diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q10.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q10.sf100/explain.txt new file mode 100644 index 0000000000000..12a8ec439eab4 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q10.sf100/explain.txt @@ -0,0 +1,286 @@ +== Physical Plan == +TakeOrderedAndProject (52) ++- * HashAggregate (51) + +- Exchange (50) + +- * HashAggregate (49) + +- * Project (48) + +- * BroadcastHashJoin Inner BuildLeft (47) + :- BroadcastExchange (43) + : +- * Project (42) + : +- * BroadcastHashJoin Inner BuildRight (41) + : :- * Project (35) + : : +- SortMergeJoin LeftSemi (34) + : : :- SortMergeJoin LeftSemi (25) + : : : :- * Sort (5) + : : : : +- Exchange (4) + : : : : +- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.customer (1) + : : : +- * Sort (24) + : : : +- Exchange (23) + : : : +- Union (22) + : : : :- * Project (15) + : : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : : :- * Filter (8) + : : : : : +- * ColumnarToRow (7) + : : : : : +- Scan parquet default.web_sales (6) + : : : : +- BroadcastExchange (13) + : : : : +- * Project (12) + : : : : +- * Filter (11) + : : : : +- * ColumnarToRow (10) + : : : : +- Scan parquet default.date_dim (9) + : : : +- * Project (21) + : : : +- * BroadcastHashJoin Inner BuildRight (20) + : : : :- * Filter (18) + : : : : +- * ColumnarToRow (17) + : : : : +- Scan parquet default.catalog_sales (16) + : : : +- ReusedExchange (19) + : : +- * Sort (33) + : : +- Exchange (32) + : : +- * Project (31) + : : +- * BroadcastHashJoin Inner BuildRight (30) + : : :- * Filter (28) + : : : +- * ColumnarToRow (27) + : : : +- Scan parquet default.store_sales (26) + : : +- ReusedExchange (29) + : +- BroadcastExchange (40) + : +- * Project (39) + : +- * Filter (38) + : +- * ColumnarToRow (37) + : +- Scan parquet default.customer_address (36) + +- * Filter (46) + +- * ColumnarToRow (45) + +- Scan parquet default.customer_demographics (44) + + +(1) Scan parquet default.customer +Output [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] + +(3) Filter [codegen id : 1] +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Condition : ((isnotnull(c_customer_sk#1) AND isnotnull(c_current_addr_sk#3)) AND isnotnull(c_current_cdemo_sk#2)) + +(4) Exchange +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Arguments: hashpartitioning(c_customer_sk#1, 5), true, [id=#4] + +(5) Sort [codegen id : 2] +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Arguments: [c_customer_sk#1 ASC NULLS FIRST], false, 0 + +(6) Scan parquet default.web_sales +Output [2]: [ws_sold_date_sk#5, ws_bill_customer_sk#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_customer_sk)] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 4] +Input [2]: [ws_sold_date_sk#5, ws_bill_customer_sk#6] + +(8) Filter [codegen id : 4] +Input [2]: [ws_sold_date_sk#5, ws_bill_customer_sk#6] +Condition : (isnotnull(ws_sold_date_sk#5) AND isnotnull(ws_bill_customer_sk#6)) + +(9) Scan parquet default.date_dim +Output [3]: [d_date_sk#7, d_year#8, d_moy#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_year,2002), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,7), IsNotNull(d_date_sk)] +ReadSchema: struct + +(10) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#7, d_year#8, d_moy#9] + +(11) Filter [codegen id : 3] +Input [3]: [d_date_sk#7, d_year#8, d_moy#9] +Condition : (((((isnotnull(d_moy#9) AND isnotnull(d_year#8)) AND (d_year#8 = 2002)) AND (d_moy#9 >= 4)) AND (d_moy#9 <= 7)) AND isnotnull(d_date_sk#7)) + +(12) Project [codegen id : 3] +Output [1]: [d_date_sk#7] +Input [3]: [d_date_sk#7, d_year#8, d_moy#9] + +(13) BroadcastExchange +Input [1]: [d_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#10] + +(14) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ws_sold_date_sk#5] +Right keys [1]: [d_date_sk#7] +Join condition: None + +(15) Project [codegen id : 4] +Output [1]: [ws_bill_customer_sk#6 AS customer_sk#11] +Input [3]: [ws_sold_date_sk#5, ws_bill_customer_sk#6, d_date_sk#7] + +(16) Scan parquet default.catalog_sales +Output [2]: [cs_sold_date_sk#12, cs_ship_customer_sk#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_ship_customer_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 6] +Input [2]: [cs_sold_date_sk#12, cs_ship_customer_sk#13] + +(18) Filter [codegen id : 6] +Input [2]: [cs_sold_date_sk#12, cs_ship_customer_sk#13] +Condition : (isnotnull(cs_sold_date_sk#12) AND isnotnull(cs_ship_customer_sk#13)) + +(19) ReusedExchange [Reuses operator id: 13] +Output [1]: [d_date_sk#7] + +(20) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cs_sold_date_sk#12] +Right keys [1]: [d_date_sk#7] +Join condition: None + +(21) Project [codegen id : 6] +Output [1]: [cs_ship_customer_sk#13 AS customer_sk#14] +Input [3]: [cs_sold_date_sk#12, cs_ship_customer_sk#13, d_date_sk#7] + +(22) Union + +(23) Exchange +Input [1]: [customer_sk#11] +Arguments: hashpartitioning(customer_sk#11, 5), true, [id=#15] + +(24) Sort [codegen id : 7] +Input [1]: [customer_sk#11] +Arguments: [customer_sk#11 ASC NULLS FIRST], false, 0 + +(25) SortMergeJoin +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [customer_sk#11] +Join condition: None + +(26) Scan parquet default.store_sales +Output [2]: [ss_sold_date_sk#16, ss_customer_sk#17] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 9] +Input [2]: [ss_sold_date_sk#16, ss_customer_sk#17] + +(28) Filter [codegen id : 9] +Input [2]: [ss_sold_date_sk#16, ss_customer_sk#17] +Condition : (isnotnull(ss_sold_date_sk#16) AND isnotnull(ss_customer_sk#17)) + +(29) ReusedExchange [Reuses operator id: 13] +Output [1]: [d_date_sk#7] + +(30) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_sold_date_sk#16] +Right keys [1]: [d_date_sk#7] +Join condition: None + +(31) Project [codegen id : 9] +Output [1]: [ss_customer_sk#17 AS customer_sk#18] +Input [3]: [ss_sold_date_sk#16, ss_customer_sk#17, d_date_sk#7] + +(32) Exchange +Input [1]: [customer_sk#18] +Arguments: hashpartitioning(customer_sk#18, 5), true, [id=#19] + +(33) Sort [codegen id : 10] +Input [1]: [customer_sk#18] +Arguments: [customer_sk#18 ASC NULLS FIRST], false, 0 + +(34) SortMergeJoin +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [customer_sk#18] +Join condition: None + +(35) Project [codegen id : 12] +Output [2]: [c_current_cdemo_sk#2, c_current_addr_sk#3] +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] + +(36) Scan parquet default.customer_address +Output [2]: [ca_address_sk#20, ca_county#21] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/customer_address] +PushedFilters: [In(ca_county, [Walker County,Richland County,Gaines County,Douglas County,Dona Ana County]), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(37) ColumnarToRow [codegen id : 11] +Input [2]: [ca_address_sk#20, ca_county#21] + +(38) Filter [codegen id : 11] +Input [2]: [ca_address_sk#20, ca_county#21] +Condition : (ca_county#21 IN (Walker County,Richland County,Gaines County,Douglas County,Dona Ana County) AND isnotnull(ca_address_sk#20)) + +(39) Project [codegen id : 11] +Output [1]: [ca_address_sk#20] +Input [2]: [ca_address_sk#20, ca_county#21] + +(40) BroadcastExchange +Input [1]: [ca_address_sk#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#22] + +(41) BroadcastHashJoin [codegen id : 12] +Left keys [1]: [c_current_addr_sk#3] +Right keys [1]: [ca_address_sk#20] +Join condition: None + +(42) Project [codegen id : 12] +Output [1]: [c_current_cdemo_sk#2] +Input [3]: [c_current_cdemo_sk#2, c_current_addr_sk#3, ca_address_sk#20] + +(43) BroadcastExchange +Input [1]: [c_current_cdemo_sk#2] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#23] + +(44) Scan parquet default.customer_demographics +Output [9]: [cd_demo_sk#24, cd_gender#25, cd_marital_status#26, cd_education_status#27, cd_purchase_estimate#28, cd_credit_rating#29, cd_dep_count#30, cd_dep_employed_count#31, cd_dep_college_count#32] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(45) ColumnarToRow +Input [9]: [cd_demo_sk#24, cd_gender#25, cd_marital_status#26, cd_education_status#27, cd_purchase_estimate#28, cd_credit_rating#29, cd_dep_count#30, cd_dep_employed_count#31, cd_dep_college_count#32] + +(46) Filter +Input [9]: [cd_demo_sk#24, cd_gender#25, cd_marital_status#26, cd_education_status#27, cd_purchase_estimate#28, cd_credit_rating#29, cd_dep_count#30, cd_dep_employed_count#31, cd_dep_college_count#32] +Condition : isnotnull(cd_demo_sk#24) + +(47) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [c_current_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#24] +Join condition: None + +(48) Project [codegen id : 13] +Output [8]: [cd_gender#25, cd_marital_status#26, cd_education_status#27, cd_purchase_estimate#28, cd_credit_rating#29, cd_dep_count#30, cd_dep_employed_count#31, cd_dep_college_count#32] +Input [10]: [c_current_cdemo_sk#2, cd_demo_sk#24, cd_gender#25, cd_marital_status#26, cd_education_status#27, cd_purchase_estimate#28, cd_credit_rating#29, cd_dep_count#30, cd_dep_employed_count#31, cd_dep_college_count#32] + +(49) HashAggregate [codegen id : 13] +Input [8]: [cd_gender#25, cd_marital_status#26, cd_education_status#27, cd_purchase_estimate#28, cd_credit_rating#29, cd_dep_count#30, cd_dep_employed_count#31, cd_dep_college_count#32] +Keys [8]: [cd_gender#25, cd_marital_status#26, cd_education_status#27, cd_purchase_estimate#28, cd_credit_rating#29, cd_dep_count#30, cd_dep_employed_count#31, cd_dep_college_count#32] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#33] +Results [9]: [cd_gender#25, cd_marital_status#26, cd_education_status#27, cd_purchase_estimate#28, cd_credit_rating#29, cd_dep_count#30, cd_dep_employed_count#31, cd_dep_college_count#32, count#34] + +(50) Exchange +Input [9]: [cd_gender#25, cd_marital_status#26, cd_education_status#27, cd_purchase_estimate#28, cd_credit_rating#29, cd_dep_count#30, cd_dep_employed_count#31, cd_dep_college_count#32, count#34] +Arguments: hashpartitioning(cd_gender#25, cd_marital_status#26, cd_education_status#27, cd_purchase_estimate#28, cd_credit_rating#29, cd_dep_count#30, cd_dep_employed_count#31, cd_dep_college_count#32, 5), true, [id=#35] + +(51) HashAggregate [codegen id : 14] +Input [9]: [cd_gender#25, cd_marital_status#26, cd_education_status#27, cd_purchase_estimate#28, cd_credit_rating#29, cd_dep_count#30, cd_dep_employed_count#31, cd_dep_college_count#32, count#34] +Keys [8]: [cd_gender#25, cd_marital_status#26, cd_education_status#27, cd_purchase_estimate#28, cd_credit_rating#29, cd_dep_count#30, cd_dep_employed_count#31, cd_dep_college_count#32] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#36] +Results [14]: [cd_gender#25, cd_marital_status#26, cd_education_status#27, count(1)#36 AS cnt1#37, cd_purchase_estimate#28, count(1)#36 AS cnt2#38, cd_credit_rating#29, count(1)#36 AS cnt3#39, cd_dep_count#30, count(1)#36 AS cnt4#40, cd_dep_employed_count#31, count(1)#36 AS cnt5#41, cd_dep_college_count#32, count(1)#36 AS cnt6#42] + +(52) TakeOrderedAndProject +Input [14]: [cd_gender#25, cd_marital_status#26, cd_education_status#27, cnt1#37, cd_purchase_estimate#28, cnt2#38, cd_credit_rating#29, cnt3#39, cd_dep_count#30, cnt4#40, cd_dep_employed_count#31, cnt5#41, cd_dep_college_count#32, cnt6#42] +Arguments: 100, [cd_gender#25 ASC NULLS FIRST, cd_marital_status#26 ASC NULLS FIRST, cd_education_status#27 ASC NULLS FIRST, cd_purchase_estimate#28 ASC NULLS FIRST, cd_credit_rating#29 ASC NULLS FIRST, cd_dep_count#30 ASC NULLS FIRST, cd_dep_employed_count#31 ASC NULLS FIRST, cd_dep_college_count#32 ASC NULLS FIRST], [cd_gender#25, cd_marital_status#26, cd_education_status#27, cnt1#37, cd_purchase_estimate#28, cnt2#38, cd_credit_rating#29, cnt3#39, cd_dep_count#30, cnt4#40, cd_dep_employed_count#31, cnt5#41, cd_dep_college_count#32, cnt6#42] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q10.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q10.sf100/simplified.txt new file mode 100644 index 0000000000000..056eb273bfe50 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q10.sf100/simplified.txt @@ -0,0 +1,81 @@ +TakeOrderedAndProject [cd_credit_rating,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate,cnt1,cnt2,cnt3,cnt4,cnt5,cnt6] + WholeStageCodegen (14) + HashAggregate [cd_credit_rating,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate,count] [cnt1,cnt2,cnt3,cnt4,cnt5,cnt6,count,count(1)] + InputAdapter + Exchange [cd_credit_rating,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate] #1 + WholeStageCodegen (13) + HashAggregate [cd_credit_rating,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate] [count,count] + Project [cd_credit_rating,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (12) + Project [c_current_cdemo_sk] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_addr_sk,c_current_cdemo_sk] + InputAdapter + SortMergeJoin [c_customer_sk,customer_sk] + SortMergeJoin [c_customer_sk,customer_sk] + WholeStageCodegen (2) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #3 + WholeStageCodegen (1) + Filter [c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] + WholeStageCodegen (7) + Sort [customer_sk] + InputAdapter + Exchange [customer_sk] #4 + Union + WholeStageCodegen (4) + Project [ws_bill_customer_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_bill_customer_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + WholeStageCodegen (6) + Project [cs_ship_customer_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_ship_customer_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #5 + WholeStageCodegen (10) + Sort [customer_sk] + InputAdapter + Exchange [customer_sk] #6 + WholeStageCodegen (9) + Project [ss_customer_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_customer_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #5 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (11) + Project [ca_address_sk] + Filter [ca_address_sk,ca_county] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_county] + Filter [cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_credit_rating,cd_demo_sk,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q10/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q10/explain.txt new file mode 100644 index 0000000000000..f6c43526fbe5d --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q10/explain.txt @@ -0,0 +1,266 @@ +== Physical Plan == +TakeOrderedAndProject (48) ++- * HashAggregate (47) + +- Exchange (46) + +- * HashAggregate (45) + +- * Project (44) + +- * BroadcastHashJoin Inner BuildRight (43) + :- * Project (38) + : +- * BroadcastHashJoin Inner BuildRight (37) + : :- * Project (31) + : : +- * BroadcastHashJoin LeftSemi BuildRight (30) + : : :- * BroadcastHashJoin LeftSemi BuildRight (22) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.customer (1) + : : : +- BroadcastExchange (21) + : : : +- Union (20) + : : : :- * Project (13) + : : : : +- * BroadcastHashJoin Inner BuildRight (12) + : : : : :- * Filter (6) + : : : : : +- * ColumnarToRow (5) + : : : : : +- Scan parquet default.web_sales (4) + : : : : +- BroadcastExchange (11) + : : : : +- * Project (10) + : : : : +- * Filter (9) + : : : : +- * ColumnarToRow (8) + : : : : +- Scan parquet default.date_dim (7) + : : : +- * Project (19) + : : : +- * BroadcastHashJoin Inner BuildRight (18) + : : : :- * Filter (16) + : : : : +- * ColumnarToRow (15) + : : : : +- Scan parquet default.catalog_sales (14) + : : : +- ReusedExchange (17) + : : +- BroadcastExchange (29) + : : +- * Project (28) + : : +- * BroadcastHashJoin Inner BuildRight (27) + : : :- * Filter (25) + : : : +- * ColumnarToRow (24) + : : : +- Scan parquet default.store_sales (23) + : : +- ReusedExchange (26) + : +- BroadcastExchange (36) + : +- * Project (35) + : +- * Filter (34) + : +- * ColumnarToRow (33) + : +- Scan parquet default.customer_address (32) + +- BroadcastExchange (42) + +- * Filter (41) + +- * ColumnarToRow (40) + +- Scan parquet default.customer_demographics (39) + + +(1) Scan parquet default.customer +Output [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 9] +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] + +(3) Filter [codegen id : 9] +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Condition : ((isnotnull(c_customer_sk#1) AND isnotnull(c_current_addr_sk#3)) AND isnotnull(c_current_cdemo_sk#2)) + +(4) Scan parquet default.web_sales +Output [2]: [ws_sold_date_sk#4, ws_bill_customer_sk#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_customer_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 2] +Input [2]: [ws_sold_date_sk#4, ws_bill_customer_sk#5] + +(6) Filter [codegen id : 2] +Input [2]: [ws_sold_date_sk#4, ws_bill_customer_sk#5] +Condition : (isnotnull(ws_sold_date_sk#4) AND isnotnull(ws_bill_customer_sk#5)) + +(7) Scan parquet default.date_dim +Output [3]: [d_date_sk#6, d_year#7, d_moy#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2002), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,7), IsNotNull(d_date_sk)] +ReadSchema: struct + +(8) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_moy#8] + +(9) Filter [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_moy#8] +Condition : (((((isnotnull(d_year#7) AND isnotnull(d_moy#8)) AND (d_year#7 = 2002)) AND (d_moy#8 >= 4)) AND (d_moy#8 <= 7)) AND isnotnull(d_date_sk#6)) + +(10) Project [codegen id : 1] +Output [1]: [d_date_sk#6] +Input [3]: [d_date_sk#6, d_year#7, d_moy#8] + +(11) BroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] + +(12) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ws_sold_date_sk#4] +Right keys [1]: [d_date_sk#6] +Join condition: None + +(13) Project [codegen id : 2] +Output [1]: [ws_bill_customer_sk#5 AS customer_sk#10] +Input [3]: [ws_sold_date_sk#4, ws_bill_customer_sk#5, d_date_sk#6] + +(14) Scan parquet default.catalog_sales +Output [2]: [cs_sold_date_sk#11, cs_ship_customer_sk#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_ship_customer_sk)] +ReadSchema: struct + +(15) ColumnarToRow [codegen id : 4] +Input [2]: [cs_sold_date_sk#11, cs_ship_customer_sk#12] + +(16) Filter [codegen id : 4] +Input [2]: [cs_sold_date_sk#11, cs_ship_customer_sk#12] +Condition : (isnotnull(cs_sold_date_sk#11) AND isnotnull(cs_ship_customer_sk#12)) + +(17) ReusedExchange [Reuses operator id: 11] +Output [1]: [d_date_sk#6] + +(18) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_sold_date_sk#11] +Right keys [1]: [d_date_sk#6] +Join condition: None + +(19) Project [codegen id : 4] +Output [1]: [cs_ship_customer_sk#12 AS customer_sk#13] +Input [3]: [cs_sold_date_sk#11, cs_ship_customer_sk#12, d_date_sk#6] + +(20) Union + +(21) BroadcastExchange +Input [1]: [customer_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] + +(22) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [customer_sk#10] +Join condition: None + +(23) Scan parquet default.store_sales +Output [2]: [ss_sold_date_sk#15, ss_customer_sk#16] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 6] +Input [2]: [ss_sold_date_sk#15, ss_customer_sk#16] + +(25) Filter [codegen id : 6] +Input [2]: [ss_sold_date_sk#15, ss_customer_sk#16] +Condition : (isnotnull(ss_sold_date_sk#15) AND isnotnull(ss_customer_sk#16)) + +(26) ReusedExchange [Reuses operator id: 11] +Output [1]: [d_date_sk#6] + +(27) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_sold_date_sk#15] +Right keys [1]: [d_date_sk#6] +Join condition: None + +(28) Project [codegen id : 6] +Output [1]: [ss_customer_sk#16 AS customer_sk#17] +Input [3]: [ss_sold_date_sk#15, ss_customer_sk#16, d_date_sk#6] + +(29) BroadcastExchange +Input [1]: [customer_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] + +(30) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [customer_sk#17] +Join condition: None + +(31) Project [codegen id : 9] +Output [2]: [c_current_cdemo_sk#2, c_current_addr_sk#3] +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] + +(32) Scan parquet default.customer_address +Output [2]: [ca_address_sk#19, ca_county#20] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/customer_address] +PushedFilters: [In(ca_county, [Walker County,Richland County,Gaines County,Douglas County,Dona Ana County]), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(33) ColumnarToRow [codegen id : 7] +Input [2]: [ca_address_sk#19, ca_county#20] + +(34) Filter [codegen id : 7] +Input [2]: [ca_address_sk#19, ca_county#20] +Condition : (ca_county#20 IN (Walker County,Richland County,Gaines County,Douglas County,Dona Ana County) AND isnotnull(ca_address_sk#19)) + +(35) Project [codegen id : 7] +Output [1]: [ca_address_sk#19] +Input [2]: [ca_address_sk#19, ca_county#20] + +(36) BroadcastExchange +Input [1]: [ca_address_sk#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#21] + +(37) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_current_addr_sk#3] +Right keys [1]: [ca_address_sk#19] +Join condition: None + +(38) Project [codegen id : 9] +Output [1]: [c_current_cdemo_sk#2] +Input [3]: [c_current_cdemo_sk#2, c_current_addr_sk#3, ca_address_sk#19] + +(39) Scan parquet default.customer_demographics +Output [9]: [cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(40) ColumnarToRow [codegen id : 8] +Input [9]: [cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] + +(41) Filter [codegen id : 8] +Input [9]: [cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] +Condition : isnotnull(cd_demo_sk#22) + +(42) BroadcastExchange +Input [9]: [cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#31] + +(43) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_current_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#22] +Join condition: None + +(44) Project [codegen id : 9] +Output [8]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] +Input [10]: [c_current_cdemo_sk#2, cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] + +(45) HashAggregate [codegen id : 9] +Input [8]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] +Keys [8]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#32] +Results [9]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30, count#33] + +(46) Exchange +Input [9]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30, count#33] +Arguments: hashpartitioning(cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30, 5), true, [id=#34] + +(47) HashAggregate [codegen id : 10] +Input [9]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30, count#33] +Keys [8]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#35] +Results [14]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, count(1)#35 AS cnt1#36, cd_purchase_estimate#26, count(1)#35 AS cnt2#37, cd_credit_rating#27, count(1)#35 AS cnt3#38, cd_dep_count#28, count(1)#35 AS cnt4#39, cd_dep_employed_count#29, count(1)#35 AS cnt5#40, cd_dep_college_count#30, count(1)#35 AS cnt6#41] + +(48) TakeOrderedAndProject +Input [14]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cnt1#36, cd_purchase_estimate#26, cnt2#37, cd_credit_rating#27, cnt3#38, cd_dep_count#28, cnt4#39, cd_dep_employed_count#29, cnt5#40, cd_dep_college_count#30, cnt6#41] +Arguments: 100, [cd_gender#23 ASC NULLS FIRST, cd_marital_status#24 ASC NULLS FIRST, cd_education_status#25 ASC NULLS FIRST, cd_purchase_estimate#26 ASC NULLS FIRST, cd_credit_rating#27 ASC NULLS FIRST, cd_dep_count#28 ASC NULLS FIRST, cd_dep_employed_count#29 ASC NULLS FIRST, cd_dep_college_count#30 ASC NULLS FIRST], [cd_gender#23, cd_marital_status#24, cd_education_status#25, cnt1#36, cd_purchase_estimate#26, cnt2#37, cd_credit_rating#27, cnt3#38, cd_dep_count#28, cnt4#39, cd_dep_employed_count#29, cnt5#40, cd_dep_college_count#30, cnt6#41] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q10/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q10/simplified.txt new file mode 100644 index 0000000000000..41685e1f311a0 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q10/simplified.txt @@ -0,0 +1,71 @@ +TakeOrderedAndProject [cd_credit_rating,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate,cnt1,cnt2,cnt3,cnt4,cnt5,cnt6] + WholeStageCodegen (10) + HashAggregate [cd_credit_rating,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate,count] [cnt1,cnt2,cnt3,cnt4,cnt5,cnt6,count,count(1)] + InputAdapter + Exchange [cd_credit_rating,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate] #1 + WholeStageCodegen (9) + HashAggregate [cd_credit_rating,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate] [count,count] + Project [cd_credit_rating,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [c_current_cdemo_sk] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_addr_sk,c_current_cdemo_sk] + BroadcastHashJoin [c_customer_sk,customer_sk] + BroadcastHashJoin [c_customer_sk,customer_sk] + Filter [c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] + InputAdapter + BroadcastExchange #2 + Union + WholeStageCodegen (2) + Project [ws_bill_customer_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_bill_customer_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + WholeStageCodegen (4) + Project [cs_ship_customer_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_ship_customer_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (6) + Project [ss_customer_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_customer_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (7) + Project [ca_address_sk] + Filter [ca_address_sk,ca_county] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_county] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (8) + Filter [cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_credit_rating,cd_demo_sk,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q19.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q19.sf100/explain.txt new file mode 100644 index 0000000000000..1239699c0e839 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q19.sf100/explain.txt @@ -0,0 +1,221 @@ +== Physical Plan == +TakeOrderedAndProject (39) ++- * HashAggregate (38) + +- Exchange (37) + +- * HashAggregate (36) + +- * Project (35) + +- * BroadcastHashJoin Inner BuildRight (34) + :- * Project (28) + : +- * BroadcastHashJoin Inner BuildLeft (27) + : :- BroadcastExchange (23) + : : +- * Project (22) + : : +- * BroadcastHashJoin Inner BuildRight (21) + : : :- * Project (16) + : : : +- * BroadcastHashJoin Inner BuildLeft (15) + : : : :- BroadcastExchange (11) + : : : : +- * Project (10) + : : : : +- * BroadcastHashJoin Inner BuildLeft (9) + : : : : :- BroadcastExchange (5) + : : : : : +- * Project (4) + : : : : : +- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.date_dim (1) + : : : : +- * Filter (8) + : : : : +- * ColumnarToRow (7) + : : : : +- Scan parquet default.store_sales (6) + : : : +- * Filter (14) + : : : +- * ColumnarToRow (13) + : : : +- Scan parquet default.customer (12) + : : +- BroadcastExchange (20) + : : +- * Filter (19) + : : +- * ColumnarToRow (18) + : : +- Scan parquet default.store (17) + : +- * Filter (26) + : +- * ColumnarToRow (25) + : +- Scan parquet default.customer_address (24) + +- BroadcastExchange (33) + +- * Project (32) + +- * Filter (31) + +- * ColumnarToRow (30) + +- Scan parquet default.item (29) + + +(1) Scan parquet default.date_dim +Output [3]: [d_date_sk#1, d_year#2, d_moy#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,1999), GreaterThanOrEqual(d_date_sk,2451484), LessThanOrEqual(d_date_sk,2451513), IsNotNull(d_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(3) Filter [codegen id : 1] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Condition : ((((((isnotnull(d_moy#3) AND isnotnull(d_year#2)) AND (d_moy#3 = 11)) AND (d_year#2 = 1999)) AND (d_date_sk#1 >= 2451484)) AND (d_date_sk#1 <= 2451513)) AND isnotnull(d_date_sk#1)) + +(4) Project [codegen id : 1] +Output [1]: [d_date_sk#1] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(5) BroadcastExchange +Input [1]: [d_date_sk#1] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#4] + +(6) Scan parquet default.store_sales +Output [5]: [ss_sold_date_sk#5, ss_item_sk#6, ss_customer_sk#7, ss_store_sk#8, ss_ext_sales_price#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), GreaterThanOrEqual(ss_sold_date_sk,2451484), LessThanOrEqual(ss_sold_date_sk,2451513), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(7) ColumnarToRow +Input [5]: [ss_sold_date_sk#5, ss_item_sk#6, ss_customer_sk#7, ss_store_sk#8, ss_ext_sales_price#9] + +(8) Filter +Input [5]: [ss_sold_date_sk#5, ss_item_sk#6, ss_customer_sk#7, ss_store_sk#8, ss_ext_sales_price#9] +Condition : (((((isnotnull(ss_sold_date_sk#5) AND (ss_sold_date_sk#5 >= 2451484)) AND (ss_sold_date_sk#5 <= 2451513)) AND isnotnull(ss_item_sk#6)) AND isnotnull(ss_customer_sk#7)) AND isnotnull(ss_store_sk#8)) + +(9) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [d_date_sk#1] +Right keys [1]: [ss_sold_date_sk#5] +Join condition: None + +(10) Project [codegen id : 2] +Output [4]: [ss_item_sk#6, ss_customer_sk#7, ss_store_sk#8, ss_ext_sales_price#9] +Input [6]: [d_date_sk#1, ss_sold_date_sk#5, ss_item_sk#6, ss_customer_sk#7, ss_store_sk#8, ss_ext_sales_price#9] + +(11) BroadcastExchange +Input [4]: [ss_item_sk#6, ss_customer_sk#7, ss_store_sk#8, ss_ext_sales_price#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [id=#10] + +(12) Scan parquet default.customer +Output [2]: [c_customer_sk#11, c_current_addr_sk#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(13) ColumnarToRow +Input [2]: [c_customer_sk#11, c_current_addr_sk#12] + +(14) Filter +Input [2]: [c_customer_sk#11, c_current_addr_sk#12] +Condition : (isnotnull(c_customer_sk#11) AND isnotnull(c_current_addr_sk#12)) + +(15) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_customer_sk#7] +Right keys [1]: [c_customer_sk#11] +Join condition: None + +(16) Project [codegen id : 4] +Output [4]: [ss_item_sk#6, ss_store_sk#8, ss_ext_sales_price#9, c_current_addr_sk#12] +Input [6]: [ss_item_sk#6, ss_customer_sk#7, ss_store_sk#8, ss_ext_sales_price#9, c_customer_sk#11, c_current_addr_sk#12] + +(17) Scan parquet default.store +Output [2]: [s_store_sk#13, s_zip#14] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_zip), IsNotNull(s_store_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [2]: [s_store_sk#13, s_zip#14] + +(19) Filter [codegen id : 3] +Input [2]: [s_store_sk#13, s_zip#14] +Condition : (isnotnull(s_zip#14) AND isnotnull(s_store_sk#13)) + +(20) BroadcastExchange +Input [2]: [s_store_sk#13, s_zip#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#15] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#8] +Right keys [1]: [s_store_sk#13] +Join condition: None + +(22) Project [codegen id : 4] +Output [4]: [ss_item_sk#6, ss_ext_sales_price#9, c_current_addr_sk#12, s_zip#14] +Input [6]: [ss_item_sk#6, ss_store_sk#8, ss_ext_sales_price#9, c_current_addr_sk#12, s_store_sk#13, s_zip#14] + +(23) BroadcastExchange +Input [4]: [ss_item_sk#6, ss_ext_sales_price#9, c_current_addr_sk#12, s_zip#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [id=#16] + +(24) Scan parquet default.customer_address +Output [2]: [ca_address_sk#17, ca_zip#18] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_zip)] +ReadSchema: struct + +(25) ColumnarToRow +Input [2]: [ca_address_sk#17, ca_zip#18] + +(26) Filter +Input [2]: [ca_address_sk#17, ca_zip#18] +Condition : (isnotnull(ca_address_sk#17) AND isnotnull(ca_zip#18)) + +(27) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [c_current_addr_sk#12] +Right keys [1]: [ca_address_sk#17] +Join condition: NOT (substr(ca_zip#18, 1, 5) = substr(s_zip#14, 1, 5)) + +(28) Project [codegen id : 6] +Output [2]: [ss_item_sk#6, ss_ext_sales_price#9] +Input [6]: [ss_item_sk#6, ss_ext_sales_price#9, c_current_addr_sk#12, s_zip#14, ca_address_sk#17, ca_zip#18] + +(29) Scan parquet default.item +Output [6]: [i_item_sk#19, i_brand_id#20, i_brand#21, i_manufact_id#22, i_manufact#23, i_manager_id#24] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,7), IsNotNull(i_item_sk)] +ReadSchema: struct + +(30) ColumnarToRow [codegen id : 5] +Input [6]: [i_item_sk#19, i_brand_id#20, i_brand#21, i_manufact_id#22, i_manufact#23, i_manager_id#24] + +(31) Filter [codegen id : 5] +Input [6]: [i_item_sk#19, i_brand_id#20, i_brand#21, i_manufact_id#22, i_manufact#23, i_manager_id#24] +Condition : ((isnotnull(i_manager_id#24) AND (i_manager_id#24 = 7)) AND isnotnull(i_item_sk#19)) + +(32) Project [codegen id : 5] +Output [5]: [i_item_sk#19, i_brand_id#20, i_brand#21, i_manufact_id#22, i_manufact#23] +Input [6]: [i_item_sk#19, i_brand_id#20, i_brand#21, i_manufact_id#22, i_manufact#23, i_manager_id#24] + +(33) BroadcastExchange +Input [5]: [i_item_sk#19, i_brand_id#20, i_brand#21, i_manufact_id#22, i_manufact#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#25] + +(34) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_item_sk#6] +Right keys [1]: [i_item_sk#19] +Join condition: None + +(35) Project [codegen id : 6] +Output [5]: [ss_ext_sales_price#9, i_brand_id#20, i_brand#21, i_manufact_id#22, i_manufact#23] +Input [7]: [ss_item_sk#6, ss_ext_sales_price#9, i_item_sk#19, i_brand_id#20, i_brand#21, i_manufact_id#22, i_manufact#23] + +(36) HashAggregate [codegen id : 6] +Input [5]: [ss_ext_sales_price#9, i_brand_id#20, i_brand#21, i_manufact_id#22, i_manufact#23] +Keys [4]: [i_brand#21, i_brand_id#20, i_manufact_id#22, i_manufact#23] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#9))] +Aggregate Attributes [1]: [sum#26] +Results [5]: [i_brand#21, i_brand_id#20, i_manufact_id#22, i_manufact#23, sum#27] + +(37) Exchange +Input [5]: [i_brand#21, i_brand_id#20, i_manufact_id#22, i_manufact#23, sum#27] +Arguments: hashpartitioning(i_brand#21, i_brand_id#20, i_manufact_id#22, i_manufact#23, 5), true, [id=#28] + +(38) HashAggregate [codegen id : 7] +Input [5]: [i_brand#21, i_brand_id#20, i_manufact_id#22, i_manufact#23, sum#27] +Keys [4]: [i_brand#21, i_brand_id#20, i_manufact_id#22, i_manufact#23] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#9))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#9))#29] +Results [5]: [i_brand_id#20 AS brand_id#30, i_brand#21 AS brand#31, i_manufact_id#22, i_manufact#23, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#9))#29,17,2) AS ext_price#32] + +(39) TakeOrderedAndProject +Input [5]: [brand_id#30, brand#31, i_manufact_id#22, i_manufact#23, ext_price#32] +Arguments: 100, [ext_price#32 DESC NULLS LAST, brand#31 ASC NULLS FIRST, brand_id#30 ASC NULLS FIRST, i_manufact_id#22 ASC NULLS FIRST, i_manufact#23 ASC NULLS FIRST], [brand_id#30, brand#31, i_manufact_id#22, i_manufact#23, ext_price#32] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q19.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q19.sf100/simplified.txt new file mode 100644 index 0000000000000..f0e796970f414 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q19.sf100/simplified.txt @@ -0,0 +1,58 @@ +TakeOrderedAndProject [brand,brand_id,ext_price,i_manufact,i_manufact_id] + WholeStageCodegen (7) + HashAggregate [i_brand,i_brand_id,i_manufact,i_manufact_id,sum] [brand,brand_id,ext_price,sum,sum(UnscaledValue(ss_ext_sales_price))] + InputAdapter + Exchange [i_brand,i_brand_id,i_manufact,i_manufact_id] #1 + WholeStageCodegen (6) + HashAggregate [i_brand,i_brand_id,i_manufact,i_manufact_id,ss_ext_sales_price] [sum,sum] + Project [i_brand,i_brand_id,i_manufact,i_manufact_id,ss_ext_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_ext_sales_price,ss_item_sk] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk,ca_zip,s_zip] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (4) + Project [c_current_addr_sk,s_zip,ss_ext_sales_price,ss_item_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [c_current_addr_sk,ss_ext_sales_price,ss_item_sk,ss_store_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [ss_customer_sk,ss_ext_sales_price,ss_item_sk,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + Filter [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk,ss_store_sk] + Filter [c_current_addr_sk,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_current_addr_sk,c_customer_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Filter [s_store_sk,s_zip] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk,s_zip] + Filter [ca_address_sk,ca_zip] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_zip] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Project [i_brand,i_brand_id,i_item_sk,i_manufact,i_manufact_id] + Filter [i_item_sk,i_manager_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand,i_brand_id,i_item_sk,i_manager_id,i_manufact,i_manufact_id] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q19/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q19/explain.txt new file mode 100644 index 0000000000000..5a404d7719934 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q19/explain.txt @@ -0,0 +1,221 @@ +== Physical Plan == +TakeOrderedAndProject (39) ++- * HashAggregate (38) + +- Exchange (37) + +- * HashAggregate (36) + +- * Project (35) + +- * BroadcastHashJoin Inner BuildRight (34) + :- * Project (29) + : +- * BroadcastHashJoin Inner BuildRight (28) + : :- * Project (23) + : : +- * BroadcastHashJoin Inner BuildRight (22) + : : :- * Project (17) + : : : +- * BroadcastHashJoin Inner BuildRight (16) + : : : :- * Project (10) + : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : :- * Project (4) + : : : : : +- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.date_dim (1) + : : : : +- BroadcastExchange (8) + : : : : +- * Filter (7) + : : : : +- * ColumnarToRow (6) + : : : : +- Scan parquet default.store_sales (5) + : : : +- BroadcastExchange (15) + : : : +- * Project (14) + : : : +- * Filter (13) + : : : +- * ColumnarToRow (12) + : : : +- Scan parquet default.item (11) + : : +- BroadcastExchange (21) + : : +- * Filter (20) + : : +- * ColumnarToRow (19) + : : +- Scan parquet default.customer (18) + : +- BroadcastExchange (27) + : +- * Filter (26) + : +- * ColumnarToRow (25) + : +- Scan parquet default.customer_address (24) + +- BroadcastExchange (33) + +- * Filter (32) + +- * ColumnarToRow (31) + +- Scan parquet default.store (30) + + +(1) Scan parquet default.date_dim +Output [3]: [d_date_sk#1, d_year#2, d_moy#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,1999), GreaterThanOrEqual(d_date_sk,2451484), LessThanOrEqual(d_date_sk,2451513), IsNotNull(d_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 6] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(3) Filter [codegen id : 6] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Condition : ((((((isnotnull(d_moy#3) AND isnotnull(d_year#2)) AND (d_moy#3 = 11)) AND (d_year#2 = 1999)) AND (d_date_sk#1 >= 2451484)) AND (d_date_sk#1 <= 2451513)) AND isnotnull(d_date_sk#1)) + +(4) Project [codegen id : 6] +Output [1]: [d_date_sk#1] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(5) Scan parquet default.store_sales +Output [5]: [ss_sold_date_sk#4, ss_item_sk#5, ss_customer_sk#6, ss_store_sk#7, ss_ext_sales_price#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), GreaterThanOrEqual(ss_sold_date_sk,2451484), LessThanOrEqual(ss_sold_date_sk,2451513), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [5]: [ss_sold_date_sk#4, ss_item_sk#5, ss_customer_sk#6, ss_store_sk#7, ss_ext_sales_price#8] + +(7) Filter [codegen id : 1] +Input [5]: [ss_sold_date_sk#4, ss_item_sk#5, ss_customer_sk#6, ss_store_sk#7, ss_ext_sales_price#8] +Condition : (((((isnotnull(ss_sold_date_sk#4) AND (ss_sold_date_sk#4 >= 2451484)) AND (ss_sold_date_sk#4 <= 2451513)) AND isnotnull(ss_item_sk#5)) AND isnotnull(ss_customer_sk#6)) AND isnotnull(ss_store_sk#7)) + +(8) BroadcastExchange +Input [5]: [ss_sold_date_sk#4, ss_item_sk#5, ss_customer_sk#6, ss_store_sk#7, ss_ext_sales_price#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#9] + +(9) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [d_date_sk#1] +Right keys [1]: [ss_sold_date_sk#4] +Join condition: None + +(10) Project [codegen id : 6] +Output [4]: [ss_item_sk#5, ss_customer_sk#6, ss_store_sk#7, ss_ext_sales_price#8] +Input [6]: [d_date_sk#1, ss_sold_date_sk#4, ss_item_sk#5, ss_customer_sk#6, ss_store_sk#7, ss_ext_sales_price#8] + +(11) Scan parquet default.item +Output [6]: [i_item_sk#10, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14, i_manager_id#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,7), IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [6]: [i_item_sk#10, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14, i_manager_id#15] + +(13) Filter [codegen id : 2] +Input [6]: [i_item_sk#10, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14, i_manager_id#15] +Condition : ((isnotnull(i_manager_id#15) AND (i_manager_id#15 = 7)) AND isnotnull(i_item_sk#10)) + +(14) Project [codegen id : 2] +Output [5]: [i_item_sk#10, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14] +Input [6]: [i_item_sk#10, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14, i_manager_id#15] + +(15) BroadcastExchange +Input [5]: [i_item_sk#10, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] + +(16) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_item_sk#5] +Right keys [1]: [i_item_sk#10] +Join condition: None + +(17) Project [codegen id : 6] +Output [7]: [ss_customer_sk#6, ss_store_sk#7, ss_ext_sales_price#8, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14] +Input [9]: [ss_item_sk#5, ss_customer_sk#6, ss_store_sk#7, ss_ext_sales_price#8, i_item_sk#10, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14] + +(18) Scan parquet default.customer +Output [2]: [c_customer_sk#17, c_current_addr_sk#18] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [2]: [c_customer_sk#17, c_current_addr_sk#18] + +(20) Filter [codegen id : 3] +Input [2]: [c_customer_sk#17, c_current_addr_sk#18] +Condition : (isnotnull(c_customer_sk#17) AND isnotnull(c_current_addr_sk#18)) + +(21) BroadcastExchange +Input [2]: [c_customer_sk#17, c_current_addr_sk#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] + +(22) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_customer_sk#6] +Right keys [1]: [c_customer_sk#17] +Join condition: None + +(23) Project [codegen id : 6] +Output [7]: [ss_store_sk#7, ss_ext_sales_price#8, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14, c_current_addr_sk#18] +Input [9]: [ss_customer_sk#6, ss_store_sk#7, ss_ext_sales_price#8, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14, c_customer_sk#17, c_current_addr_sk#18] + +(24) Scan parquet default.customer_address +Output [2]: [ca_address_sk#20, ca_zip#21] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_zip)] +ReadSchema: struct + +(25) ColumnarToRow [codegen id : 4] +Input [2]: [ca_address_sk#20, ca_zip#21] + +(26) Filter [codegen id : 4] +Input [2]: [ca_address_sk#20, ca_zip#21] +Condition : (isnotnull(ca_address_sk#20) AND isnotnull(ca_zip#21)) + +(27) BroadcastExchange +Input [2]: [ca_address_sk#20, ca_zip#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#22] + +(28) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [c_current_addr_sk#18] +Right keys [1]: [ca_address_sk#20] +Join condition: None + +(29) Project [codegen id : 6] +Output [7]: [ss_store_sk#7, ss_ext_sales_price#8, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14, ca_zip#21] +Input [9]: [ss_store_sk#7, ss_ext_sales_price#8, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14, c_current_addr_sk#18, ca_address_sk#20, ca_zip#21] + +(30) Scan parquet default.store +Output [2]: [s_store_sk#23, s_zip#24] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_zip), IsNotNull(s_store_sk)] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 5] +Input [2]: [s_store_sk#23, s_zip#24] + +(32) Filter [codegen id : 5] +Input [2]: [s_store_sk#23, s_zip#24] +Condition : (isnotnull(s_zip#24) AND isnotnull(s_store_sk#23)) + +(33) BroadcastExchange +Input [2]: [s_store_sk#23, s_zip#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#25] + +(34) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_store_sk#7] +Right keys [1]: [s_store_sk#23] +Join condition: NOT (substr(ca_zip#21, 1, 5) = substr(s_zip#24, 1, 5)) + +(35) Project [codegen id : 6] +Output [5]: [ss_ext_sales_price#8, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14] +Input [9]: [ss_store_sk#7, ss_ext_sales_price#8, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14, ca_zip#21, s_store_sk#23, s_zip#24] + +(36) HashAggregate [codegen id : 6] +Input [5]: [ss_ext_sales_price#8, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14] +Keys [4]: [i_brand#12, i_brand_id#11, i_manufact_id#13, i_manufact#14] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#8))] +Aggregate Attributes [1]: [sum#26] +Results [5]: [i_brand#12, i_brand_id#11, i_manufact_id#13, i_manufact#14, sum#27] + +(37) Exchange +Input [5]: [i_brand#12, i_brand_id#11, i_manufact_id#13, i_manufact#14, sum#27] +Arguments: hashpartitioning(i_brand#12, i_brand_id#11, i_manufact_id#13, i_manufact#14, 5), true, [id=#28] + +(38) HashAggregate [codegen id : 7] +Input [5]: [i_brand#12, i_brand_id#11, i_manufact_id#13, i_manufact#14, sum#27] +Keys [4]: [i_brand#12, i_brand_id#11, i_manufact_id#13, i_manufact#14] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#8))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#8))#29] +Results [5]: [i_brand_id#11 AS brand_id#30, i_brand#12 AS brand#31, i_manufact_id#13, i_manufact#14, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#8))#29,17,2) AS ext_price#32] + +(39) TakeOrderedAndProject +Input [5]: [brand_id#30, brand#31, i_manufact_id#13, i_manufact#14, ext_price#32] +Arguments: 100, [ext_price#32 DESC NULLS LAST, brand#31 ASC NULLS FIRST, brand_id#30 ASC NULLS FIRST, i_manufact_id#13 ASC NULLS FIRST, i_manufact#14 ASC NULLS FIRST], [brand_id#30, brand#31, i_manufact_id#13, i_manufact#14, ext_price#32] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q19/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q19/simplified.txt new file mode 100644 index 0000000000000..9217520556863 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q19/simplified.txt @@ -0,0 +1,58 @@ +TakeOrderedAndProject [brand,brand_id,ext_price,i_manufact,i_manufact_id] + WholeStageCodegen (7) + HashAggregate [i_brand,i_brand_id,i_manufact,i_manufact_id,sum] [brand,brand_id,ext_price,sum,sum(UnscaledValue(ss_ext_sales_price))] + InputAdapter + Exchange [i_brand,i_brand_id,i_manufact,i_manufact_id] #1 + WholeStageCodegen (6) + HashAggregate [i_brand,i_brand_id,i_manufact,i_manufact_id,ss_ext_sales_price] [sum,sum] + Project [i_brand,i_brand_id,i_manufact,i_manufact_id,ss_ext_sales_price] + BroadcastHashJoin [ca_zip,s_store_sk,s_zip,ss_store_sk] + Project [ca_zip,i_brand,i_brand_id,i_manufact,i_manufact_id,ss_ext_sales_price,ss_store_sk] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_addr_sk,i_brand,i_brand_id,i_manufact,i_manufact_id,ss_ext_sales_price,ss_store_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Project [i_brand,i_brand_id,i_manufact,i_manufact_id,ss_customer_sk,ss_ext_sales_price,ss_store_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_customer_sk,ss_ext_sales_price,ss_item_sk,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [i_brand,i_brand_id,i_item_sk,i_manufact,i_manufact_id] + Filter [i_item_sk,i_manager_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand,i_brand_id,i_item_sk,i_manager_id,i_manufact,i_manufact_id] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [c_current_addr_sk,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_current_addr_sk,c_customer_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Filter [ca_address_sk,ca_zip] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_zip] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Filter [s_store_sk,s_zip] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk,s_zip] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q27.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q27.sf100/explain.txt new file mode 100644 index 0000000000000..5cec6f18579e2 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q27.sf100/explain.txt @@ -0,0 +1,428 @@ +== Physical Plan == +TakeOrderedAndProject (77) ++- Union (76) + :- * HashAggregate (32) + : +- Exchange (31) + : +- * HashAggregate (30) + : +- * Project (29) + : +- * BroadcastHashJoin Inner BuildRight (28) + : :- * Project (23) + : : +- * BroadcastHashJoin Inner BuildRight (22) + : : :- * Project (17) + : : : +- * BroadcastHashJoin Inner BuildRight (16) + : : : :- * Project (10) + : : : : +- * BroadcastHashJoin Inner BuildLeft (9) + : : : : :- BroadcastExchange (5) + : : : : : +- * Project (4) + : : : : : +- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.date_dim (1) + : : : : +- * Filter (8) + : : : : +- * ColumnarToRow (7) + : : : : +- Scan parquet default.store_sales (6) + : : : +- BroadcastExchange (15) + : : : +- * Project (14) + : : : +- * Filter (13) + : : : +- * ColumnarToRow (12) + : : : +- Scan parquet default.customer_demographics (11) + : : +- BroadcastExchange (21) + : : +- * Filter (20) + : : +- * ColumnarToRow (19) + : : +- Scan parquet default.store (18) + : +- BroadcastExchange (27) + : +- * Filter (26) + : +- * ColumnarToRow (25) + : +- Scan parquet default.item (24) + :- * HashAggregate (54) + : +- Exchange (53) + : +- * HashAggregate (52) + : +- * Project (51) + : +- * BroadcastHashJoin Inner BuildRight (50) + : :- * Project (48) + : : +- * BroadcastHashJoin Inner BuildRight (47) + : : :- * Project (45) + : : : +- * BroadcastHashJoin Inner BuildRight (44) + : : : :- * Project (38) + : : : : +- * BroadcastHashJoin Inner BuildLeft (37) + : : : : :- ReusedExchange (33) + : : : : +- * Filter (36) + : : : : +- * ColumnarToRow (35) + : : : : +- Scan parquet default.store_sales (34) + : : : +- BroadcastExchange (43) + : : : +- * Project (42) + : : : +- * Filter (41) + : : : +- * ColumnarToRow (40) + : : : +- Scan parquet default.store (39) + : : +- ReusedExchange (46) + : +- ReusedExchange (49) + +- * HashAggregate (75) + +- Exchange (74) + +- * HashAggregate (73) + +- * Project (72) + +- * BroadcastHashJoin Inner BuildRight (71) + :- * Project (66) + : +- * BroadcastHashJoin Inner BuildRight (65) + : :- * Project (63) + : : +- * BroadcastHashJoin Inner BuildRight (62) + : : :- * Project (60) + : : : +- * BroadcastHashJoin Inner BuildLeft (59) + : : : :- ReusedExchange (55) + : : : +- * Filter (58) + : : : +- * ColumnarToRow (57) + : : : +- Scan parquet default.store_sales (56) + : : +- ReusedExchange (61) + : +- ReusedExchange (64) + +- BroadcastExchange (70) + +- * Filter (69) + +- * ColumnarToRow (68) + +- Scan parquet default.item (67) + + +(1) Scan parquet default.date_dim +Output [2]: [d_date_sk#1, d_year#2] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), LessThanOrEqual(d_date_sk,2451910), GreaterThanOrEqual(d_date_sk,2451545), IsNotNull(d_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#1, d_year#2] + +(3) Filter [codegen id : 1] +Input [2]: [d_date_sk#1, d_year#2] +Condition : ((((isnotnull(d_year#2) AND (d_year#2 = 2000)) AND (d_date_sk#1 <= 2451910)) AND (d_date_sk#1 >= 2451545)) AND isnotnull(d_date_sk#1)) + +(4) Project [codegen id : 1] +Output [1]: [d_date_sk#1] +Input [2]: [d_date_sk#1, d_year#2] + +(5) BroadcastExchange +Input [1]: [d_date_sk#1] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#3] + +(6) Scan parquet default.store_sales +Output [8]: [ss_sold_date_sk#4, ss_item_sk#5, ss_cdemo_sk#6, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), GreaterThanOrEqual(ss_sold_date_sk,2451545), LessThanOrEqual(ss_sold_date_sk,2451910), IsNotNull(ss_cdemo_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(7) ColumnarToRow +Input [8]: [ss_sold_date_sk#4, ss_item_sk#5, ss_cdemo_sk#6, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11] + +(8) Filter +Input [8]: [ss_sold_date_sk#4, ss_item_sk#5, ss_cdemo_sk#6, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11] +Condition : (((((isnotnull(ss_sold_date_sk#4) AND (ss_sold_date_sk#4 >= 2451545)) AND (ss_sold_date_sk#4 <= 2451910)) AND isnotnull(ss_cdemo_sk#6)) AND isnotnull(ss_store_sk#7)) AND isnotnull(ss_item_sk#5)) + +(9) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [d_date_sk#1] +Right keys [1]: [ss_sold_date_sk#4] +Join condition: None + +(10) Project [codegen id : 5] +Output [7]: [ss_item_sk#5, ss_cdemo_sk#6, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11] +Input [9]: [d_date_sk#1, ss_sold_date_sk#4, ss_item_sk#5, ss_cdemo_sk#6, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11] + +(11) Scan parquet default.customer_demographics +Output [4]: [cd_demo_sk#12, cd_gender#13, cd_marital_status#14, cd_education_status#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/customer_demographics] +PushedFilters: [IsNotNull(cd_education_status), IsNotNull(cd_gender), IsNotNull(cd_marital_status), EqualTo(cd_gender,F), EqualTo(cd_marital_status,D), EqualTo(cd_education_status,Primary), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [4]: [cd_demo_sk#12, cd_gender#13, cd_marital_status#14, cd_education_status#15] + +(13) Filter [codegen id : 2] +Input [4]: [cd_demo_sk#12, cd_gender#13, cd_marital_status#14, cd_education_status#15] +Condition : ((((((isnotnull(cd_education_status#15) AND isnotnull(cd_gender#13)) AND isnotnull(cd_marital_status#14)) AND (cd_gender#13 = F)) AND (cd_marital_status#14 = D)) AND (cd_education_status#15 = Primary)) AND isnotnull(cd_demo_sk#12)) + +(14) Project [codegen id : 2] +Output [1]: [cd_demo_sk#12] +Input [4]: [cd_demo_sk#12, cd_gender#13, cd_marital_status#14, cd_education_status#15] + +(15) BroadcastExchange +Input [1]: [cd_demo_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] + +(16) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_cdemo_sk#6] +Right keys [1]: [cd_demo_sk#12] +Join condition: None + +(17) Project [codegen id : 5] +Output [6]: [ss_item_sk#5, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11] +Input [8]: [ss_item_sk#5, ss_cdemo_sk#6, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11, cd_demo_sk#12] + +(18) Scan parquet default.store +Output [2]: [s_store_sk#17, s_state#18] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/store] +PushedFilters: [In(s_state, [TN,AL,SD]), IsNotNull(s_store_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [2]: [s_store_sk#17, s_state#18] + +(20) Filter [codegen id : 3] +Input [2]: [s_store_sk#17, s_state#18] +Condition : (s_state#18 IN (TN,AL,SD) AND isnotnull(s_store_sk#17)) + +(21) BroadcastExchange +Input [2]: [s_store_sk#17, s_state#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] + +(22) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_store_sk#7] +Right keys [1]: [s_store_sk#17] +Join condition: None + +(23) Project [codegen id : 5] +Output [6]: [ss_item_sk#5, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11, s_state#18] +Input [8]: [ss_item_sk#5, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11, s_store_sk#17, s_state#18] + +(24) Scan parquet default.item +Output [2]: [i_item_sk#20, i_item_id#21] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(25) ColumnarToRow [codegen id : 4] +Input [2]: [i_item_sk#20, i_item_id#21] + +(26) Filter [codegen id : 4] +Input [2]: [i_item_sk#20, i_item_id#21] +Condition : isnotnull(i_item_sk#20) + +(27) BroadcastExchange +Input [2]: [i_item_sk#20, i_item_id#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#22] + +(28) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_item_sk#5] +Right keys [1]: [i_item_sk#20] +Join condition: None + +(29) Project [codegen id : 5] +Output [6]: [i_item_id#21, s_state#18, ss_quantity#8 AS agg1#23, ss_list_price#9 AS agg2#24, ss_coupon_amt#11 AS agg3#25, ss_sales_price#10 AS agg4#26] +Input [8]: [ss_item_sk#5, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11, s_state#18, i_item_sk#20, i_item_id#21] + +(30) HashAggregate [codegen id : 5] +Input [6]: [i_item_id#21, s_state#18, agg1#23, agg2#24, agg3#25, agg4#26] +Keys [2]: [i_item_id#21, s_state#18] +Functions [4]: [partial_avg(cast(agg1#23 as bigint)), partial_avg(UnscaledValue(agg2#24)), partial_avg(UnscaledValue(agg3#25)), partial_avg(UnscaledValue(agg4#26))] +Aggregate Attributes [8]: [sum#27, count#28, sum#29, count#30, sum#31, count#32, sum#33, count#34] +Results [10]: [i_item_id#21, s_state#18, sum#35, count#36, sum#37, count#38, sum#39, count#40, sum#41, count#42] + +(31) Exchange +Input [10]: [i_item_id#21, s_state#18, sum#35, count#36, sum#37, count#38, sum#39, count#40, sum#41, count#42] +Arguments: hashpartitioning(i_item_id#21, s_state#18, 5), true, [id=#43] + +(32) HashAggregate [codegen id : 6] +Input [10]: [i_item_id#21, s_state#18, sum#35, count#36, sum#37, count#38, sum#39, count#40, sum#41, count#42] +Keys [2]: [i_item_id#21, s_state#18] +Functions [4]: [avg(cast(agg1#23 as bigint)), avg(UnscaledValue(agg2#24)), avg(UnscaledValue(agg3#25)), avg(UnscaledValue(agg4#26))] +Aggregate Attributes [4]: [avg(cast(agg1#23 as bigint))#44, avg(UnscaledValue(agg2#24))#45, avg(UnscaledValue(agg3#25))#46, avg(UnscaledValue(agg4#26))#47] +Results [7]: [i_item_id#21, s_state#18, 0 AS g_state#48, avg(cast(agg1#23 as bigint))#44 AS agg1#49, cast((avg(UnscaledValue(agg2#24))#45 / 100.0) as decimal(11,6)) AS agg2#50, cast((avg(UnscaledValue(agg3#25))#46 / 100.0) as decimal(11,6)) AS agg3#51, cast((avg(UnscaledValue(agg4#26))#47 / 100.0) as decimal(11,6)) AS agg4#52] + +(33) ReusedExchange [Reuses operator id: 5] +Output [1]: [d_date_sk#1] + +(34) Scan parquet default.store_sales +Output [8]: [ss_sold_date_sk#4, ss_item_sk#5, ss_cdemo_sk#6, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), GreaterThanOrEqual(ss_sold_date_sk,2451545), LessThanOrEqual(ss_sold_date_sk,2451910), IsNotNull(ss_cdemo_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(35) ColumnarToRow +Input [8]: [ss_sold_date_sk#4, ss_item_sk#5, ss_cdemo_sk#6, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11] + +(36) Filter +Input [8]: [ss_sold_date_sk#4, ss_item_sk#5, ss_cdemo_sk#6, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11] +Condition : (((((isnotnull(ss_sold_date_sk#4) AND (ss_sold_date_sk#4 >= 2451545)) AND (ss_sold_date_sk#4 <= 2451910)) AND isnotnull(ss_cdemo_sk#6)) AND isnotnull(ss_store_sk#7)) AND isnotnull(ss_item_sk#5)) + +(37) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [d_date_sk#1] +Right keys [1]: [ss_sold_date_sk#4] +Join condition: None + +(38) Project [codegen id : 11] +Output [7]: [ss_item_sk#5, ss_cdemo_sk#6, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11] +Input [9]: [d_date_sk#1, ss_sold_date_sk#4, ss_item_sk#5, ss_cdemo_sk#6, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11] + +(39) Scan parquet default.store +Output [2]: [s_store_sk#17, s_state#18] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/store] +PushedFilters: [In(s_state, [TN,AL,SD]), IsNotNull(s_store_sk)] +ReadSchema: struct + +(40) ColumnarToRow [codegen id : 8] +Input [2]: [s_store_sk#17, s_state#18] + +(41) Filter [codegen id : 8] +Input [2]: [s_store_sk#17, s_state#18] +Condition : (s_state#18 IN (TN,AL,SD) AND isnotnull(s_store_sk#17)) + +(42) Project [codegen id : 8] +Output [1]: [s_store_sk#17] +Input [2]: [s_store_sk#17, s_state#18] + +(43) BroadcastExchange +Input [1]: [s_store_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#53] + +(44) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ss_store_sk#7] +Right keys [1]: [s_store_sk#17] +Join condition: None + +(45) Project [codegen id : 11] +Output [6]: [ss_item_sk#5, ss_cdemo_sk#6, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11] +Input [8]: [ss_item_sk#5, ss_cdemo_sk#6, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11, s_store_sk#17] + +(46) ReusedExchange [Reuses operator id: 15] +Output [1]: [cd_demo_sk#12] + +(47) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ss_cdemo_sk#6] +Right keys [1]: [cd_demo_sk#12] +Join condition: None + +(48) Project [codegen id : 11] +Output [5]: [ss_item_sk#5, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11] +Input [7]: [ss_item_sk#5, ss_cdemo_sk#6, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11, cd_demo_sk#12] + +(49) ReusedExchange [Reuses operator id: 27] +Output [2]: [i_item_sk#20, i_item_id#21] + +(50) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ss_item_sk#5] +Right keys [1]: [i_item_sk#20] +Join condition: None + +(51) Project [codegen id : 11] +Output [5]: [i_item_id#21, ss_quantity#8 AS agg1#23, ss_list_price#9 AS agg2#24, ss_coupon_amt#11 AS agg3#25, ss_sales_price#10 AS agg4#26] +Input [7]: [ss_item_sk#5, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11, i_item_sk#20, i_item_id#21] + +(52) HashAggregate [codegen id : 11] +Input [5]: [i_item_id#21, agg1#23, agg2#24, agg3#25, agg4#26] +Keys [1]: [i_item_id#21] +Functions [4]: [partial_avg(cast(agg1#23 as bigint)), partial_avg(UnscaledValue(agg2#24)), partial_avg(UnscaledValue(agg3#25)), partial_avg(UnscaledValue(agg4#26))] +Aggregate Attributes [8]: [sum#54, count#55, sum#56, count#57, sum#58, count#59, sum#60, count#61] +Results [9]: [i_item_id#21, sum#62, count#63, sum#64, count#65, sum#66, count#67, sum#68, count#69] + +(53) Exchange +Input [9]: [i_item_id#21, sum#62, count#63, sum#64, count#65, sum#66, count#67, sum#68, count#69] +Arguments: hashpartitioning(i_item_id#21, 5), true, [id=#70] + +(54) HashAggregate [codegen id : 12] +Input [9]: [i_item_id#21, sum#62, count#63, sum#64, count#65, sum#66, count#67, sum#68, count#69] +Keys [1]: [i_item_id#21] +Functions [4]: [avg(cast(agg1#23 as bigint)), avg(UnscaledValue(agg2#24)), avg(UnscaledValue(agg3#25)), avg(UnscaledValue(agg4#26))] +Aggregate Attributes [4]: [avg(cast(agg1#23 as bigint))#71, avg(UnscaledValue(agg2#24))#72, avg(UnscaledValue(agg3#25))#73, avg(UnscaledValue(agg4#26))#74] +Results [7]: [i_item_id#21, null AS s_state#75, 1 AS g_state#76, avg(cast(agg1#23 as bigint))#71 AS agg1#77, cast((avg(UnscaledValue(agg2#24))#72 / 100.0) as decimal(11,6)) AS agg2#78, cast((avg(UnscaledValue(agg3#25))#73 / 100.0) as decimal(11,6)) AS agg3#79, cast((avg(UnscaledValue(agg4#26))#74 / 100.0) as decimal(11,6)) AS agg4#80] + +(55) ReusedExchange [Reuses operator id: 5] +Output [1]: [d_date_sk#1] + +(56) Scan parquet default.store_sales +Output [8]: [ss_sold_date_sk#4, ss_item_sk#5, ss_cdemo_sk#6, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), GreaterThanOrEqual(ss_sold_date_sk,2451545), LessThanOrEqual(ss_sold_date_sk,2451910), IsNotNull(ss_cdemo_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(57) ColumnarToRow +Input [8]: [ss_sold_date_sk#4, ss_item_sk#5, ss_cdemo_sk#6, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11] + +(58) Filter +Input [8]: [ss_sold_date_sk#4, ss_item_sk#5, ss_cdemo_sk#6, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11] +Condition : (((((isnotnull(ss_sold_date_sk#4) AND (ss_sold_date_sk#4 >= 2451545)) AND (ss_sold_date_sk#4 <= 2451910)) AND isnotnull(ss_cdemo_sk#6)) AND isnotnull(ss_store_sk#7)) AND isnotnull(ss_item_sk#5)) + +(59) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [d_date_sk#1] +Right keys [1]: [ss_sold_date_sk#4] +Join condition: None + +(60) Project [codegen id : 17] +Output [7]: [ss_item_sk#5, ss_cdemo_sk#6, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11] +Input [9]: [d_date_sk#1, ss_sold_date_sk#4, ss_item_sk#5, ss_cdemo_sk#6, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11] + +(61) ReusedExchange [Reuses operator id: 43] +Output [1]: [s_store_sk#17] + +(62) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ss_store_sk#7] +Right keys [1]: [s_store_sk#17] +Join condition: None + +(63) Project [codegen id : 17] +Output [6]: [ss_item_sk#5, ss_cdemo_sk#6, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11] +Input [8]: [ss_item_sk#5, ss_cdemo_sk#6, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11, s_store_sk#17] + +(64) ReusedExchange [Reuses operator id: 15] +Output [1]: [cd_demo_sk#12] + +(65) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ss_cdemo_sk#6] +Right keys [1]: [cd_demo_sk#12] +Join condition: None + +(66) Project [codegen id : 17] +Output [5]: [ss_item_sk#5, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11] +Input [7]: [ss_item_sk#5, ss_cdemo_sk#6, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11, cd_demo_sk#12] + +(67) Scan parquet default.item +Output [1]: [i_item_sk#20] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(68) ColumnarToRow [codegen id : 16] +Input [1]: [i_item_sk#20] + +(69) Filter [codegen id : 16] +Input [1]: [i_item_sk#20] +Condition : isnotnull(i_item_sk#20) + +(70) BroadcastExchange +Input [1]: [i_item_sk#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#81] + +(71) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ss_item_sk#5] +Right keys [1]: [i_item_sk#20] +Join condition: None + +(72) Project [codegen id : 17] +Output [4]: [ss_quantity#8 AS agg1#23, ss_list_price#9 AS agg2#24, ss_coupon_amt#11 AS agg3#25, ss_sales_price#10 AS agg4#26] +Input [6]: [ss_item_sk#5, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11, i_item_sk#20] + +(73) HashAggregate [codegen id : 17] +Input [4]: [agg1#23, agg2#24, agg3#25, agg4#26] +Keys: [] +Functions [4]: [partial_avg(cast(agg1#23 as bigint)), partial_avg(UnscaledValue(agg2#24)), partial_avg(UnscaledValue(agg3#25)), partial_avg(UnscaledValue(agg4#26))] +Aggregate Attributes [8]: [sum#82, count#83, sum#84, count#85, sum#86, count#87, sum#88, count#89] +Results [8]: [sum#90, count#91, sum#92, count#93, sum#94, count#95, sum#96, count#97] + +(74) Exchange +Input [8]: [sum#90, count#91, sum#92, count#93, sum#94, count#95, sum#96, count#97] +Arguments: SinglePartition, true, [id=#98] + +(75) HashAggregate [codegen id : 18] +Input [8]: [sum#90, count#91, sum#92, count#93, sum#94, count#95, sum#96, count#97] +Keys: [] +Functions [4]: [avg(cast(agg1#23 as bigint)), avg(UnscaledValue(agg2#24)), avg(UnscaledValue(agg3#25)), avg(UnscaledValue(agg4#26))] +Aggregate Attributes [4]: [avg(cast(agg1#23 as bigint))#99, avg(UnscaledValue(agg2#24))#100, avg(UnscaledValue(agg3#25))#101, avg(UnscaledValue(agg4#26))#102] +Results [7]: [null AS i_item_id#103, null AS s_state#104, 1 AS g_state#105, avg(cast(agg1#23 as bigint))#99 AS agg1#106, cast((avg(UnscaledValue(agg2#24))#100 / 100.0) as decimal(11,6)) AS agg2#107, cast((avg(UnscaledValue(agg3#25))#101 / 100.0) as decimal(11,6)) AS agg3#108, cast((avg(UnscaledValue(agg4#26))#102 / 100.0) as decimal(11,6)) AS agg4#109] + +(76) Union + +(77) TakeOrderedAndProject +Input [7]: [i_item_id#21, s_state#18, g_state#48, agg1#49, agg2#50, agg3#51, agg4#52] +Arguments: 100, [i_item_id#21 ASC NULLS FIRST, s_state#18 ASC NULLS FIRST], [i_item_id#21, s_state#18, g_state#48, agg1#49, agg2#50, agg3#51, agg4#52] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q27.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q27.sf100/simplified.txt new file mode 100644 index 0000000000000..f2d9cfb0b28af --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q27.sf100/simplified.txt @@ -0,0 +1,113 @@ +TakeOrderedAndProject [agg1,agg2,agg3,agg4,g_state,i_item_id,s_state] + Union + WholeStageCodegen (6) + HashAggregate [count,count,count,count,i_item_id,s_state,sum,sum,sum,sum] [agg1,agg2,agg3,agg4,avg(UnscaledValue(agg2)),avg(UnscaledValue(agg3)),avg(UnscaledValue(agg4)),avg(cast(agg1 as bigint)),count,count,count,count,g_state,sum,sum,sum,sum] + InputAdapter + Exchange [i_item_id,s_state] #1 + WholeStageCodegen (5) + HashAggregate [agg1,agg2,agg3,agg4,i_item_id,s_state] [count,count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum,sum] + Project [i_item_id,s_state,ss_coupon_amt,ss_list_price,ss_quantity,ss_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [s_state,ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_store_sk] + BroadcastHashJoin [cd_demo_sk,ss_cdemo_sk] + Project [ss_cdemo_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + Filter [ss_cdemo_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_cdemo_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [cd_demo_sk] + Filter [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [s_state,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_state,s_store_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_id,i_item_sk] + WholeStageCodegen (12) + HashAggregate [count,count,count,count,i_item_id,sum,sum,sum,sum] [agg1,agg2,agg3,agg4,avg(UnscaledValue(agg2)),avg(UnscaledValue(agg3)),avg(UnscaledValue(agg4)),avg(cast(agg1 as bigint)),count,count,count,count,g_state,s_state,sum,sum,sum,sum] + InputAdapter + Exchange [i_item_id] #6 + WholeStageCodegen (11) + HashAggregate [agg1,agg2,agg3,agg4,i_item_id] [count,count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum,sum] + Project [i_item_id,ss_coupon_amt,ss_list_price,ss_quantity,ss_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price] + BroadcastHashJoin [cd_demo_sk,ss_cdemo_sk] + Project [ss_cdemo_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_cdemo_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #2 + Filter [ss_cdemo_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_cdemo_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (8) + Project [s_store_sk] + Filter [s_state,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_state,s_store_sk] + InputAdapter + ReusedExchange [cd_demo_sk] #3 + InputAdapter + ReusedExchange [i_item_id,i_item_sk] #5 + WholeStageCodegen (18) + HashAggregate [count,count,count,count,sum,sum,sum,sum] [agg1,agg2,agg3,agg4,avg(UnscaledValue(agg2)),avg(UnscaledValue(agg3)),avg(UnscaledValue(agg4)),avg(cast(agg1 as bigint)),count,count,count,count,g_state,i_item_id,s_state,sum,sum,sum,sum] + InputAdapter + Exchange #8 + WholeStageCodegen (17) + HashAggregate [agg1,agg2,agg3,agg4] [count,count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum,sum] + Project [ss_coupon_amt,ss_list_price,ss_quantity,ss_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price] + BroadcastHashJoin [cd_demo_sk,ss_cdemo_sk] + Project [ss_cdemo_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_cdemo_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #2 + Filter [ss_cdemo_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_cdemo_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + ReusedExchange [s_store_sk] #7 + InputAdapter + ReusedExchange [cd_demo_sk] #3 + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (16) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q27/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q27/explain.txt new file mode 100644 index 0000000000000..151e713ff2e3c --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q27/explain.txt @@ -0,0 +1,428 @@ +== Physical Plan == +TakeOrderedAndProject (77) ++- Union (76) + :- * HashAggregate (32) + : +- Exchange (31) + : +- * HashAggregate (30) + : +- * Project (29) + : +- * BroadcastHashJoin Inner BuildRight (28) + : :- * Project (23) + : : +- * BroadcastHashJoin Inner BuildRight (22) + : : :- * Project (17) + : : : +- * BroadcastHashJoin Inner BuildRight (16) + : : : :- * Project (10) + : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.store_sales (1) + : : : : +- BroadcastExchange (8) + : : : : +- * Project (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.customer_demographics (4) + : : : +- BroadcastExchange (15) + : : : +- * Project (14) + : : : +- * Filter (13) + : : : +- * ColumnarToRow (12) + : : : +- Scan parquet default.date_dim (11) + : : +- BroadcastExchange (21) + : : +- * Filter (20) + : : +- * ColumnarToRow (19) + : : +- Scan parquet default.store (18) + : +- BroadcastExchange (27) + : +- * Filter (26) + : +- * ColumnarToRow (25) + : +- Scan parquet default.item (24) + :- * HashAggregate (54) + : +- Exchange (53) + : +- * HashAggregate (52) + : +- * Project (51) + : +- * BroadcastHashJoin Inner BuildRight (50) + : :- * Project (48) + : : +- * BroadcastHashJoin Inner BuildRight (47) + : : :- * Project (41) + : : : +- * BroadcastHashJoin Inner BuildRight (40) + : : : :- * Project (38) + : : : : +- * BroadcastHashJoin Inner BuildRight (37) + : : : : :- * Filter (35) + : : : : : +- * ColumnarToRow (34) + : : : : : +- Scan parquet default.store_sales (33) + : : : : +- ReusedExchange (36) + : : : +- ReusedExchange (39) + : : +- BroadcastExchange (46) + : : +- * Project (45) + : : +- * Filter (44) + : : +- * ColumnarToRow (43) + : : +- Scan parquet default.store (42) + : +- ReusedExchange (49) + +- * HashAggregate (75) + +- Exchange (74) + +- * HashAggregate (73) + +- * Project (72) + +- * BroadcastHashJoin Inner BuildRight (71) + :- * Project (66) + : +- * BroadcastHashJoin Inner BuildRight (65) + : :- * Project (63) + : : +- * BroadcastHashJoin Inner BuildRight (62) + : : :- * Project (60) + : : : +- * BroadcastHashJoin Inner BuildRight (59) + : : : :- * Filter (57) + : : : : +- * ColumnarToRow (56) + : : : : +- Scan parquet default.store_sales (55) + : : : +- ReusedExchange (58) + : : +- ReusedExchange (61) + : +- ReusedExchange (64) + +- BroadcastExchange (70) + +- * Filter (69) + +- * ColumnarToRow (68) + +- Scan parquet default.item (67) + + +(1) Scan parquet default.store_sales +Output [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), GreaterThanOrEqual(ss_sold_date_sk,2451545), LessThanOrEqual(ss_sold_date_sk,2451910), IsNotNull(ss_cdemo_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] + +(3) Filter [codegen id : 5] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Condition : (((((isnotnull(ss_sold_date_sk#1) AND (ss_sold_date_sk#1 >= 2451545)) AND (ss_sold_date_sk#1 <= 2451910)) AND isnotnull(ss_cdemo_sk#3)) AND isnotnull(ss_store_sk#4)) AND isnotnull(ss_item_sk#2)) + +(4) Scan parquet default.customer_demographics +Output [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/customer_demographics] +PushedFilters: [IsNotNull(cd_marital_status), IsNotNull(cd_education_status), IsNotNull(cd_gender), EqualTo(cd_gender,F), EqualTo(cd_marital_status,D), EqualTo(cd_education_status,Primary), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] + +(6) Filter [codegen id : 1] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Condition : ((((((isnotnull(cd_marital_status#11) AND isnotnull(cd_education_status#12)) AND isnotnull(cd_gender#10)) AND (cd_gender#10 = F)) AND (cd_marital_status#11 = D)) AND (cd_education_status#12 = Primary)) AND isnotnull(cd_demo_sk#9)) + +(7) Project [codegen id : 1] +Output [1]: [cd_demo_sk#9] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] + +(8) BroadcastExchange +Input [1]: [cd_demo_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] + +(9) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_cdemo_sk#3] +Right keys [1]: [cd_demo_sk#9] +Join condition: None + +(10) Project [codegen id : 5] +Output [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, cd_demo_sk#9] + +(11) Scan parquet default.date_dim +Output [2]: [d_date_sk#14, d_year#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), LessThanOrEqual(d_date_sk,2451910), GreaterThanOrEqual(d_date_sk,2451545), IsNotNull(d_date_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#14, d_year#15] + +(13) Filter [codegen id : 2] +Input [2]: [d_date_sk#14, d_year#15] +Condition : ((((isnotnull(d_year#15) AND (d_year#15 = 2000)) AND (d_date_sk#14 <= 2451910)) AND (d_date_sk#14 >= 2451545)) AND isnotnull(d_date_sk#14)) + +(14) Project [codegen id : 2] +Output [1]: [d_date_sk#14] +Input [2]: [d_date_sk#14, d_year#15] + +(15) BroadcastExchange +Input [1]: [d_date_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] + +(16) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#14] +Join condition: None + +(17) Project [codegen id : 5] +Output [6]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, d_date_sk#14] + +(18) Scan parquet default.store +Output [2]: [s_store_sk#17, s_state#18] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/store] +PushedFilters: [In(s_state, [TN,AL,SD]), IsNotNull(s_store_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [2]: [s_store_sk#17, s_state#18] + +(20) Filter [codegen id : 3] +Input [2]: [s_store_sk#17, s_state#18] +Condition : (s_state#18 IN (TN,AL,SD) AND isnotnull(s_store_sk#17)) + +(21) BroadcastExchange +Input [2]: [s_store_sk#17, s_state#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] + +(22) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#17] +Join condition: None + +(23) Project [codegen id : 5] +Output [6]: [ss_item_sk#2, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, s_state#18] +Input [8]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, s_store_sk#17, s_state#18] + +(24) Scan parquet default.item +Output [2]: [i_item_sk#20, i_item_id#21] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(25) ColumnarToRow [codegen id : 4] +Input [2]: [i_item_sk#20, i_item_id#21] + +(26) Filter [codegen id : 4] +Input [2]: [i_item_sk#20, i_item_id#21] +Condition : isnotnull(i_item_sk#20) + +(27) BroadcastExchange +Input [2]: [i_item_sk#20, i_item_id#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#22] + +(28) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#20] +Join condition: None + +(29) Project [codegen id : 5] +Output [6]: [i_item_id#21, s_state#18, ss_quantity#5 AS agg1#23, ss_list_price#6 AS agg2#24, ss_coupon_amt#8 AS agg3#25, ss_sales_price#7 AS agg4#26] +Input [8]: [ss_item_sk#2, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, s_state#18, i_item_sk#20, i_item_id#21] + +(30) HashAggregate [codegen id : 5] +Input [6]: [i_item_id#21, s_state#18, agg1#23, agg2#24, agg3#25, agg4#26] +Keys [2]: [i_item_id#21, s_state#18] +Functions [4]: [partial_avg(cast(agg1#23 as bigint)), partial_avg(UnscaledValue(agg2#24)), partial_avg(UnscaledValue(agg3#25)), partial_avg(UnscaledValue(agg4#26))] +Aggregate Attributes [8]: [sum#27, count#28, sum#29, count#30, sum#31, count#32, sum#33, count#34] +Results [10]: [i_item_id#21, s_state#18, sum#35, count#36, sum#37, count#38, sum#39, count#40, sum#41, count#42] + +(31) Exchange +Input [10]: [i_item_id#21, s_state#18, sum#35, count#36, sum#37, count#38, sum#39, count#40, sum#41, count#42] +Arguments: hashpartitioning(i_item_id#21, s_state#18, 5), true, [id=#43] + +(32) HashAggregate [codegen id : 6] +Input [10]: [i_item_id#21, s_state#18, sum#35, count#36, sum#37, count#38, sum#39, count#40, sum#41, count#42] +Keys [2]: [i_item_id#21, s_state#18] +Functions [4]: [avg(cast(agg1#23 as bigint)), avg(UnscaledValue(agg2#24)), avg(UnscaledValue(agg3#25)), avg(UnscaledValue(agg4#26))] +Aggregate Attributes [4]: [avg(cast(agg1#23 as bigint))#44, avg(UnscaledValue(agg2#24))#45, avg(UnscaledValue(agg3#25))#46, avg(UnscaledValue(agg4#26))#47] +Results [7]: [i_item_id#21, s_state#18, 0 AS g_state#48, avg(cast(agg1#23 as bigint))#44 AS agg1#49, cast((avg(UnscaledValue(agg2#24))#45 / 100.0) as decimal(11,6)) AS agg2#50, cast((avg(UnscaledValue(agg3#25))#46 / 100.0) as decimal(11,6)) AS agg3#51, cast((avg(UnscaledValue(agg4#26))#47 / 100.0) as decimal(11,6)) AS agg4#52] + +(33) Scan parquet default.store_sales +Output [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), GreaterThanOrEqual(ss_sold_date_sk,2451545), LessThanOrEqual(ss_sold_date_sk,2451910), IsNotNull(ss_cdemo_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 11] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] + +(35) Filter [codegen id : 11] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Condition : (((((isnotnull(ss_sold_date_sk#1) AND (ss_sold_date_sk#1 >= 2451545)) AND (ss_sold_date_sk#1 <= 2451910)) AND isnotnull(ss_cdemo_sk#3)) AND isnotnull(ss_store_sk#4)) AND isnotnull(ss_item_sk#2)) + +(36) ReusedExchange [Reuses operator id: 8] +Output [1]: [cd_demo_sk#9] + +(37) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ss_cdemo_sk#3] +Right keys [1]: [cd_demo_sk#9] +Join condition: None + +(38) Project [codegen id : 11] +Output [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, cd_demo_sk#9] + +(39) ReusedExchange [Reuses operator id: 15] +Output [1]: [d_date_sk#14] + +(40) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#14] +Join condition: None + +(41) Project [codegen id : 11] +Output [6]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, d_date_sk#14] + +(42) Scan parquet default.store +Output [2]: [s_store_sk#17, s_state#18] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/store] +PushedFilters: [In(s_state, [TN,AL,SD]), IsNotNull(s_store_sk)] +ReadSchema: struct + +(43) ColumnarToRow [codegen id : 9] +Input [2]: [s_store_sk#17, s_state#18] + +(44) Filter [codegen id : 9] +Input [2]: [s_store_sk#17, s_state#18] +Condition : (s_state#18 IN (TN,AL,SD) AND isnotnull(s_store_sk#17)) + +(45) Project [codegen id : 9] +Output [1]: [s_store_sk#17] +Input [2]: [s_store_sk#17, s_state#18] + +(46) BroadcastExchange +Input [1]: [s_store_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#53] + +(47) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#17] +Join condition: None + +(48) Project [codegen id : 11] +Output [5]: [ss_item_sk#2, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Input [7]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, s_store_sk#17] + +(49) ReusedExchange [Reuses operator id: 27] +Output [2]: [i_item_sk#20, i_item_id#21] + +(50) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#20] +Join condition: None + +(51) Project [codegen id : 11] +Output [5]: [i_item_id#21, ss_quantity#5 AS agg1#23, ss_list_price#6 AS agg2#24, ss_coupon_amt#8 AS agg3#25, ss_sales_price#7 AS agg4#26] +Input [7]: [ss_item_sk#2, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_sk#20, i_item_id#21] + +(52) HashAggregate [codegen id : 11] +Input [5]: [i_item_id#21, agg1#23, agg2#24, agg3#25, agg4#26] +Keys [1]: [i_item_id#21] +Functions [4]: [partial_avg(cast(agg1#23 as bigint)), partial_avg(UnscaledValue(agg2#24)), partial_avg(UnscaledValue(agg3#25)), partial_avg(UnscaledValue(agg4#26))] +Aggregate Attributes [8]: [sum#54, count#55, sum#56, count#57, sum#58, count#59, sum#60, count#61] +Results [9]: [i_item_id#21, sum#62, count#63, sum#64, count#65, sum#66, count#67, sum#68, count#69] + +(53) Exchange +Input [9]: [i_item_id#21, sum#62, count#63, sum#64, count#65, sum#66, count#67, sum#68, count#69] +Arguments: hashpartitioning(i_item_id#21, 5), true, [id=#70] + +(54) HashAggregate [codegen id : 12] +Input [9]: [i_item_id#21, sum#62, count#63, sum#64, count#65, sum#66, count#67, sum#68, count#69] +Keys [1]: [i_item_id#21] +Functions [4]: [avg(cast(agg1#23 as bigint)), avg(UnscaledValue(agg2#24)), avg(UnscaledValue(agg3#25)), avg(UnscaledValue(agg4#26))] +Aggregate Attributes [4]: [avg(cast(agg1#23 as bigint))#71, avg(UnscaledValue(agg2#24))#72, avg(UnscaledValue(agg3#25))#73, avg(UnscaledValue(agg4#26))#74] +Results [7]: [i_item_id#21, null AS s_state#75, 1 AS g_state#76, avg(cast(agg1#23 as bigint))#71 AS agg1#77, cast((avg(UnscaledValue(agg2#24))#72 / 100.0) as decimal(11,6)) AS agg2#78, cast((avg(UnscaledValue(agg3#25))#73 / 100.0) as decimal(11,6)) AS agg3#79, cast((avg(UnscaledValue(agg4#26))#74 / 100.0) as decimal(11,6)) AS agg4#80] + +(55) Scan parquet default.store_sales +Output [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), GreaterThanOrEqual(ss_sold_date_sk,2451545), LessThanOrEqual(ss_sold_date_sk,2451910), IsNotNull(ss_cdemo_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(56) ColumnarToRow [codegen id : 17] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] + +(57) Filter [codegen id : 17] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Condition : (((((isnotnull(ss_sold_date_sk#1) AND (ss_sold_date_sk#1 >= 2451545)) AND (ss_sold_date_sk#1 <= 2451910)) AND isnotnull(ss_cdemo_sk#3)) AND isnotnull(ss_store_sk#4)) AND isnotnull(ss_item_sk#2)) + +(58) ReusedExchange [Reuses operator id: 8] +Output [1]: [cd_demo_sk#9] + +(59) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ss_cdemo_sk#3] +Right keys [1]: [cd_demo_sk#9] +Join condition: None + +(60) Project [codegen id : 17] +Output [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, cd_demo_sk#9] + +(61) ReusedExchange [Reuses operator id: 15] +Output [1]: [d_date_sk#14] + +(62) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#14] +Join condition: None + +(63) Project [codegen id : 17] +Output [6]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, d_date_sk#14] + +(64) ReusedExchange [Reuses operator id: 46] +Output [1]: [s_store_sk#17] + +(65) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#17] +Join condition: None + +(66) Project [codegen id : 17] +Output [5]: [ss_item_sk#2, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Input [7]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, s_store_sk#17] + +(67) Scan parquet default.item +Output [1]: [i_item_sk#20] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(68) ColumnarToRow [codegen id : 16] +Input [1]: [i_item_sk#20] + +(69) Filter [codegen id : 16] +Input [1]: [i_item_sk#20] +Condition : isnotnull(i_item_sk#20) + +(70) BroadcastExchange +Input [1]: [i_item_sk#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#81] + +(71) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#20] +Join condition: None + +(72) Project [codegen id : 17] +Output [4]: [ss_quantity#5 AS agg1#23, ss_list_price#6 AS agg2#24, ss_coupon_amt#8 AS agg3#25, ss_sales_price#7 AS agg4#26] +Input [6]: [ss_item_sk#2, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_sk#20] + +(73) HashAggregate [codegen id : 17] +Input [4]: [agg1#23, agg2#24, agg3#25, agg4#26] +Keys: [] +Functions [4]: [partial_avg(cast(agg1#23 as bigint)), partial_avg(UnscaledValue(agg2#24)), partial_avg(UnscaledValue(agg3#25)), partial_avg(UnscaledValue(agg4#26))] +Aggregate Attributes [8]: [sum#82, count#83, sum#84, count#85, sum#86, count#87, sum#88, count#89] +Results [8]: [sum#90, count#91, sum#92, count#93, sum#94, count#95, sum#96, count#97] + +(74) Exchange +Input [8]: [sum#90, count#91, sum#92, count#93, sum#94, count#95, sum#96, count#97] +Arguments: SinglePartition, true, [id=#98] + +(75) HashAggregate [codegen id : 18] +Input [8]: [sum#90, count#91, sum#92, count#93, sum#94, count#95, sum#96, count#97] +Keys: [] +Functions [4]: [avg(cast(agg1#23 as bigint)), avg(UnscaledValue(agg2#24)), avg(UnscaledValue(agg3#25)), avg(UnscaledValue(agg4#26))] +Aggregate Attributes [4]: [avg(cast(agg1#23 as bigint))#99, avg(UnscaledValue(agg2#24))#100, avg(UnscaledValue(agg3#25))#101, avg(UnscaledValue(agg4#26))#102] +Results [7]: [null AS i_item_id#103, null AS s_state#104, 1 AS g_state#105, avg(cast(agg1#23 as bigint))#99 AS agg1#106, cast((avg(UnscaledValue(agg2#24))#100 / 100.0) as decimal(11,6)) AS agg2#107, cast((avg(UnscaledValue(agg3#25))#101 / 100.0) as decimal(11,6)) AS agg3#108, cast((avg(UnscaledValue(agg4#26))#102 / 100.0) as decimal(11,6)) AS agg4#109] + +(76) Union + +(77) TakeOrderedAndProject +Input [7]: [i_item_id#21, s_state#18, g_state#48, agg1#49, agg2#50, agg3#51, agg4#52] +Arguments: 100, [i_item_id#21 ASC NULLS FIRST, s_state#18 ASC NULLS FIRST], [i_item_id#21, s_state#18, g_state#48, agg1#49, agg2#50, agg3#51, agg4#52] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q27/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q27/simplified.txt new file mode 100644 index 0000000000000..7bfdbae974b51 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q27/simplified.txt @@ -0,0 +1,113 @@ +TakeOrderedAndProject [agg1,agg2,agg3,agg4,g_state,i_item_id,s_state] + Union + WholeStageCodegen (6) + HashAggregate [count,count,count,count,i_item_id,s_state,sum,sum,sum,sum] [agg1,agg2,agg3,agg4,avg(UnscaledValue(agg2)),avg(UnscaledValue(agg3)),avg(UnscaledValue(agg4)),avg(cast(agg1 as bigint)),count,count,count,count,g_state,sum,sum,sum,sum] + InputAdapter + Exchange [i_item_id,s_state] #1 + WholeStageCodegen (5) + HashAggregate [agg1,agg2,agg3,agg4,i_item_id,s_state] [count,count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum,sum] + Project [i_item_id,s_state,ss_coupon_amt,ss_list_price,ss_quantity,ss_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [s_state,ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [cd_demo_sk,ss_cdemo_sk] + Filter [ss_cdemo_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_cdemo_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [cd_demo_sk] + Filter [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [s_state,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_state,s_store_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_id,i_item_sk] + WholeStageCodegen (12) + HashAggregate [count,count,count,count,i_item_id,sum,sum,sum,sum] [agg1,agg2,agg3,agg4,avg(UnscaledValue(agg2)),avg(UnscaledValue(agg3)),avg(UnscaledValue(agg4)),avg(cast(agg1 as bigint)),count,count,count,count,g_state,s_state,sum,sum,sum,sum] + InputAdapter + Exchange [i_item_id] #6 + WholeStageCodegen (11) + HashAggregate [agg1,agg2,agg3,agg4,i_item_id] [count,count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum,sum] + Project [i_item_id,ss_coupon_amt,ss_list_price,ss_quantity,ss_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [cd_demo_sk,ss_cdemo_sk] + Filter [ss_cdemo_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_cdemo_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + ReusedExchange [cd_demo_sk] #2 + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (9) + Project [s_store_sk] + Filter [s_state,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_state,s_store_sk] + InputAdapter + ReusedExchange [i_item_id,i_item_sk] #5 + WholeStageCodegen (18) + HashAggregate [count,count,count,count,sum,sum,sum,sum] [agg1,agg2,agg3,agg4,avg(UnscaledValue(agg2)),avg(UnscaledValue(agg3)),avg(UnscaledValue(agg4)),avg(cast(agg1 as bigint)),count,count,count,count,g_state,i_item_id,s_state,sum,sum,sum,sum] + InputAdapter + Exchange #8 + WholeStageCodegen (17) + HashAggregate [agg1,agg2,agg3,agg4] [count,count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum,sum] + Project [ss_coupon_amt,ss_list_price,ss_quantity,ss_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [cd_demo_sk,ss_cdemo_sk] + Filter [ss_cdemo_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_cdemo_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + ReusedExchange [cd_demo_sk] #2 + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + ReusedExchange [s_store_sk] #7 + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (16) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q3.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q3.sf100/explain.txt new file mode 100644 index 0000000000000..c7212ca883700 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q3.sf100/explain.txt @@ -0,0 +1,122 @@ +== Physical Plan == +TakeOrderedAndProject (21) ++- * HashAggregate (20) + +- Exchange (19) + +- * HashAggregate (18) + +- * Project (17) + +- * BroadcastHashJoin Inner BuildRight (16) + :- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.store_sales (1) + : +- BroadcastExchange (8) + : +- * Project (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet default.item (4) + +- BroadcastExchange (15) + +- * Project (14) + +- * Filter (13) + +- * ColumnarToRow (12) + +- Scan parquet default.date_dim (11) + + +(1) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_net_profit#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/store_sales] +PushedFilters: [Or(Or(Or(Or(Or(Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2415355),LessThanOrEqual(ss_sold_date_sk,2415385)),And(GreaterThanOrEqual(ss_sold_date_sk,2415720),LessThanOrEqual(ss_sold_date_sk,2415750))),Or(And(GreaterThanOrEqual(ss_sold_date_sk,2416085),LessThanOrEqual(ss_sold_date_sk,2416115)),And(GreaterThanOrEqual(ss_sold_date_sk,2416450),LessThanOrEqual(ss_sold_date_sk,2416480)))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2416816),LessThanOrEqual(ss_sold_date_sk,2416846)),And(GreaterThanOrEqual(ss_sold_date_sk,2417181),LessThanOrEqual(ss_sold_date_sk,2417211))),And(GreaterThanOrEqual(ss_sold_date_sk,2417546),LessThanOrEqual(ss_sold_date_sk,2417576)))),Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2417911),LessThanOrEqual(ss_sold_date_sk,2417941)),And(GreaterThanOrEqual(ss_sold_date_sk,2418277),LessThanOrEqual(ss_sold_date_sk,2418307))),And(GreaterThanOrEqual(ss_sold_date_sk,2418642),LessThanOrEqual(ss_sold_date_sk,2418672))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2419007),LessThanOrEqual(ss_sold_date_sk,2419037)),And(GreaterThanOrEqual(ss_sold_date_sk,2419372),LessThanOrEqual(ss_sold_date_sk,2419402))),And(GreaterThanOrEqual(ss_sold_date_sk,2419738),LessThanOrEqual(ss_sold_date_sk,2419768))))),Or(Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2420103),LessThanOrEqual(ss_sold_date_sk,2420133)),And(GreaterThanOrEqual(ss_sold_date_sk,2420468),LessThanOrEqual(ss_sold_date_sk,2420498))),And(GreaterThanOrEqual(ss_sold_date_sk,2420833),LessThanOrEqual(ss_sold_date_sk,2420863))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2421199),LessThanOrEqual(ss_sold_date_sk,2421229)),And(GreaterThanOrEqual(ss_sold_date_sk,2421564),LessThanOrEqual(ss_sold_date_sk,2421594))),And(GreaterThanOrEqual(ss_sold_date_sk,2421929),LessThanOrEqual(ss_sold_date_sk,2421959)))),Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2422294),LessThanOrEqual(ss_sold_date_sk,2422324)),And(GreaterThanOrEqual(ss_sold_date_sk,2422660),LessThanOrEqual(ss_sold_date_sk,2422690))),And(GreaterThanOrEqual(ss_sold_date_sk,2423025),LessThanOrEqual(ss_sold_date_sk,2423055))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2423390),LessThanOrEqual(ss_sold_date_sk,2423420)),And(GreaterThanOrEqual(ss_sold_date_sk,2423755),LessThanOrEqual(ss_sold_date_sk,2423785))),And(GreaterThanOrEqual(ss_sold_date_sk,2424121),LessThanOrEqual(ss_sold_date_sk,2424151)))))),Or(Or(Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2424486),LessThanOrEqual(ss_sold_date_sk,2424516)),And(GreaterThanOrEqual(ss_sold_date_sk,2424851),LessThanOrEqual(ss_sold_date_sk,2424881))),Or(And(GreaterThanOrEqual(ss_sold_date_sk,2425216),LessThanOrEqual(ss_sold_date_sk,2425246)),And(GreaterThanOrEqual(ss_sold_date_sk,2425582),LessThanOrEqual(ss_sold_date_sk,2425612)))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2425947),LessThanOrEqual(ss_sold_date_sk,2425977)),And(GreaterThanOrEqual(ss_sold_date_sk,2426312),LessThanOrEqual(ss_sold_date_sk,2426342))),And(GreaterThanOrEqual(ss_sold_date_sk,2426677),LessThanOrEqual(ss_sold_date_sk,2426707)))),Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2427043),LessThanOrEqual(ss_sold_date_sk,2427073)),And(GreaterThanOrEqual(ss_sold_date_sk,2427408),LessThanOrEqual(ss_sold_date_sk,2427438))),And(GreaterThanOrEqual(ss_sold_date_sk,2427773),LessThanOrEqual(ss_sold_date_sk,2427803))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2428138),LessThanOrEqual(ss_sold_date_sk,2428168)),And(GreaterThanOrEqual(ss_sold_date_sk,2428504),LessThanOrEqual(ss_sold_date_sk,2428534))),And(GreaterThanOrEqual(ss_sold_date_sk,2428869),LessThanOrEqual(ss_sold_date_sk,2428899))))),Or(Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2429234),LessThanOrEqual(ss_sold_date_sk,2429264)),And(GreaterThanOrEqual(ss_sold_date_sk,2429599),LessThanOrEqual(ss_sold_date_sk,2429629))),And(GreaterThanOrEqual(ss_sold_date_sk,2429965),LessThanOrEqual(ss_sold_date_sk,2429995))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2430330),LessThanOrEqual(ss_sold_date_sk,2430360)),And(GreaterThanOrEqual(ss_sold_date_sk,2430695),LessThanOrEqual(ss_sold_date_sk,2430725))),And(GreaterThanOrEqual(ss_sold_date_sk,2431060),LessThanOrEqual(ss_sold_date_sk,2431090)))),Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2431426),LessThanOrEqual(ss_sold_date_sk,2431456)),And(GreaterThanOrEqual(ss_sold_date_sk,2431791),LessThanOrEqual(ss_sold_date_sk,2431821))),And(GreaterThanOrEqual(ss_sold_date_sk,2432156),LessThanOrEqual(ss_sold_date_sk,2432186))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2432521),LessThanOrEqual(ss_sold_date_sk,2432551)),And(GreaterThanOrEqual(ss_sold_date_sk,2432887),LessThanOrEqual(ss_sold_date_sk,2432917))),And(GreaterThanOrEqual(ss_sold_date_sk,2433252),LessThanOrEqual(ss_sold_date_sk,2433282))))))),Or(Or(Or(Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2433617),LessThanOrEqual(ss_sold_date_sk,2433647)),And(GreaterThanOrEqual(ss_sold_date_sk,2433982),LessThanOrEqual(ss_sold_date_sk,2434012))),Or(And(GreaterThanOrEqual(ss_sold_date_sk,2434348),LessThanOrEqual(ss_sold_date_sk,2434378)),And(GreaterThanOrEqual(ss_sold_date_sk,2434713),LessThanOrEqual(ss_sold_date_sk,2434743)))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2435078),LessThanOrEqual(ss_sold_date_sk,2435108)),And(GreaterThanOrEqual(ss_sold_date_sk,2435443),LessThanOrEqual(ss_sold_date_sk,2435473))),And(GreaterThanOrEqual(ss_sold_date_sk,2435809),LessThanOrEqual(ss_sold_date_sk,2435839)))),Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2436174),LessThanOrEqual(ss_sold_date_sk,2436204)),And(GreaterThanOrEqual(ss_sold_date_sk,2436539),LessThanOrEqual(ss_sold_date_sk,2436569))),And(GreaterThanOrEqual(ss_sold_date_sk,2436904),LessThanOrEqual(ss_sold_date_sk,2436934))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2437270),LessThanOrEqual(ss_sold_date_sk,2437300)),And(GreaterThanOrEqual(ss_sold_date_sk,2437635),LessThanOrEqual(ss_sold_date_sk,2437665))),And(GreaterThanOrEqual(ss_sold_date_sk,2438000),LessThanOrEqual(ss_sold_date_sk,2438030))))),Or(Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2438365),LessThanOrEqual(ss_sold_date_sk,2438395)),And(GreaterThanOrEqual(ss_sold_date_sk,2438731),LessThanOrEqual(ss_sold_date_sk,2438761))),And(GreaterThanOrEqual(ss_sold_date_sk,2439096),LessThanOrEqual(ss_sold_date_sk,2439126))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2439461),LessThanOrEqual(ss_sold_date_sk,2439491)),And(GreaterThanOrEqual(ss_sold_date_sk,2439826),LessThanOrEqual(ss_sold_date_sk,2439856))),And(GreaterThanOrEqual(ss_sold_date_sk,2440192),LessThanOrEqual(ss_sold_date_sk,2440222)))),Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2440557),LessThanOrEqual(ss_sold_date_sk,2440587)),And(GreaterThanOrEqual(ss_sold_date_sk,2440922),LessThanOrEqual(ss_sold_date_sk,2440952))),And(GreaterThanOrEqual(ss_sold_date_sk,2441287),LessThanOrEqual(ss_sold_date_sk,2441317))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2441653),LessThanOrEqual(ss_sold_date_sk,2441683)),And(GreaterThanOrEqual(ss_sold_date_sk,2442018),LessThanOrEqual(ss_sold_date_sk,2442048))),And(GreaterThanOrEqual(ss_sold_date_sk,2442383),LessThanOrEqual(ss_sold_date_sk,2442413)))))),Or(Or(Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2442748),LessThanOrEqual(ss_sold_date_sk,2442778)),And(GreaterThanOrEqual(ss_sold_date_sk,2443114),LessThanOrEqual(ss_sold_date_sk,2443144))),Or(And(GreaterThanOrEqual(ss_sold_date_sk,2443479),LessThanOrEqual(ss_sold_date_sk,2443509)),And(GreaterThanOrEqual(ss_sold_date_sk,2443844),LessThanOrEqual(ss_sold_date_sk,2443874)))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2444209),LessThanOrEqual(ss_sold_date_sk,2444239)),And(GreaterThanOrEqual(ss_sold_date_sk,2444575),LessThanOrEqual(ss_sold_date_sk,2444605))),And(GreaterThanOrEqual(ss_sold_date_sk,2444940),LessThanOrEqual(ss_sold_date_sk,2444970)))),Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2445305),LessThanOrEqual(ss_sold_date_sk,2445335)),And(GreaterThanOrEqual(ss_sold_date_sk,2445670),LessThanOrEqual(ss_sold_date_sk,2445700))),And(GreaterThanOrEqual(ss_sold_date_sk,2446036),LessThanOrEqual(ss_sold_date_sk,2446066))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2446401),LessThanOrEqual(ss_sold_date_sk,2446431)),And(GreaterThanOrEqual(ss_sold_date_sk,2446766),LessThanOrEqual(ss_sold_date_sk,2446796))),And(GreaterThanOrEqual(ss_sold_date_sk,2447131),LessThanOrEqual(ss_sold_date_sk,2447161))))),Or(Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2447497),LessThanOrEqual(ss_sold_date_sk,2447527)),And(GreaterThanOrEqual(ss_sold_date_sk,2447862),LessThanOrEqual(ss_sold_date_sk,2447892))),And(GreaterThanOrEqual(ss_sold_date_sk,2448227),LessThanOrEqual(ss_sold_date_sk,2448257))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2448592),LessThanOrEqual(ss_sold_date_sk,2448622)),And(GreaterThanOrEqual(ss_sold_date_sk,2448958),LessThanOrEqual(ss_sold_date_sk,2448988))),And(GreaterThanOrEqual(ss_sold_date_sk,2449323),LessThanOrEqual(ss_sold_date_sk,2449353)))),Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2449688),LessThanOrEqual(ss_sold_date_sk,2449718)),And(GreaterThanOrEqual(ss_sold_date_sk,2450053),LessThanOrEqual(ss_sold_date_sk,2450083))),And(GreaterThanOrEqual(ss_sold_date_sk,2450419),LessThanOrEqual(ss_sold_date_sk,2450449))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2450784),LessThanOrEqual(ss_sold_date_sk,2450814)),And(GreaterThanOrEqual(ss_sold_date_sk,2451149),LessThanOrEqual(ss_sold_date_sk,2451179))),And(GreaterThanOrEqual(ss_sold_date_sk,2451514),LessThanOrEqual(ss_sold_date_sk,2451544)))))))),Or(Or(Or(Or(Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2451880),LessThanOrEqual(ss_sold_date_sk,2451910)),And(GreaterThanOrEqual(ss_sold_date_sk,2452245),LessThanOrEqual(ss_sold_date_sk,2452275))),Or(And(GreaterThanOrEqual(ss_sold_date_sk,2452610),LessThanOrEqual(ss_sold_date_sk,2452640)),And(GreaterThanOrEqual(ss_sold_date_sk,2452975),LessThanOrEqual(ss_sold_date_sk,2453005)))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2453341),LessThanOrEqual(ss_sold_date_sk,2453371)),And(GreaterThanOrEqual(ss_sold_date_sk,2453706),LessThanOrEqual(ss_sold_date_sk,2453736))),And(GreaterThanOrEqual(ss_sold_date_sk,2454071),LessThanOrEqual(ss_sold_date_sk,2454101)))),Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2454436),LessThanOrEqual(ss_sold_date_sk,2454466)),And(GreaterThanOrEqual(ss_sold_date_sk,2454802),LessThanOrEqual(ss_sold_date_sk,2454832))),And(GreaterThanOrEqual(ss_sold_date_sk,2455167),LessThanOrEqual(ss_sold_date_sk,2455197))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2455532),LessThanOrEqual(ss_sold_date_sk,2455562)),And(GreaterThanOrEqual(ss_sold_date_sk,2455897),LessThanOrEqual(ss_sold_date_sk,2455927))),And(GreaterThanOrEqual(ss_sold_date_sk,2456263),LessThanOrEqual(ss_sold_date_sk,2456293))))),Or(Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2456628),LessThanOrEqual(ss_sold_date_sk,2456658)),And(GreaterThanOrEqual(ss_sold_date_sk,2456993),LessThanOrEqual(ss_sold_date_sk,2457023))),And(GreaterThanOrEqual(ss_sold_date_sk,2457358),LessThanOrEqual(ss_sold_date_sk,2457388))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2457724),LessThanOrEqual(ss_sold_date_sk,2457754)),And(GreaterThanOrEqual(ss_sold_date_sk,2458089),LessThanOrEqual(ss_sold_date_sk,2458119))),And(GreaterThanOrEqual(ss_sold_date_sk,2458454),LessThanOrEqual(ss_sold_date_sk,2458484)))),Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2458819),LessThanOrEqual(ss_sold_date_sk,2458849)),And(GreaterThanOrEqual(ss_sold_date_sk,2459185),LessThanOrEqual(ss_sold_date_sk,2459215))),And(GreaterThanOrEqual(ss_sold_date_sk,2459550),LessThanOrEqual(ss_sold_date_sk,2459580))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2459915),LessThanOrEqual(ss_sold_date_sk,2459945)),And(GreaterThanOrEqual(ss_sold_date_sk,2460280),LessThanOrEqual(ss_sold_date_sk,2460310))),And(GreaterThanOrEqual(ss_sold_date_sk,2460646),LessThanOrEqual(ss_sold_date_sk,2460676)))))),Or(Or(Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2461011),LessThanOrEqual(ss_sold_date_sk,2461041)),And(GreaterThanOrEqual(ss_sold_date_sk,2461376),LessThanOrEqual(ss_sold_date_sk,2461406))),Or(And(GreaterThanOrEqual(ss_sold_date_sk,2461741),LessThanOrEqual(ss_sold_date_sk,2461771)),And(GreaterThanOrEqual(ss_sold_date_sk,2462107),LessThanOrEqual(ss_sold_date_sk,2462137)))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2462472),LessThanOrEqual(ss_sold_date_sk,2462502)),And(GreaterThanOrEqual(ss_sold_date_sk,2462837),LessThanOrEqual(ss_sold_date_sk,2462867))),And(GreaterThanOrEqual(ss_sold_date_sk,2463202),LessThanOrEqual(ss_sold_date_sk,2463232)))),Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2463568),LessThanOrEqual(ss_sold_date_sk,2463598)),And(GreaterThanOrEqual(ss_sold_date_sk,2463933),LessThanOrEqual(ss_sold_date_sk,2463963))),And(GreaterThanOrEqual(ss_sold_date_sk,2464298),LessThanOrEqual(ss_sold_date_sk,2464328))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2464663),LessThanOrEqual(ss_sold_date_sk,2464693)),And(GreaterThanOrEqual(ss_sold_date_sk,2465029),LessThanOrEqual(ss_sold_date_sk,2465059))),And(GreaterThanOrEqual(ss_sold_date_sk,2465394),LessThanOrEqual(ss_sold_date_sk,2465424))))),Or(Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2465759),LessThanOrEqual(ss_sold_date_sk,2465789)),And(GreaterThanOrEqual(ss_sold_date_sk,2466124),LessThanOrEqual(ss_sold_date_sk,2466154))),And(GreaterThanOrEqual(ss_sold_date_sk,2466490),LessThanOrEqual(ss_sold_date_sk,2466520))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2466855),LessThanOrEqual(ss_sold_date_sk,2466885)),And(GreaterThanOrEqual(ss_sold_date_sk,2467220),LessThanOrEqual(ss_sold_date_sk,2467250))),And(GreaterThanOrEqual(ss_sold_date_sk,2467585),LessThanOrEqual(ss_sold_date_sk,2467615)))),Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2467951),LessThanOrEqual(ss_sold_date_sk,2467981)),And(GreaterThanOrEqual(ss_sold_date_sk,2468316),LessThanOrEqual(ss_sold_date_sk,2468346))),And(GreaterThanOrEqual(ss_sold_date_sk,2468681),LessThanOrEqual(ss_sold_date_sk,2468711))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2469046),LessThanOrEqual(ss_sold_date_sk,2469076)),And(GreaterThanOrEqual(ss_sold_date_sk,2469412),LessThanOrEqual(ss_sold_date_sk,2469442))),And(GreaterThanOrEqual(ss_sold_date_sk,2469777),LessThanOrEqual(ss_sold_date_sk,2469807))))))),Or(Or(Or(Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2470142),LessThanOrEqual(ss_sold_date_sk,2470172)),And(GreaterThanOrEqual(ss_sold_date_sk,2470507),LessThanOrEqual(ss_sold_date_sk,2470537))),Or(And(GreaterThanOrEqual(ss_sold_date_sk,2470873),LessThanOrEqual(ss_sold_date_sk,2470903)),And(GreaterThanOrEqual(ss_sold_date_sk,2471238),LessThanOrEqual(ss_sold_date_sk,2471268)))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2471603),LessThanOrEqual(ss_sold_date_sk,2471633)),And(GreaterThanOrEqual(ss_sold_date_sk,2471968),LessThanOrEqual(ss_sold_date_sk,2471998))),And(GreaterThanOrEqual(ss_sold_date_sk,2472334),LessThanOrEqual(ss_sold_date_sk,2472364)))),Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2472699),LessThanOrEqual(ss_sold_date_sk,2472729)),And(GreaterThanOrEqual(ss_sold_date_sk,2473064),LessThanOrEqual(ss_sold_date_sk,2473094))),And(GreaterThanOrEqual(ss_sold_date_sk,2473429),LessThanOrEqual(ss_sold_date_sk,2473459))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2473795),LessThanOrEqual(ss_sold_date_sk,2473825)),And(GreaterThanOrEqual(ss_sold_date_sk,2474160),LessThanOrEqual(ss_sold_date_sk,2474190))),And(GreaterThanOrEqual(ss_sold_date_sk,2474525),LessThanOrEqual(ss_sold_date_sk,2474555))))),Or(Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2474890),LessThanOrEqual(ss_sold_date_sk,2474920)),And(GreaterThanOrEqual(ss_sold_date_sk,2475256),LessThanOrEqual(ss_sold_date_sk,2475286))),And(GreaterThanOrEqual(ss_sold_date_sk,2475621),LessThanOrEqual(ss_sold_date_sk,2475651))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2475986),LessThanOrEqual(ss_sold_date_sk,2476016)),And(GreaterThanOrEqual(ss_sold_date_sk,2476351),LessThanOrEqual(ss_sold_date_sk,2476381))),And(GreaterThanOrEqual(ss_sold_date_sk,2476717),LessThanOrEqual(ss_sold_date_sk,2476747)))),Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2477082),LessThanOrEqual(ss_sold_date_sk,2477112)),And(GreaterThanOrEqual(ss_sold_date_sk,2477447),LessThanOrEqual(ss_sold_date_sk,2477477))),And(GreaterThanOrEqual(ss_sold_date_sk,2477812),LessThanOrEqual(ss_sold_date_sk,2477842))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2478178),LessThanOrEqual(ss_sold_date_sk,2478208)),And(GreaterThanOrEqual(ss_sold_date_sk,2478543),LessThanOrEqual(ss_sold_date_sk,2478573))),And(GreaterThanOrEqual(ss_sold_date_sk,2478908),LessThanOrEqual(ss_sold_date_sk,2478938)))))),Or(Or(Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2479273),LessThanOrEqual(ss_sold_date_sk,2479303)),And(GreaterThanOrEqual(ss_sold_date_sk,2479639),LessThanOrEqual(ss_sold_date_sk,2479669))),Or(And(GreaterThanOrEqual(ss_sold_date_sk,2480004),LessThanOrEqual(ss_sold_date_sk,2480034)),And(GreaterThanOrEqual(ss_sold_date_sk,2480369),LessThanOrEqual(ss_sold_date_sk,2480399)))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2480734),LessThanOrEqual(ss_sold_date_sk,2480764)),And(GreaterThanOrEqual(ss_sold_date_sk,2481100),LessThanOrEqual(ss_sold_date_sk,2481130))),And(GreaterThanOrEqual(ss_sold_date_sk,2481465),LessThanOrEqual(ss_sold_date_sk,2481495)))),Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2481830),LessThanOrEqual(ss_sold_date_sk,2481860)),And(GreaterThanOrEqual(ss_sold_date_sk,2482195),LessThanOrEqual(ss_sold_date_sk,2482225))),And(GreaterThanOrEqual(ss_sold_date_sk,2482561),LessThanOrEqual(ss_sold_date_sk,2482591))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2482926),LessThanOrEqual(ss_sold_date_sk,2482956)),And(GreaterThanOrEqual(ss_sold_date_sk,2483291),LessThanOrEqual(ss_sold_date_sk,2483321))),And(GreaterThanOrEqual(ss_sold_date_sk,2483656),LessThanOrEqual(ss_sold_date_sk,2483686))))),Or(Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2484022),LessThanOrEqual(ss_sold_date_sk,2484052)),And(GreaterThanOrEqual(ss_sold_date_sk,2484387),LessThanOrEqual(ss_sold_date_sk,2484417))),And(GreaterThanOrEqual(ss_sold_date_sk,2484752),LessThanOrEqual(ss_sold_date_sk,2484782))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2485117),LessThanOrEqual(ss_sold_date_sk,2485147)),And(GreaterThanOrEqual(ss_sold_date_sk,2485483),LessThanOrEqual(ss_sold_date_sk,2485513))),And(GreaterThanOrEqual(ss_sold_date_sk,2485848),LessThanOrEqual(ss_sold_date_sk,2485878)))),Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2486213),LessThanOrEqual(ss_sold_date_sk,2486243)),And(GreaterThanOrEqual(ss_sold_date_sk,2486578),LessThanOrEqual(ss_sold_date_sk,2486608))),And(GreaterThanOrEqual(ss_sold_date_sk,2486944),LessThanOrEqual(ss_sold_date_sk,2486974))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2487309),LessThanOrEqual(ss_sold_date_sk,2487339)),And(GreaterThanOrEqual(ss_sold_date_sk,2487674),LessThanOrEqual(ss_sold_date_sk,2487704))),And(GreaterThanOrEqual(ss_sold_date_sk,2488039),LessThanOrEqual(ss_sold_date_sk,2488069))))))))), IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_net_profit#3] + +(3) Filter [codegen id : 3] +Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_net_profit#3] +Condition : ((((((((((((ss_sold_date_sk#1 >= 2415355) AND (ss_sold_date_sk#1 <= 2415385)) OR ((ss_sold_date_sk#1 >= 2415720) AND (ss_sold_date_sk#1 <= 2415750))) OR (((ss_sold_date_sk#1 >= 2416085) AND (ss_sold_date_sk#1 <= 2416115)) OR ((ss_sold_date_sk#1 >= 2416450) AND (ss_sold_date_sk#1 <= 2416480)))) OR ((((ss_sold_date_sk#1 >= 2416816) AND (ss_sold_date_sk#1 <= 2416846)) OR ((ss_sold_date_sk#1 >= 2417181) AND (ss_sold_date_sk#1 <= 2417211))) OR ((ss_sold_date_sk#1 >= 2417546) AND (ss_sold_date_sk#1 <= 2417576)))) OR (((((ss_sold_date_sk#1 >= 2417911) AND (ss_sold_date_sk#1 <= 2417941)) OR ((ss_sold_date_sk#1 >= 2418277) AND (ss_sold_date_sk#1 <= 2418307))) OR ((ss_sold_date_sk#1 >= 2418642) AND (ss_sold_date_sk#1 <= 2418672))) OR ((((ss_sold_date_sk#1 >= 2419007) AND (ss_sold_date_sk#1 <= 2419037)) OR ((ss_sold_date_sk#1 >= 2419372) AND (ss_sold_date_sk#1 <= 2419402))) OR ((ss_sold_date_sk#1 >= 2419738) AND (ss_sold_date_sk#1 <= 2419768))))) OR ((((((ss_sold_date_sk#1 >= 2420103) AND (ss_sold_date_sk#1 <= 2420133)) OR ((ss_sold_date_sk#1 >= 2420468) AND (ss_sold_date_sk#1 <= 2420498))) OR ((ss_sold_date_sk#1 >= 2420833) AND (ss_sold_date_sk#1 <= 2420863))) OR ((((ss_sold_date_sk#1 >= 2421199) AND (ss_sold_date_sk#1 <= 2421229)) OR ((ss_sold_date_sk#1 >= 2421564) AND (ss_sold_date_sk#1 <= 2421594))) OR ((ss_sold_date_sk#1 >= 2421929) AND (ss_sold_date_sk#1 <= 2421959)))) OR (((((ss_sold_date_sk#1 >= 2422294) AND (ss_sold_date_sk#1 <= 2422324)) OR ((ss_sold_date_sk#1 >= 2422660) AND (ss_sold_date_sk#1 <= 2422690))) OR ((ss_sold_date_sk#1 >= 2423025) AND (ss_sold_date_sk#1 <= 2423055))) OR ((((ss_sold_date_sk#1 >= 2423390) AND (ss_sold_date_sk#1 <= 2423420)) OR ((ss_sold_date_sk#1 >= 2423755) AND (ss_sold_date_sk#1 <= 2423785))) OR ((ss_sold_date_sk#1 >= 2424121) AND (ss_sold_date_sk#1 <= 2424151)))))) OR (((((((ss_sold_date_sk#1 >= 2424486) AND (ss_sold_date_sk#1 <= 2424516)) OR ((ss_sold_date_sk#1 >= 2424851) AND (ss_sold_date_sk#1 <= 2424881))) OR (((ss_sold_date_sk#1 >= 2425216) AND (ss_sold_date_sk#1 <= 2425246)) OR ((ss_sold_date_sk#1 >= 2425582) AND (ss_sold_date_sk#1 <= 2425612)))) OR ((((ss_sold_date_sk#1 >= 2425947) AND (ss_sold_date_sk#1 <= 2425977)) OR ((ss_sold_date_sk#1 >= 2426312) AND (ss_sold_date_sk#1 <= 2426342))) OR ((ss_sold_date_sk#1 >= 2426677) AND (ss_sold_date_sk#1 <= 2426707)))) OR (((((ss_sold_date_sk#1 >= 2427043) AND (ss_sold_date_sk#1 <= 2427073)) OR ((ss_sold_date_sk#1 >= 2427408) AND (ss_sold_date_sk#1 <= 2427438))) OR ((ss_sold_date_sk#1 >= 2427773) AND (ss_sold_date_sk#1 <= 2427803))) OR ((((ss_sold_date_sk#1 >= 2428138) AND (ss_sold_date_sk#1 <= 2428168)) OR ((ss_sold_date_sk#1 >= 2428504) AND (ss_sold_date_sk#1 <= 2428534))) OR ((ss_sold_date_sk#1 >= 2428869) AND (ss_sold_date_sk#1 <= 2428899))))) OR ((((((ss_sold_date_sk#1 >= 2429234) AND (ss_sold_date_sk#1 <= 2429264)) OR ((ss_sold_date_sk#1 >= 2429599) AND (ss_sold_date_sk#1 <= 2429629))) OR ((ss_sold_date_sk#1 >= 2429965) AND (ss_sold_date_sk#1 <= 2429995))) OR ((((ss_sold_date_sk#1 >= 2430330) AND (ss_sold_date_sk#1 <= 2430360)) OR ((ss_sold_date_sk#1 >= 2430695) AND (ss_sold_date_sk#1 <= 2430725))) OR ((ss_sold_date_sk#1 >= 2431060) AND (ss_sold_date_sk#1 <= 2431090)))) OR (((((ss_sold_date_sk#1 >= 2431426) AND (ss_sold_date_sk#1 <= 2431456)) OR ((ss_sold_date_sk#1 >= 2431791) AND (ss_sold_date_sk#1 <= 2431821))) OR ((ss_sold_date_sk#1 >= 2432156) AND (ss_sold_date_sk#1 <= 2432186))) OR ((((ss_sold_date_sk#1 >= 2432521) AND (ss_sold_date_sk#1 <= 2432551)) OR ((ss_sold_date_sk#1 >= 2432887) AND (ss_sold_date_sk#1 <= 2432917))) OR ((ss_sold_date_sk#1 >= 2433252) AND (ss_sold_date_sk#1 <= 2433282))))))) OR ((((((((ss_sold_date_sk#1 >= 2433617) AND (ss_sold_date_sk#1 <= 2433647)) OR ((ss_sold_date_sk#1 >= 2433982) AND (ss_sold_date_sk#1 <= 2434012))) OR (((ss_sold_date_sk#1 >= 2434348) AND (ss_sold_date_sk#1 <= 2434378)) OR ((ss_sold_date_sk#1 >= 2434713) AND (ss_sold_date_sk#1 <= 2434743)))) OR ((((ss_sold_date_sk#1 >= 2435078) AND (ss_sold_date_sk#1 <= 2435108)) OR ((ss_sold_date_sk#1 >= 2435443) AND (ss_sold_date_sk#1 <= 2435473))) OR ((ss_sold_date_sk#1 >= 2435809) AND (ss_sold_date_sk#1 <= 2435839)))) OR (((((ss_sold_date_sk#1 >= 2436174) AND (ss_sold_date_sk#1 <= 2436204)) OR ((ss_sold_date_sk#1 >= 2436539) AND (ss_sold_date_sk#1 <= 2436569))) OR ((ss_sold_date_sk#1 >= 2436904) AND (ss_sold_date_sk#1 <= 2436934))) OR ((((ss_sold_date_sk#1 >= 2437270) AND (ss_sold_date_sk#1 <= 2437300)) OR ((ss_sold_date_sk#1 >= 2437635) AND (ss_sold_date_sk#1 <= 2437665))) OR ((ss_sold_date_sk#1 >= 2438000) AND (ss_sold_date_sk#1 <= 2438030))))) OR ((((((ss_sold_date_sk#1 >= 2438365) AND (ss_sold_date_sk#1 <= 2438395)) OR ((ss_sold_date_sk#1 >= 2438731) AND (ss_sold_date_sk#1 <= 2438761))) OR ((ss_sold_date_sk#1 >= 2439096) AND (ss_sold_date_sk#1 <= 2439126))) OR ((((ss_sold_date_sk#1 >= 2439461) AND (ss_sold_date_sk#1 <= 2439491)) OR ((ss_sold_date_sk#1 >= 2439826) AND (ss_sold_date_sk#1 <= 2439856))) OR ((ss_sold_date_sk#1 >= 2440192) AND (ss_sold_date_sk#1 <= 2440222)))) OR (((((ss_sold_date_sk#1 >= 2440557) AND (ss_sold_date_sk#1 <= 2440587)) OR ((ss_sold_date_sk#1 >= 2440922) AND (ss_sold_date_sk#1 <= 2440952))) OR ((ss_sold_date_sk#1 >= 2441287) AND (ss_sold_date_sk#1 <= 2441317))) OR ((((ss_sold_date_sk#1 >= 2441653) AND (ss_sold_date_sk#1 <= 2441683)) OR ((ss_sold_date_sk#1 >= 2442018) AND (ss_sold_date_sk#1 <= 2442048))) OR ((ss_sold_date_sk#1 >= 2442383) AND (ss_sold_date_sk#1 <= 2442413)))))) OR (((((((ss_sold_date_sk#1 >= 2442748) AND (ss_sold_date_sk#1 <= 2442778)) OR ((ss_sold_date_sk#1 >= 2443114) AND (ss_sold_date_sk#1 <= 2443144))) OR (((ss_sold_date_sk#1 >= 2443479) AND (ss_sold_date_sk#1 <= 2443509)) OR ((ss_sold_date_sk#1 >= 2443844) AND (ss_sold_date_sk#1 <= 2443874)))) OR ((((ss_sold_date_sk#1 >= 2444209) AND (ss_sold_date_sk#1 <= 2444239)) OR ((ss_sold_date_sk#1 >= 2444575) AND (ss_sold_date_sk#1 <= 2444605))) OR ((ss_sold_date_sk#1 >= 2444940) AND (ss_sold_date_sk#1 <= 2444970)))) OR (((((ss_sold_date_sk#1 >= 2445305) AND (ss_sold_date_sk#1 <= 2445335)) OR ((ss_sold_date_sk#1 >= 2445670) AND (ss_sold_date_sk#1 <= 2445700))) OR ((ss_sold_date_sk#1 >= 2446036) AND (ss_sold_date_sk#1 <= 2446066))) OR ((((ss_sold_date_sk#1 >= 2446401) AND (ss_sold_date_sk#1 <= 2446431)) OR ((ss_sold_date_sk#1 >= 2446766) AND (ss_sold_date_sk#1 <= 2446796))) OR ((ss_sold_date_sk#1 >= 2447131) AND (ss_sold_date_sk#1 <= 2447161))))) OR ((((((ss_sold_date_sk#1 >= 2447497) AND (ss_sold_date_sk#1 <= 2447527)) OR ((ss_sold_date_sk#1 >= 2447862) AND (ss_sold_date_sk#1 <= 2447892))) OR ((ss_sold_date_sk#1 >= 2448227) AND (ss_sold_date_sk#1 <= 2448257))) OR ((((ss_sold_date_sk#1 >= 2448592) AND (ss_sold_date_sk#1 <= 2448622)) OR ((ss_sold_date_sk#1 >= 2448958) AND (ss_sold_date_sk#1 <= 2448988))) OR ((ss_sold_date_sk#1 >= 2449323) AND (ss_sold_date_sk#1 <= 2449353)))) OR (((((ss_sold_date_sk#1 >= 2449688) AND (ss_sold_date_sk#1 <= 2449718)) OR ((ss_sold_date_sk#1 >= 2450053) AND (ss_sold_date_sk#1 <= 2450083))) OR ((ss_sold_date_sk#1 >= 2450419) AND (ss_sold_date_sk#1 <= 2450449))) OR ((((ss_sold_date_sk#1 >= 2450784) AND (ss_sold_date_sk#1 <= 2450814)) OR ((ss_sold_date_sk#1 >= 2451149) AND (ss_sold_date_sk#1 <= 2451179))) OR ((ss_sold_date_sk#1 >= 2451514) AND (ss_sold_date_sk#1 <= 2451544)))))))) OR (((((((((ss_sold_date_sk#1 >= 2451880) AND (ss_sold_date_sk#1 <= 2451910)) OR ((ss_sold_date_sk#1 >= 2452245) AND (ss_sold_date_sk#1 <= 2452275))) OR (((ss_sold_date_sk#1 >= 2452610) AND (ss_sold_date_sk#1 <= 2452640)) OR ((ss_sold_date_sk#1 >= 2452975) AND (ss_sold_date_sk#1 <= 2453005)))) OR ((((ss_sold_date_sk#1 >= 2453341) AND (ss_sold_date_sk#1 <= 2453371)) OR ((ss_sold_date_sk#1 >= 2453706) AND (ss_sold_date_sk#1 <= 2453736))) OR ((ss_sold_date_sk#1 >= 2454071) AND (ss_sold_date_sk#1 <= 2454101)))) OR (((((ss_sold_date_sk#1 >= 2454436) AND (ss_sold_date_sk#1 <= 2454466)) OR ((ss_sold_date_sk#1 >= 2454802) AND (ss_sold_date_sk#1 <= 2454832))) OR ((ss_sold_date_sk#1 >= 2455167) AND (ss_sold_date_sk#1 <= 2455197))) OR ((((ss_sold_date_sk#1 >= 2455532) AND (ss_sold_date_sk#1 <= 2455562)) OR ((ss_sold_date_sk#1 >= 2455897) AND (ss_sold_date_sk#1 <= 2455927))) OR ((ss_sold_date_sk#1 >= 2456263) AND (ss_sold_date_sk#1 <= 2456293))))) OR ((((((ss_sold_date_sk#1 >= 2456628) AND (ss_sold_date_sk#1 <= 2456658)) OR ((ss_sold_date_sk#1 >= 2456993) AND (ss_sold_date_sk#1 <= 2457023))) OR ((ss_sold_date_sk#1 >= 2457358) AND (ss_sold_date_sk#1 <= 2457388))) OR ((((ss_sold_date_sk#1 >= 2457724) AND (ss_sold_date_sk#1 <= 2457754)) OR ((ss_sold_date_sk#1 >= 2458089) AND (ss_sold_date_sk#1 <= 2458119))) OR ((ss_sold_date_sk#1 >= 2458454) AND (ss_sold_date_sk#1 <= 2458484)))) OR (((((ss_sold_date_sk#1 >= 2458819) AND (ss_sold_date_sk#1 <= 2458849)) OR ((ss_sold_date_sk#1 >= 2459185) AND (ss_sold_date_sk#1 <= 2459215))) OR ((ss_sold_date_sk#1 >= 2459550) AND (ss_sold_date_sk#1 <= 2459580))) OR ((((ss_sold_date_sk#1 >= 2459915) AND (ss_sold_date_sk#1 <= 2459945)) OR ((ss_sold_date_sk#1 >= 2460280) AND (ss_sold_date_sk#1 <= 2460310))) OR ((ss_sold_date_sk#1 >= 2460646) AND (ss_sold_date_sk#1 <= 2460676)))))) OR (((((((ss_sold_date_sk#1 >= 2461011) AND (ss_sold_date_sk#1 <= 2461041)) OR ((ss_sold_date_sk#1 >= 2461376) AND (ss_sold_date_sk#1 <= 2461406))) OR (((ss_sold_date_sk#1 >= 2461741) AND (ss_sold_date_sk#1 <= 2461771)) OR ((ss_sold_date_sk#1 >= 2462107) AND (ss_sold_date_sk#1 <= 2462137)))) OR ((((ss_sold_date_sk#1 >= 2462472) AND (ss_sold_date_sk#1 <= 2462502)) OR ((ss_sold_date_sk#1 >= 2462837) AND (ss_sold_date_sk#1 <= 2462867))) OR ((ss_sold_date_sk#1 >= 2463202) AND (ss_sold_date_sk#1 <= 2463232)))) OR (((((ss_sold_date_sk#1 >= 2463568) AND (ss_sold_date_sk#1 <= 2463598)) OR ((ss_sold_date_sk#1 >= 2463933) AND (ss_sold_date_sk#1 <= 2463963))) OR ((ss_sold_date_sk#1 >= 2464298) AND (ss_sold_date_sk#1 <= 2464328))) OR ((((ss_sold_date_sk#1 >= 2464663) AND (ss_sold_date_sk#1 <= 2464693)) OR ((ss_sold_date_sk#1 >= 2465029) AND (ss_sold_date_sk#1 <= 2465059))) OR ((ss_sold_date_sk#1 >= 2465394) AND (ss_sold_date_sk#1 <= 2465424))))) OR ((((((ss_sold_date_sk#1 >= 2465759) AND (ss_sold_date_sk#1 <= 2465789)) OR ((ss_sold_date_sk#1 >= 2466124) AND (ss_sold_date_sk#1 <= 2466154))) OR ((ss_sold_date_sk#1 >= 2466490) AND (ss_sold_date_sk#1 <= 2466520))) OR ((((ss_sold_date_sk#1 >= 2466855) AND (ss_sold_date_sk#1 <= 2466885)) OR ((ss_sold_date_sk#1 >= 2467220) AND (ss_sold_date_sk#1 <= 2467250))) OR ((ss_sold_date_sk#1 >= 2467585) AND (ss_sold_date_sk#1 <= 2467615)))) OR (((((ss_sold_date_sk#1 >= 2467951) AND (ss_sold_date_sk#1 <= 2467981)) OR ((ss_sold_date_sk#1 >= 2468316) AND (ss_sold_date_sk#1 <= 2468346))) OR ((ss_sold_date_sk#1 >= 2468681) AND (ss_sold_date_sk#1 <= 2468711))) OR ((((ss_sold_date_sk#1 >= 2469046) AND (ss_sold_date_sk#1 <= 2469076)) OR ((ss_sold_date_sk#1 >= 2469412) AND (ss_sold_date_sk#1 <= 2469442))) OR ((ss_sold_date_sk#1 >= 2469777) AND (ss_sold_date_sk#1 <= 2469807))))))) OR ((((((((ss_sold_date_sk#1 >= 2470142) AND (ss_sold_date_sk#1 <= 2470172)) OR ((ss_sold_date_sk#1 >= 2470507) AND (ss_sold_date_sk#1 <= 2470537))) OR (((ss_sold_date_sk#1 >= 2470873) AND (ss_sold_date_sk#1 <= 2470903)) OR ((ss_sold_date_sk#1 >= 2471238) AND (ss_sold_date_sk#1 <= 2471268)))) OR ((((ss_sold_date_sk#1 >= 2471603) AND (ss_sold_date_sk#1 <= 2471633)) OR ((ss_sold_date_sk#1 >= 2471968) AND (ss_sold_date_sk#1 <= 2471998))) OR ((ss_sold_date_sk#1 >= 2472334) AND (ss_sold_date_sk#1 <= 2472364)))) OR (((((ss_sold_date_sk#1 >= 2472699) AND (ss_sold_date_sk#1 <= 2472729)) OR ((ss_sold_date_sk#1 >= 2473064) AND (ss_sold_date_sk#1 <= 2473094))) OR ((ss_sold_date_sk#1 >= 2473429) AND (ss_sold_date_sk#1 <= 2473459))) OR ((((ss_sold_date_sk#1 >= 2473795) AND (ss_sold_date_sk#1 <= 2473825)) OR ((ss_sold_date_sk#1 >= 2474160) AND (ss_sold_date_sk#1 <= 2474190))) OR ((ss_sold_date_sk#1 >= 2474525) AND (ss_sold_date_sk#1 <= 2474555))))) OR ((((((ss_sold_date_sk#1 >= 2474890) AND (ss_sold_date_sk#1 <= 2474920)) OR ((ss_sold_date_sk#1 >= 2475256) AND (ss_sold_date_sk#1 <= 2475286))) OR ((ss_sold_date_sk#1 >= 2475621) AND (ss_sold_date_sk#1 <= 2475651))) OR ((((ss_sold_date_sk#1 >= 2475986) AND (ss_sold_date_sk#1 <= 2476016)) OR ((ss_sold_date_sk#1 >= 2476351) AND (ss_sold_date_sk#1 <= 2476381))) OR ((ss_sold_date_sk#1 >= 2476717) AND (ss_sold_date_sk#1 <= 2476747)))) OR (((((ss_sold_date_sk#1 >= 2477082) AND (ss_sold_date_sk#1 <= 2477112)) OR ((ss_sold_date_sk#1 >= 2477447) AND (ss_sold_date_sk#1 <= 2477477))) OR ((ss_sold_date_sk#1 >= 2477812) AND (ss_sold_date_sk#1 <= 2477842))) OR ((((ss_sold_date_sk#1 >= 2478178) AND (ss_sold_date_sk#1 <= 2478208)) OR ((ss_sold_date_sk#1 >= 2478543) AND (ss_sold_date_sk#1 <= 2478573))) OR ((ss_sold_date_sk#1 >= 2478908) AND (ss_sold_date_sk#1 <= 2478938)))))) OR (((((((ss_sold_date_sk#1 >= 2479273) AND (ss_sold_date_sk#1 <= 2479303)) OR ((ss_sold_date_sk#1 >= 2479639) AND (ss_sold_date_sk#1 <= 2479669))) OR (((ss_sold_date_sk#1 >= 2480004) AND (ss_sold_date_sk#1 <= 2480034)) OR ((ss_sold_date_sk#1 >= 2480369) AND (ss_sold_date_sk#1 <= 2480399)))) OR ((((ss_sold_date_sk#1 >= 2480734) AND (ss_sold_date_sk#1 <= 2480764)) OR ((ss_sold_date_sk#1 >= 2481100) AND (ss_sold_date_sk#1 <= 2481130))) OR ((ss_sold_date_sk#1 >= 2481465) AND (ss_sold_date_sk#1 <= 2481495)))) OR (((((ss_sold_date_sk#1 >= 2481830) AND (ss_sold_date_sk#1 <= 2481860)) OR ((ss_sold_date_sk#1 >= 2482195) AND (ss_sold_date_sk#1 <= 2482225))) OR ((ss_sold_date_sk#1 >= 2482561) AND (ss_sold_date_sk#1 <= 2482591))) OR ((((ss_sold_date_sk#1 >= 2482926) AND (ss_sold_date_sk#1 <= 2482956)) OR ((ss_sold_date_sk#1 >= 2483291) AND (ss_sold_date_sk#1 <= 2483321))) OR ((ss_sold_date_sk#1 >= 2483656) AND (ss_sold_date_sk#1 <= 2483686))))) OR ((((((ss_sold_date_sk#1 >= 2484022) AND (ss_sold_date_sk#1 <= 2484052)) OR ((ss_sold_date_sk#1 >= 2484387) AND (ss_sold_date_sk#1 <= 2484417))) OR ((ss_sold_date_sk#1 >= 2484752) AND (ss_sold_date_sk#1 <= 2484782))) OR ((((ss_sold_date_sk#1 >= 2485117) AND (ss_sold_date_sk#1 <= 2485147)) OR ((ss_sold_date_sk#1 >= 2485483) AND (ss_sold_date_sk#1 <= 2485513))) OR ((ss_sold_date_sk#1 >= 2485848) AND (ss_sold_date_sk#1 <= 2485878)))) OR (((((ss_sold_date_sk#1 >= 2486213) AND (ss_sold_date_sk#1 <= 2486243)) OR ((ss_sold_date_sk#1 >= 2486578) AND (ss_sold_date_sk#1 <= 2486608))) OR ((ss_sold_date_sk#1 >= 2486944) AND (ss_sold_date_sk#1 <= 2486974))) OR ((((ss_sold_date_sk#1 >= 2487309) AND (ss_sold_date_sk#1 <= 2487339)) OR ((ss_sold_date_sk#1 >= 2487674) AND (ss_sold_date_sk#1 <= 2487704))) OR ((ss_sold_date_sk#1 >= 2488039) AND (ss_sold_date_sk#1 <= 2488069))))))))) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_item_sk#2)) + +(4) Scan parquet default.item +Output [4]: [i_item_sk#4, i_brand_id#5, i_brand#6, i_manufact_id#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_manufact_id), EqualTo(i_manufact_id,436), IsNotNull(i_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [i_item_sk#4, i_brand_id#5, i_brand#6, i_manufact_id#7] + +(6) Filter [codegen id : 1] +Input [4]: [i_item_sk#4, i_brand_id#5, i_brand#6, i_manufact_id#7] +Condition : ((isnotnull(i_manufact_id#7) AND (i_manufact_id#7 = 436)) AND isnotnull(i_item_sk#4)) + +(7) Project [codegen id : 1] +Output [3]: [i_item_sk#4, i_brand_id#5, i_brand#6] +Input [4]: [i_item_sk#4, i_brand_id#5, i_brand#6, i_manufact_id#7] + +(8) BroadcastExchange +Input [3]: [i_item_sk#4, i_brand_id#5, i_brand#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#8] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#4] +Join condition: None + +(10) Project [codegen id : 3] +Output [4]: [ss_sold_date_sk#1, ss_net_profit#3, i_brand_id#5, i_brand#6] +Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_net_profit#3, i_item_sk#4, i_brand_id#5, i_brand#6] + +(11) Scan parquet default.date_dim +Output [3]: [d_date_sk#9, d_year#10, d_moy#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_moy), EqualTo(d_moy,12), Or(Or(Or(Or(Or(Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2415355),LessThanOrEqual(d_date_sk,2415385)),And(GreaterThanOrEqual(d_date_sk,2415720),LessThanOrEqual(d_date_sk,2415750))),Or(And(GreaterThanOrEqual(d_date_sk,2416085),LessThanOrEqual(d_date_sk,2416115)),And(GreaterThanOrEqual(d_date_sk,2416450),LessThanOrEqual(d_date_sk,2416480)))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2416816),LessThanOrEqual(d_date_sk,2416846)),And(GreaterThanOrEqual(d_date_sk,2417181),LessThanOrEqual(d_date_sk,2417211))),And(GreaterThanOrEqual(d_date_sk,2417546),LessThanOrEqual(d_date_sk,2417576)))),Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2417911),LessThanOrEqual(d_date_sk,2417941)),And(GreaterThanOrEqual(d_date_sk,2418277),LessThanOrEqual(d_date_sk,2418307))),And(GreaterThanOrEqual(d_date_sk,2418642),LessThanOrEqual(d_date_sk,2418672))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2419007),LessThanOrEqual(d_date_sk,2419037)),And(GreaterThanOrEqual(d_date_sk,2419372),LessThanOrEqual(d_date_sk,2419402))),And(GreaterThanOrEqual(d_date_sk,2419738),LessThanOrEqual(d_date_sk,2419768))))),Or(Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2420103),LessThanOrEqual(d_date_sk,2420133)),And(GreaterThanOrEqual(d_date_sk,2420468),LessThanOrEqual(d_date_sk,2420498))),And(GreaterThanOrEqual(d_date_sk,2420833),LessThanOrEqual(d_date_sk,2420863))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2421199),LessThanOrEqual(d_date_sk,2421229)),And(GreaterThanOrEqual(d_date_sk,2421564),LessThanOrEqual(d_date_sk,2421594))),And(GreaterThanOrEqual(d_date_sk,2421929),LessThanOrEqual(d_date_sk,2421959)))),Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2422294),LessThanOrEqual(d_date_sk,2422324)),And(GreaterThanOrEqual(d_date_sk,2422660),LessThanOrEqual(d_date_sk,2422690))),And(GreaterThanOrEqual(d_date_sk,2423025),LessThanOrEqual(d_date_sk,2423055))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2423390),LessThanOrEqual(d_date_sk,2423420)),And(GreaterThanOrEqual(d_date_sk,2423755),LessThanOrEqual(d_date_sk,2423785))),And(GreaterThanOrEqual(d_date_sk,2424121),LessThanOrEqual(d_date_sk,2424151)))))),Or(Or(Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2424486),LessThanOrEqual(d_date_sk,2424516)),And(GreaterThanOrEqual(d_date_sk,2424851),LessThanOrEqual(d_date_sk,2424881))),Or(And(GreaterThanOrEqual(d_date_sk,2425216),LessThanOrEqual(d_date_sk,2425246)),And(GreaterThanOrEqual(d_date_sk,2425582),LessThanOrEqual(d_date_sk,2425612)))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2425947),LessThanOrEqual(d_date_sk,2425977)),And(GreaterThanOrEqual(d_date_sk,2426312),LessThanOrEqual(d_date_sk,2426342))),And(GreaterThanOrEqual(d_date_sk,2426677),LessThanOrEqual(d_date_sk,2426707)))),Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2427043),LessThanOrEqual(d_date_sk,2427073)),And(GreaterThanOrEqual(d_date_sk,2427408),LessThanOrEqual(d_date_sk,2427438))),And(GreaterThanOrEqual(d_date_sk,2427773),LessThanOrEqual(d_date_sk,2427803))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2428138),LessThanOrEqual(d_date_sk,2428168)),And(GreaterThanOrEqual(d_date_sk,2428504),LessThanOrEqual(d_date_sk,2428534))),And(GreaterThanOrEqual(d_date_sk,2428869),LessThanOrEqual(d_date_sk,2428899))))),Or(Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2429234),LessThanOrEqual(d_date_sk,2429264)),And(GreaterThanOrEqual(d_date_sk,2429599),LessThanOrEqual(d_date_sk,2429629))),And(GreaterThanOrEqual(d_date_sk,2429965),LessThanOrEqual(d_date_sk,2429995))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2430330),LessThanOrEqual(d_date_sk,2430360)),And(GreaterThanOrEqual(d_date_sk,2430695),LessThanOrEqual(d_date_sk,2430725))),And(GreaterThanOrEqual(d_date_sk,2431060),LessThanOrEqual(d_date_sk,2431090)))),Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2431426),LessThanOrEqual(d_date_sk,2431456)),And(GreaterThanOrEqual(d_date_sk,2431791),LessThanOrEqual(d_date_sk,2431821))),And(GreaterThanOrEqual(d_date_sk,2432156),LessThanOrEqual(d_date_sk,2432186))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2432521),LessThanOrEqual(d_date_sk,2432551)),And(GreaterThanOrEqual(d_date_sk,2432887),LessThanOrEqual(d_date_sk,2432917))),And(GreaterThanOrEqual(d_date_sk,2433252),LessThanOrEqual(d_date_sk,2433282))))))),Or(Or(Or(Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2433617),LessThanOrEqual(d_date_sk,2433647)),And(GreaterThanOrEqual(d_date_sk,2433982),LessThanOrEqual(d_date_sk,2434012))),Or(And(GreaterThanOrEqual(d_date_sk,2434348),LessThanOrEqual(d_date_sk,2434378)),And(GreaterThanOrEqual(d_date_sk,2434713),LessThanOrEqual(d_date_sk,2434743)))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2435078),LessThanOrEqual(d_date_sk,2435108)),And(GreaterThanOrEqual(d_date_sk,2435443),LessThanOrEqual(d_date_sk,2435473))),And(GreaterThanOrEqual(d_date_sk,2435809),LessThanOrEqual(d_date_sk,2435839)))),Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2436174),LessThanOrEqual(d_date_sk,2436204)),And(GreaterThanOrEqual(d_date_sk,2436539),LessThanOrEqual(d_date_sk,2436569))),And(GreaterThanOrEqual(d_date_sk,2436904),LessThanOrEqual(d_date_sk,2436934))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2437270),LessThanOrEqual(d_date_sk,2437300)),And(GreaterThanOrEqual(d_date_sk,2437635),LessThanOrEqual(d_date_sk,2437665))),And(GreaterThanOrEqual(d_date_sk,2438000),LessThanOrEqual(d_date_sk,2438030))))),Or(Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2438365),LessThanOrEqual(d_date_sk,2438395)),And(GreaterThanOrEqual(d_date_sk,2438731),LessThanOrEqual(d_date_sk,2438761))),And(GreaterThanOrEqual(d_date_sk,2439096),LessThanOrEqual(d_date_sk,2439126))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2439461),LessThanOrEqual(d_date_sk,2439491)),And(GreaterThanOrEqual(d_date_sk,2439826),LessThanOrEqual(d_date_sk,2439856))),And(GreaterThanOrEqual(d_date_sk,2440192),LessThanOrEqual(d_date_sk,2440222)))),Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2440557),LessThanOrEqual(d_date_sk,2440587)),And(GreaterThanOrEqual(d_date_sk,2440922),LessThanOrEqual(d_date_sk,2440952))),And(GreaterThanOrEqual(d_date_sk,2441287),LessThanOrEqual(d_date_sk,2441317))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2441653),LessThanOrEqual(d_date_sk,2441683)),And(GreaterThanOrEqual(d_date_sk,2442018),LessThanOrEqual(d_date_sk,2442048))),And(GreaterThanOrEqual(d_date_sk,2442383),LessThanOrEqual(d_date_sk,2442413)))))),Or(Or(Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2442748),LessThanOrEqual(d_date_sk,2442778)),And(GreaterThanOrEqual(d_date_sk,2443114),LessThanOrEqual(d_date_sk,2443144))),Or(And(GreaterThanOrEqual(d_date_sk,2443479),LessThanOrEqual(d_date_sk,2443509)),And(GreaterThanOrEqual(d_date_sk,2443844),LessThanOrEqual(d_date_sk,2443874)))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2444209),LessThanOrEqual(d_date_sk,2444239)),And(GreaterThanOrEqual(d_date_sk,2444575),LessThanOrEqual(d_date_sk,2444605))),And(GreaterThanOrEqual(d_date_sk,2444940),LessThanOrEqual(d_date_sk,2444970)))),Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2445305),LessThanOrEqual(d_date_sk,2445335)),And(GreaterThanOrEqual(d_date_sk,2445670),LessThanOrEqual(d_date_sk,2445700))),And(GreaterThanOrEqual(d_date_sk,2446036),LessThanOrEqual(d_date_sk,2446066))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2446401),LessThanOrEqual(d_date_sk,2446431)),And(GreaterThanOrEqual(d_date_sk,2446766),LessThanOrEqual(d_date_sk,2446796))),And(GreaterThanOrEqual(d_date_sk,2447131),LessThanOrEqual(d_date_sk,2447161))))),Or(Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2447497),LessThanOrEqual(d_date_sk,2447527)),And(GreaterThanOrEqual(d_date_sk,2447862),LessThanOrEqual(d_date_sk,2447892))),And(GreaterThanOrEqual(d_date_sk,2448227),LessThanOrEqual(d_date_sk,2448257))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2448592),LessThanOrEqual(d_date_sk,2448622)),And(GreaterThanOrEqual(d_date_sk,2448958),LessThanOrEqual(d_date_sk,2448988))),And(GreaterThanOrEqual(d_date_sk,2449323),LessThanOrEqual(d_date_sk,2449353)))),Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2449688),LessThanOrEqual(d_date_sk,2449718)),And(GreaterThanOrEqual(d_date_sk,2450053),LessThanOrEqual(d_date_sk,2450083))),And(GreaterThanOrEqual(d_date_sk,2450419),LessThanOrEqual(d_date_sk,2450449))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2450784),LessThanOrEqual(d_date_sk,2450814)),And(GreaterThanOrEqual(d_date_sk,2451149),LessThanOrEqual(d_date_sk,2451179))),And(GreaterThanOrEqual(d_date_sk,2451514),LessThanOrEqual(d_date_sk,2451544)))))))),Or(Or(Or(Or(Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2451880),LessThanOrEqual(d_date_sk,2451910)),And(GreaterThanOrEqual(d_date_sk,2452245),LessThanOrEqual(d_date_sk,2452275))),Or(And(GreaterThanOrEqual(d_date_sk,2452610),LessThanOrEqual(d_date_sk,2452640)),And(GreaterThanOrEqual(d_date_sk,2452975),LessThanOrEqual(d_date_sk,2453005)))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2453341),LessThanOrEqual(d_date_sk,2453371)),And(GreaterThanOrEqual(d_date_sk,2453706),LessThanOrEqual(d_date_sk,2453736))),And(GreaterThanOrEqual(d_date_sk,2454071),LessThanOrEqual(d_date_sk,2454101)))),Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2454436),LessThanOrEqual(d_date_sk,2454466)),And(GreaterThanOrEqual(d_date_sk,2454802),LessThanOrEqual(d_date_sk,2454832))),And(GreaterThanOrEqual(d_date_sk,2455167),LessThanOrEqual(d_date_sk,2455197))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2455532),LessThanOrEqual(d_date_sk,2455562)),And(GreaterThanOrEqual(d_date_sk,2455897),LessThanOrEqual(d_date_sk,2455927))),And(GreaterThanOrEqual(d_date_sk,2456263),LessThanOrEqual(d_date_sk,2456293))))),Or(Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2456628),LessThanOrEqual(d_date_sk,2456658)),And(GreaterThanOrEqual(d_date_sk,2456993),LessThanOrEqual(d_date_sk,2457023))),And(GreaterThanOrEqual(d_date_sk,2457358),LessThanOrEqual(d_date_sk,2457388))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2457724),LessThanOrEqual(d_date_sk,2457754)),And(GreaterThanOrEqual(d_date_sk,2458089),LessThanOrEqual(d_date_sk,2458119))),And(GreaterThanOrEqual(d_date_sk,2458454),LessThanOrEqual(d_date_sk,2458484)))),Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2458819),LessThanOrEqual(d_date_sk,2458849)),And(GreaterThanOrEqual(d_date_sk,2459185),LessThanOrEqual(d_date_sk,2459215))),And(GreaterThanOrEqual(d_date_sk,2459550),LessThanOrEqual(d_date_sk,2459580))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2459915),LessThanOrEqual(d_date_sk,2459945)),And(GreaterThanOrEqual(d_date_sk,2460280),LessThanOrEqual(d_date_sk,2460310))),And(GreaterThanOrEqual(d_date_sk,2460646),LessThanOrEqual(d_date_sk,2460676)))))),Or(Or(Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2461011),LessThanOrEqual(d_date_sk,2461041)),And(GreaterThanOrEqual(d_date_sk,2461376),LessThanOrEqual(d_date_sk,2461406))),Or(And(GreaterThanOrEqual(d_date_sk,2461741),LessThanOrEqual(d_date_sk,2461771)),And(GreaterThanOrEqual(d_date_sk,2462107),LessThanOrEqual(d_date_sk,2462137)))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2462472),LessThanOrEqual(d_date_sk,2462502)),And(GreaterThanOrEqual(d_date_sk,2462837),LessThanOrEqual(d_date_sk,2462867))),And(GreaterThanOrEqual(d_date_sk,2463202),LessThanOrEqual(d_date_sk,2463232)))),Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2463568),LessThanOrEqual(d_date_sk,2463598)),And(GreaterThanOrEqual(d_date_sk,2463933),LessThanOrEqual(d_date_sk,2463963))),And(GreaterThanOrEqual(d_date_sk,2464298),LessThanOrEqual(d_date_sk,2464328))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2464663),LessThanOrEqual(d_date_sk,2464693)),And(GreaterThanOrEqual(d_date_sk,2465029),LessThanOrEqual(d_date_sk,2465059))),And(GreaterThanOrEqual(d_date_sk,2465394),LessThanOrEqual(d_date_sk,2465424))))),Or(Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2465759),LessThanOrEqual(d_date_sk,2465789)),And(GreaterThanOrEqual(d_date_sk,2466124),LessThanOrEqual(d_date_sk,2466154))),And(GreaterThanOrEqual(d_date_sk,2466490),LessThanOrEqual(d_date_sk,2466520))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2466855),LessThanOrEqual(d_date_sk,2466885)),And(GreaterThanOrEqual(d_date_sk,2467220),LessThanOrEqual(d_date_sk,2467250))),And(GreaterThanOrEqual(d_date_sk,2467585),LessThanOrEqual(d_date_sk,2467615)))),Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2467951),LessThanOrEqual(d_date_sk,2467981)),And(GreaterThanOrEqual(d_date_sk,2468316),LessThanOrEqual(d_date_sk,2468346))),And(GreaterThanOrEqual(d_date_sk,2468681),LessThanOrEqual(d_date_sk,2468711))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2469046),LessThanOrEqual(d_date_sk,2469076)),And(GreaterThanOrEqual(d_date_sk,2469412),LessThanOrEqual(d_date_sk,2469442))),And(GreaterThanOrEqual(d_date_sk,2469777),LessThanOrEqual(d_date_sk,2469807))))))),Or(Or(Or(Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2470142),LessThanOrEqual(d_date_sk,2470172)),And(GreaterThanOrEqual(d_date_sk,2470507),LessThanOrEqual(d_date_sk,2470537))),Or(And(GreaterThanOrEqual(d_date_sk,2470873),LessThanOrEqual(d_date_sk,2470903)),And(GreaterThanOrEqual(d_date_sk,2471238),LessThanOrEqual(d_date_sk,2471268)))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2471603),LessThanOrEqual(d_date_sk,2471633)),And(GreaterThanOrEqual(d_date_sk,2471968),LessThanOrEqual(d_date_sk,2471998))),And(GreaterThanOrEqual(d_date_sk,2472334),LessThanOrEqual(d_date_sk,2472364)))),Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2472699),LessThanOrEqual(d_date_sk,2472729)),And(GreaterThanOrEqual(d_date_sk,2473064),LessThanOrEqual(d_date_sk,2473094))),And(GreaterThanOrEqual(d_date_sk,2473429),LessThanOrEqual(d_date_sk,2473459))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2473795),LessThanOrEqual(d_date_sk,2473825)),And(GreaterThanOrEqual(d_date_sk,2474160),LessThanOrEqual(d_date_sk,2474190))),And(GreaterThanOrEqual(d_date_sk,2474525),LessThanOrEqual(d_date_sk,2474555))))),Or(Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2474890),LessThanOrEqual(d_date_sk,2474920)),And(GreaterThanOrEqual(d_date_sk,2475256),LessThanOrEqual(d_date_sk,2475286))),And(GreaterThanOrEqual(d_date_sk,2475621),LessThanOrEqual(d_date_sk,2475651))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2475986),LessThanOrEqual(d_date_sk,2476016)),And(GreaterThanOrEqual(d_date_sk,2476351),LessThanOrEqual(d_date_sk,2476381))),And(GreaterThanOrEqual(d_date_sk,2476717),LessThanOrEqual(d_date_sk,2476747)))),Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2477082),LessThanOrEqual(d_date_sk,2477112)),And(GreaterThanOrEqual(d_date_sk,2477447),LessThanOrEqual(d_date_sk,2477477))),And(GreaterThanOrEqual(d_date_sk,2477812),LessThanOrEqual(d_date_sk,2477842))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2478178),LessThanOrEqual(d_date_sk,2478208)),And(GreaterThanOrEqual(d_date_sk,2478543),LessThanOrEqual(d_date_sk,2478573))),And(GreaterThanOrEqual(d_date_sk,2478908),LessThanOrEqual(d_date_sk,2478938)))))),Or(Or(Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2479273),LessThanOrEqual(d_date_sk,2479303)),And(GreaterThanOrEqual(d_date_sk,2479639),LessThanOrEqual(d_date_sk,2479669))),Or(And(GreaterThanOrEqual(d_date_sk,2480004),LessThanOrEqual(d_date_sk,2480034)),And(GreaterThanOrEqual(d_date_sk,2480369),LessThanOrEqual(d_date_sk,2480399)))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2480734),LessThanOrEqual(d_date_sk,2480764)),And(GreaterThanOrEqual(d_date_sk,2481100),LessThanOrEqual(d_date_sk,2481130))),And(GreaterThanOrEqual(d_date_sk,2481465),LessThanOrEqual(d_date_sk,2481495)))),Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2481830),LessThanOrEqual(d_date_sk,2481860)),And(GreaterThanOrEqual(d_date_sk,2482195),LessThanOrEqual(d_date_sk,2482225))),And(GreaterThanOrEqual(d_date_sk,2482561),LessThanOrEqual(d_date_sk,2482591))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2482926),LessThanOrEqual(d_date_sk,2482956)),And(GreaterThanOrEqual(d_date_sk,2483291),LessThanOrEqual(d_date_sk,2483321))),And(GreaterThanOrEqual(d_date_sk,2483656),LessThanOrEqual(d_date_sk,2483686))))),Or(Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2484022),LessThanOrEqual(d_date_sk,2484052)),And(GreaterThanOrEqual(d_date_sk,2484387),LessThanOrEqual(d_date_sk,2484417))),And(GreaterThanOrEqual(d_date_sk,2484752),LessThanOrEqual(d_date_sk,2484782))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2485117),LessThanOrEqual(d_date_sk,2485147)),And(GreaterThanOrEqual(d_date_sk,2485483),LessThanOrEqual(d_date_sk,2485513))),And(GreaterThanOrEqual(d_date_sk,2485848),LessThanOrEqual(d_date_sk,2485878)))),Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2486213),LessThanOrEqual(d_date_sk,2486243)),And(GreaterThanOrEqual(d_date_sk,2486578),LessThanOrEqual(d_date_sk,2486608))),And(GreaterThanOrEqual(d_date_sk,2486944),LessThanOrEqual(d_date_sk,2486974))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2487309),LessThanOrEqual(d_date_sk,2487339)),And(GreaterThanOrEqual(d_date_sk,2487674),LessThanOrEqual(d_date_sk,2487704))),And(GreaterThanOrEqual(d_date_sk,2488039),LessThanOrEqual(d_date_sk,2488069))))))))), IsNotNull(d_date_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] + +(13) Filter [codegen id : 2] +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] +Condition : (((isnotnull(d_moy#11) AND (d_moy#11 = 12)) AND ((((((((((d_date_sk#9 >= 2415355) AND (d_date_sk#9 <= 2415385)) OR ((d_date_sk#9 >= 2415720) AND (d_date_sk#9 <= 2415750))) OR (((d_date_sk#9 >= 2416085) AND (d_date_sk#9 <= 2416115)) OR ((d_date_sk#9 >= 2416450) AND (d_date_sk#9 <= 2416480)))) OR ((((d_date_sk#9 >= 2416816) AND (d_date_sk#9 <= 2416846)) OR ((d_date_sk#9 >= 2417181) AND (d_date_sk#9 <= 2417211))) OR ((d_date_sk#9 >= 2417546) AND (d_date_sk#9 <= 2417576)))) OR (((((d_date_sk#9 >= 2417911) AND (d_date_sk#9 <= 2417941)) OR ((d_date_sk#9 >= 2418277) AND (d_date_sk#9 <= 2418307))) OR ((d_date_sk#9 >= 2418642) AND (d_date_sk#9 <= 2418672))) OR ((((d_date_sk#9 >= 2419007) AND (d_date_sk#9 <= 2419037)) OR ((d_date_sk#9 >= 2419372) AND (d_date_sk#9 <= 2419402))) OR ((d_date_sk#9 >= 2419738) AND (d_date_sk#9 <= 2419768))))) OR ((((((d_date_sk#9 >= 2420103) AND (d_date_sk#9 <= 2420133)) OR ((d_date_sk#9 >= 2420468) AND (d_date_sk#9 <= 2420498))) OR ((d_date_sk#9 >= 2420833) AND (d_date_sk#9 <= 2420863))) OR ((((d_date_sk#9 >= 2421199) AND (d_date_sk#9 <= 2421229)) OR ((d_date_sk#9 >= 2421564) AND (d_date_sk#9 <= 2421594))) OR ((d_date_sk#9 >= 2421929) AND (d_date_sk#9 <= 2421959)))) OR (((((d_date_sk#9 >= 2422294) AND (d_date_sk#9 <= 2422324)) OR ((d_date_sk#9 >= 2422660) AND (d_date_sk#9 <= 2422690))) OR ((d_date_sk#9 >= 2423025) AND (d_date_sk#9 <= 2423055))) OR ((((d_date_sk#9 >= 2423390) AND (d_date_sk#9 <= 2423420)) OR ((d_date_sk#9 >= 2423755) AND (d_date_sk#9 <= 2423785))) OR ((d_date_sk#9 >= 2424121) AND (d_date_sk#9 <= 2424151)))))) OR (((((((d_date_sk#9 >= 2424486) AND (d_date_sk#9 <= 2424516)) OR ((d_date_sk#9 >= 2424851) AND (d_date_sk#9 <= 2424881))) OR (((d_date_sk#9 >= 2425216) AND (d_date_sk#9 <= 2425246)) OR ((d_date_sk#9 >= 2425582) AND (d_date_sk#9 <= 2425612)))) OR ((((d_date_sk#9 >= 2425947) AND (d_date_sk#9 <= 2425977)) OR ((d_date_sk#9 >= 2426312) AND (d_date_sk#9 <= 2426342))) OR ((d_date_sk#9 >= 2426677) AND (d_date_sk#9 <= 2426707)))) OR (((((d_date_sk#9 >= 2427043) AND (d_date_sk#9 <= 2427073)) OR ((d_date_sk#9 >= 2427408) AND (d_date_sk#9 <= 2427438))) OR ((d_date_sk#9 >= 2427773) AND (d_date_sk#9 <= 2427803))) OR ((((d_date_sk#9 >= 2428138) AND (d_date_sk#9 <= 2428168)) OR ((d_date_sk#9 >= 2428504) AND (d_date_sk#9 <= 2428534))) OR ((d_date_sk#9 >= 2428869) AND (d_date_sk#9 <= 2428899))))) OR ((((((d_date_sk#9 >= 2429234) AND (d_date_sk#9 <= 2429264)) OR ((d_date_sk#9 >= 2429599) AND (d_date_sk#9 <= 2429629))) OR ((d_date_sk#9 >= 2429965) AND (d_date_sk#9 <= 2429995))) OR ((((d_date_sk#9 >= 2430330) AND (d_date_sk#9 <= 2430360)) OR ((d_date_sk#9 >= 2430695) AND (d_date_sk#9 <= 2430725))) OR ((d_date_sk#9 >= 2431060) AND (d_date_sk#9 <= 2431090)))) OR (((((d_date_sk#9 >= 2431426) AND (d_date_sk#9 <= 2431456)) OR ((d_date_sk#9 >= 2431791) AND (d_date_sk#9 <= 2431821))) OR ((d_date_sk#9 >= 2432156) AND (d_date_sk#9 <= 2432186))) OR ((((d_date_sk#9 >= 2432521) AND (d_date_sk#9 <= 2432551)) OR ((d_date_sk#9 >= 2432887) AND (d_date_sk#9 <= 2432917))) OR ((d_date_sk#9 >= 2433252) AND (d_date_sk#9 <= 2433282))))))) OR ((((((((d_date_sk#9 >= 2433617) AND (d_date_sk#9 <= 2433647)) OR ((d_date_sk#9 >= 2433982) AND (d_date_sk#9 <= 2434012))) OR (((d_date_sk#9 >= 2434348) AND (d_date_sk#9 <= 2434378)) OR ((d_date_sk#9 >= 2434713) AND (d_date_sk#9 <= 2434743)))) OR ((((d_date_sk#9 >= 2435078) AND (d_date_sk#9 <= 2435108)) OR ((d_date_sk#9 >= 2435443) AND (d_date_sk#9 <= 2435473))) OR ((d_date_sk#9 >= 2435809) AND (d_date_sk#9 <= 2435839)))) OR (((((d_date_sk#9 >= 2436174) AND (d_date_sk#9 <= 2436204)) OR ((d_date_sk#9 >= 2436539) AND (d_date_sk#9 <= 2436569))) OR ((d_date_sk#9 >= 2436904) AND (d_date_sk#9 <= 2436934))) OR ((((d_date_sk#9 >= 2437270) AND (d_date_sk#9 <= 2437300)) OR ((d_date_sk#9 >= 2437635) AND (d_date_sk#9 <= 2437665))) OR ((d_date_sk#9 >= 2438000) AND (d_date_sk#9 <= 2438030))))) OR ((((((d_date_sk#9 >= 2438365) AND (d_date_sk#9 <= 2438395)) OR ((d_date_sk#9 >= 2438731) AND (d_date_sk#9 <= 2438761))) OR ((d_date_sk#9 >= 2439096) AND (d_date_sk#9 <= 2439126))) OR ((((d_date_sk#9 >= 2439461) AND (d_date_sk#9 <= 2439491)) OR ((d_date_sk#9 >= 2439826) AND (d_date_sk#9 <= 2439856))) OR ((d_date_sk#9 >= 2440192) AND (d_date_sk#9 <= 2440222)))) OR (((((d_date_sk#9 >= 2440557) AND (d_date_sk#9 <= 2440587)) OR ((d_date_sk#9 >= 2440922) AND (d_date_sk#9 <= 2440952))) OR ((d_date_sk#9 >= 2441287) AND (d_date_sk#9 <= 2441317))) OR ((((d_date_sk#9 >= 2441653) AND (d_date_sk#9 <= 2441683)) OR ((d_date_sk#9 >= 2442018) AND (d_date_sk#9 <= 2442048))) OR ((d_date_sk#9 >= 2442383) AND (d_date_sk#9 <= 2442413)))))) OR (((((((d_date_sk#9 >= 2442748) AND (d_date_sk#9 <= 2442778)) OR ((d_date_sk#9 >= 2443114) AND (d_date_sk#9 <= 2443144))) OR (((d_date_sk#9 >= 2443479) AND (d_date_sk#9 <= 2443509)) OR ((d_date_sk#9 >= 2443844) AND (d_date_sk#9 <= 2443874)))) OR ((((d_date_sk#9 >= 2444209) AND (d_date_sk#9 <= 2444239)) OR ((d_date_sk#9 >= 2444575) AND (d_date_sk#9 <= 2444605))) OR ((d_date_sk#9 >= 2444940) AND (d_date_sk#9 <= 2444970)))) OR (((((d_date_sk#9 >= 2445305) AND (d_date_sk#9 <= 2445335)) OR ((d_date_sk#9 >= 2445670) AND (d_date_sk#9 <= 2445700))) OR ((d_date_sk#9 >= 2446036) AND (d_date_sk#9 <= 2446066))) OR ((((d_date_sk#9 >= 2446401) AND (d_date_sk#9 <= 2446431)) OR ((d_date_sk#9 >= 2446766) AND (d_date_sk#9 <= 2446796))) OR ((d_date_sk#9 >= 2447131) AND (d_date_sk#9 <= 2447161))))) OR ((((((d_date_sk#9 >= 2447497) AND (d_date_sk#9 <= 2447527)) OR ((d_date_sk#9 >= 2447862) AND (d_date_sk#9 <= 2447892))) OR ((d_date_sk#9 >= 2448227) AND (d_date_sk#9 <= 2448257))) OR ((((d_date_sk#9 >= 2448592) AND (d_date_sk#9 <= 2448622)) OR ((d_date_sk#9 >= 2448958) AND (d_date_sk#9 <= 2448988))) OR ((d_date_sk#9 >= 2449323) AND (d_date_sk#9 <= 2449353)))) OR (((((d_date_sk#9 >= 2449688) AND (d_date_sk#9 <= 2449718)) OR ((d_date_sk#9 >= 2450053) AND (d_date_sk#9 <= 2450083))) OR ((d_date_sk#9 >= 2450419) AND (d_date_sk#9 <= 2450449))) OR ((((d_date_sk#9 >= 2450784) AND (d_date_sk#9 <= 2450814)) OR ((d_date_sk#9 >= 2451149) AND (d_date_sk#9 <= 2451179))) OR ((d_date_sk#9 >= 2451514) AND (d_date_sk#9 <= 2451544)))))))) OR (((((((((d_date_sk#9 >= 2451880) AND (d_date_sk#9 <= 2451910)) OR ((d_date_sk#9 >= 2452245) AND (d_date_sk#9 <= 2452275))) OR (((d_date_sk#9 >= 2452610) AND (d_date_sk#9 <= 2452640)) OR ((d_date_sk#9 >= 2452975) AND (d_date_sk#9 <= 2453005)))) OR ((((d_date_sk#9 >= 2453341) AND (d_date_sk#9 <= 2453371)) OR ((d_date_sk#9 >= 2453706) AND (d_date_sk#9 <= 2453736))) OR ((d_date_sk#9 >= 2454071) AND (d_date_sk#9 <= 2454101)))) OR (((((d_date_sk#9 >= 2454436) AND (d_date_sk#9 <= 2454466)) OR ((d_date_sk#9 >= 2454802) AND (d_date_sk#9 <= 2454832))) OR ((d_date_sk#9 >= 2455167) AND (d_date_sk#9 <= 2455197))) OR ((((d_date_sk#9 >= 2455532) AND (d_date_sk#9 <= 2455562)) OR ((d_date_sk#9 >= 2455897) AND (d_date_sk#9 <= 2455927))) OR ((d_date_sk#9 >= 2456263) AND (d_date_sk#9 <= 2456293))))) OR ((((((d_date_sk#9 >= 2456628) AND (d_date_sk#9 <= 2456658)) OR ((d_date_sk#9 >= 2456993) AND (d_date_sk#9 <= 2457023))) OR ((d_date_sk#9 >= 2457358) AND (d_date_sk#9 <= 2457388))) OR ((((d_date_sk#9 >= 2457724) AND (d_date_sk#9 <= 2457754)) OR ((d_date_sk#9 >= 2458089) AND (d_date_sk#9 <= 2458119))) OR ((d_date_sk#9 >= 2458454) AND (d_date_sk#9 <= 2458484)))) OR (((((d_date_sk#9 >= 2458819) AND (d_date_sk#9 <= 2458849)) OR ((d_date_sk#9 >= 2459185) AND (d_date_sk#9 <= 2459215))) OR ((d_date_sk#9 >= 2459550) AND (d_date_sk#9 <= 2459580))) OR ((((d_date_sk#9 >= 2459915) AND (d_date_sk#9 <= 2459945)) OR ((d_date_sk#9 >= 2460280) AND (d_date_sk#9 <= 2460310))) OR ((d_date_sk#9 >= 2460646) AND (d_date_sk#9 <= 2460676)))))) OR (((((((d_date_sk#9 >= 2461011) AND (d_date_sk#9 <= 2461041)) OR ((d_date_sk#9 >= 2461376) AND (d_date_sk#9 <= 2461406))) OR (((d_date_sk#9 >= 2461741) AND (d_date_sk#9 <= 2461771)) OR ((d_date_sk#9 >= 2462107) AND (d_date_sk#9 <= 2462137)))) OR ((((d_date_sk#9 >= 2462472) AND (d_date_sk#9 <= 2462502)) OR ((d_date_sk#9 >= 2462837) AND (d_date_sk#9 <= 2462867))) OR ((d_date_sk#9 >= 2463202) AND (d_date_sk#9 <= 2463232)))) OR (((((d_date_sk#9 >= 2463568) AND (d_date_sk#9 <= 2463598)) OR ((d_date_sk#9 >= 2463933) AND (d_date_sk#9 <= 2463963))) OR ((d_date_sk#9 >= 2464298) AND (d_date_sk#9 <= 2464328))) OR ((((d_date_sk#9 >= 2464663) AND (d_date_sk#9 <= 2464693)) OR ((d_date_sk#9 >= 2465029) AND (d_date_sk#9 <= 2465059))) OR ((d_date_sk#9 >= 2465394) AND (d_date_sk#9 <= 2465424))))) OR ((((((d_date_sk#9 >= 2465759) AND (d_date_sk#9 <= 2465789)) OR ((d_date_sk#9 >= 2466124) AND (d_date_sk#9 <= 2466154))) OR ((d_date_sk#9 >= 2466490) AND (d_date_sk#9 <= 2466520))) OR ((((d_date_sk#9 >= 2466855) AND (d_date_sk#9 <= 2466885)) OR ((d_date_sk#9 >= 2467220) AND (d_date_sk#9 <= 2467250))) OR ((d_date_sk#9 >= 2467585) AND (d_date_sk#9 <= 2467615)))) OR (((((d_date_sk#9 >= 2467951) AND (d_date_sk#9 <= 2467981)) OR ((d_date_sk#9 >= 2468316) AND (d_date_sk#9 <= 2468346))) OR ((d_date_sk#9 >= 2468681) AND (d_date_sk#9 <= 2468711))) OR ((((d_date_sk#9 >= 2469046) AND (d_date_sk#9 <= 2469076)) OR ((d_date_sk#9 >= 2469412) AND (d_date_sk#9 <= 2469442))) OR ((d_date_sk#9 >= 2469777) AND (d_date_sk#9 <= 2469807))))))) OR ((((((((d_date_sk#9 >= 2470142) AND (d_date_sk#9 <= 2470172)) OR ((d_date_sk#9 >= 2470507) AND (d_date_sk#9 <= 2470537))) OR (((d_date_sk#9 >= 2470873) AND (d_date_sk#9 <= 2470903)) OR ((d_date_sk#9 >= 2471238) AND (d_date_sk#9 <= 2471268)))) OR ((((d_date_sk#9 >= 2471603) AND (d_date_sk#9 <= 2471633)) OR ((d_date_sk#9 >= 2471968) AND (d_date_sk#9 <= 2471998))) OR ((d_date_sk#9 >= 2472334) AND (d_date_sk#9 <= 2472364)))) OR (((((d_date_sk#9 >= 2472699) AND (d_date_sk#9 <= 2472729)) OR ((d_date_sk#9 >= 2473064) AND (d_date_sk#9 <= 2473094))) OR ((d_date_sk#9 >= 2473429) AND (d_date_sk#9 <= 2473459))) OR ((((d_date_sk#9 >= 2473795) AND (d_date_sk#9 <= 2473825)) OR ((d_date_sk#9 >= 2474160) AND (d_date_sk#9 <= 2474190))) OR ((d_date_sk#9 >= 2474525) AND (d_date_sk#9 <= 2474555))))) OR ((((((d_date_sk#9 >= 2474890) AND (d_date_sk#9 <= 2474920)) OR ((d_date_sk#9 >= 2475256) AND (d_date_sk#9 <= 2475286))) OR ((d_date_sk#9 >= 2475621) AND (d_date_sk#9 <= 2475651))) OR ((((d_date_sk#9 >= 2475986) AND (d_date_sk#9 <= 2476016)) OR ((d_date_sk#9 >= 2476351) AND (d_date_sk#9 <= 2476381))) OR ((d_date_sk#9 >= 2476717) AND (d_date_sk#9 <= 2476747)))) OR (((((d_date_sk#9 >= 2477082) AND (d_date_sk#9 <= 2477112)) OR ((d_date_sk#9 >= 2477447) AND (d_date_sk#9 <= 2477477))) OR ((d_date_sk#9 >= 2477812) AND (d_date_sk#9 <= 2477842))) OR ((((d_date_sk#9 >= 2478178) AND (d_date_sk#9 <= 2478208)) OR ((d_date_sk#9 >= 2478543) AND (d_date_sk#9 <= 2478573))) OR ((d_date_sk#9 >= 2478908) AND (d_date_sk#9 <= 2478938)))))) OR (((((((d_date_sk#9 >= 2479273) AND (d_date_sk#9 <= 2479303)) OR ((d_date_sk#9 >= 2479639) AND (d_date_sk#9 <= 2479669))) OR (((d_date_sk#9 >= 2480004) AND (d_date_sk#9 <= 2480034)) OR ((d_date_sk#9 >= 2480369) AND (d_date_sk#9 <= 2480399)))) OR ((((d_date_sk#9 >= 2480734) AND (d_date_sk#9 <= 2480764)) OR ((d_date_sk#9 >= 2481100) AND (d_date_sk#9 <= 2481130))) OR ((d_date_sk#9 >= 2481465) AND (d_date_sk#9 <= 2481495)))) OR (((((d_date_sk#9 >= 2481830) AND (d_date_sk#9 <= 2481860)) OR ((d_date_sk#9 >= 2482195) AND (d_date_sk#9 <= 2482225))) OR ((d_date_sk#9 >= 2482561) AND (d_date_sk#9 <= 2482591))) OR ((((d_date_sk#9 >= 2482926) AND (d_date_sk#9 <= 2482956)) OR ((d_date_sk#9 >= 2483291) AND (d_date_sk#9 <= 2483321))) OR ((d_date_sk#9 >= 2483656) AND (d_date_sk#9 <= 2483686))))) OR ((((((d_date_sk#9 >= 2484022) AND (d_date_sk#9 <= 2484052)) OR ((d_date_sk#9 >= 2484387) AND (d_date_sk#9 <= 2484417))) OR ((d_date_sk#9 >= 2484752) AND (d_date_sk#9 <= 2484782))) OR ((((d_date_sk#9 >= 2485117) AND (d_date_sk#9 <= 2485147)) OR ((d_date_sk#9 >= 2485483) AND (d_date_sk#9 <= 2485513))) OR ((d_date_sk#9 >= 2485848) AND (d_date_sk#9 <= 2485878)))) OR (((((d_date_sk#9 >= 2486213) AND (d_date_sk#9 <= 2486243)) OR ((d_date_sk#9 >= 2486578) AND (d_date_sk#9 <= 2486608))) OR ((d_date_sk#9 >= 2486944) AND (d_date_sk#9 <= 2486974))) OR ((((d_date_sk#9 >= 2487309) AND (d_date_sk#9 <= 2487339)) OR ((d_date_sk#9 >= 2487674) AND (d_date_sk#9 <= 2487704))) OR ((d_date_sk#9 >= 2488039) AND (d_date_sk#9 <= 2488069)))))))))) AND isnotnull(d_date_sk#9)) + +(14) Project [codegen id : 2] +Output [2]: [d_date_sk#9, d_year#10] +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] + +(15) BroadcastExchange +Input [2]: [d_date_sk#9, d_year#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(16) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#9] +Join condition: None + +(17) Project [codegen id : 3] +Output [4]: [d_year#10, ss_net_profit#3, i_brand_id#5, i_brand#6] +Input [6]: [ss_sold_date_sk#1, ss_net_profit#3, i_brand_id#5, i_brand#6, d_date_sk#9, d_year#10] + +(18) HashAggregate [codegen id : 3] +Input [4]: [d_year#10, ss_net_profit#3, i_brand_id#5, i_brand#6] +Keys [3]: [d_year#10, i_brand#6, i_brand_id#5] +Functions [1]: [partial_sum(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [1]: [sum#13] +Results [4]: [d_year#10, i_brand#6, i_brand_id#5, sum#14] + +(19) Exchange +Input [4]: [d_year#10, i_brand#6, i_brand_id#5, sum#14] +Arguments: hashpartitioning(d_year#10, i_brand#6, i_brand_id#5, 5), true, [id=#15] + +(20) HashAggregate [codegen id : 4] +Input [4]: [d_year#10, i_brand#6, i_brand_id#5, sum#14] +Keys [3]: [d_year#10, i_brand#6, i_brand_id#5] +Functions [1]: [sum(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#3))#16] +Results [4]: [d_year#10, i_brand_id#5 AS brand_id#17, i_brand#6 AS brand#18, MakeDecimal(sum(UnscaledValue(ss_net_profit#3))#16,17,2) AS sum_agg#19] + +(21) TakeOrderedAndProject +Input [4]: [d_year#10, brand_id#17, brand#18, sum_agg#19] +Arguments: 100, [d_year#10 ASC NULLS FIRST, sum_agg#19 DESC NULLS LAST, brand_id#17 ASC NULLS FIRST], [d_year#10, brand_id#17, brand#18, sum_agg#19] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q3.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q3.sf100/simplified.txt new file mode 100644 index 0000000000000..733c9c780db94 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q3.sf100/simplified.txt @@ -0,0 +1,31 @@ +TakeOrderedAndProject [brand,brand_id,d_year,sum_agg] + WholeStageCodegen (4) + HashAggregate [d_year,i_brand,i_brand_id,sum] [brand,brand_id,sum,sum(UnscaledValue(ss_net_profit)),sum_agg] + InputAdapter + Exchange [d_year,i_brand,i_brand_id] #1 + WholeStageCodegen (3) + HashAggregate [d_year,i_brand,i_brand_id,ss_net_profit] [sum,sum] + Project [d_year,i_brand,i_brand_id,ss_net_profit] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_brand,i_brand_id,ss_net_profit,ss_sold_date_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_net_profit,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [i_brand,i_brand_id,i_item_sk] + Filter [i_item_sk,i_manufact_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand,i_brand_id,i_item_sk,i_manufact_id] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [d_date_sk,d_year] + Filter [d_date_sk,d_moy] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q3/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q3/explain.txt new file mode 100644 index 0000000000000..6f5ff301fbeac --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q3/explain.txt @@ -0,0 +1,122 @@ +== Physical Plan == +TakeOrderedAndProject (21) ++- * HashAggregate (20) + +- Exchange (19) + +- * HashAggregate (18) + +- * Project (17) + +- * BroadcastHashJoin Inner BuildRight (16) + :- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Project (4) + : : +- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.date_dim (1) + : +- BroadcastExchange (8) + : +- * Filter (7) + : +- * ColumnarToRow (6) + : +- Scan parquet default.store_sales (5) + +- BroadcastExchange (15) + +- * Project (14) + +- * Filter (13) + +- * ColumnarToRow (12) + +- Scan parquet default.item (11) + + +(1) Scan parquet default.date_dim +Output [3]: [d_date_sk#1, d_year#2, d_moy#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_moy), EqualTo(d_moy,12), Or(Or(Or(Or(Or(Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2415355),LessThanOrEqual(d_date_sk,2415385)),And(GreaterThanOrEqual(d_date_sk,2415720),LessThanOrEqual(d_date_sk,2415750))),Or(And(GreaterThanOrEqual(d_date_sk,2416085),LessThanOrEqual(d_date_sk,2416115)),And(GreaterThanOrEqual(d_date_sk,2416450),LessThanOrEqual(d_date_sk,2416480)))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2416816),LessThanOrEqual(d_date_sk,2416846)),And(GreaterThanOrEqual(d_date_sk,2417181),LessThanOrEqual(d_date_sk,2417211))),And(GreaterThanOrEqual(d_date_sk,2417546),LessThanOrEqual(d_date_sk,2417576)))),Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2417911),LessThanOrEqual(d_date_sk,2417941)),And(GreaterThanOrEqual(d_date_sk,2418277),LessThanOrEqual(d_date_sk,2418307))),And(GreaterThanOrEqual(d_date_sk,2418642),LessThanOrEqual(d_date_sk,2418672))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2419007),LessThanOrEqual(d_date_sk,2419037)),And(GreaterThanOrEqual(d_date_sk,2419372),LessThanOrEqual(d_date_sk,2419402))),And(GreaterThanOrEqual(d_date_sk,2419738),LessThanOrEqual(d_date_sk,2419768))))),Or(Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2420103),LessThanOrEqual(d_date_sk,2420133)),And(GreaterThanOrEqual(d_date_sk,2420468),LessThanOrEqual(d_date_sk,2420498))),And(GreaterThanOrEqual(d_date_sk,2420833),LessThanOrEqual(d_date_sk,2420863))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2421199),LessThanOrEqual(d_date_sk,2421229)),And(GreaterThanOrEqual(d_date_sk,2421564),LessThanOrEqual(d_date_sk,2421594))),And(GreaterThanOrEqual(d_date_sk,2421929),LessThanOrEqual(d_date_sk,2421959)))),Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2422294),LessThanOrEqual(d_date_sk,2422324)),And(GreaterThanOrEqual(d_date_sk,2422660),LessThanOrEqual(d_date_sk,2422690))),And(GreaterThanOrEqual(d_date_sk,2423025),LessThanOrEqual(d_date_sk,2423055))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2423390),LessThanOrEqual(d_date_sk,2423420)),And(GreaterThanOrEqual(d_date_sk,2423755),LessThanOrEqual(d_date_sk,2423785))),And(GreaterThanOrEqual(d_date_sk,2424121),LessThanOrEqual(d_date_sk,2424151)))))),Or(Or(Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2424486),LessThanOrEqual(d_date_sk,2424516)),And(GreaterThanOrEqual(d_date_sk,2424851),LessThanOrEqual(d_date_sk,2424881))),Or(And(GreaterThanOrEqual(d_date_sk,2425216),LessThanOrEqual(d_date_sk,2425246)),And(GreaterThanOrEqual(d_date_sk,2425582),LessThanOrEqual(d_date_sk,2425612)))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2425947),LessThanOrEqual(d_date_sk,2425977)),And(GreaterThanOrEqual(d_date_sk,2426312),LessThanOrEqual(d_date_sk,2426342))),And(GreaterThanOrEqual(d_date_sk,2426677),LessThanOrEqual(d_date_sk,2426707)))),Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2427043),LessThanOrEqual(d_date_sk,2427073)),And(GreaterThanOrEqual(d_date_sk,2427408),LessThanOrEqual(d_date_sk,2427438))),And(GreaterThanOrEqual(d_date_sk,2427773),LessThanOrEqual(d_date_sk,2427803))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2428138),LessThanOrEqual(d_date_sk,2428168)),And(GreaterThanOrEqual(d_date_sk,2428504),LessThanOrEqual(d_date_sk,2428534))),And(GreaterThanOrEqual(d_date_sk,2428869),LessThanOrEqual(d_date_sk,2428899))))),Or(Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2429234),LessThanOrEqual(d_date_sk,2429264)),And(GreaterThanOrEqual(d_date_sk,2429599),LessThanOrEqual(d_date_sk,2429629))),And(GreaterThanOrEqual(d_date_sk,2429965),LessThanOrEqual(d_date_sk,2429995))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2430330),LessThanOrEqual(d_date_sk,2430360)),And(GreaterThanOrEqual(d_date_sk,2430695),LessThanOrEqual(d_date_sk,2430725))),And(GreaterThanOrEqual(d_date_sk,2431060),LessThanOrEqual(d_date_sk,2431090)))),Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2431426),LessThanOrEqual(d_date_sk,2431456)),And(GreaterThanOrEqual(d_date_sk,2431791),LessThanOrEqual(d_date_sk,2431821))),And(GreaterThanOrEqual(d_date_sk,2432156),LessThanOrEqual(d_date_sk,2432186))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2432521),LessThanOrEqual(d_date_sk,2432551)),And(GreaterThanOrEqual(d_date_sk,2432887),LessThanOrEqual(d_date_sk,2432917))),And(GreaterThanOrEqual(d_date_sk,2433252),LessThanOrEqual(d_date_sk,2433282))))))),Or(Or(Or(Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2433617),LessThanOrEqual(d_date_sk,2433647)),And(GreaterThanOrEqual(d_date_sk,2433982),LessThanOrEqual(d_date_sk,2434012))),Or(And(GreaterThanOrEqual(d_date_sk,2434348),LessThanOrEqual(d_date_sk,2434378)),And(GreaterThanOrEqual(d_date_sk,2434713),LessThanOrEqual(d_date_sk,2434743)))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2435078),LessThanOrEqual(d_date_sk,2435108)),And(GreaterThanOrEqual(d_date_sk,2435443),LessThanOrEqual(d_date_sk,2435473))),And(GreaterThanOrEqual(d_date_sk,2435809),LessThanOrEqual(d_date_sk,2435839)))),Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2436174),LessThanOrEqual(d_date_sk,2436204)),And(GreaterThanOrEqual(d_date_sk,2436539),LessThanOrEqual(d_date_sk,2436569))),And(GreaterThanOrEqual(d_date_sk,2436904),LessThanOrEqual(d_date_sk,2436934))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2437270),LessThanOrEqual(d_date_sk,2437300)),And(GreaterThanOrEqual(d_date_sk,2437635),LessThanOrEqual(d_date_sk,2437665))),And(GreaterThanOrEqual(d_date_sk,2438000),LessThanOrEqual(d_date_sk,2438030))))),Or(Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2438365),LessThanOrEqual(d_date_sk,2438395)),And(GreaterThanOrEqual(d_date_sk,2438731),LessThanOrEqual(d_date_sk,2438761))),And(GreaterThanOrEqual(d_date_sk,2439096),LessThanOrEqual(d_date_sk,2439126))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2439461),LessThanOrEqual(d_date_sk,2439491)),And(GreaterThanOrEqual(d_date_sk,2439826),LessThanOrEqual(d_date_sk,2439856))),And(GreaterThanOrEqual(d_date_sk,2440192),LessThanOrEqual(d_date_sk,2440222)))),Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2440557),LessThanOrEqual(d_date_sk,2440587)),And(GreaterThanOrEqual(d_date_sk,2440922),LessThanOrEqual(d_date_sk,2440952))),And(GreaterThanOrEqual(d_date_sk,2441287),LessThanOrEqual(d_date_sk,2441317))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2441653),LessThanOrEqual(d_date_sk,2441683)),And(GreaterThanOrEqual(d_date_sk,2442018),LessThanOrEqual(d_date_sk,2442048))),And(GreaterThanOrEqual(d_date_sk,2442383),LessThanOrEqual(d_date_sk,2442413)))))),Or(Or(Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2442748),LessThanOrEqual(d_date_sk,2442778)),And(GreaterThanOrEqual(d_date_sk,2443114),LessThanOrEqual(d_date_sk,2443144))),Or(And(GreaterThanOrEqual(d_date_sk,2443479),LessThanOrEqual(d_date_sk,2443509)),And(GreaterThanOrEqual(d_date_sk,2443844),LessThanOrEqual(d_date_sk,2443874)))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2444209),LessThanOrEqual(d_date_sk,2444239)),And(GreaterThanOrEqual(d_date_sk,2444575),LessThanOrEqual(d_date_sk,2444605))),And(GreaterThanOrEqual(d_date_sk,2444940),LessThanOrEqual(d_date_sk,2444970)))),Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2445305),LessThanOrEqual(d_date_sk,2445335)),And(GreaterThanOrEqual(d_date_sk,2445670),LessThanOrEqual(d_date_sk,2445700))),And(GreaterThanOrEqual(d_date_sk,2446036),LessThanOrEqual(d_date_sk,2446066))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2446401),LessThanOrEqual(d_date_sk,2446431)),And(GreaterThanOrEqual(d_date_sk,2446766),LessThanOrEqual(d_date_sk,2446796))),And(GreaterThanOrEqual(d_date_sk,2447131),LessThanOrEqual(d_date_sk,2447161))))),Or(Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2447497),LessThanOrEqual(d_date_sk,2447527)),And(GreaterThanOrEqual(d_date_sk,2447862),LessThanOrEqual(d_date_sk,2447892))),And(GreaterThanOrEqual(d_date_sk,2448227),LessThanOrEqual(d_date_sk,2448257))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2448592),LessThanOrEqual(d_date_sk,2448622)),And(GreaterThanOrEqual(d_date_sk,2448958),LessThanOrEqual(d_date_sk,2448988))),And(GreaterThanOrEqual(d_date_sk,2449323),LessThanOrEqual(d_date_sk,2449353)))),Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2449688),LessThanOrEqual(d_date_sk,2449718)),And(GreaterThanOrEqual(d_date_sk,2450053),LessThanOrEqual(d_date_sk,2450083))),And(GreaterThanOrEqual(d_date_sk,2450419),LessThanOrEqual(d_date_sk,2450449))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2450784),LessThanOrEqual(d_date_sk,2450814)),And(GreaterThanOrEqual(d_date_sk,2451149),LessThanOrEqual(d_date_sk,2451179))),And(GreaterThanOrEqual(d_date_sk,2451514),LessThanOrEqual(d_date_sk,2451544)))))))),Or(Or(Or(Or(Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2451880),LessThanOrEqual(d_date_sk,2451910)),And(GreaterThanOrEqual(d_date_sk,2452245),LessThanOrEqual(d_date_sk,2452275))),Or(And(GreaterThanOrEqual(d_date_sk,2452610),LessThanOrEqual(d_date_sk,2452640)),And(GreaterThanOrEqual(d_date_sk,2452975),LessThanOrEqual(d_date_sk,2453005)))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2453341),LessThanOrEqual(d_date_sk,2453371)),And(GreaterThanOrEqual(d_date_sk,2453706),LessThanOrEqual(d_date_sk,2453736))),And(GreaterThanOrEqual(d_date_sk,2454071),LessThanOrEqual(d_date_sk,2454101)))),Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2454436),LessThanOrEqual(d_date_sk,2454466)),And(GreaterThanOrEqual(d_date_sk,2454802),LessThanOrEqual(d_date_sk,2454832))),And(GreaterThanOrEqual(d_date_sk,2455167),LessThanOrEqual(d_date_sk,2455197))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2455532),LessThanOrEqual(d_date_sk,2455562)),And(GreaterThanOrEqual(d_date_sk,2455897),LessThanOrEqual(d_date_sk,2455927))),And(GreaterThanOrEqual(d_date_sk,2456263),LessThanOrEqual(d_date_sk,2456293))))),Or(Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2456628),LessThanOrEqual(d_date_sk,2456658)),And(GreaterThanOrEqual(d_date_sk,2456993),LessThanOrEqual(d_date_sk,2457023))),And(GreaterThanOrEqual(d_date_sk,2457358),LessThanOrEqual(d_date_sk,2457388))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2457724),LessThanOrEqual(d_date_sk,2457754)),And(GreaterThanOrEqual(d_date_sk,2458089),LessThanOrEqual(d_date_sk,2458119))),And(GreaterThanOrEqual(d_date_sk,2458454),LessThanOrEqual(d_date_sk,2458484)))),Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2458819),LessThanOrEqual(d_date_sk,2458849)),And(GreaterThanOrEqual(d_date_sk,2459185),LessThanOrEqual(d_date_sk,2459215))),And(GreaterThanOrEqual(d_date_sk,2459550),LessThanOrEqual(d_date_sk,2459580))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2459915),LessThanOrEqual(d_date_sk,2459945)),And(GreaterThanOrEqual(d_date_sk,2460280),LessThanOrEqual(d_date_sk,2460310))),And(GreaterThanOrEqual(d_date_sk,2460646),LessThanOrEqual(d_date_sk,2460676)))))),Or(Or(Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2461011),LessThanOrEqual(d_date_sk,2461041)),And(GreaterThanOrEqual(d_date_sk,2461376),LessThanOrEqual(d_date_sk,2461406))),Or(And(GreaterThanOrEqual(d_date_sk,2461741),LessThanOrEqual(d_date_sk,2461771)),And(GreaterThanOrEqual(d_date_sk,2462107),LessThanOrEqual(d_date_sk,2462137)))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2462472),LessThanOrEqual(d_date_sk,2462502)),And(GreaterThanOrEqual(d_date_sk,2462837),LessThanOrEqual(d_date_sk,2462867))),And(GreaterThanOrEqual(d_date_sk,2463202),LessThanOrEqual(d_date_sk,2463232)))),Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2463568),LessThanOrEqual(d_date_sk,2463598)),And(GreaterThanOrEqual(d_date_sk,2463933),LessThanOrEqual(d_date_sk,2463963))),And(GreaterThanOrEqual(d_date_sk,2464298),LessThanOrEqual(d_date_sk,2464328))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2464663),LessThanOrEqual(d_date_sk,2464693)),And(GreaterThanOrEqual(d_date_sk,2465029),LessThanOrEqual(d_date_sk,2465059))),And(GreaterThanOrEqual(d_date_sk,2465394),LessThanOrEqual(d_date_sk,2465424))))),Or(Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2465759),LessThanOrEqual(d_date_sk,2465789)),And(GreaterThanOrEqual(d_date_sk,2466124),LessThanOrEqual(d_date_sk,2466154))),And(GreaterThanOrEqual(d_date_sk,2466490),LessThanOrEqual(d_date_sk,2466520))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2466855),LessThanOrEqual(d_date_sk,2466885)),And(GreaterThanOrEqual(d_date_sk,2467220),LessThanOrEqual(d_date_sk,2467250))),And(GreaterThanOrEqual(d_date_sk,2467585),LessThanOrEqual(d_date_sk,2467615)))),Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2467951),LessThanOrEqual(d_date_sk,2467981)),And(GreaterThanOrEqual(d_date_sk,2468316),LessThanOrEqual(d_date_sk,2468346))),And(GreaterThanOrEqual(d_date_sk,2468681),LessThanOrEqual(d_date_sk,2468711))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2469046),LessThanOrEqual(d_date_sk,2469076)),And(GreaterThanOrEqual(d_date_sk,2469412),LessThanOrEqual(d_date_sk,2469442))),And(GreaterThanOrEqual(d_date_sk,2469777),LessThanOrEqual(d_date_sk,2469807))))))),Or(Or(Or(Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2470142),LessThanOrEqual(d_date_sk,2470172)),And(GreaterThanOrEqual(d_date_sk,2470507),LessThanOrEqual(d_date_sk,2470537))),Or(And(GreaterThanOrEqual(d_date_sk,2470873),LessThanOrEqual(d_date_sk,2470903)),And(GreaterThanOrEqual(d_date_sk,2471238),LessThanOrEqual(d_date_sk,2471268)))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2471603),LessThanOrEqual(d_date_sk,2471633)),And(GreaterThanOrEqual(d_date_sk,2471968),LessThanOrEqual(d_date_sk,2471998))),And(GreaterThanOrEqual(d_date_sk,2472334),LessThanOrEqual(d_date_sk,2472364)))),Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2472699),LessThanOrEqual(d_date_sk,2472729)),And(GreaterThanOrEqual(d_date_sk,2473064),LessThanOrEqual(d_date_sk,2473094))),And(GreaterThanOrEqual(d_date_sk,2473429),LessThanOrEqual(d_date_sk,2473459))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2473795),LessThanOrEqual(d_date_sk,2473825)),And(GreaterThanOrEqual(d_date_sk,2474160),LessThanOrEqual(d_date_sk,2474190))),And(GreaterThanOrEqual(d_date_sk,2474525),LessThanOrEqual(d_date_sk,2474555))))),Or(Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2474890),LessThanOrEqual(d_date_sk,2474920)),And(GreaterThanOrEqual(d_date_sk,2475256),LessThanOrEqual(d_date_sk,2475286))),And(GreaterThanOrEqual(d_date_sk,2475621),LessThanOrEqual(d_date_sk,2475651))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2475986),LessThanOrEqual(d_date_sk,2476016)),And(GreaterThanOrEqual(d_date_sk,2476351),LessThanOrEqual(d_date_sk,2476381))),And(GreaterThanOrEqual(d_date_sk,2476717),LessThanOrEqual(d_date_sk,2476747)))),Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2477082),LessThanOrEqual(d_date_sk,2477112)),And(GreaterThanOrEqual(d_date_sk,2477447),LessThanOrEqual(d_date_sk,2477477))),And(GreaterThanOrEqual(d_date_sk,2477812),LessThanOrEqual(d_date_sk,2477842))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2478178),LessThanOrEqual(d_date_sk,2478208)),And(GreaterThanOrEqual(d_date_sk,2478543),LessThanOrEqual(d_date_sk,2478573))),And(GreaterThanOrEqual(d_date_sk,2478908),LessThanOrEqual(d_date_sk,2478938)))))),Or(Or(Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2479273),LessThanOrEqual(d_date_sk,2479303)),And(GreaterThanOrEqual(d_date_sk,2479639),LessThanOrEqual(d_date_sk,2479669))),Or(And(GreaterThanOrEqual(d_date_sk,2480004),LessThanOrEqual(d_date_sk,2480034)),And(GreaterThanOrEqual(d_date_sk,2480369),LessThanOrEqual(d_date_sk,2480399)))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2480734),LessThanOrEqual(d_date_sk,2480764)),And(GreaterThanOrEqual(d_date_sk,2481100),LessThanOrEqual(d_date_sk,2481130))),And(GreaterThanOrEqual(d_date_sk,2481465),LessThanOrEqual(d_date_sk,2481495)))),Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2481830),LessThanOrEqual(d_date_sk,2481860)),And(GreaterThanOrEqual(d_date_sk,2482195),LessThanOrEqual(d_date_sk,2482225))),And(GreaterThanOrEqual(d_date_sk,2482561),LessThanOrEqual(d_date_sk,2482591))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2482926),LessThanOrEqual(d_date_sk,2482956)),And(GreaterThanOrEqual(d_date_sk,2483291),LessThanOrEqual(d_date_sk,2483321))),And(GreaterThanOrEqual(d_date_sk,2483656),LessThanOrEqual(d_date_sk,2483686))))),Or(Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2484022),LessThanOrEqual(d_date_sk,2484052)),And(GreaterThanOrEqual(d_date_sk,2484387),LessThanOrEqual(d_date_sk,2484417))),And(GreaterThanOrEqual(d_date_sk,2484752),LessThanOrEqual(d_date_sk,2484782))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2485117),LessThanOrEqual(d_date_sk,2485147)),And(GreaterThanOrEqual(d_date_sk,2485483),LessThanOrEqual(d_date_sk,2485513))),And(GreaterThanOrEqual(d_date_sk,2485848),LessThanOrEqual(d_date_sk,2485878)))),Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2486213),LessThanOrEqual(d_date_sk,2486243)),And(GreaterThanOrEqual(d_date_sk,2486578),LessThanOrEqual(d_date_sk,2486608))),And(GreaterThanOrEqual(d_date_sk,2486944),LessThanOrEqual(d_date_sk,2486974))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2487309),LessThanOrEqual(d_date_sk,2487339)),And(GreaterThanOrEqual(d_date_sk,2487674),LessThanOrEqual(d_date_sk,2487704))),And(GreaterThanOrEqual(d_date_sk,2488039),LessThanOrEqual(d_date_sk,2488069))))))))), IsNotNull(d_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(3) Filter [codegen id : 3] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Condition : (((isnotnull(d_moy#3) AND (d_moy#3 = 12)) AND ((((((((((d_date_sk#1 >= 2415355) AND (d_date_sk#1 <= 2415385)) OR ((d_date_sk#1 >= 2415720) AND (d_date_sk#1 <= 2415750))) OR (((d_date_sk#1 >= 2416085) AND (d_date_sk#1 <= 2416115)) OR ((d_date_sk#1 >= 2416450) AND (d_date_sk#1 <= 2416480)))) OR ((((d_date_sk#1 >= 2416816) AND (d_date_sk#1 <= 2416846)) OR ((d_date_sk#1 >= 2417181) AND (d_date_sk#1 <= 2417211))) OR ((d_date_sk#1 >= 2417546) AND (d_date_sk#1 <= 2417576)))) OR (((((d_date_sk#1 >= 2417911) AND (d_date_sk#1 <= 2417941)) OR ((d_date_sk#1 >= 2418277) AND (d_date_sk#1 <= 2418307))) OR ((d_date_sk#1 >= 2418642) AND (d_date_sk#1 <= 2418672))) OR ((((d_date_sk#1 >= 2419007) AND (d_date_sk#1 <= 2419037)) OR ((d_date_sk#1 >= 2419372) AND (d_date_sk#1 <= 2419402))) OR ((d_date_sk#1 >= 2419738) AND (d_date_sk#1 <= 2419768))))) OR ((((((d_date_sk#1 >= 2420103) AND (d_date_sk#1 <= 2420133)) OR ((d_date_sk#1 >= 2420468) AND (d_date_sk#1 <= 2420498))) OR ((d_date_sk#1 >= 2420833) AND (d_date_sk#1 <= 2420863))) OR ((((d_date_sk#1 >= 2421199) AND (d_date_sk#1 <= 2421229)) OR ((d_date_sk#1 >= 2421564) AND (d_date_sk#1 <= 2421594))) OR ((d_date_sk#1 >= 2421929) AND (d_date_sk#1 <= 2421959)))) OR (((((d_date_sk#1 >= 2422294) AND (d_date_sk#1 <= 2422324)) OR ((d_date_sk#1 >= 2422660) AND (d_date_sk#1 <= 2422690))) OR ((d_date_sk#1 >= 2423025) AND (d_date_sk#1 <= 2423055))) OR ((((d_date_sk#1 >= 2423390) AND (d_date_sk#1 <= 2423420)) OR ((d_date_sk#1 >= 2423755) AND (d_date_sk#1 <= 2423785))) OR ((d_date_sk#1 >= 2424121) AND (d_date_sk#1 <= 2424151)))))) OR (((((((d_date_sk#1 >= 2424486) AND (d_date_sk#1 <= 2424516)) OR ((d_date_sk#1 >= 2424851) AND (d_date_sk#1 <= 2424881))) OR (((d_date_sk#1 >= 2425216) AND (d_date_sk#1 <= 2425246)) OR ((d_date_sk#1 >= 2425582) AND (d_date_sk#1 <= 2425612)))) OR ((((d_date_sk#1 >= 2425947) AND (d_date_sk#1 <= 2425977)) OR ((d_date_sk#1 >= 2426312) AND (d_date_sk#1 <= 2426342))) OR ((d_date_sk#1 >= 2426677) AND (d_date_sk#1 <= 2426707)))) OR (((((d_date_sk#1 >= 2427043) AND (d_date_sk#1 <= 2427073)) OR ((d_date_sk#1 >= 2427408) AND (d_date_sk#1 <= 2427438))) OR ((d_date_sk#1 >= 2427773) AND (d_date_sk#1 <= 2427803))) OR ((((d_date_sk#1 >= 2428138) AND (d_date_sk#1 <= 2428168)) OR ((d_date_sk#1 >= 2428504) AND (d_date_sk#1 <= 2428534))) OR ((d_date_sk#1 >= 2428869) AND (d_date_sk#1 <= 2428899))))) OR ((((((d_date_sk#1 >= 2429234) AND (d_date_sk#1 <= 2429264)) OR ((d_date_sk#1 >= 2429599) AND (d_date_sk#1 <= 2429629))) OR ((d_date_sk#1 >= 2429965) AND (d_date_sk#1 <= 2429995))) OR ((((d_date_sk#1 >= 2430330) AND (d_date_sk#1 <= 2430360)) OR ((d_date_sk#1 >= 2430695) AND (d_date_sk#1 <= 2430725))) OR ((d_date_sk#1 >= 2431060) AND (d_date_sk#1 <= 2431090)))) OR (((((d_date_sk#1 >= 2431426) AND (d_date_sk#1 <= 2431456)) OR ((d_date_sk#1 >= 2431791) AND (d_date_sk#1 <= 2431821))) OR ((d_date_sk#1 >= 2432156) AND (d_date_sk#1 <= 2432186))) OR ((((d_date_sk#1 >= 2432521) AND (d_date_sk#1 <= 2432551)) OR ((d_date_sk#1 >= 2432887) AND (d_date_sk#1 <= 2432917))) OR ((d_date_sk#1 >= 2433252) AND (d_date_sk#1 <= 2433282))))))) OR ((((((((d_date_sk#1 >= 2433617) AND (d_date_sk#1 <= 2433647)) OR ((d_date_sk#1 >= 2433982) AND (d_date_sk#1 <= 2434012))) OR (((d_date_sk#1 >= 2434348) AND (d_date_sk#1 <= 2434378)) OR ((d_date_sk#1 >= 2434713) AND (d_date_sk#1 <= 2434743)))) OR ((((d_date_sk#1 >= 2435078) AND (d_date_sk#1 <= 2435108)) OR ((d_date_sk#1 >= 2435443) AND (d_date_sk#1 <= 2435473))) OR ((d_date_sk#1 >= 2435809) AND (d_date_sk#1 <= 2435839)))) OR (((((d_date_sk#1 >= 2436174) AND (d_date_sk#1 <= 2436204)) OR ((d_date_sk#1 >= 2436539) AND (d_date_sk#1 <= 2436569))) OR ((d_date_sk#1 >= 2436904) AND (d_date_sk#1 <= 2436934))) OR ((((d_date_sk#1 >= 2437270) AND (d_date_sk#1 <= 2437300)) OR ((d_date_sk#1 >= 2437635) AND (d_date_sk#1 <= 2437665))) OR ((d_date_sk#1 >= 2438000) AND (d_date_sk#1 <= 2438030))))) OR ((((((d_date_sk#1 >= 2438365) AND (d_date_sk#1 <= 2438395)) OR ((d_date_sk#1 >= 2438731) AND (d_date_sk#1 <= 2438761))) OR ((d_date_sk#1 >= 2439096) AND (d_date_sk#1 <= 2439126))) OR ((((d_date_sk#1 >= 2439461) AND (d_date_sk#1 <= 2439491)) OR ((d_date_sk#1 >= 2439826) AND (d_date_sk#1 <= 2439856))) OR ((d_date_sk#1 >= 2440192) AND (d_date_sk#1 <= 2440222)))) OR (((((d_date_sk#1 >= 2440557) AND (d_date_sk#1 <= 2440587)) OR ((d_date_sk#1 >= 2440922) AND (d_date_sk#1 <= 2440952))) OR ((d_date_sk#1 >= 2441287) AND (d_date_sk#1 <= 2441317))) OR ((((d_date_sk#1 >= 2441653) AND (d_date_sk#1 <= 2441683)) OR ((d_date_sk#1 >= 2442018) AND (d_date_sk#1 <= 2442048))) OR ((d_date_sk#1 >= 2442383) AND (d_date_sk#1 <= 2442413)))))) OR (((((((d_date_sk#1 >= 2442748) AND (d_date_sk#1 <= 2442778)) OR ((d_date_sk#1 >= 2443114) AND (d_date_sk#1 <= 2443144))) OR (((d_date_sk#1 >= 2443479) AND (d_date_sk#1 <= 2443509)) OR ((d_date_sk#1 >= 2443844) AND (d_date_sk#1 <= 2443874)))) OR ((((d_date_sk#1 >= 2444209) AND (d_date_sk#1 <= 2444239)) OR ((d_date_sk#1 >= 2444575) AND (d_date_sk#1 <= 2444605))) OR ((d_date_sk#1 >= 2444940) AND (d_date_sk#1 <= 2444970)))) OR (((((d_date_sk#1 >= 2445305) AND (d_date_sk#1 <= 2445335)) OR ((d_date_sk#1 >= 2445670) AND (d_date_sk#1 <= 2445700))) OR ((d_date_sk#1 >= 2446036) AND (d_date_sk#1 <= 2446066))) OR ((((d_date_sk#1 >= 2446401) AND (d_date_sk#1 <= 2446431)) OR ((d_date_sk#1 >= 2446766) AND (d_date_sk#1 <= 2446796))) OR ((d_date_sk#1 >= 2447131) AND (d_date_sk#1 <= 2447161))))) OR ((((((d_date_sk#1 >= 2447497) AND (d_date_sk#1 <= 2447527)) OR ((d_date_sk#1 >= 2447862) AND (d_date_sk#1 <= 2447892))) OR ((d_date_sk#1 >= 2448227) AND (d_date_sk#1 <= 2448257))) OR ((((d_date_sk#1 >= 2448592) AND (d_date_sk#1 <= 2448622)) OR ((d_date_sk#1 >= 2448958) AND (d_date_sk#1 <= 2448988))) OR ((d_date_sk#1 >= 2449323) AND (d_date_sk#1 <= 2449353)))) OR (((((d_date_sk#1 >= 2449688) AND (d_date_sk#1 <= 2449718)) OR ((d_date_sk#1 >= 2450053) AND (d_date_sk#1 <= 2450083))) OR ((d_date_sk#1 >= 2450419) AND (d_date_sk#1 <= 2450449))) OR ((((d_date_sk#1 >= 2450784) AND (d_date_sk#1 <= 2450814)) OR ((d_date_sk#1 >= 2451149) AND (d_date_sk#1 <= 2451179))) OR ((d_date_sk#1 >= 2451514) AND (d_date_sk#1 <= 2451544)))))))) OR (((((((((d_date_sk#1 >= 2451880) AND (d_date_sk#1 <= 2451910)) OR ((d_date_sk#1 >= 2452245) AND (d_date_sk#1 <= 2452275))) OR (((d_date_sk#1 >= 2452610) AND (d_date_sk#1 <= 2452640)) OR ((d_date_sk#1 >= 2452975) AND (d_date_sk#1 <= 2453005)))) OR ((((d_date_sk#1 >= 2453341) AND (d_date_sk#1 <= 2453371)) OR ((d_date_sk#1 >= 2453706) AND (d_date_sk#1 <= 2453736))) OR ((d_date_sk#1 >= 2454071) AND (d_date_sk#1 <= 2454101)))) OR (((((d_date_sk#1 >= 2454436) AND (d_date_sk#1 <= 2454466)) OR ((d_date_sk#1 >= 2454802) AND (d_date_sk#1 <= 2454832))) OR ((d_date_sk#1 >= 2455167) AND (d_date_sk#1 <= 2455197))) OR ((((d_date_sk#1 >= 2455532) AND (d_date_sk#1 <= 2455562)) OR ((d_date_sk#1 >= 2455897) AND (d_date_sk#1 <= 2455927))) OR ((d_date_sk#1 >= 2456263) AND (d_date_sk#1 <= 2456293))))) OR ((((((d_date_sk#1 >= 2456628) AND (d_date_sk#1 <= 2456658)) OR ((d_date_sk#1 >= 2456993) AND (d_date_sk#1 <= 2457023))) OR ((d_date_sk#1 >= 2457358) AND (d_date_sk#1 <= 2457388))) OR ((((d_date_sk#1 >= 2457724) AND (d_date_sk#1 <= 2457754)) OR ((d_date_sk#1 >= 2458089) AND (d_date_sk#1 <= 2458119))) OR ((d_date_sk#1 >= 2458454) AND (d_date_sk#1 <= 2458484)))) OR (((((d_date_sk#1 >= 2458819) AND (d_date_sk#1 <= 2458849)) OR ((d_date_sk#1 >= 2459185) AND (d_date_sk#1 <= 2459215))) OR ((d_date_sk#1 >= 2459550) AND (d_date_sk#1 <= 2459580))) OR ((((d_date_sk#1 >= 2459915) AND (d_date_sk#1 <= 2459945)) OR ((d_date_sk#1 >= 2460280) AND (d_date_sk#1 <= 2460310))) OR ((d_date_sk#1 >= 2460646) AND (d_date_sk#1 <= 2460676)))))) OR (((((((d_date_sk#1 >= 2461011) AND (d_date_sk#1 <= 2461041)) OR ((d_date_sk#1 >= 2461376) AND (d_date_sk#1 <= 2461406))) OR (((d_date_sk#1 >= 2461741) AND (d_date_sk#1 <= 2461771)) OR ((d_date_sk#1 >= 2462107) AND (d_date_sk#1 <= 2462137)))) OR ((((d_date_sk#1 >= 2462472) AND (d_date_sk#1 <= 2462502)) OR ((d_date_sk#1 >= 2462837) AND (d_date_sk#1 <= 2462867))) OR ((d_date_sk#1 >= 2463202) AND (d_date_sk#1 <= 2463232)))) OR (((((d_date_sk#1 >= 2463568) AND (d_date_sk#1 <= 2463598)) OR ((d_date_sk#1 >= 2463933) AND (d_date_sk#1 <= 2463963))) OR ((d_date_sk#1 >= 2464298) AND (d_date_sk#1 <= 2464328))) OR ((((d_date_sk#1 >= 2464663) AND (d_date_sk#1 <= 2464693)) OR ((d_date_sk#1 >= 2465029) AND (d_date_sk#1 <= 2465059))) OR ((d_date_sk#1 >= 2465394) AND (d_date_sk#1 <= 2465424))))) OR ((((((d_date_sk#1 >= 2465759) AND (d_date_sk#1 <= 2465789)) OR ((d_date_sk#1 >= 2466124) AND (d_date_sk#1 <= 2466154))) OR ((d_date_sk#1 >= 2466490) AND (d_date_sk#1 <= 2466520))) OR ((((d_date_sk#1 >= 2466855) AND (d_date_sk#1 <= 2466885)) OR ((d_date_sk#1 >= 2467220) AND (d_date_sk#1 <= 2467250))) OR ((d_date_sk#1 >= 2467585) AND (d_date_sk#1 <= 2467615)))) OR (((((d_date_sk#1 >= 2467951) AND (d_date_sk#1 <= 2467981)) OR ((d_date_sk#1 >= 2468316) AND (d_date_sk#1 <= 2468346))) OR ((d_date_sk#1 >= 2468681) AND (d_date_sk#1 <= 2468711))) OR ((((d_date_sk#1 >= 2469046) AND (d_date_sk#1 <= 2469076)) OR ((d_date_sk#1 >= 2469412) AND (d_date_sk#1 <= 2469442))) OR ((d_date_sk#1 >= 2469777) AND (d_date_sk#1 <= 2469807))))))) OR ((((((((d_date_sk#1 >= 2470142) AND (d_date_sk#1 <= 2470172)) OR ((d_date_sk#1 >= 2470507) AND (d_date_sk#1 <= 2470537))) OR (((d_date_sk#1 >= 2470873) AND (d_date_sk#1 <= 2470903)) OR ((d_date_sk#1 >= 2471238) AND (d_date_sk#1 <= 2471268)))) OR ((((d_date_sk#1 >= 2471603) AND (d_date_sk#1 <= 2471633)) OR ((d_date_sk#1 >= 2471968) AND (d_date_sk#1 <= 2471998))) OR ((d_date_sk#1 >= 2472334) AND (d_date_sk#1 <= 2472364)))) OR (((((d_date_sk#1 >= 2472699) AND (d_date_sk#1 <= 2472729)) OR ((d_date_sk#1 >= 2473064) AND (d_date_sk#1 <= 2473094))) OR ((d_date_sk#1 >= 2473429) AND (d_date_sk#1 <= 2473459))) OR ((((d_date_sk#1 >= 2473795) AND (d_date_sk#1 <= 2473825)) OR ((d_date_sk#1 >= 2474160) AND (d_date_sk#1 <= 2474190))) OR ((d_date_sk#1 >= 2474525) AND (d_date_sk#1 <= 2474555))))) OR ((((((d_date_sk#1 >= 2474890) AND (d_date_sk#1 <= 2474920)) OR ((d_date_sk#1 >= 2475256) AND (d_date_sk#1 <= 2475286))) OR ((d_date_sk#1 >= 2475621) AND (d_date_sk#1 <= 2475651))) OR ((((d_date_sk#1 >= 2475986) AND (d_date_sk#1 <= 2476016)) OR ((d_date_sk#1 >= 2476351) AND (d_date_sk#1 <= 2476381))) OR ((d_date_sk#1 >= 2476717) AND (d_date_sk#1 <= 2476747)))) OR (((((d_date_sk#1 >= 2477082) AND (d_date_sk#1 <= 2477112)) OR ((d_date_sk#1 >= 2477447) AND (d_date_sk#1 <= 2477477))) OR ((d_date_sk#1 >= 2477812) AND (d_date_sk#1 <= 2477842))) OR ((((d_date_sk#1 >= 2478178) AND (d_date_sk#1 <= 2478208)) OR ((d_date_sk#1 >= 2478543) AND (d_date_sk#1 <= 2478573))) OR ((d_date_sk#1 >= 2478908) AND (d_date_sk#1 <= 2478938)))))) OR (((((((d_date_sk#1 >= 2479273) AND (d_date_sk#1 <= 2479303)) OR ((d_date_sk#1 >= 2479639) AND (d_date_sk#1 <= 2479669))) OR (((d_date_sk#1 >= 2480004) AND (d_date_sk#1 <= 2480034)) OR ((d_date_sk#1 >= 2480369) AND (d_date_sk#1 <= 2480399)))) OR ((((d_date_sk#1 >= 2480734) AND (d_date_sk#1 <= 2480764)) OR ((d_date_sk#1 >= 2481100) AND (d_date_sk#1 <= 2481130))) OR ((d_date_sk#1 >= 2481465) AND (d_date_sk#1 <= 2481495)))) OR (((((d_date_sk#1 >= 2481830) AND (d_date_sk#1 <= 2481860)) OR ((d_date_sk#1 >= 2482195) AND (d_date_sk#1 <= 2482225))) OR ((d_date_sk#1 >= 2482561) AND (d_date_sk#1 <= 2482591))) OR ((((d_date_sk#1 >= 2482926) AND (d_date_sk#1 <= 2482956)) OR ((d_date_sk#1 >= 2483291) AND (d_date_sk#1 <= 2483321))) OR ((d_date_sk#1 >= 2483656) AND (d_date_sk#1 <= 2483686))))) OR ((((((d_date_sk#1 >= 2484022) AND (d_date_sk#1 <= 2484052)) OR ((d_date_sk#1 >= 2484387) AND (d_date_sk#1 <= 2484417))) OR ((d_date_sk#1 >= 2484752) AND (d_date_sk#1 <= 2484782))) OR ((((d_date_sk#1 >= 2485117) AND (d_date_sk#1 <= 2485147)) OR ((d_date_sk#1 >= 2485483) AND (d_date_sk#1 <= 2485513))) OR ((d_date_sk#1 >= 2485848) AND (d_date_sk#1 <= 2485878)))) OR (((((d_date_sk#1 >= 2486213) AND (d_date_sk#1 <= 2486243)) OR ((d_date_sk#1 >= 2486578) AND (d_date_sk#1 <= 2486608))) OR ((d_date_sk#1 >= 2486944) AND (d_date_sk#1 <= 2486974))) OR ((((d_date_sk#1 >= 2487309) AND (d_date_sk#1 <= 2487339)) OR ((d_date_sk#1 >= 2487674) AND (d_date_sk#1 <= 2487704))) OR ((d_date_sk#1 >= 2488039) AND (d_date_sk#1 <= 2488069)))))))))) AND isnotnull(d_date_sk#1)) + +(4) Project [codegen id : 3] +Output [2]: [d_date_sk#1, d_year#2] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(5) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_net_profit#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/store_sales] +PushedFilters: [Or(Or(Or(Or(Or(Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2415355),LessThanOrEqual(ss_sold_date_sk,2415385)),And(GreaterThanOrEqual(ss_sold_date_sk,2415720),LessThanOrEqual(ss_sold_date_sk,2415750))),Or(And(GreaterThanOrEqual(ss_sold_date_sk,2416085),LessThanOrEqual(ss_sold_date_sk,2416115)),And(GreaterThanOrEqual(ss_sold_date_sk,2416450),LessThanOrEqual(ss_sold_date_sk,2416480)))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2416816),LessThanOrEqual(ss_sold_date_sk,2416846)),And(GreaterThanOrEqual(ss_sold_date_sk,2417181),LessThanOrEqual(ss_sold_date_sk,2417211))),And(GreaterThanOrEqual(ss_sold_date_sk,2417546),LessThanOrEqual(ss_sold_date_sk,2417576)))),Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2417911),LessThanOrEqual(ss_sold_date_sk,2417941)),And(GreaterThanOrEqual(ss_sold_date_sk,2418277),LessThanOrEqual(ss_sold_date_sk,2418307))),And(GreaterThanOrEqual(ss_sold_date_sk,2418642),LessThanOrEqual(ss_sold_date_sk,2418672))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2419007),LessThanOrEqual(ss_sold_date_sk,2419037)),And(GreaterThanOrEqual(ss_sold_date_sk,2419372),LessThanOrEqual(ss_sold_date_sk,2419402))),And(GreaterThanOrEqual(ss_sold_date_sk,2419738),LessThanOrEqual(ss_sold_date_sk,2419768))))),Or(Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2420103),LessThanOrEqual(ss_sold_date_sk,2420133)),And(GreaterThanOrEqual(ss_sold_date_sk,2420468),LessThanOrEqual(ss_sold_date_sk,2420498))),And(GreaterThanOrEqual(ss_sold_date_sk,2420833),LessThanOrEqual(ss_sold_date_sk,2420863))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2421199),LessThanOrEqual(ss_sold_date_sk,2421229)),And(GreaterThanOrEqual(ss_sold_date_sk,2421564),LessThanOrEqual(ss_sold_date_sk,2421594))),And(GreaterThanOrEqual(ss_sold_date_sk,2421929),LessThanOrEqual(ss_sold_date_sk,2421959)))),Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2422294),LessThanOrEqual(ss_sold_date_sk,2422324)),And(GreaterThanOrEqual(ss_sold_date_sk,2422660),LessThanOrEqual(ss_sold_date_sk,2422690))),And(GreaterThanOrEqual(ss_sold_date_sk,2423025),LessThanOrEqual(ss_sold_date_sk,2423055))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2423390),LessThanOrEqual(ss_sold_date_sk,2423420)),And(GreaterThanOrEqual(ss_sold_date_sk,2423755),LessThanOrEqual(ss_sold_date_sk,2423785))),And(GreaterThanOrEqual(ss_sold_date_sk,2424121),LessThanOrEqual(ss_sold_date_sk,2424151)))))),Or(Or(Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2424486),LessThanOrEqual(ss_sold_date_sk,2424516)),And(GreaterThanOrEqual(ss_sold_date_sk,2424851),LessThanOrEqual(ss_sold_date_sk,2424881))),Or(And(GreaterThanOrEqual(ss_sold_date_sk,2425216),LessThanOrEqual(ss_sold_date_sk,2425246)),And(GreaterThanOrEqual(ss_sold_date_sk,2425582),LessThanOrEqual(ss_sold_date_sk,2425612)))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2425947),LessThanOrEqual(ss_sold_date_sk,2425977)),And(GreaterThanOrEqual(ss_sold_date_sk,2426312),LessThanOrEqual(ss_sold_date_sk,2426342))),And(GreaterThanOrEqual(ss_sold_date_sk,2426677),LessThanOrEqual(ss_sold_date_sk,2426707)))),Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2427043),LessThanOrEqual(ss_sold_date_sk,2427073)),And(GreaterThanOrEqual(ss_sold_date_sk,2427408),LessThanOrEqual(ss_sold_date_sk,2427438))),And(GreaterThanOrEqual(ss_sold_date_sk,2427773),LessThanOrEqual(ss_sold_date_sk,2427803))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2428138),LessThanOrEqual(ss_sold_date_sk,2428168)),And(GreaterThanOrEqual(ss_sold_date_sk,2428504),LessThanOrEqual(ss_sold_date_sk,2428534))),And(GreaterThanOrEqual(ss_sold_date_sk,2428869),LessThanOrEqual(ss_sold_date_sk,2428899))))),Or(Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2429234),LessThanOrEqual(ss_sold_date_sk,2429264)),And(GreaterThanOrEqual(ss_sold_date_sk,2429599),LessThanOrEqual(ss_sold_date_sk,2429629))),And(GreaterThanOrEqual(ss_sold_date_sk,2429965),LessThanOrEqual(ss_sold_date_sk,2429995))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2430330),LessThanOrEqual(ss_sold_date_sk,2430360)),And(GreaterThanOrEqual(ss_sold_date_sk,2430695),LessThanOrEqual(ss_sold_date_sk,2430725))),And(GreaterThanOrEqual(ss_sold_date_sk,2431060),LessThanOrEqual(ss_sold_date_sk,2431090)))),Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2431426),LessThanOrEqual(ss_sold_date_sk,2431456)),And(GreaterThanOrEqual(ss_sold_date_sk,2431791),LessThanOrEqual(ss_sold_date_sk,2431821))),And(GreaterThanOrEqual(ss_sold_date_sk,2432156),LessThanOrEqual(ss_sold_date_sk,2432186))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2432521),LessThanOrEqual(ss_sold_date_sk,2432551)),And(GreaterThanOrEqual(ss_sold_date_sk,2432887),LessThanOrEqual(ss_sold_date_sk,2432917))),And(GreaterThanOrEqual(ss_sold_date_sk,2433252),LessThanOrEqual(ss_sold_date_sk,2433282))))))),Or(Or(Or(Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2433617),LessThanOrEqual(ss_sold_date_sk,2433647)),And(GreaterThanOrEqual(ss_sold_date_sk,2433982),LessThanOrEqual(ss_sold_date_sk,2434012))),Or(And(GreaterThanOrEqual(ss_sold_date_sk,2434348),LessThanOrEqual(ss_sold_date_sk,2434378)),And(GreaterThanOrEqual(ss_sold_date_sk,2434713),LessThanOrEqual(ss_sold_date_sk,2434743)))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2435078),LessThanOrEqual(ss_sold_date_sk,2435108)),And(GreaterThanOrEqual(ss_sold_date_sk,2435443),LessThanOrEqual(ss_sold_date_sk,2435473))),And(GreaterThanOrEqual(ss_sold_date_sk,2435809),LessThanOrEqual(ss_sold_date_sk,2435839)))),Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2436174),LessThanOrEqual(ss_sold_date_sk,2436204)),And(GreaterThanOrEqual(ss_sold_date_sk,2436539),LessThanOrEqual(ss_sold_date_sk,2436569))),And(GreaterThanOrEqual(ss_sold_date_sk,2436904),LessThanOrEqual(ss_sold_date_sk,2436934))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2437270),LessThanOrEqual(ss_sold_date_sk,2437300)),And(GreaterThanOrEqual(ss_sold_date_sk,2437635),LessThanOrEqual(ss_sold_date_sk,2437665))),And(GreaterThanOrEqual(ss_sold_date_sk,2438000),LessThanOrEqual(ss_sold_date_sk,2438030))))),Or(Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2438365),LessThanOrEqual(ss_sold_date_sk,2438395)),And(GreaterThanOrEqual(ss_sold_date_sk,2438731),LessThanOrEqual(ss_sold_date_sk,2438761))),And(GreaterThanOrEqual(ss_sold_date_sk,2439096),LessThanOrEqual(ss_sold_date_sk,2439126))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2439461),LessThanOrEqual(ss_sold_date_sk,2439491)),And(GreaterThanOrEqual(ss_sold_date_sk,2439826),LessThanOrEqual(ss_sold_date_sk,2439856))),And(GreaterThanOrEqual(ss_sold_date_sk,2440192),LessThanOrEqual(ss_sold_date_sk,2440222)))),Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2440557),LessThanOrEqual(ss_sold_date_sk,2440587)),And(GreaterThanOrEqual(ss_sold_date_sk,2440922),LessThanOrEqual(ss_sold_date_sk,2440952))),And(GreaterThanOrEqual(ss_sold_date_sk,2441287),LessThanOrEqual(ss_sold_date_sk,2441317))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2441653),LessThanOrEqual(ss_sold_date_sk,2441683)),And(GreaterThanOrEqual(ss_sold_date_sk,2442018),LessThanOrEqual(ss_sold_date_sk,2442048))),And(GreaterThanOrEqual(ss_sold_date_sk,2442383),LessThanOrEqual(ss_sold_date_sk,2442413)))))),Or(Or(Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2442748),LessThanOrEqual(ss_sold_date_sk,2442778)),And(GreaterThanOrEqual(ss_sold_date_sk,2443114),LessThanOrEqual(ss_sold_date_sk,2443144))),Or(And(GreaterThanOrEqual(ss_sold_date_sk,2443479),LessThanOrEqual(ss_sold_date_sk,2443509)),And(GreaterThanOrEqual(ss_sold_date_sk,2443844),LessThanOrEqual(ss_sold_date_sk,2443874)))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2444209),LessThanOrEqual(ss_sold_date_sk,2444239)),And(GreaterThanOrEqual(ss_sold_date_sk,2444575),LessThanOrEqual(ss_sold_date_sk,2444605))),And(GreaterThanOrEqual(ss_sold_date_sk,2444940),LessThanOrEqual(ss_sold_date_sk,2444970)))),Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2445305),LessThanOrEqual(ss_sold_date_sk,2445335)),And(GreaterThanOrEqual(ss_sold_date_sk,2445670),LessThanOrEqual(ss_sold_date_sk,2445700))),And(GreaterThanOrEqual(ss_sold_date_sk,2446036),LessThanOrEqual(ss_sold_date_sk,2446066))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2446401),LessThanOrEqual(ss_sold_date_sk,2446431)),And(GreaterThanOrEqual(ss_sold_date_sk,2446766),LessThanOrEqual(ss_sold_date_sk,2446796))),And(GreaterThanOrEqual(ss_sold_date_sk,2447131),LessThanOrEqual(ss_sold_date_sk,2447161))))),Or(Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2447497),LessThanOrEqual(ss_sold_date_sk,2447527)),And(GreaterThanOrEqual(ss_sold_date_sk,2447862),LessThanOrEqual(ss_sold_date_sk,2447892))),And(GreaterThanOrEqual(ss_sold_date_sk,2448227),LessThanOrEqual(ss_sold_date_sk,2448257))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2448592),LessThanOrEqual(ss_sold_date_sk,2448622)),And(GreaterThanOrEqual(ss_sold_date_sk,2448958),LessThanOrEqual(ss_sold_date_sk,2448988))),And(GreaterThanOrEqual(ss_sold_date_sk,2449323),LessThanOrEqual(ss_sold_date_sk,2449353)))),Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2449688),LessThanOrEqual(ss_sold_date_sk,2449718)),And(GreaterThanOrEqual(ss_sold_date_sk,2450053),LessThanOrEqual(ss_sold_date_sk,2450083))),And(GreaterThanOrEqual(ss_sold_date_sk,2450419),LessThanOrEqual(ss_sold_date_sk,2450449))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2450784),LessThanOrEqual(ss_sold_date_sk,2450814)),And(GreaterThanOrEqual(ss_sold_date_sk,2451149),LessThanOrEqual(ss_sold_date_sk,2451179))),And(GreaterThanOrEqual(ss_sold_date_sk,2451514),LessThanOrEqual(ss_sold_date_sk,2451544)))))))),Or(Or(Or(Or(Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2451880),LessThanOrEqual(ss_sold_date_sk,2451910)),And(GreaterThanOrEqual(ss_sold_date_sk,2452245),LessThanOrEqual(ss_sold_date_sk,2452275))),Or(And(GreaterThanOrEqual(ss_sold_date_sk,2452610),LessThanOrEqual(ss_sold_date_sk,2452640)),And(GreaterThanOrEqual(ss_sold_date_sk,2452975),LessThanOrEqual(ss_sold_date_sk,2453005)))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2453341),LessThanOrEqual(ss_sold_date_sk,2453371)),And(GreaterThanOrEqual(ss_sold_date_sk,2453706),LessThanOrEqual(ss_sold_date_sk,2453736))),And(GreaterThanOrEqual(ss_sold_date_sk,2454071),LessThanOrEqual(ss_sold_date_sk,2454101)))),Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2454436),LessThanOrEqual(ss_sold_date_sk,2454466)),And(GreaterThanOrEqual(ss_sold_date_sk,2454802),LessThanOrEqual(ss_sold_date_sk,2454832))),And(GreaterThanOrEqual(ss_sold_date_sk,2455167),LessThanOrEqual(ss_sold_date_sk,2455197))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2455532),LessThanOrEqual(ss_sold_date_sk,2455562)),And(GreaterThanOrEqual(ss_sold_date_sk,2455897),LessThanOrEqual(ss_sold_date_sk,2455927))),And(GreaterThanOrEqual(ss_sold_date_sk,2456263),LessThanOrEqual(ss_sold_date_sk,2456293))))),Or(Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2456628),LessThanOrEqual(ss_sold_date_sk,2456658)),And(GreaterThanOrEqual(ss_sold_date_sk,2456993),LessThanOrEqual(ss_sold_date_sk,2457023))),And(GreaterThanOrEqual(ss_sold_date_sk,2457358),LessThanOrEqual(ss_sold_date_sk,2457388))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2457724),LessThanOrEqual(ss_sold_date_sk,2457754)),And(GreaterThanOrEqual(ss_sold_date_sk,2458089),LessThanOrEqual(ss_sold_date_sk,2458119))),And(GreaterThanOrEqual(ss_sold_date_sk,2458454),LessThanOrEqual(ss_sold_date_sk,2458484)))),Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2458819),LessThanOrEqual(ss_sold_date_sk,2458849)),And(GreaterThanOrEqual(ss_sold_date_sk,2459185),LessThanOrEqual(ss_sold_date_sk,2459215))),And(GreaterThanOrEqual(ss_sold_date_sk,2459550),LessThanOrEqual(ss_sold_date_sk,2459580))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2459915),LessThanOrEqual(ss_sold_date_sk,2459945)),And(GreaterThanOrEqual(ss_sold_date_sk,2460280),LessThanOrEqual(ss_sold_date_sk,2460310))),And(GreaterThanOrEqual(ss_sold_date_sk,2460646),LessThanOrEqual(ss_sold_date_sk,2460676)))))),Or(Or(Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2461011),LessThanOrEqual(ss_sold_date_sk,2461041)),And(GreaterThanOrEqual(ss_sold_date_sk,2461376),LessThanOrEqual(ss_sold_date_sk,2461406))),Or(And(GreaterThanOrEqual(ss_sold_date_sk,2461741),LessThanOrEqual(ss_sold_date_sk,2461771)),And(GreaterThanOrEqual(ss_sold_date_sk,2462107),LessThanOrEqual(ss_sold_date_sk,2462137)))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2462472),LessThanOrEqual(ss_sold_date_sk,2462502)),And(GreaterThanOrEqual(ss_sold_date_sk,2462837),LessThanOrEqual(ss_sold_date_sk,2462867))),And(GreaterThanOrEqual(ss_sold_date_sk,2463202),LessThanOrEqual(ss_sold_date_sk,2463232)))),Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2463568),LessThanOrEqual(ss_sold_date_sk,2463598)),And(GreaterThanOrEqual(ss_sold_date_sk,2463933),LessThanOrEqual(ss_sold_date_sk,2463963))),And(GreaterThanOrEqual(ss_sold_date_sk,2464298),LessThanOrEqual(ss_sold_date_sk,2464328))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2464663),LessThanOrEqual(ss_sold_date_sk,2464693)),And(GreaterThanOrEqual(ss_sold_date_sk,2465029),LessThanOrEqual(ss_sold_date_sk,2465059))),And(GreaterThanOrEqual(ss_sold_date_sk,2465394),LessThanOrEqual(ss_sold_date_sk,2465424))))),Or(Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2465759),LessThanOrEqual(ss_sold_date_sk,2465789)),And(GreaterThanOrEqual(ss_sold_date_sk,2466124),LessThanOrEqual(ss_sold_date_sk,2466154))),And(GreaterThanOrEqual(ss_sold_date_sk,2466490),LessThanOrEqual(ss_sold_date_sk,2466520))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2466855),LessThanOrEqual(ss_sold_date_sk,2466885)),And(GreaterThanOrEqual(ss_sold_date_sk,2467220),LessThanOrEqual(ss_sold_date_sk,2467250))),And(GreaterThanOrEqual(ss_sold_date_sk,2467585),LessThanOrEqual(ss_sold_date_sk,2467615)))),Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2467951),LessThanOrEqual(ss_sold_date_sk,2467981)),And(GreaterThanOrEqual(ss_sold_date_sk,2468316),LessThanOrEqual(ss_sold_date_sk,2468346))),And(GreaterThanOrEqual(ss_sold_date_sk,2468681),LessThanOrEqual(ss_sold_date_sk,2468711))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2469046),LessThanOrEqual(ss_sold_date_sk,2469076)),And(GreaterThanOrEqual(ss_sold_date_sk,2469412),LessThanOrEqual(ss_sold_date_sk,2469442))),And(GreaterThanOrEqual(ss_sold_date_sk,2469777),LessThanOrEqual(ss_sold_date_sk,2469807))))))),Or(Or(Or(Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2470142),LessThanOrEqual(ss_sold_date_sk,2470172)),And(GreaterThanOrEqual(ss_sold_date_sk,2470507),LessThanOrEqual(ss_sold_date_sk,2470537))),Or(And(GreaterThanOrEqual(ss_sold_date_sk,2470873),LessThanOrEqual(ss_sold_date_sk,2470903)),And(GreaterThanOrEqual(ss_sold_date_sk,2471238),LessThanOrEqual(ss_sold_date_sk,2471268)))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2471603),LessThanOrEqual(ss_sold_date_sk,2471633)),And(GreaterThanOrEqual(ss_sold_date_sk,2471968),LessThanOrEqual(ss_sold_date_sk,2471998))),And(GreaterThanOrEqual(ss_sold_date_sk,2472334),LessThanOrEqual(ss_sold_date_sk,2472364)))),Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2472699),LessThanOrEqual(ss_sold_date_sk,2472729)),And(GreaterThanOrEqual(ss_sold_date_sk,2473064),LessThanOrEqual(ss_sold_date_sk,2473094))),And(GreaterThanOrEqual(ss_sold_date_sk,2473429),LessThanOrEqual(ss_sold_date_sk,2473459))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2473795),LessThanOrEqual(ss_sold_date_sk,2473825)),And(GreaterThanOrEqual(ss_sold_date_sk,2474160),LessThanOrEqual(ss_sold_date_sk,2474190))),And(GreaterThanOrEqual(ss_sold_date_sk,2474525),LessThanOrEqual(ss_sold_date_sk,2474555))))),Or(Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2474890),LessThanOrEqual(ss_sold_date_sk,2474920)),And(GreaterThanOrEqual(ss_sold_date_sk,2475256),LessThanOrEqual(ss_sold_date_sk,2475286))),And(GreaterThanOrEqual(ss_sold_date_sk,2475621),LessThanOrEqual(ss_sold_date_sk,2475651))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2475986),LessThanOrEqual(ss_sold_date_sk,2476016)),And(GreaterThanOrEqual(ss_sold_date_sk,2476351),LessThanOrEqual(ss_sold_date_sk,2476381))),And(GreaterThanOrEqual(ss_sold_date_sk,2476717),LessThanOrEqual(ss_sold_date_sk,2476747)))),Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2477082),LessThanOrEqual(ss_sold_date_sk,2477112)),And(GreaterThanOrEqual(ss_sold_date_sk,2477447),LessThanOrEqual(ss_sold_date_sk,2477477))),And(GreaterThanOrEqual(ss_sold_date_sk,2477812),LessThanOrEqual(ss_sold_date_sk,2477842))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2478178),LessThanOrEqual(ss_sold_date_sk,2478208)),And(GreaterThanOrEqual(ss_sold_date_sk,2478543),LessThanOrEqual(ss_sold_date_sk,2478573))),And(GreaterThanOrEqual(ss_sold_date_sk,2478908),LessThanOrEqual(ss_sold_date_sk,2478938)))))),Or(Or(Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2479273),LessThanOrEqual(ss_sold_date_sk,2479303)),And(GreaterThanOrEqual(ss_sold_date_sk,2479639),LessThanOrEqual(ss_sold_date_sk,2479669))),Or(And(GreaterThanOrEqual(ss_sold_date_sk,2480004),LessThanOrEqual(ss_sold_date_sk,2480034)),And(GreaterThanOrEqual(ss_sold_date_sk,2480369),LessThanOrEqual(ss_sold_date_sk,2480399)))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2480734),LessThanOrEqual(ss_sold_date_sk,2480764)),And(GreaterThanOrEqual(ss_sold_date_sk,2481100),LessThanOrEqual(ss_sold_date_sk,2481130))),And(GreaterThanOrEqual(ss_sold_date_sk,2481465),LessThanOrEqual(ss_sold_date_sk,2481495)))),Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2481830),LessThanOrEqual(ss_sold_date_sk,2481860)),And(GreaterThanOrEqual(ss_sold_date_sk,2482195),LessThanOrEqual(ss_sold_date_sk,2482225))),And(GreaterThanOrEqual(ss_sold_date_sk,2482561),LessThanOrEqual(ss_sold_date_sk,2482591))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2482926),LessThanOrEqual(ss_sold_date_sk,2482956)),And(GreaterThanOrEqual(ss_sold_date_sk,2483291),LessThanOrEqual(ss_sold_date_sk,2483321))),And(GreaterThanOrEqual(ss_sold_date_sk,2483656),LessThanOrEqual(ss_sold_date_sk,2483686))))),Or(Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2484022),LessThanOrEqual(ss_sold_date_sk,2484052)),And(GreaterThanOrEqual(ss_sold_date_sk,2484387),LessThanOrEqual(ss_sold_date_sk,2484417))),And(GreaterThanOrEqual(ss_sold_date_sk,2484752),LessThanOrEqual(ss_sold_date_sk,2484782))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2485117),LessThanOrEqual(ss_sold_date_sk,2485147)),And(GreaterThanOrEqual(ss_sold_date_sk,2485483),LessThanOrEqual(ss_sold_date_sk,2485513))),And(GreaterThanOrEqual(ss_sold_date_sk,2485848),LessThanOrEqual(ss_sold_date_sk,2485878)))),Or(Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2486213),LessThanOrEqual(ss_sold_date_sk,2486243)),And(GreaterThanOrEqual(ss_sold_date_sk,2486578),LessThanOrEqual(ss_sold_date_sk,2486608))),And(GreaterThanOrEqual(ss_sold_date_sk,2486944),LessThanOrEqual(ss_sold_date_sk,2486974))),Or(Or(And(GreaterThanOrEqual(ss_sold_date_sk,2487309),LessThanOrEqual(ss_sold_date_sk,2487339)),And(GreaterThanOrEqual(ss_sold_date_sk,2487674),LessThanOrEqual(ss_sold_date_sk,2487704))),And(GreaterThanOrEqual(ss_sold_date_sk,2488039),LessThanOrEqual(ss_sold_date_sk,2488069))))))))), IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_net_profit#6] + +(7) Filter [codegen id : 1] +Input [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_net_profit#6] +Condition : ((((((((((((ss_sold_date_sk#4 >= 2415355) AND (ss_sold_date_sk#4 <= 2415385)) OR ((ss_sold_date_sk#4 >= 2415720) AND (ss_sold_date_sk#4 <= 2415750))) OR (((ss_sold_date_sk#4 >= 2416085) AND (ss_sold_date_sk#4 <= 2416115)) OR ((ss_sold_date_sk#4 >= 2416450) AND (ss_sold_date_sk#4 <= 2416480)))) OR ((((ss_sold_date_sk#4 >= 2416816) AND (ss_sold_date_sk#4 <= 2416846)) OR ((ss_sold_date_sk#4 >= 2417181) AND (ss_sold_date_sk#4 <= 2417211))) OR ((ss_sold_date_sk#4 >= 2417546) AND (ss_sold_date_sk#4 <= 2417576)))) OR (((((ss_sold_date_sk#4 >= 2417911) AND (ss_sold_date_sk#4 <= 2417941)) OR ((ss_sold_date_sk#4 >= 2418277) AND (ss_sold_date_sk#4 <= 2418307))) OR ((ss_sold_date_sk#4 >= 2418642) AND (ss_sold_date_sk#4 <= 2418672))) OR ((((ss_sold_date_sk#4 >= 2419007) AND (ss_sold_date_sk#4 <= 2419037)) OR ((ss_sold_date_sk#4 >= 2419372) AND (ss_sold_date_sk#4 <= 2419402))) OR ((ss_sold_date_sk#4 >= 2419738) AND (ss_sold_date_sk#4 <= 2419768))))) OR ((((((ss_sold_date_sk#4 >= 2420103) AND (ss_sold_date_sk#4 <= 2420133)) OR ((ss_sold_date_sk#4 >= 2420468) AND (ss_sold_date_sk#4 <= 2420498))) OR ((ss_sold_date_sk#4 >= 2420833) AND (ss_sold_date_sk#4 <= 2420863))) OR ((((ss_sold_date_sk#4 >= 2421199) AND (ss_sold_date_sk#4 <= 2421229)) OR ((ss_sold_date_sk#4 >= 2421564) AND (ss_sold_date_sk#4 <= 2421594))) OR ((ss_sold_date_sk#4 >= 2421929) AND (ss_sold_date_sk#4 <= 2421959)))) OR (((((ss_sold_date_sk#4 >= 2422294) AND (ss_sold_date_sk#4 <= 2422324)) OR ((ss_sold_date_sk#4 >= 2422660) AND (ss_sold_date_sk#4 <= 2422690))) OR ((ss_sold_date_sk#4 >= 2423025) AND (ss_sold_date_sk#4 <= 2423055))) OR ((((ss_sold_date_sk#4 >= 2423390) AND (ss_sold_date_sk#4 <= 2423420)) OR ((ss_sold_date_sk#4 >= 2423755) AND (ss_sold_date_sk#4 <= 2423785))) OR ((ss_sold_date_sk#4 >= 2424121) AND (ss_sold_date_sk#4 <= 2424151)))))) OR (((((((ss_sold_date_sk#4 >= 2424486) AND (ss_sold_date_sk#4 <= 2424516)) OR ((ss_sold_date_sk#4 >= 2424851) AND (ss_sold_date_sk#4 <= 2424881))) OR (((ss_sold_date_sk#4 >= 2425216) AND (ss_sold_date_sk#4 <= 2425246)) OR ((ss_sold_date_sk#4 >= 2425582) AND (ss_sold_date_sk#4 <= 2425612)))) OR ((((ss_sold_date_sk#4 >= 2425947) AND (ss_sold_date_sk#4 <= 2425977)) OR ((ss_sold_date_sk#4 >= 2426312) AND (ss_sold_date_sk#4 <= 2426342))) OR ((ss_sold_date_sk#4 >= 2426677) AND (ss_sold_date_sk#4 <= 2426707)))) OR (((((ss_sold_date_sk#4 >= 2427043) AND (ss_sold_date_sk#4 <= 2427073)) OR ((ss_sold_date_sk#4 >= 2427408) AND (ss_sold_date_sk#4 <= 2427438))) OR ((ss_sold_date_sk#4 >= 2427773) AND (ss_sold_date_sk#4 <= 2427803))) OR ((((ss_sold_date_sk#4 >= 2428138) AND (ss_sold_date_sk#4 <= 2428168)) OR ((ss_sold_date_sk#4 >= 2428504) AND (ss_sold_date_sk#4 <= 2428534))) OR ((ss_sold_date_sk#4 >= 2428869) AND (ss_sold_date_sk#4 <= 2428899))))) OR ((((((ss_sold_date_sk#4 >= 2429234) AND (ss_sold_date_sk#4 <= 2429264)) OR ((ss_sold_date_sk#4 >= 2429599) AND (ss_sold_date_sk#4 <= 2429629))) OR ((ss_sold_date_sk#4 >= 2429965) AND (ss_sold_date_sk#4 <= 2429995))) OR ((((ss_sold_date_sk#4 >= 2430330) AND (ss_sold_date_sk#4 <= 2430360)) OR ((ss_sold_date_sk#4 >= 2430695) AND (ss_sold_date_sk#4 <= 2430725))) OR ((ss_sold_date_sk#4 >= 2431060) AND (ss_sold_date_sk#4 <= 2431090)))) OR (((((ss_sold_date_sk#4 >= 2431426) AND (ss_sold_date_sk#4 <= 2431456)) OR ((ss_sold_date_sk#4 >= 2431791) AND (ss_sold_date_sk#4 <= 2431821))) OR ((ss_sold_date_sk#4 >= 2432156) AND (ss_sold_date_sk#4 <= 2432186))) OR ((((ss_sold_date_sk#4 >= 2432521) AND (ss_sold_date_sk#4 <= 2432551)) OR ((ss_sold_date_sk#4 >= 2432887) AND (ss_sold_date_sk#4 <= 2432917))) OR ((ss_sold_date_sk#4 >= 2433252) AND (ss_sold_date_sk#4 <= 2433282))))))) OR ((((((((ss_sold_date_sk#4 >= 2433617) AND (ss_sold_date_sk#4 <= 2433647)) OR ((ss_sold_date_sk#4 >= 2433982) AND (ss_sold_date_sk#4 <= 2434012))) OR (((ss_sold_date_sk#4 >= 2434348) AND (ss_sold_date_sk#4 <= 2434378)) OR ((ss_sold_date_sk#4 >= 2434713) AND (ss_sold_date_sk#4 <= 2434743)))) OR ((((ss_sold_date_sk#4 >= 2435078) AND (ss_sold_date_sk#4 <= 2435108)) OR ((ss_sold_date_sk#4 >= 2435443) AND (ss_sold_date_sk#4 <= 2435473))) OR ((ss_sold_date_sk#4 >= 2435809) AND (ss_sold_date_sk#4 <= 2435839)))) OR (((((ss_sold_date_sk#4 >= 2436174) AND (ss_sold_date_sk#4 <= 2436204)) OR ((ss_sold_date_sk#4 >= 2436539) AND (ss_sold_date_sk#4 <= 2436569))) OR ((ss_sold_date_sk#4 >= 2436904) AND (ss_sold_date_sk#4 <= 2436934))) OR ((((ss_sold_date_sk#4 >= 2437270) AND (ss_sold_date_sk#4 <= 2437300)) OR ((ss_sold_date_sk#4 >= 2437635) AND (ss_sold_date_sk#4 <= 2437665))) OR ((ss_sold_date_sk#4 >= 2438000) AND (ss_sold_date_sk#4 <= 2438030))))) OR ((((((ss_sold_date_sk#4 >= 2438365) AND (ss_sold_date_sk#4 <= 2438395)) OR ((ss_sold_date_sk#4 >= 2438731) AND (ss_sold_date_sk#4 <= 2438761))) OR ((ss_sold_date_sk#4 >= 2439096) AND (ss_sold_date_sk#4 <= 2439126))) OR ((((ss_sold_date_sk#4 >= 2439461) AND (ss_sold_date_sk#4 <= 2439491)) OR ((ss_sold_date_sk#4 >= 2439826) AND (ss_sold_date_sk#4 <= 2439856))) OR ((ss_sold_date_sk#4 >= 2440192) AND (ss_sold_date_sk#4 <= 2440222)))) OR (((((ss_sold_date_sk#4 >= 2440557) AND (ss_sold_date_sk#4 <= 2440587)) OR ((ss_sold_date_sk#4 >= 2440922) AND (ss_sold_date_sk#4 <= 2440952))) OR ((ss_sold_date_sk#4 >= 2441287) AND (ss_sold_date_sk#4 <= 2441317))) OR ((((ss_sold_date_sk#4 >= 2441653) AND (ss_sold_date_sk#4 <= 2441683)) OR ((ss_sold_date_sk#4 >= 2442018) AND (ss_sold_date_sk#4 <= 2442048))) OR ((ss_sold_date_sk#4 >= 2442383) AND (ss_sold_date_sk#4 <= 2442413)))))) OR (((((((ss_sold_date_sk#4 >= 2442748) AND (ss_sold_date_sk#4 <= 2442778)) OR ((ss_sold_date_sk#4 >= 2443114) AND (ss_sold_date_sk#4 <= 2443144))) OR (((ss_sold_date_sk#4 >= 2443479) AND (ss_sold_date_sk#4 <= 2443509)) OR ((ss_sold_date_sk#4 >= 2443844) AND (ss_sold_date_sk#4 <= 2443874)))) OR ((((ss_sold_date_sk#4 >= 2444209) AND (ss_sold_date_sk#4 <= 2444239)) OR ((ss_sold_date_sk#4 >= 2444575) AND (ss_sold_date_sk#4 <= 2444605))) OR ((ss_sold_date_sk#4 >= 2444940) AND (ss_sold_date_sk#4 <= 2444970)))) OR (((((ss_sold_date_sk#4 >= 2445305) AND (ss_sold_date_sk#4 <= 2445335)) OR ((ss_sold_date_sk#4 >= 2445670) AND (ss_sold_date_sk#4 <= 2445700))) OR ((ss_sold_date_sk#4 >= 2446036) AND (ss_sold_date_sk#4 <= 2446066))) OR ((((ss_sold_date_sk#4 >= 2446401) AND (ss_sold_date_sk#4 <= 2446431)) OR ((ss_sold_date_sk#4 >= 2446766) AND (ss_sold_date_sk#4 <= 2446796))) OR ((ss_sold_date_sk#4 >= 2447131) AND (ss_sold_date_sk#4 <= 2447161))))) OR ((((((ss_sold_date_sk#4 >= 2447497) AND (ss_sold_date_sk#4 <= 2447527)) OR ((ss_sold_date_sk#4 >= 2447862) AND (ss_sold_date_sk#4 <= 2447892))) OR ((ss_sold_date_sk#4 >= 2448227) AND (ss_sold_date_sk#4 <= 2448257))) OR ((((ss_sold_date_sk#4 >= 2448592) AND (ss_sold_date_sk#4 <= 2448622)) OR ((ss_sold_date_sk#4 >= 2448958) AND (ss_sold_date_sk#4 <= 2448988))) OR ((ss_sold_date_sk#4 >= 2449323) AND (ss_sold_date_sk#4 <= 2449353)))) OR (((((ss_sold_date_sk#4 >= 2449688) AND (ss_sold_date_sk#4 <= 2449718)) OR ((ss_sold_date_sk#4 >= 2450053) AND (ss_sold_date_sk#4 <= 2450083))) OR ((ss_sold_date_sk#4 >= 2450419) AND (ss_sold_date_sk#4 <= 2450449))) OR ((((ss_sold_date_sk#4 >= 2450784) AND (ss_sold_date_sk#4 <= 2450814)) OR ((ss_sold_date_sk#4 >= 2451149) AND (ss_sold_date_sk#4 <= 2451179))) OR ((ss_sold_date_sk#4 >= 2451514) AND (ss_sold_date_sk#4 <= 2451544)))))))) OR (((((((((ss_sold_date_sk#4 >= 2451880) AND (ss_sold_date_sk#4 <= 2451910)) OR ((ss_sold_date_sk#4 >= 2452245) AND (ss_sold_date_sk#4 <= 2452275))) OR (((ss_sold_date_sk#4 >= 2452610) AND (ss_sold_date_sk#4 <= 2452640)) OR ((ss_sold_date_sk#4 >= 2452975) AND (ss_sold_date_sk#4 <= 2453005)))) OR ((((ss_sold_date_sk#4 >= 2453341) AND (ss_sold_date_sk#4 <= 2453371)) OR ((ss_sold_date_sk#4 >= 2453706) AND (ss_sold_date_sk#4 <= 2453736))) OR ((ss_sold_date_sk#4 >= 2454071) AND (ss_sold_date_sk#4 <= 2454101)))) OR (((((ss_sold_date_sk#4 >= 2454436) AND (ss_sold_date_sk#4 <= 2454466)) OR ((ss_sold_date_sk#4 >= 2454802) AND (ss_sold_date_sk#4 <= 2454832))) OR ((ss_sold_date_sk#4 >= 2455167) AND (ss_sold_date_sk#4 <= 2455197))) OR ((((ss_sold_date_sk#4 >= 2455532) AND (ss_sold_date_sk#4 <= 2455562)) OR ((ss_sold_date_sk#4 >= 2455897) AND (ss_sold_date_sk#4 <= 2455927))) OR ((ss_sold_date_sk#4 >= 2456263) AND (ss_sold_date_sk#4 <= 2456293))))) OR ((((((ss_sold_date_sk#4 >= 2456628) AND (ss_sold_date_sk#4 <= 2456658)) OR ((ss_sold_date_sk#4 >= 2456993) AND (ss_sold_date_sk#4 <= 2457023))) OR ((ss_sold_date_sk#4 >= 2457358) AND (ss_sold_date_sk#4 <= 2457388))) OR ((((ss_sold_date_sk#4 >= 2457724) AND (ss_sold_date_sk#4 <= 2457754)) OR ((ss_sold_date_sk#4 >= 2458089) AND (ss_sold_date_sk#4 <= 2458119))) OR ((ss_sold_date_sk#4 >= 2458454) AND (ss_sold_date_sk#4 <= 2458484)))) OR (((((ss_sold_date_sk#4 >= 2458819) AND (ss_sold_date_sk#4 <= 2458849)) OR ((ss_sold_date_sk#4 >= 2459185) AND (ss_sold_date_sk#4 <= 2459215))) OR ((ss_sold_date_sk#4 >= 2459550) AND (ss_sold_date_sk#4 <= 2459580))) OR ((((ss_sold_date_sk#4 >= 2459915) AND (ss_sold_date_sk#4 <= 2459945)) OR ((ss_sold_date_sk#4 >= 2460280) AND (ss_sold_date_sk#4 <= 2460310))) OR ((ss_sold_date_sk#4 >= 2460646) AND (ss_sold_date_sk#4 <= 2460676)))))) OR (((((((ss_sold_date_sk#4 >= 2461011) AND (ss_sold_date_sk#4 <= 2461041)) OR ((ss_sold_date_sk#4 >= 2461376) AND (ss_sold_date_sk#4 <= 2461406))) OR (((ss_sold_date_sk#4 >= 2461741) AND (ss_sold_date_sk#4 <= 2461771)) OR ((ss_sold_date_sk#4 >= 2462107) AND (ss_sold_date_sk#4 <= 2462137)))) OR ((((ss_sold_date_sk#4 >= 2462472) AND (ss_sold_date_sk#4 <= 2462502)) OR ((ss_sold_date_sk#4 >= 2462837) AND (ss_sold_date_sk#4 <= 2462867))) OR ((ss_sold_date_sk#4 >= 2463202) AND (ss_sold_date_sk#4 <= 2463232)))) OR (((((ss_sold_date_sk#4 >= 2463568) AND (ss_sold_date_sk#4 <= 2463598)) OR ((ss_sold_date_sk#4 >= 2463933) AND (ss_sold_date_sk#4 <= 2463963))) OR ((ss_sold_date_sk#4 >= 2464298) AND (ss_sold_date_sk#4 <= 2464328))) OR ((((ss_sold_date_sk#4 >= 2464663) AND (ss_sold_date_sk#4 <= 2464693)) OR ((ss_sold_date_sk#4 >= 2465029) AND (ss_sold_date_sk#4 <= 2465059))) OR ((ss_sold_date_sk#4 >= 2465394) AND (ss_sold_date_sk#4 <= 2465424))))) OR ((((((ss_sold_date_sk#4 >= 2465759) AND (ss_sold_date_sk#4 <= 2465789)) OR ((ss_sold_date_sk#4 >= 2466124) AND (ss_sold_date_sk#4 <= 2466154))) OR ((ss_sold_date_sk#4 >= 2466490) AND (ss_sold_date_sk#4 <= 2466520))) OR ((((ss_sold_date_sk#4 >= 2466855) AND (ss_sold_date_sk#4 <= 2466885)) OR ((ss_sold_date_sk#4 >= 2467220) AND (ss_sold_date_sk#4 <= 2467250))) OR ((ss_sold_date_sk#4 >= 2467585) AND (ss_sold_date_sk#4 <= 2467615)))) OR (((((ss_sold_date_sk#4 >= 2467951) AND (ss_sold_date_sk#4 <= 2467981)) OR ((ss_sold_date_sk#4 >= 2468316) AND (ss_sold_date_sk#4 <= 2468346))) OR ((ss_sold_date_sk#4 >= 2468681) AND (ss_sold_date_sk#4 <= 2468711))) OR ((((ss_sold_date_sk#4 >= 2469046) AND (ss_sold_date_sk#4 <= 2469076)) OR ((ss_sold_date_sk#4 >= 2469412) AND (ss_sold_date_sk#4 <= 2469442))) OR ((ss_sold_date_sk#4 >= 2469777) AND (ss_sold_date_sk#4 <= 2469807))))))) OR ((((((((ss_sold_date_sk#4 >= 2470142) AND (ss_sold_date_sk#4 <= 2470172)) OR ((ss_sold_date_sk#4 >= 2470507) AND (ss_sold_date_sk#4 <= 2470537))) OR (((ss_sold_date_sk#4 >= 2470873) AND (ss_sold_date_sk#4 <= 2470903)) OR ((ss_sold_date_sk#4 >= 2471238) AND (ss_sold_date_sk#4 <= 2471268)))) OR ((((ss_sold_date_sk#4 >= 2471603) AND (ss_sold_date_sk#4 <= 2471633)) OR ((ss_sold_date_sk#4 >= 2471968) AND (ss_sold_date_sk#4 <= 2471998))) OR ((ss_sold_date_sk#4 >= 2472334) AND (ss_sold_date_sk#4 <= 2472364)))) OR (((((ss_sold_date_sk#4 >= 2472699) AND (ss_sold_date_sk#4 <= 2472729)) OR ((ss_sold_date_sk#4 >= 2473064) AND (ss_sold_date_sk#4 <= 2473094))) OR ((ss_sold_date_sk#4 >= 2473429) AND (ss_sold_date_sk#4 <= 2473459))) OR ((((ss_sold_date_sk#4 >= 2473795) AND (ss_sold_date_sk#4 <= 2473825)) OR ((ss_sold_date_sk#4 >= 2474160) AND (ss_sold_date_sk#4 <= 2474190))) OR ((ss_sold_date_sk#4 >= 2474525) AND (ss_sold_date_sk#4 <= 2474555))))) OR ((((((ss_sold_date_sk#4 >= 2474890) AND (ss_sold_date_sk#4 <= 2474920)) OR ((ss_sold_date_sk#4 >= 2475256) AND (ss_sold_date_sk#4 <= 2475286))) OR ((ss_sold_date_sk#4 >= 2475621) AND (ss_sold_date_sk#4 <= 2475651))) OR ((((ss_sold_date_sk#4 >= 2475986) AND (ss_sold_date_sk#4 <= 2476016)) OR ((ss_sold_date_sk#4 >= 2476351) AND (ss_sold_date_sk#4 <= 2476381))) OR ((ss_sold_date_sk#4 >= 2476717) AND (ss_sold_date_sk#4 <= 2476747)))) OR (((((ss_sold_date_sk#4 >= 2477082) AND (ss_sold_date_sk#4 <= 2477112)) OR ((ss_sold_date_sk#4 >= 2477447) AND (ss_sold_date_sk#4 <= 2477477))) OR ((ss_sold_date_sk#4 >= 2477812) AND (ss_sold_date_sk#4 <= 2477842))) OR ((((ss_sold_date_sk#4 >= 2478178) AND (ss_sold_date_sk#4 <= 2478208)) OR ((ss_sold_date_sk#4 >= 2478543) AND (ss_sold_date_sk#4 <= 2478573))) OR ((ss_sold_date_sk#4 >= 2478908) AND (ss_sold_date_sk#4 <= 2478938)))))) OR (((((((ss_sold_date_sk#4 >= 2479273) AND (ss_sold_date_sk#4 <= 2479303)) OR ((ss_sold_date_sk#4 >= 2479639) AND (ss_sold_date_sk#4 <= 2479669))) OR (((ss_sold_date_sk#4 >= 2480004) AND (ss_sold_date_sk#4 <= 2480034)) OR ((ss_sold_date_sk#4 >= 2480369) AND (ss_sold_date_sk#4 <= 2480399)))) OR ((((ss_sold_date_sk#4 >= 2480734) AND (ss_sold_date_sk#4 <= 2480764)) OR ((ss_sold_date_sk#4 >= 2481100) AND (ss_sold_date_sk#4 <= 2481130))) OR ((ss_sold_date_sk#4 >= 2481465) AND (ss_sold_date_sk#4 <= 2481495)))) OR (((((ss_sold_date_sk#4 >= 2481830) AND (ss_sold_date_sk#4 <= 2481860)) OR ((ss_sold_date_sk#4 >= 2482195) AND (ss_sold_date_sk#4 <= 2482225))) OR ((ss_sold_date_sk#4 >= 2482561) AND (ss_sold_date_sk#4 <= 2482591))) OR ((((ss_sold_date_sk#4 >= 2482926) AND (ss_sold_date_sk#4 <= 2482956)) OR ((ss_sold_date_sk#4 >= 2483291) AND (ss_sold_date_sk#4 <= 2483321))) OR ((ss_sold_date_sk#4 >= 2483656) AND (ss_sold_date_sk#4 <= 2483686))))) OR ((((((ss_sold_date_sk#4 >= 2484022) AND (ss_sold_date_sk#4 <= 2484052)) OR ((ss_sold_date_sk#4 >= 2484387) AND (ss_sold_date_sk#4 <= 2484417))) OR ((ss_sold_date_sk#4 >= 2484752) AND (ss_sold_date_sk#4 <= 2484782))) OR ((((ss_sold_date_sk#4 >= 2485117) AND (ss_sold_date_sk#4 <= 2485147)) OR ((ss_sold_date_sk#4 >= 2485483) AND (ss_sold_date_sk#4 <= 2485513))) OR ((ss_sold_date_sk#4 >= 2485848) AND (ss_sold_date_sk#4 <= 2485878)))) OR (((((ss_sold_date_sk#4 >= 2486213) AND (ss_sold_date_sk#4 <= 2486243)) OR ((ss_sold_date_sk#4 >= 2486578) AND (ss_sold_date_sk#4 <= 2486608))) OR ((ss_sold_date_sk#4 >= 2486944) AND (ss_sold_date_sk#4 <= 2486974))) OR ((((ss_sold_date_sk#4 >= 2487309) AND (ss_sold_date_sk#4 <= 2487339)) OR ((ss_sold_date_sk#4 >= 2487674) AND (ss_sold_date_sk#4 <= 2487704))) OR ((ss_sold_date_sk#4 >= 2488039) AND (ss_sold_date_sk#4 <= 2488069))))))))) AND isnotnull(ss_sold_date_sk#4)) AND isnotnull(ss_item_sk#5)) + +(8) BroadcastExchange +Input [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_net_profit#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [d_date_sk#1] +Right keys [1]: [ss_sold_date_sk#4] +Join condition: None + +(10) Project [codegen id : 3] +Output [3]: [d_year#2, ss_item_sk#5, ss_net_profit#6] +Input [5]: [d_date_sk#1, d_year#2, ss_sold_date_sk#4, ss_item_sk#5, ss_net_profit#6] + +(11) Scan parquet default.item +Output [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manufact_id#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_manufact_id), EqualTo(i_manufact_id,436), IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manufact_id#11] + +(13) Filter [codegen id : 2] +Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manufact_id#11] +Condition : ((isnotnull(i_manufact_id#11) AND (i_manufact_id#11 = 436)) AND isnotnull(i_item_sk#8)) + +(14) Project [codegen id : 2] +Output [3]: [i_item_sk#8, i_brand_id#9, i_brand#10] +Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manufact_id#11] + +(15) BroadcastExchange +Input [3]: [i_item_sk#8, i_brand_id#9, i_brand#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(16) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_item_sk#5] +Right keys [1]: [i_item_sk#8] +Join condition: None + +(17) Project [codegen id : 3] +Output [4]: [d_year#2, ss_net_profit#6, i_brand_id#9, i_brand#10] +Input [6]: [d_year#2, ss_item_sk#5, ss_net_profit#6, i_item_sk#8, i_brand_id#9, i_brand#10] + +(18) HashAggregate [codegen id : 3] +Input [4]: [d_year#2, ss_net_profit#6, i_brand_id#9, i_brand#10] +Keys [3]: [d_year#2, i_brand#10, i_brand_id#9] +Functions [1]: [partial_sum(UnscaledValue(ss_net_profit#6))] +Aggregate Attributes [1]: [sum#13] +Results [4]: [d_year#2, i_brand#10, i_brand_id#9, sum#14] + +(19) Exchange +Input [4]: [d_year#2, i_brand#10, i_brand_id#9, sum#14] +Arguments: hashpartitioning(d_year#2, i_brand#10, i_brand_id#9, 5), true, [id=#15] + +(20) HashAggregate [codegen id : 4] +Input [4]: [d_year#2, i_brand#10, i_brand_id#9, sum#14] +Keys [3]: [d_year#2, i_brand#10, i_brand_id#9] +Functions [1]: [sum(UnscaledValue(ss_net_profit#6))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#6))#16] +Results [4]: [d_year#2, i_brand_id#9 AS brand_id#17, i_brand#10 AS brand#18, MakeDecimal(sum(UnscaledValue(ss_net_profit#6))#16,17,2) AS sum_agg#19] + +(21) TakeOrderedAndProject +Input [4]: [d_year#2, brand_id#17, brand#18, sum_agg#19] +Arguments: 100, [d_year#2 ASC NULLS FIRST, sum_agg#19 DESC NULLS LAST, brand_id#17 ASC NULLS FIRST], [d_year#2, brand_id#17, brand#18, sum_agg#19] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q3/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q3/simplified.txt new file mode 100644 index 0000000000000..0fc4dc7b931f4 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q3/simplified.txt @@ -0,0 +1,31 @@ +TakeOrderedAndProject [brand,brand_id,d_year,sum_agg] + WholeStageCodegen (4) + HashAggregate [d_year,i_brand,i_brand_id,sum] [brand,brand_id,sum,sum(UnscaledValue(ss_net_profit)),sum_agg] + InputAdapter + Exchange [d_year,i_brand,i_brand_id] #1 + WholeStageCodegen (3) + HashAggregate [d_year,i_brand,i_brand_id,ss_net_profit] [sum,sum] + Project [d_year,i_brand,i_brand_id,ss_net_profit] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [d_year,ss_item_sk,ss_net_profit] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [d_date_sk,d_year] + Filter [d_date_sk,d_moy] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_net_profit,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [i_brand,i_brand_id,i_item_sk] + Filter [i_item_sk,i_manufact_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand,i_brand_id,i_item_sk,i_manufact_id] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34.sf100/explain.txt new file mode 100644 index 0000000000000..71928db20bcea --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34.sf100/explain.txt @@ -0,0 +1,218 @@ +== Physical Plan == +* Sort (39) ++- Exchange (38) + +- * Project (37) + +- * SortMergeJoin Inner (36) + :- * Sort (30) + : +- Exchange (29) + : +- * Filter (28) + : +- * HashAggregate (27) + : +- Exchange (26) + : +- * HashAggregate (25) + : +- * Project (24) + : +- * BroadcastHashJoin Inner BuildRight (23) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.store (11) + : +- BroadcastExchange (22) + : +- * Project (21) + : +- * Filter (20) + : +- * ColumnarToRow (19) + : +- Scan parquet default.household_demographics (18) + +- * Sort (35) + +- Exchange (34) + +- * Filter (33) + +- * ColumnarToRow (32) + +- Scan parquet default.customer (31) + + +(1) Scan parquet default.store_sales +Output [5]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), GreaterThanOrEqual(ss_sold_date_sk,2450816), LessThanOrEqual(ss_sold_date_sk,2451910), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [5]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5] + +(3) Filter [codegen id : 4] +Input [5]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5] +Condition : (((((isnotnull(ss_sold_date_sk#1) AND (ss_sold_date_sk#1 >= 2450816)) AND (ss_sold_date_sk#1 <= 2451910)) AND isnotnull(ss_store_sk#4)) AND isnotnull(ss_hdemo_sk#3)) AND isnotnull(ss_customer_sk#2)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#6, d_year#7, d_dom#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/date_dim] +PushedFilters: [Or(And(GreaterThanOrEqual(d_dom,1),LessThanOrEqual(d_dom,3)),And(GreaterThanOrEqual(d_dom,25),LessThanOrEqual(d_dom,28))), In(d_year, [1998,1999,2000]), GreaterThanOrEqual(d_date_sk,2450816), LessThanOrEqual(d_date_sk,2451910), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] +Condition : (((((((d_dom#8 >= 1) AND (d_dom#8 <= 3)) OR ((d_dom#8 >= 25) AND (d_dom#8 <= 28))) AND d_year#7 IN (1998,1999,2000)) AND (d_date_sk#6 >= 2450816)) AND (d_date_sk#6 <= 2451910)) AND isnotnull(d_date_sk#6)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#6] +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] + +(8) BroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#6] +Join condition: None + +(10) Project [codegen id : 4] +Output [4]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5] +Input [6]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5, d_date_sk#6] + +(11) Scan parquet default.store +Output [2]: [s_store_sk#10, s_county#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/store] +PushedFilters: [In(s_county, [Saginaw County,Sumner County,Appanoose County,Daviess County,Fairfield County,Raleigh County,Ziebach County,Williamson County]), IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [s_store_sk#10, s_county#11] + +(13) Filter [codegen id : 2] +Input [2]: [s_store_sk#10, s_county#11] +Condition : (s_county#11 IN (Saginaw County,Sumner County,Appanoose County,Daviess County,Fairfield County,Raleigh County,Ziebach County,Williamson County) AND isnotnull(s_store_sk#10)) + +(14) Project [codegen id : 2] +Output [1]: [s_store_sk#10] +Input [2]: [s_store_sk#10, s_county#11] + +(15) BroadcastExchange +Input [1]: [s_store_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(16) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#10] +Join condition: None + +(17) Project [codegen id : 4] +Output [3]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_ticket_number#5] +Input [5]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5, s_store_sk#10] + +(18) Scan parquet default.household_demographics +Output [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/household_demographics] +PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,Unknown)), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] + +(20) Filter [codegen id : 3] +Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] +Condition : ((((isnotnull(hd_vehicle_count#16) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = Unknown))) AND (hd_vehicle_count#16 > 0)) AND (CASE WHEN (hd_vehicle_count#16 > 0) THEN (cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) ELSE null END > 1.2)) AND isnotnull(hd_demo_sk#13)) + +(21) Project [codegen id : 3] +Output [1]: [hd_demo_sk#13] +Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] + +(22) BroadcastExchange +Input [1]: [hd_demo_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#17] + +(23) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_hdemo_sk#3] +Right keys [1]: [hd_demo_sk#13] +Join condition: None + +(24) Project [codegen id : 4] +Output [2]: [ss_customer_sk#2, ss_ticket_number#5] +Input [4]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_ticket_number#5, hd_demo_sk#13] + +(25) HashAggregate [codegen id : 4] +Input [2]: [ss_customer_sk#2, ss_ticket_number#5] +Keys [2]: [ss_ticket_number#5, ss_customer_sk#2] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#18] +Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] + +(26) Exchange +Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] +Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), true, [id=#20] + +(27) HashAggregate [codegen id : 5] +Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] +Keys [2]: [ss_ticket_number#5, ss_customer_sk#2] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#21] +Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count(1)#21 AS cnt#22] + +(28) Filter [codegen id : 5] +Input [3]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22] +Condition : ((cnt#22 >= 15) AND (cnt#22 <= 20)) + +(29) Exchange +Input [3]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22] +Arguments: hashpartitioning(ss_customer_sk#2, 5), true, [id=#23] + +(30) Sort [codegen id : 6] +Input [3]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22] +Arguments: [ss_customer_sk#2 ASC NULLS FIRST], false, 0 + +(31) Scan parquet default.customer +Output [5]: [c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(32) ColumnarToRow [codegen id : 7] +Input [5]: [c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28] + +(33) Filter [codegen id : 7] +Input [5]: [c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28] +Condition : isnotnull(c_customer_sk#24) + +(34) Exchange +Input [5]: [c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28] +Arguments: hashpartitioning(c_customer_sk#24, 5), true, [id=#29] + +(35) Sort [codegen id : 8] +Input [5]: [c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28] +Arguments: [c_customer_sk#24 ASC NULLS FIRST], false, 0 + +(36) SortMergeJoin [codegen id : 9] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#24] +Join condition: None + +(37) Project [codegen id : 9] +Output [6]: [c_last_name#27, c_first_name#26, c_salutation#25, c_preferred_cust_flag#28, ss_ticket_number#5, cnt#22] +Input [8]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22, c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28] + +(38) Exchange +Input [6]: [c_last_name#27, c_first_name#26, c_salutation#25, c_preferred_cust_flag#28, ss_ticket_number#5, cnt#22] +Arguments: rangepartitioning(c_last_name#27 ASC NULLS FIRST, c_first_name#26 ASC NULLS FIRST, c_salutation#25 ASC NULLS FIRST, c_preferred_cust_flag#28 DESC NULLS LAST, 5), true, [id=#30] + +(39) Sort [codegen id : 10] +Input [6]: [c_last_name#27, c_first_name#26, c_salutation#25, c_preferred_cust_flag#28, ss_ticket_number#5, cnt#22] +Arguments: [c_last_name#27 ASC NULLS FIRST, c_first_name#26 ASC NULLS FIRST, c_salutation#25 ASC NULLS FIRST, c_preferred_cust_flag#28 DESC NULLS LAST], true, 0 + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34.sf100/simplified.txt new file mode 100644 index 0000000000000..94ec6ce18503e --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34.sf100/simplified.txt @@ -0,0 +1,63 @@ +WholeStageCodegen (10) + Sort [c_first_name,c_last_name,c_preferred_cust_flag,c_salutation] + InputAdapter + Exchange [c_first_name,c_last_name,c_preferred_cust_flag,c_salutation] #1 + WholeStageCodegen (9) + Project [c_first_name,c_last_name,c_preferred_cust_flag,c_salutation,cnt,ss_ticket_number] + SortMergeJoin [c_customer_sk,ss_customer_sk] + InputAdapter + WholeStageCodegen (6) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #2 + WholeStageCodegen (5) + Filter [cnt] + HashAggregate [count,ss_customer_sk,ss_ticket_number] [cnt,count,count(1)] + InputAdapter + Exchange [ss_customer_sk,ss_ticket_number] #3 + WholeStageCodegen (4) + HashAggregate [ss_customer_sk,ss_ticket_number] [count,count] + Project [ss_customer_sk,ss_ticket_number] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [ss_customer_sk,ss_hdemo_sk,ss_ticket_number] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_dom,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_dom,d_year] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Project [s_store_sk] + Filter [s_county,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_county,s_store_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (3) + Project [hd_demo_sk] + Filter [hd_buy_potential,hd_demo_sk,hd_dep_count,hd_vehicle_count] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_buy_potential,hd_demo_sk,hd_dep_count,hd_vehicle_count] + InputAdapter + WholeStageCodegen (8) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #7 + WholeStageCodegen (7) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name,c_preferred_cust_flag,c_salutation] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34/explain.txt new file mode 100644 index 0000000000000..4da22519a1466 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34/explain.txt @@ -0,0 +1,203 @@ +== Physical Plan == +* Sort (36) ++- Exchange (35) + +- * Project (34) + +- * BroadcastHashJoin Inner BuildRight (33) + :- * Filter (28) + : +- * HashAggregate (27) + : +- Exchange (26) + : +- * HashAggregate (25) + : +- * Project (24) + : +- * BroadcastHashJoin Inner BuildRight (23) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.store (11) + : +- BroadcastExchange (22) + : +- * Project (21) + : +- * Filter (20) + : +- * ColumnarToRow (19) + : +- Scan parquet default.household_demographics (18) + +- BroadcastExchange (32) + +- * Filter (31) + +- * ColumnarToRow (30) + +- Scan parquet default.customer (29) + + +(1) Scan parquet default.store_sales +Output [5]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), GreaterThanOrEqual(ss_sold_date_sk,2450816), LessThanOrEqual(ss_sold_date_sk,2451910), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [5]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5] + +(3) Filter [codegen id : 4] +Input [5]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5] +Condition : (((((isnotnull(ss_sold_date_sk#1) AND (ss_sold_date_sk#1 >= 2450816)) AND (ss_sold_date_sk#1 <= 2451910)) AND isnotnull(ss_store_sk#4)) AND isnotnull(ss_hdemo_sk#3)) AND isnotnull(ss_customer_sk#2)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#6, d_year#7, d_dom#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/date_dim] +PushedFilters: [Or(And(GreaterThanOrEqual(d_dom,1),LessThanOrEqual(d_dom,3)),And(GreaterThanOrEqual(d_dom,25),LessThanOrEqual(d_dom,28))), In(d_year, [1998,1999,2000]), GreaterThanOrEqual(d_date_sk,2450816), LessThanOrEqual(d_date_sk,2451910), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] +Condition : (((((((d_dom#8 >= 1) AND (d_dom#8 <= 3)) OR ((d_dom#8 >= 25) AND (d_dom#8 <= 28))) AND d_year#7 IN (1998,1999,2000)) AND (d_date_sk#6 >= 2450816)) AND (d_date_sk#6 <= 2451910)) AND isnotnull(d_date_sk#6)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#6] +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] + +(8) BroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#6] +Join condition: None + +(10) Project [codegen id : 4] +Output [4]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5] +Input [6]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5, d_date_sk#6] + +(11) Scan parquet default.store +Output [2]: [s_store_sk#10, s_county#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/store] +PushedFilters: [In(s_county, [Saginaw County,Sumner County,Appanoose County,Daviess County,Fairfield County,Raleigh County,Ziebach County,Williamson County]), IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [s_store_sk#10, s_county#11] + +(13) Filter [codegen id : 2] +Input [2]: [s_store_sk#10, s_county#11] +Condition : (s_county#11 IN (Saginaw County,Sumner County,Appanoose County,Daviess County,Fairfield County,Raleigh County,Ziebach County,Williamson County) AND isnotnull(s_store_sk#10)) + +(14) Project [codegen id : 2] +Output [1]: [s_store_sk#10] +Input [2]: [s_store_sk#10, s_county#11] + +(15) BroadcastExchange +Input [1]: [s_store_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(16) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#10] +Join condition: None + +(17) Project [codegen id : 4] +Output [3]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_ticket_number#5] +Input [5]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5, s_store_sk#10] + +(18) Scan parquet default.household_demographics +Output [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/household_demographics] +PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,Unknown)), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] + +(20) Filter [codegen id : 3] +Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] +Condition : ((((isnotnull(hd_vehicle_count#16) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = Unknown))) AND (hd_vehicle_count#16 > 0)) AND (CASE WHEN (hd_vehicle_count#16 > 0) THEN (cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) ELSE null END > 1.2)) AND isnotnull(hd_demo_sk#13)) + +(21) Project [codegen id : 3] +Output [1]: [hd_demo_sk#13] +Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] + +(22) BroadcastExchange +Input [1]: [hd_demo_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#17] + +(23) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_hdemo_sk#3] +Right keys [1]: [hd_demo_sk#13] +Join condition: None + +(24) Project [codegen id : 4] +Output [2]: [ss_customer_sk#2, ss_ticket_number#5] +Input [4]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_ticket_number#5, hd_demo_sk#13] + +(25) HashAggregate [codegen id : 4] +Input [2]: [ss_customer_sk#2, ss_ticket_number#5] +Keys [2]: [ss_ticket_number#5, ss_customer_sk#2] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#18] +Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] + +(26) Exchange +Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] +Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), true, [id=#20] + +(27) HashAggregate [codegen id : 6] +Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] +Keys [2]: [ss_ticket_number#5, ss_customer_sk#2] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#21] +Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count(1)#21 AS cnt#22] + +(28) Filter [codegen id : 6] +Input [3]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22] +Condition : ((cnt#22 >= 15) AND (cnt#22 <= 20)) + +(29) Scan parquet default.customer +Output [5]: [c_customer_sk#23, c_salutation#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(30) ColumnarToRow [codegen id : 5] +Input [5]: [c_customer_sk#23, c_salutation#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27] + +(31) Filter [codegen id : 5] +Input [5]: [c_customer_sk#23, c_salutation#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27] +Condition : isnotnull(c_customer_sk#23) + +(32) BroadcastExchange +Input [5]: [c_customer_sk#23, c_salutation#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#28] + +(33) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#23] +Join condition: None + +(34) Project [codegen id : 6] +Output [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22] +Input [8]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22, c_customer_sk#23, c_salutation#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27] + +(35) Exchange +Input [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22] +Arguments: rangepartitioning(c_last_name#26 ASC NULLS FIRST, c_first_name#25 ASC NULLS FIRST, c_salutation#24 ASC NULLS FIRST, c_preferred_cust_flag#27 DESC NULLS LAST, 5), true, [id=#29] + +(36) Sort [codegen id : 7] +Input [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22] +Arguments: [c_last_name#26 ASC NULLS FIRST, c_first_name#25 ASC NULLS FIRST, c_salutation#24 ASC NULLS FIRST, c_preferred_cust_flag#27 DESC NULLS LAST], true, 0 + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34/simplified.txt new file mode 100644 index 0000000000000..12cd87e119622 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34/simplified.txt @@ -0,0 +1,54 @@ +WholeStageCodegen (7) + Sort [c_first_name,c_last_name,c_preferred_cust_flag,c_salutation] + InputAdapter + Exchange [c_first_name,c_last_name,c_preferred_cust_flag,c_salutation] #1 + WholeStageCodegen (6) + Project [c_first_name,c_last_name,c_preferred_cust_flag,c_salutation,cnt,ss_ticket_number] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [cnt] + HashAggregate [count,ss_customer_sk,ss_ticket_number] [cnt,count,count(1)] + InputAdapter + Exchange [ss_customer_sk,ss_ticket_number] #2 + WholeStageCodegen (4) + HashAggregate [ss_customer_sk,ss_ticket_number] [count,count] + Project [ss_customer_sk,ss_ticket_number] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [ss_customer_sk,ss_hdemo_sk,ss_ticket_number] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_dom,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_dom,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [s_store_sk] + Filter [s_county,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_county,s_store_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Project [hd_demo_sk] + Filter [hd_buy_potential,hd_demo_sk,hd_dep_count,hd_vehicle_count] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_buy_potential,hd_demo_sk,hd_dep_count,hd_vehicle_count] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name,c_preferred_cust_flag,c_salutation] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q42.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q42.sf100/explain.txt new file mode 100644 index 0000000000000..5f93b5077a921 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q42.sf100/explain.txt @@ -0,0 +1,122 @@ +== Physical Plan == +TakeOrderedAndProject (21) ++- * HashAggregate (20) + +- Exchange (19) + +- * HashAggregate (18) + +- * Project (17) + +- * BroadcastHashJoin Inner BuildRight (16) + :- * Project (10) + : +- * BroadcastHashJoin Inner BuildLeft (9) + : :- BroadcastExchange (5) + : : +- * Project (4) + : : +- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.date_dim (1) + : +- * Filter (8) + : +- * ColumnarToRow (7) + : +- Scan parquet default.store_sales (6) + +- BroadcastExchange (15) + +- * Project (14) + +- * Filter (13) + +- * ColumnarToRow (12) + +- Scan parquet default.item (11) + + +(1) Scan parquet default.date_dim +Output [3]: [d_date_sk#1, d_year#2, d_moy#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,12), EqualTo(d_year,1998), GreaterThanOrEqual(d_date_sk,2451149), IsNotNull(d_date_sk), LessThanOrEqual(d_date_sk,2451179)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(3) Filter [codegen id : 1] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Condition : ((((((isnotnull(d_moy#3) AND isnotnull(d_year#2)) AND (d_moy#3 = 12)) AND (d_year#2 = 1998)) AND (d_date_sk#1 >= 2451149)) AND isnotnull(d_date_sk#1)) AND (d_date_sk#1 <= 2451179)) + +(4) Project [codegen id : 1] +Output [2]: [d_date_sk#1, d_year#2] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(5) BroadcastExchange +Input [2]: [d_date_sk#1, d_year#2] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#4] + +(6) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#5, ss_item_sk#6, ss_ext_sales_price#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), GreaterThanOrEqual(ss_sold_date_sk,2451149), LessThanOrEqual(ss_sold_date_sk,2451179), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(7) ColumnarToRow +Input [3]: [ss_sold_date_sk#5, ss_item_sk#6, ss_ext_sales_price#7] + +(8) Filter +Input [3]: [ss_sold_date_sk#5, ss_item_sk#6, ss_ext_sales_price#7] +Condition : (((isnotnull(ss_sold_date_sk#5) AND (ss_sold_date_sk#5 >= 2451149)) AND (ss_sold_date_sk#5 <= 2451179)) AND isnotnull(ss_item_sk#6)) + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [d_date_sk#1] +Right keys [1]: [ss_sold_date_sk#5] +Join condition: None + +(10) Project [codegen id : 3] +Output [3]: [d_year#2, ss_item_sk#6, ss_ext_sales_price#7] +Input [5]: [d_date_sk#1, d_year#2, ss_sold_date_sk#5, ss_item_sk#6, ss_ext_sales_price#7] + +(11) Scan parquet default.item +Output [4]: [i_item_sk#8, i_category_id#9, i_category#10, i_manager_id#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,1), IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [4]: [i_item_sk#8, i_category_id#9, i_category#10, i_manager_id#11] + +(13) Filter [codegen id : 2] +Input [4]: [i_item_sk#8, i_category_id#9, i_category#10, i_manager_id#11] +Condition : ((isnotnull(i_manager_id#11) AND (i_manager_id#11 = 1)) AND isnotnull(i_item_sk#8)) + +(14) Project [codegen id : 2] +Output [3]: [i_item_sk#8, i_category_id#9, i_category#10] +Input [4]: [i_item_sk#8, i_category_id#9, i_category#10, i_manager_id#11] + +(15) BroadcastExchange +Input [3]: [i_item_sk#8, i_category_id#9, i_category#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(16) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_item_sk#6] +Right keys [1]: [i_item_sk#8] +Join condition: None + +(17) Project [codegen id : 3] +Output [4]: [d_year#2, ss_ext_sales_price#7, i_category_id#9, i_category#10] +Input [6]: [d_year#2, ss_item_sk#6, ss_ext_sales_price#7, i_item_sk#8, i_category_id#9, i_category#10] + +(18) HashAggregate [codegen id : 3] +Input [4]: [d_year#2, ss_ext_sales_price#7, i_category_id#9, i_category#10] +Keys [3]: [d_year#2, i_category_id#9, i_category#10] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#7))] +Aggregate Attributes [1]: [sum#13] +Results [4]: [d_year#2, i_category_id#9, i_category#10, sum#14] + +(19) Exchange +Input [4]: [d_year#2, i_category_id#9, i_category#10, sum#14] +Arguments: hashpartitioning(d_year#2, i_category_id#9, i_category#10, 5), true, [id=#15] + +(20) HashAggregate [codegen id : 4] +Input [4]: [d_year#2, i_category_id#9, i_category#10, sum#14] +Keys [3]: [d_year#2, i_category_id#9, i_category#10] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#7))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#7))#16] +Results [4]: [d_year#2, i_category_id#9, i_category#10, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#7))#16,17,2) AS sum(ss_ext_sales_price)#17] + +(21) TakeOrderedAndProject +Input [4]: [d_year#2, i_category_id#9, i_category#10, sum(ss_ext_sales_price)#17] +Arguments: 100, [sum(ss_ext_sales_price)#17 DESC NULLS LAST, d_year#2 ASC NULLS FIRST, i_category_id#9 ASC NULLS FIRST, i_category#10 ASC NULLS FIRST], [d_year#2, i_category_id#9, i_category#10, sum(ss_ext_sales_price)#17] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q42.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q42.sf100/simplified.txt new file mode 100644 index 0000000000000..4be03d5b3ed7d --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q42.sf100/simplified.txt @@ -0,0 +1,31 @@ +TakeOrderedAndProject [d_year,i_category,i_category_id,sum(ss_ext_sales_price)] + WholeStageCodegen (4) + HashAggregate [d_year,i_category,i_category_id,sum] [sum,sum(UnscaledValue(ss_ext_sales_price)),sum(ss_ext_sales_price)] + InputAdapter + Exchange [d_year,i_category,i_category_id] #1 + WholeStageCodegen (3) + HashAggregate [d_year,i_category,i_category_id,ss_ext_sales_price] [sum,sum] + Project [d_year,i_category,i_category_id,ss_ext_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [d_year,ss_ext_sales_price,ss_item_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [d_date_sk,d_year] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [i_category,i_category_id,i_item_sk] + Filter [i_item_sk,i_manager_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_category,i_category_id,i_item_sk,i_manager_id] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q42/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q42/explain.txt new file mode 100644 index 0000000000000..092b388c03b5c --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q42/explain.txt @@ -0,0 +1,122 @@ +== Physical Plan == +TakeOrderedAndProject (21) ++- * HashAggregate (20) + +- Exchange (19) + +- * HashAggregate (18) + +- * Project (17) + +- * BroadcastHashJoin Inner BuildRight (16) + :- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Project (4) + : : +- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.date_dim (1) + : +- BroadcastExchange (8) + : +- * Filter (7) + : +- * ColumnarToRow (6) + : +- Scan parquet default.store_sales (5) + +- BroadcastExchange (15) + +- * Project (14) + +- * Filter (13) + +- * ColumnarToRow (12) + +- Scan parquet default.item (11) + + +(1) Scan parquet default.date_dim +Output [3]: [d_date_sk#1, d_year#2, d_moy#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,12), EqualTo(d_year,1998), LessThanOrEqual(d_date_sk,2451179), GreaterThanOrEqual(d_date_sk,2451149), IsNotNull(d_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(3) Filter [codegen id : 3] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Condition : ((((((isnotnull(d_moy#3) AND isnotnull(d_year#2)) AND (d_moy#3 = 12)) AND (d_year#2 = 1998)) AND (d_date_sk#1 <= 2451179)) AND (d_date_sk#1 >= 2451149)) AND isnotnull(d_date_sk#1)) + +(4) Project [codegen id : 3] +Output [2]: [d_date_sk#1, d_year#2] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(5) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), GreaterThanOrEqual(ss_sold_date_sk,2451149), LessThanOrEqual(ss_sold_date_sk,2451179), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] + +(7) Filter [codegen id : 1] +Input [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] +Condition : (((isnotnull(ss_sold_date_sk#4) AND (ss_sold_date_sk#4 >= 2451149)) AND (ss_sold_date_sk#4 <= 2451179)) AND isnotnull(ss_item_sk#5)) + +(8) BroadcastExchange +Input [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [d_date_sk#1] +Right keys [1]: [ss_sold_date_sk#4] +Join condition: None + +(10) Project [codegen id : 3] +Output [3]: [d_year#2, ss_item_sk#5, ss_ext_sales_price#6] +Input [5]: [d_date_sk#1, d_year#2, ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] + +(11) Scan parquet default.item +Output [4]: [i_item_sk#8, i_category_id#9, i_category#10, i_manager_id#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,1), IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [4]: [i_item_sk#8, i_category_id#9, i_category#10, i_manager_id#11] + +(13) Filter [codegen id : 2] +Input [4]: [i_item_sk#8, i_category_id#9, i_category#10, i_manager_id#11] +Condition : ((isnotnull(i_manager_id#11) AND (i_manager_id#11 = 1)) AND isnotnull(i_item_sk#8)) + +(14) Project [codegen id : 2] +Output [3]: [i_item_sk#8, i_category_id#9, i_category#10] +Input [4]: [i_item_sk#8, i_category_id#9, i_category#10, i_manager_id#11] + +(15) BroadcastExchange +Input [3]: [i_item_sk#8, i_category_id#9, i_category#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(16) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_item_sk#5] +Right keys [1]: [i_item_sk#8] +Join condition: None + +(17) Project [codegen id : 3] +Output [4]: [d_year#2, ss_ext_sales_price#6, i_category_id#9, i_category#10] +Input [6]: [d_year#2, ss_item_sk#5, ss_ext_sales_price#6, i_item_sk#8, i_category_id#9, i_category#10] + +(18) HashAggregate [codegen id : 3] +Input [4]: [d_year#2, ss_ext_sales_price#6, i_category_id#9, i_category#10] +Keys [3]: [d_year#2, i_category_id#9, i_category#10] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#6))] +Aggregate Attributes [1]: [sum#13] +Results [4]: [d_year#2, i_category_id#9, i_category#10, sum#14] + +(19) Exchange +Input [4]: [d_year#2, i_category_id#9, i_category#10, sum#14] +Arguments: hashpartitioning(d_year#2, i_category_id#9, i_category#10, 5), true, [id=#15] + +(20) HashAggregate [codegen id : 4] +Input [4]: [d_year#2, i_category_id#9, i_category#10, sum#14] +Keys [3]: [d_year#2, i_category_id#9, i_category#10] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#6))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#6))#16] +Results [4]: [d_year#2, i_category_id#9, i_category#10, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#6))#16,17,2) AS sum(ss_ext_sales_price)#17] + +(21) TakeOrderedAndProject +Input [4]: [d_year#2, i_category_id#9, i_category#10, sum(ss_ext_sales_price)#17] +Arguments: 100, [sum(ss_ext_sales_price)#17 DESC NULLS LAST, d_year#2 ASC NULLS FIRST, i_category_id#9 ASC NULLS FIRST, i_category#10 ASC NULLS FIRST], [d_year#2, i_category_id#9, i_category#10, sum(ss_ext_sales_price)#17] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q42/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q42/simplified.txt new file mode 100644 index 0000000000000..c8fc6e5c63919 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q42/simplified.txt @@ -0,0 +1,31 @@ +TakeOrderedAndProject [d_year,i_category,i_category_id,sum(ss_ext_sales_price)] + WholeStageCodegen (4) + HashAggregate [d_year,i_category,i_category_id,sum] [sum,sum(UnscaledValue(ss_ext_sales_price)),sum(ss_ext_sales_price)] + InputAdapter + Exchange [d_year,i_category,i_category_id] #1 + WholeStageCodegen (3) + HashAggregate [d_year,i_category,i_category_id,ss_ext_sales_price] [sum,sum] + Project [d_year,i_category,i_category_id,ss_ext_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [d_year,ss_ext_sales_price,ss_item_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [d_date_sk,d_year] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [i_category,i_category_id,i_item_sk] + Filter [i_item_sk,i_manager_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_category,i_category_id,i_item_sk,i_manager_id] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q43.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q43.sf100/explain.txt new file mode 100644 index 0000000000000..5e0761f2ebb7b --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q43.sf100/explain.txt @@ -0,0 +1,122 @@ +== Physical Plan == +TakeOrderedAndProject (21) ++- * HashAggregate (20) + +- Exchange (19) + +- * HashAggregate (18) + +- * Project (17) + +- * BroadcastHashJoin Inner BuildRight (16) + :- * Project (10) + : +- * BroadcastHashJoin Inner BuildLeft (9) + : :- BroadcastExchange (5) + : : +- * Project (4) + : : +- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.date_dim (1) + : +- * Filter (8) + : +- * ColumnarToRow (7) + : +- Scan parquet default.store_sales (6) + +- BroadcastExchange (15) + +- * Project (14) + +- * Filter (13) + +- * ColumnarToRow (12) + +- Scan parquet default.store (11) + + +(1) Scan parquet default.date_dim +Output [3]: [d_date_sk#1, d_year#2, d_day_name#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1998), GreaterThanOrEqual(d_date_sk,2450816), LessThanOrEqual(d_date_sk,2451179), IsNotNull(d_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#1, d_year#2, d_day_name#3] + +(3) Filter [codegen id : 1] +Input [3]: [d_date_sk#1, d_year#2, d_day_name#3] +Condition : ((((isnotnull(d_year#2) AND (d_year#2 = 1998)) AND (d_date_sk#1 >= 2450816)) AND (d_date_sk#1 <= 2451179)) AND isnotnull(d_date_sk#1)) + +(4) Project [codegen id : 1] +Output [2]: [d_date_sk#1, d_day_name#3] +Input [3]: [d_date_sk#1, d_year#2, d_day_name#3] + +(5) BroadcastExchange +Input [2]: [d_date_sk#1, d_day_name#3] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#4] + +(6) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#5, ss_store_sk#6, ss_sales_price#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), GreaterThanOrEqual(ss_sold_date_sk,2450816), LessThanOrEqual(ss_sold_date_sk,2451179), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(7) ColumnarToRow +Input [3]: [ss_sold_date_sk#5, ss_store_sk#6, ss_sales_price#7] + +(8) Filter +Input [3]: [ss_sold_date_sk#5, ss_store_sk#6, ss_sales_price#7] +Condition : (((isnotnull(ss_sold_date_sk#5) AND (ss_sold_date_sk#5 >= 2450816)) AND (ss_sold_date_sk#5 <= 2451179)) AND isnotnull(ss_store_sk#6)) + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [d_date_sk#1] +Right keys [1]: [ss_sold_date_sk#5] +Join condition: None + +(10) Project [codegen id : 3] +Output [3]: [d_day_name#3, ss_store_sk#6, ss_sales_price#7] +Input [5]: [d_date_sk#1, d_day_name#3, ss_sold_date_sk#5, ss_store_sk#6, ss_sales_price#7] + +(11) Scan parquet default.store +Output [4]: [s_store_sk#8, s_store_id#9, s_store_name#10, s_gmt_offset#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_gmt_offset), EqualTo(s_gmt_offset,-5.00), IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [4]: [s_store_sk#8, s_store_id#9, s_store_name#10, s_gmt_offset#11] + +(13) Filter [codegen id : 2] +Input [4]: [s_store_sk#8, s_store_id#9, s_store_name#10, s_gmt_offset#11] +Condition : ((isnotnull(s_gmt_offset#11) AND (s_gmt_offset#11 = -5.00)) AND isnotnull(s_store_sk#8)) + +(14) Project [codegen id : 2] +Output [3]: [s_store_sk#8, s_store_id#9, s_store_name#10] +Input [4]: [s_store_sk#8, s_store_id#9, s_store_name#10, s_gmt_offset#11] + +(15) BroadcastExchange +Input [3]: [s_store_sk#8, s_store_id#9, s_store_name#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(16) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_store_sk#6] +Right keys [1]: [s_store_sk#8] +Join condition: None + +(17) Project [codegen id : 3] +Output [4]: [d_day_name#3, ss_sales_price#7, s_store_id#9, s_store_name#10] +Input [6]: [d_day_name#3, ss_store_sk#6, ss_sales_price#7, s_store_sk#8, s_store_id#9, s_store_name#10] + +(18) HashAggregate [codegen id : 3] +Input [4]: [d_day_name#3, ss_sales_price#7, s_store_id#9, s_store_name#10] +Keys [2]: [s_store_name#10, s_store_id#9] +Functions [7]: [partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday) THEN ss_sales_price#7 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday) THEN ss_sales_price#7 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday) THEN ss_sales_price#7 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#7 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday) THEN ss_sales_price#7 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday) THEN ss_sales_price#7 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday) THEN ss_sales_price#7 ELSE null END))] +Aggregate Attributes [7]: [sum#13, sum#14, sum#15, sum#16, sum#17, sum#18, sum#19] +Results [9]: [s_store_name#10, s_store_id#9, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26] + +(19) Exchange +Input [9]: [s_store_name#10, s_store_id#9, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26] +Arguments: hashpartitioning(s_store_name#10, s_store_id#9, 5), true, [id=#27] + +(20) HashAggregate [codegen id : 4] +Input [9]: [s_store_name#10, s_store_id#9, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26] +Keys [2]: [s_store_name#10, s_store_id#9] +Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday) THEN ss_sales_price#7 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday) THEN ss_sales_price#7 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday) THEN ss_sales_price#7 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#7 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday) THEN ss_sales_price#7 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday) THEN ss_sales_price#7 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday) THEN ss_sales_price#7 ELSE null END))] +Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday) THEN ss_sales_price#7 ELSE null END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday) THEN ss_sales_price#7 ELSE null END))#29, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday) THEN ss_sales_price#7 ELSE null END))#30, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#7 ELSE null END))#31, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday) THEN ss_sales_price#7 ELSE null END))#32, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday) THEN ss_sales_price#7 ELSE null END))#33, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday) THEN ss_sales_price#7 ELSE null END))#34] +Results [9]: [s_store_name#10, s_store_id#9, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday) THEN ss_sales_price#7 ELSE null END))#28,17,2) AS sun_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday) THEN ss_sales_price#7 ELSE null END))#29,17,2) AS mon_sales#36, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday) THEN ss_sales_price#7 ELSE null END))#30,17,2) AS tue_sales#37, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#7 ELSE null END))#31,17,2) AS wed_sales#38, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday) THEN ss_sales_price#7 ELSE null END))#32,17,2) AS thu_sales#39, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday) THEN ss_sales_price#7 ELSE null END))#33,17,2) AS fri_sales#40, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday) THEN ss_sales_price#7 ELSE null END))#34,17,2) AS sat_sales#41] + +(21) TakeOrderedAndProject +Input [9]: [s_store_name#10, s_store_id#9, sun_sales#35, mon_sales#36, tue_sales#37, wed_sales#38, thu_sales#39, fri_sales#40, sat_sales#41] +Arguments: 100, [s_store_name#10 ASC NULLS FIRST, s_store_id#9 ASC NULLS FIRST, sun_sales#35 ASC NULLS FIRST, mon_sales#36 ASC NULLS FIRST, tue_sales#37 ASC NULLS FIRST, wed_sales#38 ASC NULLS FIRST, thu_sales#39 ASC NULLS FIRST, fri_sales#40 ASC NULLS FIRST, sat_sales#41 ASC NULLS FIRST], [s_store_name#10, s_store_id#9, sun_sales#35, mon_sales#36, tue_sales#37, wed_sales#38, thu_sales#39, fri_sales#40, sat_sales#41] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q43.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q43.sf100/simplified.txt new file mode 100644 index 0000000000000..2ed0f8f445720 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q43.sf100/simplified.txt @@ -0,0 +1,31 @@ +TakeOrderedAndProject [fri_sales,mon_sales,s_store_id,s_store_name,sat_sales,sun_sales,thu_sales,tue_sales,wed_sales] + WholeStageCodegen (4) + HashAggregate [s_store_id,s_store_name,sum,sum,sum,sum,sum,sum,sum] [fri_sales,mon_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sun_sales,thu_sales,tue_sales,wed_sales] + InputAdapter + Exchange [s_store_id,s_store_name] #1 + WholeStageCodegen (3) + HashAggregate [d_day_name,s_store_id,s_store_name,ss_sales_price] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [d_day_name,s_store_id,s_store_name,ss_sales_price] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [d_day_name,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [d_date_sk,d_day_name] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_day_name,d_year] + Filter [ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [s_store_id,s_store_name,s_store_sk] + Filter [s_gmt_offset,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_gmt_offset,s_store_id,s_store_name,s_store_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q43/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q43/explain.txt new file mode 100644 index 0000000000000..c5db98d8ee408 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q43/explain.txt @@ -0,0 +1,122 @@ +== Physical Plan == +TakeOrderedAndProject (21) ++- * HashAggregate (20) + +- Exchange (19) + +- * HashAggregate (18) + +- * Project (17) + +- * BroadcastHashJoin Inner BuildRight (16) + :- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Project (4) + : : +- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.date_dim (1) + : +- BroadcastExchange (8) + : +- * Filter (7) + : +- * ColumnarToRow (6) + : +- Scan parquet default.store_sales (5) + +- BroadcastExchange (15) + +- * Project (14) + +- * Filter (13) + +- * ColumnarToRow (12) + +- Scan parquet default.store (11) + + +(1) Scan parquet default.date_dim +Output [3]: [d_date_sk#1, d_year#2, d_day_name#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1998), GreaterThanOrEqual(d_date_sk,2450816), LessThanOrEqual(d_date_sk,2451179), IsNotNull(d_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#1, d_year#2, d_day_name#3] + +(3) Filter [codegen id : 3] +Input [3]: [d_date_sk#1, d_year#2, d_day_name#3] +Condition : ((((isnotnull(d_year#2) AND (d_year#2 = 1998)) AND (d_date_sk#1 >= 2450816)) AND (d_date_sk#1 <= 2451179)) AND isnotnull(d_date_sk#1)) + +(4) Project [codegen id : 3] +Output [2]: [d_date_sk#1, d_day_name#3] +Input [3]: [d_date_sk#1, d_year#2, d_day_name#3] + +(5) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#4, ss_store_sk#5, ss_sales_price#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), GreaterThanOrEqual(ss_sold_date_sk,2450816), LessThanOrEqual(ss_sold_date_sk,2451179), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [3]: [ss_sold_date_sk#4, ss_store_sk#5, ss_sales_price#6] + +(7) Filter [codegen id : 1] +Input [3]: [ss_sold_date_sk#4, ss_store_sk#5, ss_sales_price#6] +Condition : (((isnotnull(ss_sold_date_sk#4) AND (ss_sold_date_sk#4 >= 2450816)) AND (ss_sold_date_sk#4 <= 2451179)) AND isnotnull(ss_store_sk#5)) + +(8) BroadcastExchange +Input [3]: [ss_sold_date_sk#4, ss_store_sk#5, ss_sales_price#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [d_date_sk#1] +Right keys [1]: [ss_sold_date_sk#4] +Join condition: None + +(10) Project [codegen id : 3] +Output [3]: [d_day_name#3, ss_store_sk#5, ss_sales_price#6] +Input [5]: [d_date_sk#1, d_day_name#3, ss_sold_date_sk#4, ss_store_sk#5, ss_sales_price#6] + +(11) Scan parquet default.store +Output [4]: [s_store_sk#8, s_store_id#9, s_store_name#10, s_gmt_offset#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_gmt_offset), EqualTo(s_gmt_offset,-5.00), IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [4]: [s_store_sk#8, s_store_id#9, s_store_name#10, s_gmt_offset#11] + +(13) Filter [codegen id : 2] +Input [4]: [s_store_sk#8, s_store_id#9, s_store_name#10, s_gmt_offset#11] +Condition : ((isnotnull(s_gmt_offset#11) AND (s_gmt_offset#11 = -5.00)) AND isnotnull(s_store_sk#8)) + +(14) Project [codegen id : 2] +Output [3]: [s_store_sk#8, s_store_id#9, s_store_name#10] +Input [4]: [s_store_sk#8, s_store_id#9, s_store_name#10, s_gmt_offset#11] + +(15) BroadcastExchange +Input [3]: [s_store_sk#8, s_store_id#9, s_store_name#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(16) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_store_sk#5] +Right keys [1]: [s_store_sk#8] +Join condition: None + +(17) Project [codegen id : 3] +Output [4]: [d_day_name#3, ss_sales_price#6, s_store_id#9, s_store_name#10] +Input [6]: [d_day_name#3, ss_store_sk#5, ss_sales_price#6, s_store_sk#8, s_store_id#9, s_store_name#10] + +(18) HashAggregate [codegen id : 3] +Input [4]: [d_day_name#3, ss_sales_price#6, s_store_id#9, s_store_name#10] +Keys [2]: [s_store_name#10, s_store_id#9] +Functions [7]: [partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday) THEN ss_sales_price#6 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday) THEN ss_sales_price#6 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday) THEN ss_sales_price#6 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#6 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday) THEN ss_sales_price#6 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday) THEN ss_sales_price#6 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday) THEN ss_sales_price#6 ELSE null END))] +Aggregate Attributes [7]: [sum#13, sum#14, sum#15, sum#16, sum#17, sum#18, sum#19] +Results [9]: [s_store_name#10, s_store_id#9, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26] + +(19) Exchange +Input [9]: [s_store_name#10, s_store_id#9, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26] +Arguments: hashpartitioning(s_store_name#10, s_store_id#9, 5), true, [id=#27] + +(20) HashAggregate [codegen id : 4] +Input [9]: [s_store_name#10, s_store_id#9, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26] +Keys [2]: [s_store_name#10, s_store_id#9] +Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday) THEN ss_sales_price#6 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday) THEN ss_sales_price#6 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday) THEN ss_sales_price#6 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#6 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday) THEN ss_sales_price#6 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday) THEN ss_sales_price#6 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday) THEN ss_sales_price#6 ELSE null END))] +Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday) THEN ss_sales_price#6 ELSE null END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday) THEN ss_sales_price#6 ELSE null END))#29, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday) THEN ss_sales_price#6 ELSE null END))#30, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#6 ELSE null END))#31, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday) THEN ss_sales_price#6 ELSE null END))#32, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday) THEN ss_sales_price#6 ELSE null END))#33, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday) THEN ss_sales_price#6 ELSE null END))#34] +Results [9]: [s_store_name#10, s_store_id#9, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday) THEN ss_sales_price#6 ELSE null END))#28,17,2) AS sun_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday) THEN ss_sales_price#6 ELSE null END))#29,17,2) AS mon_sales#36, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday) THEN ss_sales_price#6 ELSE null END))#30,17,2) AS tue_sales#37, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#6 ELSE null END))#31,17,2) AS wed_sales#38, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday) THEN ss_sales_price#6 ELSE null END))#32,17,2) AS thu_sales#39, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday) THEN ss_sales_price#6 ELSE null END))#33,17,2) AS fri_sales#40, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday) THEN ss_sales_price#6 ELSE null END))#34,17,2) AS sat_sales#41] + +(21) TakeOrderedAndProject +Input [9]: [s_store_name#10, s_store_id#9, sun_sales#35, mon_sales#36, tue_sales#37, wed_sales#38, thu_sales#39, fri_sales#40, sat_sales#41] +Arguments: 100, [s_store_name#10 ASC NULLS FIRST, s_store_id#9 ASC NULLS FIRST, sun_sales#35 ASC NULLS FIRST, mon_sales#36 ASC NULLS FIRST, tue_sales#37 ASC NULLS FIRST, wed_sales#38 ASC NULLS FIRST, thu_sales#39 ASC NULLS FIRST, fri_sales#40 ASC NULLS FIRST, sat_sales#41 ASC NULLS FIRST], [s_store_name#10, s_store_id#9, sun_sales#35, mon_sales#36, tue_sales#37, wed_sales#38, thu_sales#39, fri_sales#40, sat_sales#41] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q43/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q43/simplified.txt new file mode 100644 index 0000000000000..bf8ac2624db70 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q43/simplified.txt @@ -0,0 +1,31 @@ +TakeOrderedAndProject [fri_sales,mon_sales,s_store_id,s_store_name,sat_sales,sun_sales,thu_sales,tue_sales,wed_sales] + WholeStageCodegen (4) + HashAggregate [s_store_id,s_store_name,sum,sum,sum,sum,sum,sum,sum] [fri_sales,mon_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sun_sales,thu_sales,tue_sales,wed_sales] + InputAdapter + Exchange [s_store_id,s_store_name] #1 + WholeStageCodegen (3) + HashAggregate [d_day_name,s_store_id,s_store_name,ss_sales_price] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [d_day_name,s_store_id,s_store_name,ss_sales_price] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [d_day_name,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [d_date_sk,d_day_name] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_day_name,d_year] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [s_store_id,s_store_name,s_store_sk] + Filter [s_gmt_offset,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_gmt_offset,s_store_id,s_store_name,s_store_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q46.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q46.sf100/explain.txt new file mode 100644 index 0000000000000..872aae35fcaa1 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q46.sf100/explain.txt @@ -0,0 +1,281 @@ +== Physical Plan == +TakeOrderedAndProject (51) ++- * Project (50) + +- * SortMergeJoin Inner (49) + :- * Sort (46) + : +- Exchange (45) + : +- * Project (44) + : +- * SortMergeJoin Inner (43) + : :- * Sort (37) + : : +- Exchange (36) + : : +- * HashAggregate (35) + : : +- * HashAggregate (34) + : : +- * Project (33) + : : +- * SortMergeJoin Inner (32) + : : :- * Sort (26) + : : : +- Exchange (25) + : : : +- * Project (24) + : : : +- * BroadcastHashJoin Inner BuildRight (23) + : : : :- * Project (17) + : : : : +- * BroadcastHashJoin Inner BuildRight (16) + : : : : :- * Project (10) + : : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : : :- * Filter (3) + : : : : : : +- * ColumnarToRow (2) + : : : : : : +- Scan parquet default.store_sales (1) + : : : : : +- BroadcastExchange (8) + : : : : : +- * Project (7) + : : : : : +- * Filter (6) + : : : : : +- * ColumnarToRow (5) + : : : : : +- Scan parquet default.date_dim (4) + : : : : +- BroadcastExchange (15) + : : : : +- * Project (14) + : : : : +- * Filter (13) + : : : : +- * ColumnarToRow (12) + : : : : +- Scan parquet default.store (11) + : : : +- BroadcastExchange (22) + : : : +- * Project (21) + : : : +- * Filter (20) + : : : +- * ColumnarToRow (19) + : : : +- Scan parquet default.household_demographics (18) + : : +- * Sort (31) + : : +- Exchange (30) + : : +- * Filter (29) + : : +- * ColumnarToRow (28) + : : +- Scan parquet default.customer_address (27) + : +- * Sort (42) + : +- Exchange (41) + : +- * Filter (40) + : +- * ColumnarToRow (39) + : +- Scan parquet default.customer (38) + +- * Sort (48) + +- ReusedExchange (47) + + +(1) Scan parquet default.store_sales +Output [8]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/store_sales] +PushedFilters: [In(ss_sold_date_sksNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [8]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] + +(3) Filter [codegen id : 4] +Input [8]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] +Condition : (((((ss_sold_date_sk#1 INSET (2451790,2451609,2451294,2451658,2452099,2451482,2451700,2452035,2452274,2451258,2451847,2451714,2451937,2451860,2451601,2451573,2451686,2452008,2451454,2451882,2451832,2452259,2451671,2451903,2451497,2452162,2451322,2451517,2451434,2451273,2451405,2452105,2451924,2452050,2452126,2452203,2451818,2451559,2451853,2451238,2451209,2451357,2451959,2452239,2451608,2452141,2452252,2451623,2451867,2451504,2451910,2452232,2451874,2451581,2451329,2451223,2451783,2452267,2452042,2451895,2451986,2452091,2451693,2451265,2451678,2451825,2451244,2451490,2451287,2451419,2451546,2451245,2451713,2452070,2451189,2451804,2451468,2451525,2451902,2452077,2452161,2451378,2451567,2451931,2451699,2451251,2451840,2452253,2451938,2451510,2452231,2452036,2451616,2451230,2452112,2451846,2451966,2451538,2451819,2452140,2452183,2451496,2451791,2451595,2451574,2451363,2451994,2451917,2451602,2452273,2451237,2451350,2451685,2451259,2451286,2451972,2452224,2451370,2452245,2451643,2451993,2451315,2451301,2451560,2451433,2452225,2451532,2451755,2451854,2451545,2451210,2451587,2451987,2451447,2452197,2451552,2451896,2451679,2452147,2451735,2452022,2451707,2451868,2451398,2451777,2451181,2451503,2451839,2452175,2451441,2452154,2452029,2452196,2451952,2451805,2451965,2451539,2452001,2451833,2451392,2451524,2451461,2452133,2451448,2451307,2451615,2451769,2451412,2451349,2451651,2451763,2451203,2452064,2451980,2451748,2451637,2452182,2451279,2451231,2451734,2451692,2452071,2451336,2451300,2451727,2451630,2452189,2451875,2451973,2451328,2452084,2451399,2451944,2452204,2451385,2451776,2451384,2451272,2451812,2451749,2451566,2451182,2451945,2451420,2451930,2452057,2451756,2451644,2451314,2451364,2452007,2451798,2451475,2452015,2451440,2452000,2451588,2452148,2451195,2452217,2451371,2452176,2451531,2452134,2452211,2451462,2451188,2451741,2452119,2451342,2451580,2451672,2451889,2451280,2451406,2451293,2451217,2452049,2452106,2451321,2451335,2451483,2452260,2451657,2451979,2451518,2451629,2451728,2451923,2451861,2451951,2452246,2451455,2451356,2451224,2452210,2452021,2451427,2451202,2452098,2452168,2451553,2451391,2451706,2452155,2451196,2451770,2452127,2451762,2452078,2451958,2451721,2451665,2452120,2451252,2452085,2452092,2451476,2452218,2452169,2451797,2451650,2451881,2451511,2451469,2451888,2452043,2452266,2451664,2452014,2451343,2452056,2452190,2452063,2451636,2451742,2451811,2451720,2451308,2451489,2451413,2451216,2451594,2452238,2451784,2451426,2451622,2451916,2452113,2451909,2451266,2451826,2451377,2452028) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_store_sk#5)) AND isnotnull(ss_hdemo_sk#3)) AND isnotnull(ss_addr_sk#4)) AND isnotnull(ss_customer_sk#2)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#9, d_year#10, d_dow#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/date_dim] +PushedFilters: [In(d_dow, [6,0]), In(d_year, [1999,2000,2001]), In(d_date_sksNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#9, d_year#10, d_dow#11] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#9, d_year#10, d_dow#11] +Condition : (((d_dow#11 IN (6,0) AND d_year#10 IN (1999,2000,2001)) AND d_date_sk#9 INSET (2451790,2451609,2451294,2451658,2452099,2451482,2451700,2452035,2452274,2451258,2451847,2451714,2451937,2451860,2451601,2451573,2451686,2452008,2451454,2451882,2451832,2452259,2451671,2451903,2451497,2452162,2451322,2451517,2451434,2451273,2451405,2452105,2451924,2452050,2452126,2452203,2451818,2451559,2451853,2451238,2451209,2451357,2451959,2452239,2451608,2452141,2452252,2451623,2451867,2451504,2451910,2452232,2451874,2451581,2451329,2451223,2451783,2452267,2452042,2451895,2451986,2452091,2451693,2451265,2451678,2451825,2451244,2451490,2451287,2451419,2451546,2451245,2451713,2452070,2451189,2451804,2451468,2451525,2451902,2452077,2452161,2451378,2451567,2451931,2451699,2451251,2451840,2452253,2451938,2451510,2452231,2452036,2451616,2451230,2452112,2451846,2451966,2451538,2451819,2452140,2452183,2451496,2451791,2451595,2451574,2451363,2451994,2451917,2451602,2452273,2451237,2451350,2451685,2451259,2451286,2451972,2452224,2451370,2452245,2451643,2451993,2451315,2451301,2451560,2451433,2452225,2451532,2451755,2451854,2451545,2451210,2451587,2451987,2451447,2452197,2451552,2451896,2451679,2452147,2451735,2452022,2451707,2451868,2451398,2451777,2451181,2451503,2451839,2452175,2451441,2452154,2452029,2452196,2451952,2451805,2451965,2451539,2452001,2451833,2451392,2451524,2451461,2452133,2451448,2451307,2451615,2451769,2451412,2451349,2451651,2451763,2451203,2452064,2451980,2451748,2451637,2452182,2451279,2451231,2451734,2451692,2452071,2451336,2451300,2451727,2451630,2452189,2451875,2451973,2451328,2452084,2451399,2451944,2452204,2451385,2451776,2451384,2451272,2451812,2451749,2451566,2451182,2451945,2451420,2451930,2452057,2451756,2451644,2451314,2451364,2452007,2451798,2451475,2452015,2451440,2452000,2451588,2452148,2451195,2452217,2451371,2452176,2451531,2452134,2452211,2451462,2451188,2451741,2452119,2451342,2451580,2451672,2451889,2451280,2451406,2451293,2451217,2452049,2452106,2451321,2451335,2451483,2452260,2451657,2451979,2451518,2451629,2451728,2451923,2451861,2451951,2452246,2451455,2451356,2451224,2452210,2452021,2451427,2451202,2452098,2452168,2451553,2451391,2451706,2452155,2451196,2451770,2452127,2451762,2452078,2451958,2451721,2451665,2452120,2451252,2452085,2452092,2451476,2452218,2452169,2451797,2451650,2451881,2451511,2451469,2451888,2452043,2452266,2451664,2452014,2451343,2452056,2452190,2452063,2451636,2451742,2451811,2451720,2451308,2451489,2451413,2451216,2451594,2452238,2451784,2451426,2451622,2451916,2452113,2451909,2451266,2451826,2451377,2452028)) AND isnotnull(d_date_sk#9)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#9] +Input [3]: [d_date_sk#9, d_year#10, d_dow#11] + +(8) BroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#9] +Join condition: None + +(10) Project [codegen id : 4] +Output [7]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] +Input [9]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, d_date_sk#9] + +(11) Scan parquet default.store +Output [2]: [s_store_sk#13, s_city#14] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/store] +PushedFilters: [In(s_city, [Midway,Concord,Spring Hill,Brownsville,Greenville]), IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [s_store_sk#13, s_city#14] + +(13) Filter [codegen id : 2] +Input [2]: [s_store_sk#13, s_city#14] +Condition : (s_city#14 IN (Midway,Concord,Spring Hill,Brownsville,Greenville) AND isnotnull(s_store_sk#13)) + +(14) Project [codegen id : 2] +Output [1]: [s_store_sk#13] +Input [2]: [s_store_sk#13, s_city#14] + +(15) BroadcastExchange +Input [1]: [s_store_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] + +(16) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#5] +Right keys [1]: [s_store_sk#13] +Join condition: None + +(17) Project [codegen id : 4] +Output [6]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] +Input [8]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, s_store_sk#13] + +(18) Scan parquet default.household_demographics +Output [3]: [hd_demo_sk#16, hd_dep_count#17, hd_vehicle_count#18] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/household_demographics] +PushedFilters: [Or(EqualTo(hd_dep_count,5),EqualTo(hd_vehicle_count,3)), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [3]: [hd_demo_sk#16, hd_dep_count#17, hd_vehicle_count#18] + +(20) Filter [codegen id : 3] +Input [3]: [hd_demo_sk#16, hd_dep_count#17, hd_vehicle_count#18] +Condition : (((hd_dep_count#17 = 5) OR (hd_vehicle_count#18 = 3)) AND isnotnull(hd_demo_sk#16)) + +(21) Project [codegen id : 3] +Output [1]: [hd_demo_sk#16] +Input [3]: [hd_demo_sk#16, hd_dep_count#17, hd_vehicle_count#18] + +(22) BroadcastExchange +Input [1]: [hd_demo_sk#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#19] + +(23) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_hdemo_sk#3] +Right keys [1]: [hd_demo_sk#16] +Join condition: None + +(24) Project [codegen id : 4] +Output [5]: [ss_customer_sk#2, ss_addr_sk#4, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] +Input [7]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, hd_demo_sk#16] + +(25) Exchange +Input [5]: [ss_customer_sk#2, ss_addr_sk#4, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] +Arguments: hashpartitioning(ss_addr_sk#4, 5), true, [id=#20] + +(26) Sort [codegen id : 5] +Input [5]: [ss_customer_sk#2, ss_addr_sk#4, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] +Arguments: [ss_addr_sk#4 ASC NULLS FIRST], false, 0 + +(27) Scan parquet default.customer_address +Output [2]: [ca_address_sk#21, ca_city#22] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_city)] +ReadSchema: struct + +(28) ColumnarToRow [codegen id : 6] +Input [2]: [ca_address_sk#21, ca_city#22] + +(29) Filter [codegen id : 6] +Input [2]: [ca_address_sk#21, ca_city#22] +Condition : (isnotnull(ca_address_sk#21) AND isnotnull(ca_city#22)) + +(30) Exchange +Input [2]: [ca_address_sk#21, ca_city#22] +Arguments: hashpartitioning(ca_address_sk#21, 5), true, [id=#23] + +(31) Sort [codegen id : 7] +Input [2]: [ca_address_sk#21, ca_city#22] +Arguments: [ca_address_sk#21 ASC NULLS FIRST], false, 0 + +(32) SortMergeJoin [codegen id : 8] +Left keys [1]: [ss_addr_sk#4] +Right keys [1]: [ca_address_sk#21] +Join condition: None + +(33) Project [codegen id : 8] +Output [6]: [ss_customer_sk#2, ss_addr_sk#4, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, ca_city#22] +Input [7]: [ss_customer_sk#2, ss_addr_sk#4, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, ca_address_sk#21, ca_city#22] + +(34) HashAggregate [codegen id : 8] +Input [6]: [ss_customer_sk#2, ss_addr_sk#4, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, ca_city#22] +Keys [4]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#22] +Functions [2]: [partial_sum(UnscaledValue(ss_coupon_amt#7)), partial_sum(UnscaledValue(ss_net_profit#8))] +Aggregate Attributes [2]: [sum#24, sum#25] +Results [6]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#22, sum#26, sum#27] + +(35) HashAggregate [codegen id : 8] +Input [6]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#22, sum#26, sum#27] +Keys [4]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#22] +Functions [2]: [sum(UnscaledValue(ss_coupon_amt#7)), sum(UnscaledValue(ss_net_profit#8))] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_coupon_amt#7))#28, sum(UnscaledValue(ss_net_profit#8))#29] +Results [5]: [ss_ticket_number#6, ss_customer_sk#2, ca_city#22 AS bought_city#30, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#7))#28,17,2) AS amt#31, MakeDecimal(sum(UnscaledValue(ss_net_profit#8))#29,17,2) AS profit#32] + +(36) Exchange +Input [5]: [ss_ticket_number#6, ss_customer_sk#2, bought_city#30, amt#31, profit#32] +Arguments: hashpartitioning(ss_customer_sk#2, 5), true, [id=#33] + +(37) Sort [codegen id : 9] +Input [5]: [ss_ticket_number#6, ss_customer_sk#2, bought_city#30, amt#31, profit#32] +Arguments: [ss_customer_sk#2 ASC NULLS FIRST], false, 0 + +(38) Scan parquet default.customer +Output [4]: [c_customer_sk#34, c_current_addr_sk#35, c_first_name#36, c_last_name#37] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(39) ColumnarToRow [codegen id : 10] +Input [4]: [c_customer_sk#34, c_current_addr_sk#35, c_first_name#36, c_last_name#37] + +(40) Filter [codegen id : 10] +Input [4]: [c_customer_sk#34, c_current_addr_sk#35, c_first_name#36, c_last_name#37] +Condition : (isnotnull(c_customer_sk#34) AND isnotnull(c_current_addr_sk#35)) + +(41) Exchange +Input [4]: [c_customer_sk#34, c_current_addr_sk#35, c_first_name#36, c_last_name#37] +Arguments: hashpartitioning(c_customer_sk#34, 5), true, [id=#38] + +(42) Sort [codegen id : 11] +Input [4]: [c_customer_sk#34, c_current_addr_sk#35, c_first_name#36, c_last_name#37] +Arguments: [c_customer_sk#34 ASC NULLS FIRST], false, 0 + +(43) SortMergeJoin [codegen id : 12] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#34] +Join condition: None + +(44) Project [codegen id : 12] +Output [7]: [ss_ticket_number#6, bought_city#30, amt#31, profit#32, c_current_addr_sk#35, c_first_name#36, c_last_name#37] +Input [9]: [ss_ticket_number#6, ss_customer_sk#2, bought_city#30, amt#31, profit#32, c_customer_sk#34, c_current_addr_sk#35, c_first_name#36, c_last_name#37] + +(45) Exchange +Input [7]: [ss_ticket_number#6, bought_city#30, amt#31, profit#32, c_current_addr_sk#35, c_first_name#36, c_last_name#37] +Arguments: hashpartitioning(c_current_addr_sk#35, 5), true, [id=#39] + +(46) Sort [codegen id : 13] +Input [7]: [ss_ticket_number#6, bought_city#30, amt#31, profit#32, c_current_addr_sk#35, c_first_name#36, c_last_name#37] +Arguments: [c_current_addr_sk#35 ASC NULLS FIRST], false, 0 + +(47) ReusedExchange [Reuses operator id: 30] +Output [2]: [ca_address_sk#21, ca_city#22] + +(48) Sort [codegen id : 15] +Input [2]: [ca_address_sk#21, ca_city#22] +Arguments: [ca_address_sk#21 ASC NULLS FIRST], false, 0 + +(49) SortMergeJoin [codegen id : 16] +Left keys [1]: [c_current_addr_sk#35] +Right keys [1]: [ca_address_sk#21] +Join condition: NOT (ca_city#22 = bought_city#30) + +(50) Project [codegen id : 16] +Output [7]: [c_last_name#37, c_first_name#36, ca_city#22, bought_city#30, ss_ticket_number#6, amt#31, profit#32] +Input [9]: [ss_ticket_number#6, bought_city#30, amt#31, profit#32, c_current_addr_sk#35, c_first_name#36, c_last_name#37, ca_address_sk#21, ca_city#22] + +(51) TakeOrderedAndProject +Input [7]: [c_last_name#37, c_first_name#36, ca_city#22, bought_city#30, ss_ticket_number#6, amt#31, profit#32] +Arguments: 100, [c_last_name#37 ASC NULLS FIRST, c_first_name#36 ASC NULLS FIRST, ca_city#22 ASC NULLS FIRST, bought_city#30 ASC NULLS FIRST, ss_ticket_number#6 ASC NULLS FIRST], [c_last_name#37, c_first_name#36, ca_city#22, bought_city#30, ss_ticket_number#6, amt#31, profit#32] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q46.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q46.sf100/simplified.txt new file mode 100644 index 0000000000000..8108d64db305b --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q46.sf100/simplified.txt @@ -0,0 +1,87 @@ +TakeOrderedAndProject [amt,bought_city,c_first_name,c_last_name,ca_city,profit,ss_ticket_number] + WholeStageCodegen (16) + Project [amt,bought_city,c_first_name,c_last_name,ca_city,profit,ss_ticket_number] + SortMergeJoin [bought_city,c_current_addr_sk,ca_address_sk,ca_city] + InputAdapter + WholeStageCodegen (13) + Sort [c_current_addr_sk] + InputAdapter + Exchange [c_current_addr_sk] #1 + WholeStageCodegen (12) + Project [amt,bought_city,c_current_addr_sk,c_first_name,c_last_name,profit,ss_ticket_number] + SortMergeJoin [c_customer_sk,ss_customer_sk] + InputAdapter + WholeStageCodegen (9) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #2 + WholeStageCodegen (8) + HashAggregate [ca_city,ss_addr_sk,ss_customer_sk,ss_ticket_number,sum,sum] [amt,bought_city,profit,sum,sum,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_net_profit))] + HashAggregate [ca_city,ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,ss_ticket_number] [sum,sum,sum,sum] + Project [ca_city,ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,ss_ticket_number] + SortMergeJoin [ca_address_sk,ss_addr_sk] + InputAdapter + WholeStageCodegen (5) + Sort [ss_addr_sk] + InputAdapter + Exchange [ss_addr_sk] #3 + WholeStageCodegen (4) + Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,ss_ticket_number] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_ticket_number] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_store_sk,ss_ticket_number] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_addr_sk,ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_dow,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_dow,d_year] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Project [s_store_sk] + Filter [s_city,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_city,s_store_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (3) + Project [hd_demo_sk] + Filter [hd_demo_sk,hd_dep_count,hd_vehicle_count] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] + InputAdapter + WholeStageCodegen (7) + Sort [ca_address_sk] + InputAdapter + Exchange [ca_address_sk] #7 + WholeStageCodegen (6) + Filter [ca_address_sk,ca_city] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_city] + InputAdapter + WholeStageCodegen (11) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #8 + WholeStageCodegen (10) + Filter [c_current_addr_sk,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_current_addr_sk,c_customer_sk,c_first_name,c_last_name] + InputAdapter + WholeStageCodegen (15) + Sort [ca_address_sk] + InputAdapter + ReusedExchange [ca_address_sk,ca_city] #7 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q46/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q46/explain.txt new file mode 100644 index 0000000000000..8c2e3c0153844 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q46/explain.txt @@ -0,0 +1,241 @@ +== Physical Plan == +TakeOrderedAndProject (43) ++- * Project (42) + +- * BroadcastHashJoin Inner BuildRight (41) + :- * Project (39) + : +- * BroadcastHashJoin Inner BuildRight (38) + : :- * HashAggregate (33) + : : +- Exchange (32) + : : +- * HashAggregate (31) + : : +- * Project (30) + : : +- * BroadcastHashJoin Inner BuildRight (29) + : : :- * Project (24) + : : : +- * BroadcastHashJoin Inner BuildRight (23) + : : : :- * Project (17) + : : : : +- * BroadcastHashJoin Inner BuildRight (16) + : : : : :- * Project (10) + : : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : : :- * Filter (3) + : : : : : : +- * ColumnarToRow (2) + : : : : : : +- Scan parquet default.store_sales (1) + : : : : : +- BroadcastExchange (8) + : : : : : +- * Project (7) + : : : : : +- * Filter (6) + : : : : : +- * ColumnarToRow (5) + : : : : : +- Scan parquet default.date_dim (4) + : : : : +- BroadcastExchange (15) + : : : : +- * Project (14) + : : : : +- * Filter (13) + : : : : +- * ColumnarToRow (12) + : : : : +- Scan parquet default.store (11) + : : : +- BroadcastExchange (22) + : : : +- * Project (21) + : : : +- * Filter (20) + : : : +- * ColumnarToRow (19) + : : : +- Scan parquet default.household_demographics (18) + : : +- BroadcastExchange (28) + : : +- * Filter (27) + : : +- * ColumnarToRow (26) + : : +- Scan parquet default.customer_address (25) + : +- BroadcastExchange (37) + : +- * Filter (36) + : +- * ColumnarToRow (35) + : +- Scan parquet default.customer (34) + +- ReusedExchange (40) + + +(1) Scan parquet default.store_sales +Output [8]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/store_sales] +PushedFilters: [In(ss_sold_date_sksNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [8]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] + +(3) Filter [codegen id : 5] +Input [8]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] +Condition : (((((ss_sold_date_sk#1 INSET (2451790,2451609,2451294,2451658,2452099,2451482,2451700,2452035,2452274,2451258,2451847,2451714,2451937,2451860,2451601,2451573,2451686,2452008,2451454,2451882,2451832,2452259,2451671,2451903,2451497,2452162,2451322,2451517,2451434,2451273,2451405,2452105,2451924,2452050,2452126,2452203,2451818,2451559,2451853,2451238,2451209,2451357,2451959,2452239,2451608,2452141,2452252,2451623,2451867,2451504,2451910,2452232,2451874,2451581,2451329,2451223,2451783,2452267,2452042,2451895,2451986,2452091,2451693,2451265,2451678,2451825,2451244,2451490,2451287,2451419,2451546,2451245,2451713,2452070,2451189,2451804,2451468,2451525,2451902,2452077,2452161,2451378,2451567,2451931,2451699,2451251,2451840,2452253,2451938,2451510,2452231,2452036,2451616,2451230,2452112,2451846,2451966,2451538,2451819,2452140,2452183,2451496,2451791,2451595,2451574,2451363,2451994,2451917,2451602,2452273,2451237,2451350,2451685,2451259,2451286,2451972,2452224,2451370,2452245,2451643,2451993,2451315,2451301,2451560,2451433,2452225,2451532,2451755,2451854,2451545,2451210,2451587,2451987,2451447,2452197,2451552,2451896,2451679,2452147,2451735,2452022,2451707,2451868,2451398,2451777,2451181,2451503,2451839,2452175,2451441,2452154,2452029,2452196,2451952,2451805,2451965,2451539,2452001,2451833,2451392,2451524,2451461,2452133,2451448,2451307,2451615,2451769,2451412,2451349,2451651,2451763,2451203,2452064,2451980,2451748,2451637,2452182,2451279,2451231,2451734,2451692,2452071,2451336,2451300,2451727,2451630,2452189,2451875,2451973,2451328,2452084,2451399,2451944,2452204,2451385,2451776,2451384,2451272,2451812,2451749,2451566,2451182,2451945,2451420,2451930,2452057,2451756,2451644,2451314,2451364,2452007,2451798,2451475,2452015,2451440,2452000,2451588,2452148,2451195,2452217,2451371,2452176,2451531,2452134,2452211,2451462,2451188,2451741,2452119,2451342,2451580,2451672,2451889,2451280,2451406,2451293,2451217,2452049,2452106,2451321,2451335,2451483,2452260,2451657,2451979,2451518,2451629,2451728,2451923,2451861,2451951,2452246,2451455,2451356,2451224,2452210,2452021,2451427,2451202,2452098,2452168,2451553,2451391,2451706,2452155,2451196,2451770,2452127,2451762,2452078,2451958,2451721,2451665,2452120,2451252,2452085,2452092,2451476,2452218,2452169,2451797,2451650,2451881,2451511,2451469,2451888,2452043,2452266,2451664,2452014,2451343,2452056,2452190,2452063,2451636,2451742,2451811,2451720,2451308,2451489,2451413,2451216,2451594,2452238,2451784,2451426,2451622,2451916,2452113,2451909,2451266,2451826,2451377,2452028) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_store_sk#5)) AND isnotnull(ss_hdemo_sk#3)) AND isnotnull(ss_addr_sk#4)) AND isnotnull(ss_customer_sk#2)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#9, d_year#10, d_dow#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/date_dim] +PushedFilters: [In(d_dow, [6,0]), In(d_year, [1999,2000,2001]), In(d_date_sksNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#9, d_year#10, d_dow#11] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#9, d_year#10, d_dow#11] +Condition : (((d_dow#11 IN (6,0) AND d_year#10 IN (1999,2000,2001)) AND d_date_sk#9 INSET (2451790,2451609,2451294,2451658,2452099,2451482,2451700,2452035,2452274,2451258,2451847,2451714,2451937,2451860,2451601,2451573,2451686,2452008,2451454,2451882,2451832,2452259,2451671,2451903,2451497,2452162,2451322,2451517,2451434,2451273,2451405,2452105,2451924,2452050,2452126,2452203,2451818,2451559,2451853,2451238,2451209,2451357,2451959,2452239,2451608,2452141,2452252,2451623,2451867,2451504,2451910,2452232,2451874,2451581,2451329,2451223,2451783,2452267,2452042,2451895,2451986,2452091,2451693,2451265,2451678,2451825,2451244,2451490,2451287,2451419,2451546,2451245,2451713,2452070,2451189,2451804,2451468,2451525,2451902,2452077,2452161,2451378,2451567,2451931,2451699,2451251,2451840,2452253,2451938,2451510,2452231,2452036,2451616,2451230,2452112,2451846,2451966,2451538,2451819,2452140,2452183,2451496,2451791,2451595,2451574,2451363,2451994,2451917,2451602,2452273,2451237,2451350,2451685,2451259,2451286,2451972,2452224,2451370,2452245,2451643,2451993,2451315,2451301,2451560,2451433,2452225,2451532,2451755,2451854,2451545,2451210,2451587,2451987,2451447,2452197,2451552,2451896,2451679,2452147,2451735,2452022,2451707,2451868,2451398,2451777,2451181,2451503,2451839,2452175,2451441,2452154,2452029,2452196,2451952,2451805,2451965,2451539,2452001,2451833,2451392,2451524,2451461,2452133,2451448,2451307,2451615,2451769,2451412,2451349,2451651,2451763,2451203,2452064,2451980,2451748,2451637,2452182,2451279,2451231,2451734,2451692,2452071,2451336,2451300,2451727,2451630,2452189,2451875,2451973,2451328,2452084,2451399,2451944,2452204,2451385,2451776,2451384,2451272,2451812,2451749,2451566,2451182,2451945,2451420,2451930,2452057,2451756,2451644,2451314,2451364,2452007,2451798,2451475,2452015,2451440,2452000,2451588,2452148,2451195,2452217,2451371,2452176,2451531,2452134,2452211,2451462,2451188,2451741,2452119,2451342,2451580,2451672,2451889,2451280,2451406,2451293,2451217,2452049,2452106,2451321,2451335,2451483,2452260,2451657,2451979,2451518,2451629,2451728,2451923,2451861,2451951,2452246,2451455,2451356,2451224,2452210,2452021,2451427,2451202,2452098,2452168,2451553,2451391,2451706,2452155,2451196,2451770,2452127,2451762,2452078,2451958,2451721,2451665,2452120,2451252,2452085,2452092,2451476,2452218,2452169,2451797,2451650,2451881,2451511,2451469,2451888,2452043,2452266,2451664,2452014,2451343,2452056,2452190,2452063,2451636,2451742,2451811,2451720,2451308,2451489,2451413,2451216,2451594,2452238,2451784,2451426,2451622,2451916,2452113,2451909,2451266,2451826,2451377,2452028)) AND isnotnull(d_date_sk#9)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#9] +Input [3]: [d_date_sk#9, d_year#10, d_dow#11] + +(8) BroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(9) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#9] +Join condition: None + +(10) Project [codegen id : 5] +Output [7]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] +Input [9]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, d_date_sk#9] + +(11) Scan parquet default.store +Output [2]: [s_store_sk#13, s_city#14] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/store] +PushedFilters: [In(s_city, [Midway,Concord,Spring Hill,Brownsville,Greenville]), IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [s_store_sk#13, s_city#14] + +(13) Filter [codegen id : 2] +Input [2]: [s_store_sk#13, s_city#14] +Condition : (s_city#14 IN (Midway,Concord,Spring Hill,Brownsville,Greenville) AND isnotnull(s_store_sk#13)) + +(14) Project [codegen id : 2] +Output [1]: [s_store_sk#13] +Input [2]: [s_store_sk#13, s_city#14] + +(15) BroadcastExchange +Input [1]: [s_store_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] + +(16) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_store_sk#5] +Right keys [1]: [s_store_sk#13] +Join condition: None + +(17) Project [codegen id : 5] +Output [6]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] +Input [8]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, s_store_sk#13] + +(18) Scan parquet default.household_demographics +Output [3]: [hd_demo_sk#16, hd_dep_count#17, hd_vehicle_count#18] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/household_demographics] +PushedFilters: [Or(EqualTo(hd_dep_count,5),EqualTo(hd_vehicle_count,3)), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [3]: [hd_demo_sk#16, hd_dep_count#17, hd_vehicle_count#18] + +(20) Filter [codegen id : 3] +Input [3]: [hd_demo_sk#16, hd_dep_count#17, hd_vehicle_count#18] +Condition : (((hd_dep_count#17 = 5) OR (hd_vehicle_count#18 = 3)) AND isnotnull(hd_demo_sk#16)) + +(21) Project [codegen id : 3] +Output [1]: [hd_demo_sk#16] +Input [3]: [hd_demo_sk#16, hd_dep_count#17, hd_vehicle_count#18] + +(22) BroadcastExchange +Input [1]: [hd_demo_sk#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#19] + +(23) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_hdemo_sk#3] +Right keys [1]: [hd_demo_sk#16] +Join condition: None + +(24) Project [codegen id : 5] +Output [5]: [ss_customer_sk#2, ss_addr_sk#4, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] +Input [7]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, hd_demo_sk#16] + +(25) Scan parquet default.customer_address +Output [2]: [ca_address_sk#20, ca_city#21] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_city)] +ReadSchema: struct + +(26) ColumnarToRow [codegen id : 4] +Input [2]: [ca_address_sk#20, ca_city#21] + +(27) Filter [codegen id : 4] +Input [2]: [ca_address_sk#20, ca_city#21] +Condition : (isnotnull(ca_address_sk#20) AND isnotnull(ca_city#21)) + +(28) BroadcastExchange +Input [2]: [ca_address_sk#20, ca_city#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#22] + +(29) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_addr_sk#4] +Right keys [1]: [ca_address_sk#20] +Join condition: None + +(30) Project [codegen id : 5] +Output [6]: [ss_customer_sk#2, ss_addr_sk#4, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, ca_city#21] +Input [7]: [ss_customer_sk#2, ss_addr_sk#4, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, ca_address_sk#20, ca_city#21] + +(31) HashAggregate [codegen id : 5] +Input [6]: [ss_customer_sk#2, ss_addr_sk#4, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, ca_city#21] +Keys [4]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#21] +Functions [2]: [partial_sum(UnscaledValue(ss_coupon_amt#7)), partial_sum(UnscaledValue(ss_net_profit#8))] +Aggregate Attributes [2]: [sum#23, sum#24] +Results [6]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#21, sum#25, sum#26] + +(32) Exchange +Input [6]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#21, sum#25, sum#26] +Arguments: hashpartitioning(ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#21, 5), true, [id=#27] + +(33) HashAggregate [codegen id : 8] +Input [6]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#21, sum#25, sum#26] +Keys [4]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#21] +Functions [2]: [sum(UnscaledValue(ss_coupon_amt#7)), sum(UnscaledValue(ss_net_profit#8))] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_coupon_amt#7))#28, sum(UnscaledValue(ss_net_profit#8))#29] +Results [5]: [ss_ticket_number#6, ss_customer_sk#2, ca_city#21 AS bought_city#30, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#7))#28,17,2) AS amt#31, MakeDecimal(sum(UnscaledValue(ss_net_profit#8))#29,17,2) AS profit#32] + +(34) Scan parquet default.customer +Output [4]: [c_customer_sk#33, c_current_addr_sk#34, c_first_name#35, c_last_name#36] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(35) ColumnarToRow [codegen id : 6] +Input [4]: [c_customer_sk#33, c_current_addr_sk#34, c_first_name#35, c_last_name#36] + +(36) Filter [codegen id : 6] +Input [4]: [c_customer_sk#33, c_current_addr_sk#34, c_first_name#35, c_last_name#36] +Condition : (isnotnull(c_customer_sk#33) AND isnotnull(c_current_addr_sk#34)) + +(37) BroadcastExchange +Input [4]: [c_customer_sk#33, c_current_addr_sk#34, c_first_name#35, c_last_name#36] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#37] + +(38) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#33] +Join condition: None + +(39) Project [codegen id : 8] +Output [7]: [ss_ticket_number#6, bought_city#30, amt#31, profit#32, c_current_addr_sk#34, c_first_name#35, c_last_name#36] +Input [9]: [ss_ticket_number#6, ss_customer_sk#2, bought_city#30, amt#31, profit#32, c_customer_sk#33, c_current_addr_sk#34, c_first_name#35, c_last_name#36] + +(40) ReusedExchange [Reuses operator id: 28] +Output [2]: [ca_address_sk#20, ca_city#21] + +(41) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [c_current_addr_sk#34] +Right keys [1]: [ca_address_sk#20] +Join condition: NOT (ca_city#21 = bought_city#30) + +(42) Project [codegen id : 8] +Output [7]: [c_last_name#36, c_first_name#35, ca_city#21, bought_city#30, ss_ticket_number#6, amt#31, profit#32] +Input [9]: [ss_ticket_number#6, bought_city#30, amt#31, profit#32, c_current_addr_sk#34, c_first_name#35, c_last_name#36, ca_address_sk#20, ca_city#21] + +(43) TakeOrderedAndProject +Input [7]: [c_last_name#36, c_first_name#35, ca_city#21, bought_city#30, ss_ticket_number#6, amt#31, profit#32] +Arguments: 100, [c_last_name#36 ASC NULLS FIRST, c_first_name#35 ASC NULLS FIRST, ca_city#21 ASC NULLS FIRST, bought_city#30 ASC NULLS FIRST, ss_ticket_number#6 ASC NULLS FIRST], [c_last_name#36, c_first_name#35, ca_city#21, bought_city#30, ss_ticket_number#6, amt#31, profit#32] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q46/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q46/simplified.txt new file mode 100644 index 0000000000000..edf9894f202ea --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q46/simplified.txt @@ -0,0 +1,63 @@ +TakeOrderedAndProject [amt,bought_city,c_first_name,c_last_name,ca_city,profit,ss_ticket_number] + WholeStageCodegen (8) + Project [amt,bought_city,c_first_name,c_last_name,ca_city,profit,ss_ticket_number] + BroadcastHashJoin [bought_city,c_current_addr_sk,ca_address_sk,ca_city] + Project [amt,bought_city,c_current_addr_sk,c_first_name,c_last_name,profit,ss_ticket_number] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + HashAggregate [ca_city,ss_addr_sk,ss_customer_sk,ss_ticket_number,sum,sum] [amt,bought_city,profit,sum,sum,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_net_profit))] + InputAdapter + Exchange [ca_city,ss_addr_sk,ss_customer_sk,ss_ticket_number] #1 + WholeStageCodegen (5) + HashAggregate [ca_city,ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,ss_ticket_number] [sum,sum,sum,sum] + Project [ca_city,ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,ss_ticket_number] + BroadcastHashJoin [ca_address_sk,ss_addr_sk] + Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,ss_ticket_number] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_ticket_number] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_store_sk,ss_ticket_number] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_addr_sk,ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_dow,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_dow,d_year] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [s_store_sk] + Filter [s_city,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_city,s_store_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [hd_demo_sk] + Filter [hd_demo_sk,hd_dep_count,hd_vehicle_count] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Filter [ca_address_sk,ca_city] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_city] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (6) + Filter [c_current_addr_sk,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_current_addr_sk,c_customer_sk,c_first_name,c_last_name] + InputAdapter + ReusedExchange [ca_address_sk,ca_city] #5 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q52.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q52.sf100/explain.txt new file mode 100644 index 0000000000000..66cb0ccfe6e72 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q52.sf100/explain.txt @@ -0,0 +1,122 @@ +== Physical Plan == +TakeOrderedAndProject (21) ++- * HashAggregate (20) + +- Exchange (19) + +- * HashAggregate (18) + +- * Project (17) + +- * BroadcastHashJoin Inner BuildRight (16) + :- * Project (10) + : +- * BroadcastHashJoin Inner BuildLeft (9) + : :- BroadcastExchange (5) + : : +- * Project (4) + : : +- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.date_dim (1) + : +- * Filter (8) + : +- * ColumnarToRow (7) + : +- Scan parquet default.store_sales (6) + +- BroadcastExchange (15) + +- * Project (14) + +- * Filter (13) + +- * ColumnarToRow (12) + +- Scan parquet default.item (11) + + +(1) Scan parquet default.date_dim +Output [3]: [d_date_sk#1, d_year#2, d_moy#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,12), EqualTo(d_year,1998), GreaterThanOrEqual(d_date_sk,2451149), IsNotNull(d_date_sk), LessThanOrEqual(d_date_sk,2451179)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(3) Filter [codegen id : 1] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Condition : ((((((isnotnull(d_moy#3) AND isnotnull(d_year#2)) AND (d_moy#3 = 12)) AND (d_year#2 = 1998)) AND (d_date_sk#1 >= 2451149)) AND isnotnull(d_date_sk#1)) AND (d_date_sk#1 <= 2451179)) + +(4) Project [codegen id : 1] +Output [2]: [d_date_sk#1, d_year#2] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(5) BroadcastExchange +Input [2]: [d_date_sk#1, d_year#2] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#4] + +(6) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#5, ss_item_sk#6, ss_ext_sales_price#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), GreaterThanOrEqual(ss_sold_date_sk,2451149), LessThanOrEqual(ss_sold_date_sk,2451179), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(7) ColumnarToRow +Input [3]: [ss_sold_date_sk#5, ss_item_sk#6, ss_ext_sales_price#7] + +(8) Filter +Input [3]: [ss_sold_date_sk#5, ss_item_sk#6, ss_ext_sales_price#7] +Condition : (((isnotnull(ss_sold_date_sk#5) AND (ss_sold_date_sk#5 >= 2451149)) AND (ss_sold_date_sk#5 <= 2451179)) AND isnotnull(ss_item_sk#6)) + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [d_date_sk#1] +Right keys [1]: [ss_sold_date_sk#5] +Join condition: None + +(10) Project [codegen id : 3] +Output [3]: [d_year#2, ss_item_sk#6, ss_ext_sales_price#7] +Input [5]: [d_date_sk#1, d_year#2, ss_sold_date_sk#5, ss_item_sk#6, ss_ext_sales_price#7] + +(11) Scan parquet default.item +Output [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,1), IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] + +(13) Filter [codegen id : 2] +Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] +Condition : ((isnotnull(i_manager_id#11) AND (i_manager_id#11 = 1)) AND isnotnull(i_item_sk#8)) + +(14) Project [codegen id : 2] +Output [3]: [i_item_sk#8, i_brand_id#9, i_brand#10] +Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] + +(15) BroadcastExchange +Input [3]: [i_item_sk#8, i_brand_id#9, i_brand#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(16) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_item_sk#6] +Right keys [1]: [i_item_sk#8] +Join condition: None + +(17) Project [codegen id : 3] +Output [4]: [d_year#2, ss_ext_sales_price#7, i_brand_id#9, i_brand#10] +Input [6]: [d_year#2, ss_item_sk#6, ss_ext_sales_price#7, i_item_sk#8, i_brand_id#9, i_brand#10] + +(18) HashAggregate [codegen id : 3] +Input [4]: [d_year#2, ss_ext_sales_price#7, i_brand_id#9, i_brand#10] +Keys [3]: [d_year#2, i_brand#10, i_brand_id#9] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#7))] +Aggregate Attributes [1]: [sum#13] +Results [4]: [d_year#2, i_brand#10, i_brand_id#9, sum#14] + +(19) Exchange +Input [4]: [d_year#2, i_brand#10, i_brand_id#9, sum#14] +Arguments: hashpartitioning(d_year#2, i_brand#10, i_brand_id#9, 5), true, [id=#15] + +(20) HashAggregate [codegen id : 4] +Input [4]: [d_year#2, i_brand#10, i_brand_id#9, sum#14] +Keys [3]: [d_year#2, i_brand#10, i_brand_id#9] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#7))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#7))#16] +Results [4]: [d_year#2, i_brand_id#9 AS brand_id#17, i_brand#10 AS brand#18, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#7))#16,17,2) AS ext_price#19] + +(21) TakeOrderedAndProject +Input [4]: [d_year#2, brand_id#17, brand#18, ext_price#19] +Arguments: 100, [d_year#2 ASC NULLS FIRST, ext_price#19 DESC NULLS LAST, brand_id#17 ASC NULLS FIRST], [d_year#2, brand_id#17, brand#18, ext_price#19] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q52.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q52.sf100/simplified.txt new file mode 100644 index 0000000000000..290d6f35ea78c --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q52.sf100/simplified.txt @@ -0,0 +1,31 @@ +TakeOrderedAndProject [brand,brand_id,d_year,ext_price] + WholeStageCodegen (4) + HashAggregate [d_year,i_brand,i_brand_id,sum] [brand,brand_id,ext_price,sum,sum(UnscaledValue(ss_ext_sales_price))] + InputAdapter + Exchange [d_year,i_brand,i_brand_id] #1 + WholeStageCodegen (3) + HashAggregate [d_year,i_brand,i_brand_id,ss_ext_sales_price] [sum,sum] + Project [d_year,i_brand,i_brand_id,ss_ext_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [d_year,ss_ext_sales_price,ss_item_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [d_date_sk,d_year] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [i_brand,i_brand_id,i_item_sk] + Filter [i_item_sk,i_manager_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand,i_brand_id,i_item_sk,i_manager_id] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q52/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q52/explain.txt new file mode 100644 index 0000000000000..e1bf75575e040 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q52/explain.txt @@ -0,0 +1,122 @@ +== Physical Plan == +TakeOrderedAndProject (21) ++- * HashAggregate (20) + +- Exchange (19) + +- * HashAggregate (18) + +- * Project (17) + +- * BroadcastHashJoin Inner BuildRight (16) + :- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Project (4) + : : +- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.date_dim (1) + : +- BroadcastExchange (8) + : +- * Filter (7) + : +- * ColumnarToRow (6) + : +- Scan parquet default.store_sales (5) + +- BroadcastExchange (15) + +- * Project (14) + +- * Filter (13) + +- * ColumnarToRow (12) + +- Scan parquet default.item (11) + + +(1) Scan parquet default.date_dim +Output [3]: [d_date_sk#1, d_year#2, d_moy#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,12), EqualTo(d_year,1998), LessThanOrEqual(d_date_sk,2451179), GreaterThanOrEqual(d_date_sk,2451149), IsNotNull(d_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(3) Filter [codegen id : 3] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Condition : ((((((isnotnull(d_moy#3) AND isnotnull(d_year#2)) AND (d_moy#3 = 12)) AND (d_year#2 = 1998)) AND (d_date_sk#1 <= 2451179)) AND (d_date_sk#1 >= 2451149)) AND isnotnull(d_date_sk#1)) + +(4) Project [codegen id : 3] +Output [2]: [d_date_sk#1, d_year#2] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(5) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), GreaterThanOrEqual(ss_sold_date_sk,2451149), LessThanOrEqual(ss_sold_date_sk,2451179), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] + +(7) Filter [codegen id : 1] +Input [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] +Condition : (((isnotnull(ss_sold_date_sk#4) AND (ss_sold_date_sk#4 >= 2451149)) AND (ss_sold_date_sk#4 <= 2451179)) AND isnotnull(ss_item_sk#5)) + +(8) BroadcastExchange +Input [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [d_date_sk#1] +Right keys [1]: [ss_sold_date_sk#4] +Join condition: None + +(10) Project [codegen id : 3] +Output [3]: [d_year#2, ss_item_sk#5, ss_ext_sales_price#6] +Input [5]: [d_date_sk#1, d_year#2, ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] + +(11) Scan parquet default.item +Output [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,1), IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] + +(13) Filter [codegen id : 2] +Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] +Condition : ((isnotnull(i_manager_id#11) AND (i_manager_id#11 = 1)) AND isnotnull(i_item_sk#8)) + +(14) Project [codegen id : 2] +Output [3]: [i_item_sk#8, i_brand_id#9, i_brand#10] +Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] + +(15) BroadcastExchange +Input [3]: [i_item_sk#8, i_brand_id#9, i_brand#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(16) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_item_sk#5] +Right keys [1]: [i_item_sk#8] +Join condition: None + +(17) Project [codegen id : 3] +Output [4]: [d_year#2, ss_ext_sales_price#6, i_brand_id#9, i_brand#10] +Input [6]: [d_year#2, ss_item_sk#5, ss_ext_sales_price#6, i_item_sk#8, i_brand_id#9, i_brand#10] + +(18) HashAggregate [codegen id : 3] +Input [4]: [d_year#2, ss_ext_sales_price#6, i_brand_id#9, i_brand#10] +Keys [3]: [d_year#2, i_brand#10, i_brand_id#9] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#6))] +Aggregate Attributes [1]: [sum#13] +Results [4]: [d_year#2, i_brand#10, i_brand_id#9, sum#14] + +(19) Exchange +Input [4]: [d_year#2, i_brand#10, i_brand_id#9, sum#14] +Arguments: hashpartitioning(d_year#2, i_brand#10, i_brand_id#9, 5), true, [id=#15] + +(20) HashAggregate [codegen id : 4] +Input [4]: [d_year#2, i_brand#10, i_brand_id#9, sum#14] +Keys [3]: [d_year#2, i_brand#10, i_brand_id#9] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#6))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#6))#16] +Results [4]: [d_year#2, i_brand_id#9 AS brand_id#17, i_brand#10 AS brand#18, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#6))#16,17,2) AS ext_price#19] + +(21) TakeOrderedAndProject +Input [4]: [d_year#2, brand_id#17, brand#18, ext_price#19] +Arguments: 100, [d_year#2 ASC NULLS FIRST, ext_price#19 DESC NULLS LAST, brand_id#17 ASC NULLS FIRST], [d_year#2, brand_id#17, brand#18, ext_price#19] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q52/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q52/simplified.txt new file mode 100644 index 0000000000000..7d70658c38241 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q52/simplified.txt @@ -0,0 +1,31 @@ +TakeOrderedAndProject [brand,brand_id,d_year,ext_price] + WholeStageCodegen (4) + HashAggregate [d_year,i_brand,i_brand_id,sum] [brand,brand_id,ext_price,sum,sum(UnscaledValue(ss_ext_sales_price))] + InputAdapter + Exchange [d_year,i_brand,i_brand_id] #1 + WholeStageCodegen (3) + HashAggregate [d_year,i_brand,i_brand_id,ss_ext_sales_price] [sum,sum] + Project [d_year,i_brand,i_brand_id,ss_ext_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [d_year,ss_ext_sales_price,ss_item_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [d_date_sk,d_year] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [i_brand,i_brand_id,i_item_sk] + Filter [i_item_sk,i_manager_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand,i_brand_id,i_item_sk,i_manager_id] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q53.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q53.sf100/explain.txt new file mode 100644 index 0000000000000..1f17140c0ef1d --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q53.sf100/explain.txt @@ -0,0 +1,180 @@ +== Physical Plan == +TakeOrderedAndProject (32) ++- * Project (31) + +- * Filter (30) + +- Window (29) + +- * Sort (28) + +- Exchange (27) + +- * HashAggregate (26) + +- Exchange (25) + +- * HashAggregate (24) + +- * Project (23) + +- * BroadcastHashJoin Inner BuildRight (22) + :- * Project (16) + : +- * BroadcastHashJoin Inner BuildRight (15) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildLeft (9) + : : :- BroadcastExchange (5) + : : : +- * Project (4) + : : : +- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.item (1) + : : +- * Filter (8) + : : +- * ColumnarToRow (7) + : : +- Scan parquet default.store_sales (6) + : +- BroadcastExchange (14) + : +- * Filter (13) + : +- * ColumnarToRow (12) + : +- Scan parquet default.store (11) + +- BroadcastExchange (21) + +- * Project (20) + +- * Filter (19) + +- * ColumnarToRow (18) + +- Scan parquet default.date_dim (17) + + +(1) Scan parquet default.item +Output [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manufact_id#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/item] +PushedFilters: [Or(And(And(In(i_category, [Books,Children,Electronics]),In(i_class, [personal,portable,reference,self-help])),In(i_brand, [scholaramalgamalg #6,scholaramalgamalg #7,exportiunivamalg #8,scholaramalgamalg #8])),And(And(In(i_category, [Women,Music,Men]),In(i_class, [accessories,classical,fragrances,pants])),In(i_brand, [amalgimporto #9,edu packscholar #9,exportiimporto #9,importoamalg #9]))), IsNotNull(i_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manufact_id#5] + +(3) Filter [codegen id : 1] +Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manufact_id#5] +Condition : ((((i_category#4 IN (Books,Children,Electronics) AND i_class#3 IN (personal,portable,reference,self-help)) AND i_brand#2 IN (scholaramalgamalg #6,scholaramalgamalg #7,exportiunivamalg #8,scholaramalgamalg #8)) OR ((i_category#4 IN (Women,Music,Men) AND i_class#3 IN (accessories,classical,fragrances,pants)) AND i_brand#2 IN (amalgimporto #9,edu packscholar #9,exportiimporto #9,importoamalg #9))) AND isnotnull(i_item_sk#1)) + +(4) Project [codegen id : 1] +Output [2]: [i_item_sk#1, i_manufact_id#5] +Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manufact_id#5] + +(5) BroadcastExchange +Input [2]: [i_item_sk#1, i_manufact_id#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#10] + +(6) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#11, ss_item_sk#12, ss_store_sk#13, ss_sales_price#14] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), GreaterThanOrEqual(ss_sold_date_sk,2451911), LessThanOrEqual(ss_sold_date_sk,2452275), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(7) ColumnarToRow +Input [4]: [ss_sold_date_sk#11, ss_item_sk#12, ss_store_sk#13, ss_sales_price#14] + +(8) Filter +Input [4]: [ss_sold_date_sk#11, ss_item_sk#12, ss_store_sk#13, ss_sales_price#14] +Condition : ((((isnotnull(ss_sold_date_sk#11) AND (ss_sold_date_sk#11 >= 2451911)) AND (ss_sold_date_sk#11 <= 2452275)) AND isnotnull(ss_item_sk#12)) AND isnotnull(ss_store_sk#13)) + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [ss_item_sk#12] +Join condition: None + +(10) Project [codegen id : 4] +Output [4]: [i_manufact_id#5, ss_sold_date_sk#11, ss_store_sk#13, ss_sales_price#14] +Input [6]: [i_item_sk#1, i_manufact_id#5, ss_sold_date_sk#11, ss_item_sk#12, ss_store_sk#13, ss_sales_price#14] + +(11) Scan parquet default.store +Output [1]: [s_store_sk#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [1]: [s_store_sk#15] + +(13) Filter [codegen id : 2] +Input [1]: [s_store_sk#15] +Condition : isnotnull(s_store_sk#15) + +(14) BroadcastExchange +Input [1]: [s_store_sk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#16] + +(15) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#13] +Right keys [1]: [s_store_sk#15] +Join condition: None + +(16) Project [codegen id : 4] +Output [3]: [i_manufact_id#5, ss_sold_date_sk#11, ss_sales_price#14] +Input [5]: [i_manufact_id#5, ss_sold_date_sk#11, ss_store_sk#13, ss_sales_price#14, s_store_sk#15] + +(17) Scan parquet default.date_dim +Output [3]: [d_date_sk#17, d_month_seq#18, d_qoy#19] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/date_dim] +PushedFilters: [In(d_month_seq, [1222,1215,1223,1217,1214,1219,1213,1218,1220,1221,1216,1212]), LessThanOrEqual(d_date_sk,2452275), GreaterThanOrEqual(d_date_sk,2451911), IsNotNull(d_date_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#17, d_month_seq#18, d_qoy#19] + +(19) Filter [codegen id : 3] +Input [3]: [d_date_sk#17, d_month_seq#18, d_qoy#19] +Condition : (((d_month_seq#18 INSET (1222,1215,1223,1217,1214,1219,1213,1218,1220,1221,1216,1212) AND (d_date_sk#17 <= 2452275)) AND (d_date_sk#17 >= 2451911)) AND isnotnull(d_date_sk#17)) + +(20) Project [codegen id : 3] +Output [2]: [d_date_sk#17, d_qoy#19] +Input [3]: [d_date_sk#17, d_month_seq#18, d_qoy#19] + +(21) BroadcastExchange +Input [2]: [d_date_sk#17, d_qoy#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] + +(22) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#11] +Right keys [1]: [d_date_sk#17] +Join condition: None + +(23) Project [codegen id : 4] +Output [3]: [i_manufact_id#5, ss_sales_price#14, d_qoy#19] +Input [5]: [i_manufact_id#5, ss_sold_date_sk#11, ss_sales_price#14, d_date_sk#17, d_qoy#19] + +(24) HashAggregate [codegen id : 4] +Input [3]: [i_manufact_id#5, ss_sales_price#14, d_qoy#19] +Keys [2]: [i_manufact_id#5, d_qoy#19] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#14))] +Aggregate Attributes [1]: [sum#21] +Results [3]: [i_manufact_id#5, d_qoy#19, sum#22] + +(25) Exchange +Input [3]: [i_manufact_id#5, d_qoy#19, sum#22] +Arguments: hashpartitioning(i_manufact_id#5, d_qoy#19, 5), true, [id=#23] + +(26) HashAggregate [codegen id : 5] +Input [3]: [i_manufact_id#5, d_qoy#19, sum#22] +Keys [2]: [i_manufact_id#5, d_qoy#19] +Functions [1]: [sum(UnscaledValue(ss_sales_price#14))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#14))#24] +Results [3]: [i_manufact_id#5, MakeDecimal(sum(UnscaledValue(ss_sales_price#14))#24,17,2) AS sum_sales#25, MakeDecimal(sum(UnscaledValue(ss_sales_price#14))#24,17,2) AS _w0#26] + +(27) Exchange +Input [3]: [i_manufact_id#5, sum_sales#25, _w0#26] +Arguments: hashpartitioning(i_manufact_id#5, 5), true, [id=#27] + +(28) Sort [codegen id : 6] +Input [3]: [i_manufact_id#5, sum_sales#25, _w0#26] +Arguments: [i_manufact_id#5 ASC NULLS FIRST], false, 0 + +(29) Window +Input [3]: [i_manufact_id#5, sum_sales#25, _w0#26] +Arguments: [avg(_w0#26) windowspecdefinition(i_manufact_id#5, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_quarterly_sales#28], [i_manufact_id#5] + +(30) Filter [codegen id : 7] +Input [4]: [i_manufact_id#5, sum_sales#25, _w0#26, avg_quarterly_sales#28] +Condition : (CASE WHEN (avg_quarterly_sales#28 > 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#25 as decimal(22,6))) - promote_precision(cast(avg_quarterly_sales#28 as decimal(22,6)))), DecimalType(22,6), true))) / promote_precision(cast(avg_quarterly_sales#28 as decimal(22,6)))), DecimalType(38,16), true) ELSE null END > 0.1000000000000000) + +(31) Project [codegen id : 7] +Output [3]: [i_manufact_id#5, sum_sales#25, avg_quarterly_sales#28] +Input [4]: [i_manufact_id#5, sum_sales#25, _w0#26, avg_quarterly_sales#28] + +(32) TakeOrderedAndProject +Input [3]: [i_manufact_id#5, sum_sales#25, avg_quarterly_sales#28] +Arguments: 100, [avg_quarterly_sales#28 ASC NULLS FIRST, sum_sales#25 ASC NULLS FIRST, i_manufact_id#5 ASC NULLS FIRST], [i_manufact_id#5, sum_sales#25, avg_quarterly_sales#28] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q53.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q53.sf100/simplified.txt new file mode 100644 index 0000000000000..98f126325517b --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q53.sf100/simplified.txt @@ -0,0 +1,49 @@ +TakeOrderedAndProject [avg_quarterly_sales,i_manufact_id,sum_sales] + WholeStageCodegen (7) + Project [avg_quarterly_sales,i_manufact_id,sum_sales] + Filter [avg_quarterly_sales,sum_sales] + InputAdapter + Window [_w0,i_manufact_id] + WholeStageCodegen (6) + Sort [i_manufact_id] + InputAdapter + Exchange [i_manufact_id] #1 + WholeStageCodegen (5) + HashAggregate [d_qoy,i_manufact_id,sum] [_w0,sum,sum(UnscaledValue(ss_sales_price)),sum_sales] + InputAdapter + Exchange [d_qoy,i_manufact_id] #2 + WholeStageCodegen (4) + HashAggregate [d_qoy,i_manufact_id,ss_sales_price] [sum,sum] + Project [d_qoy,i_manufact_id,ss_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_manufact_id,ss_sales_price,ss_sold_date_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [i_manufact_id,ss_sales_price,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [i_item_sk,i_manufact_id] + Filter [i_brand,i_category,i_class,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand,i_category,i_class,i_item_sk,i_manufact_id] + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Project [d_date_sk,d_qoy] + Filter [d_date_sk,d_month_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq,d_qoy] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q53/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q53/explain.txt new file mode 100644 index 0000000000000..656a81b8529b6 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q53/explain.txt @@ -0,0 +1,180 @@ +== Physical Plan == +TakeOrderedAndProject (32) ++- * Project (31) + +- * Filter (30) + +- Window (29) + +- * Sort (28) + +- Exchange (27) + +- * HashAggregate (26) + +- Exchange (25) + +- * HashAggregate (24) + +- * Project (23) + +- * BroadcastHashJoin Inner BuildRight (22) + :- * Project (17) + : +- * BroadcastHashJoin Inner BuildRight (16) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Project (4) + : : : +- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.item (1) + : : +- BroadcastExchange (8) + : : +- * Filter (7) + : : +- * ColumnarToRow (6) + : : +- Scan parquet default.store_sales (5) + : +- BroadcastExchange (15) + : +- * Project (14) + : +- * Filter (13) + : +- * ColumnarToRow (12) + : +- Scan parquet default.date_dim (11) + +- BroadcastExchange (21) + +- * Filter (20) + +- * ColumnarToRow (19) + +- Scan parquet default.store (18) + + +(1) Scan parquet default.item +Output [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manufact_id#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/item] +PushedFilters: [Or(And(And(In(i_category, [Books,Children,Electronics]),In(i_class, [personal,portable,reference,self-help])),In(i_brand, [scholaramalgamalg #6,scholaramalgamalg #7,exportiunivamalg #8,scholaramalgamalg #8])),And(And(In(i_category, [Women,Music,Men]),In(i_class, [accessories,classical,fragrances,pants])),In(i_brand, [amalgimporto #9,edu packscholar #9,exportiimporto #9,importoamalg #9]))), IsNotNull(i_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manufact_id#5] + +(3) Filter [codegen id : 4] +Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manufact_id#5] +Condition : ((((i_category#4 IN (Books,Children,Electronics) AND i_class#3 IN (personal,portable,reference,self-help)) AND i_brand#2 IN (scholaramalgamalg #6,scholaramalgamalg #7,exportiunivamalg #8,scholaramalgamalg #8)) OR ((i_category#4 IN (Women,Music,Men) AND i_class#3 IN (accessories,classical,fragrances,pants)) AND i_brand#2 IN (amalgimporto #9,edu packscholar #9,exportiimporto #9,importoamalg #9))) AND isnotnull(i_item_sk#1)) + +(4) Project [codegen id : 4] +Output [2]: [i_item_sk#1, i_manufact_id#5] +Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manufact_id#5] + +(5) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#10, ss_item_sk#11, ss_store_sk#12, ss_sales_price#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), GreaterThanOrEqual(ss_sold_date_sk,2451911), LessThanOrEqual(ss_sold_date_sk,2452275), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [4]: [ss_sold_date_sk#10, ss_item_sk#11, ss_store_sk#12, ss_sales_price#13] + +(7) Filter [codegen id : 1] +Input [4]: [ss_sold_date_sk#10, ss_item_sk#11, ss_store_sk#12, ss_sales_price#13] +Condition : ((((isnotnull(ss_sold_date_sk#10) AND (ss_sold_date_sk#10 >= 2451911)) AND (ss_sold_date_sk#10 <= 2452275)) AND isnotnull(ss_item_sk#11)) AND isnotnull(ss_store_sk#12)) + +(8) BroadcastExchange +Input [4]: [ss_sold_date_sk#10, ss_item_sk#11, ss_store_sk#12, ss_sales_price#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#14] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [ss_item_sk#11] +Join condition: None + +(10) Project [codegen id : 4] +Output [4]: [i_manufact_id#5, ss_sold_date_sk#10, ss_store_sk#12, ss_sales_price#13] +Input [6]: [i_item_sk#1, i_manufact_id#5, ss_sold_date_sk#10, ss_item_sk#11, ss_store_sk#12, ss_sales_price#13] + +(11) Scan parquet default.date_dim +Output [3]: [d_date_sk#15, d_month_seq#16, d_qoy#17] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/date_dim] +PushedFilters: [In(d_month_seq, [1222,1215,1223,1217,1214,1219,1213,1218,1220,1221,1216,1212]), GreaterThanOrEqual(d_date_sk,2451911), LessThanOrEqual(d_date_sk,2452275), IsNotNull(d_date_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#15, d_month_seq#16, d_qoy#17] + +(13) Filter [codegen id : 2] +Input [3]: [d_date_sk#15, d_month_seq#16, d_qoy#17] +Condition : (((d_month_seq#16 INSET (1222,1215,1223,1217,1214,1219,1213,1218,1220,1221,1216,1212) AND (d_date_sk#15 >= 2451911)) AND (d_date_sk#15 <= 2452275)) AND isnotnull(d_date_sk#15)) + +(14) Project [codegen id : 2] +Output [2]: [d_date_sk#15, d_qoy#17] +Input [3]: [d_date_sk#15, d_month_seq#16, d_qoy#17] + +(15) BroadcastExchange +Input [2]: [d_date_sk#15, d_qoy#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] + +(16) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#10] +Right keys [1]: [d_date_sk#15] +Join condition: None + +(17) Project [codegen id : 4] +Output [4]: [i_manufact_id#5, ss_store_sk#12, ss_sales_price#13, d_qoy#17] +Input [6]: [i_manufact_id#5, ss_sold_date_sk#10, ss_store_sk#12, ss_sales_price#13, d_date_sk#15, d_qoy#17] + +(18) Scan parquet default.store +Output [1]: [s_store_sk#19] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [1]: [s_store_sk#19] + +(20) Filter [codegen id : 3] +Input [1]: [s_store_sk#19] +Condition : isnotnull(s_store_sk#19) + +(21) BroadcastExchange +Input [1]: [s_store_sk#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#20] + +(22) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#12] +Right keys [1]: [s_store_sk#19] +Join condition: None + +(23) Project [codegen id : 4] +Output [3]: [i_manufact_id#5, ss_sales_price#13, d_qoy#17] +Input [5]: [i_manufact_id#5, ss_store_sk#12, ss_sales_price#13, d_qoy#17, s_store_sk#19] + +(24) HashAggregate [codegen id : 4] +Input [3]: [i_manufact_id#5, ss_sales_price#13, d_qoy#17] +Keys [2]: [i_manufact_id#5, d_qoy#17] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#13))] +Aggregate Attributes [1]: [sum#21] +Results [3]: [i_manufact_id#5, d_qoy#17, sum#22] + +(25) Exchange +Input [3]: [i_manufact_id#5, d_qoy#17, sum#22] +Arguments: hashpartitioning(i_manufact_id#5, d_qoy#17, 5), true, [id=#23] + +(26) HashAggregate [codegen id : 5] +Input [3]: [i_manufact_id#5, d_qoy#17, sum#22] +Keys [2]: [i_manufact_id#5, d_qoy#17] +Functions [1]: [sum(UnscaledValue(ss_sales_price#13))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#13))#24] +Results [3]: [i_manufact_id#5, MakeDecimal(sum(UnscaledValue(ss_sales_price#13))#24,17,2) AS sum_sales#25, MakeDecimal(sum(UnscaledValue(ss_sales_price#13))#24,17,2) AS _w0#26] + +(27) Exchange +Input [3]: [i_manufact_id#5, sum_sales#25, _w0#26] +Arguments: hashpartitioning(i_manufact_id#5, 5), true, [id=#27] + +(28) Sort [codegen id : 6] +Input [3]: [i_manufact_id#5, sum_sales#25, _w0#26] +Arguments: [i_manufact_id#5 ASC NULLS FIRST], false, 0 + +(29) Window +Input [3]: [i_manufact_id#5, sum_sales#25, _w0#26] +Arguments: [avg(_w0#26) windowspecdefinition(i_manufact_id#5, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_quarterly_sales#28], [i_manufact_id#5] + +(30) Filter [codegen id : 7] +Input [4]: [i_manufact_id#5, sum_sales#25, _w0#26, avg_quarterly_sales#28] +Condition : (CASE WHEN (avg_quarterly_sales#28 > 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#25 as decimal(22,6))) - promote_precision(cast(avg_quarterly_sales#28 as decimal(22,6)))), DecimalType(22,6), true))) / promote_precision(cast(avg_quarterly_sales#28 as decimal(22,6)))), DecimalType(38,16), true) ELSE null END > 0.1000000000000000) + +(31) Project [codegen id : 7] +Output [3]: [i_manufact_id#5, sum_sales#25, avg_quarterly_sales#28] +Input [4]: [i_manufact_id#5, sum_sales#25, _w0#26, avg_quarterly_sales#28] + +(32) TakeOrderedAndProject +Input [3]: [i_manufact_id#5, sum_sales#25, avg_quarterly_sales#28] +Arguments: 100, [avg_quarterly_sales#28 ASC NULLS FIRST, sum_sales#25 ASC NULLS FIRST, i_manufact_id#5 ASC NULLS FIRST], [i_manufact_id#5, sum_sales#25, avg_quarterly_sales#28] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q53/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q53/simplified.txt new file mode 100644 index 0000000000000..c8070c46c9c80 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q53/simplified.txt @@ -0,0 +1,49 @@ +TakeOrderedAndProject [avg_quarterly_sales,i_manufact_id,sum_sales] + WholeStageCodegen (7) + Project [avg_quarterly_sales,i_manufact_id,sum_sales] + Filter [avg_quarterly_sales,sum_sales] + InputAdapter + Window [_w0,i_manufact_id] + WholeStageCodegen (6) + Sort [i_manufact_id] + InputAdapter + Exchange [i_manufact_id] #1 + WholeStageCodegen (5) + HashAggregate [d_qoy,i_manufact_id,sum] [_w0,sum,sum(UnscaledValue(ss_sales_price)),sum_sales] + InputAdapter + Exchange [d_qoy,i_manufact_id] #2 + WholeStageCodegen (4) + HashAggregate [d_qoy,i_manufact_id,ss_sales_price] [sum,sum] + Project [d_qoy,i_manufact_id,ss_sales_price] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [d_qoy,i_manufact_id,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_manufact_id,ss_sales_price,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [i_item_sk,i_manufact_id] + Filter [i_brand,i_category,i_class,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand,i_category,i_class,i_item_sk,i_manufact_id] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [d_date_sk,d_qoy] + Filter [d_date_sk,d_month_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq,d_qoy] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q55.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q55.sf100/explain.txt new file mode 100644 index 0000000000000..fcffe468011ba --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q55.sf100/explain.txt @@ -0,0 +1,122 @@ +== Physical Plan == +TakeOrderedAndProject (21) ++- * HashAggregate (20) + +- Exchange (19) + +- * HashAggregate (18) + +- * Project (17) + +- * BroadcastHashJoin Inner BuildRight (16) + :- * Project (10) + : +- * BroadcastHashJoin Inner BuildLeft (9) + : :- BroadcastExchange (5) + : : +- * Project (4) + : : +- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.date_dim (1) + : +- * Filter (8) + : +- * ColumnarToRow (7) + : +- Scan parquet default.store_sales (6) + +- BroadcastExchange (15) + +- * Project (14) + +- * Filter (13) + +- * ColumnarToRow (12) + +- Scan parquet default.item (11) + + +(1) Scan parquet default.date_dim +Output [3]: [d_date_sk#1, d_year#2, d_moy#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,2001), GreaterThanOrEqual(d_date_sk,2452215), LessThanOrEqual(d_date_sk,2452244), IsNotNull(d_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(3) Filter [codegen id : 1] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Condition : ((((((isnotnull(d_moy#3) AND isnotnull(d_year#2)) AND (d_moy#3 = 11)) AND (d_year#2 = 2001)) AND (d_date_sk#1 >= 2452215)) AND (d_date_sk#1 <= 2452244)) AND isnotnull(d_date_sk#1)) + +(4) Project [codegen id : 1] +Output [1]: [d_date_sk#1] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(5) BroadcastExchange +Input [1]: [d_date_sk#1] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#4] + +(6) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#5, ss_item_sk#6, ss_ext_sales_price#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), GreaterThanOrEqual(ss_sold_date_sk,2452215), LessThanOrEqual(ss_sold_date_sk,2452244), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(7) ColumnarToRow +Input [3]: [ss_sold_date_sk#5, ss_item_sk#6, ss_ext_sales_price#7] + +(8) Filter +Input [3]: [ss_sold_date_sk#5, ss_item_sk#6, ss_ext_sales_price#7] +Condition : (((isnotnull(ss_sold_date_sk#5) AND (ss_sold_date_sk#5 >= 2452215)) AND (ss_sold_date_sk#5 <= 2452244)) AND isnotnull(ss_item_sk#6)) + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [d_date_sk#1] +Right keys [1]: [ss_sold_date_sk#5] +Join condition: None + +(10) Project [codegen id : 3] +Output [2]: [ss_item_sk#6, ss_ext_sales_price#7] +Input [4]: [d_date_sk#1, ss_sold_date_sk#5, ss_item_sk#6, ss_ext_sales_price#7] + +(11) Scan parquet default.item +Output [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,48), IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] + +(13) Filter [codegen id : 2] +Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] +Condition : ((isnotnull(i_manager_id#11) AND (i_manager_id#11 = 48)) AND isnotnull(i_item_sk#8)) + +(14) Project [codegen id : 2] +Output [3]: [i_item_sk#8, i_brand_id#9, i_brand#10] +Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] + +(15) BroadcastExchange +Input [3]: [i_item_sk#8, i_brand_id#9, i_brand#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(16) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_item_sk#6] +Right keys [1]: [i_item_sk#8] +Join condition: None + +(17) Project [codegen id : 3] +Output [3]: [ss_ext_sales_price#7, i_brand_id#9, i_brand#10] +Input [5]: [ss_item_sk#6, ss_ext_sales_price#7, i_item_sk#8, i_brand_id#9, i_brand#10] + +(18) HashAggregate [codegen id : 3] +Input [3]: [ss_ext_sales_price#7, i_brand_id#9, i_brand#10] +Keys [2]: [i_brand#10, i_brand_id#9] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#7))] +Aggregate Attributes [1]: [sum#13] +Results [3]: [i_brand#10, i_brand_id#9, sum#14] + +(19) Exchange +Input [3]: [i_brand#10, i_brand_id#9, sum#14] +Arguments: hashpartitioning(i_brand#10, i_brand_id#9, 5), true, [id=#15] + +(20) HashAggregate [codegen id : 4] +Input [3]: [i_brand#10, i_brand_id#9, sum#14] +Keys [2]: [i_brand#10, i_brand_id#9] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#7))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#7))#16] +Results [3]: [i_brand_id#9 AS brand_id#17, i_brand#10 AS brand#18, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#7))#16,17,2) AS ext_price#19] + +(21) TakeOrderedAndProject +Input [3]: [brand_id#17, brand#18, ext_price#19] +Arguments: 100, [ext_price#19 DESC NULLS LAST, brand_id#17 ASC NULLS FIRST], [brand_id#17, brand#18, ext_price#19] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q55.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q55.sf100/simplified.txt new file mode 100644 index 0000000000000..dab4e1d1bd68b --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q55.sf100/simplified.txt @@ -0,0 +1,31 @@ +TakeOrderedAndProject [brand,brand_id,ext_price] + WholeStageCodegen (4) + HashAggregate [i_brand,i_brand_id,sum] [brand,brand_id,ext_price,sum,sum(UnscaledValue(ss_ext_sales_price))] + InputAdapter + Exchange [i_brand,i_brand_id] #1 + WholeStageCodegen (3) + HashAggregate [i_brand,i_brand_id,ss_ext_sales_price] [sum,sum] + Project [i_brand,i_brand_id,ss_ext_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_ext_sales_price,ss_item_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [i_brand,i_brand_id,i_item_sk] + Filter [i_item_sk,i_manager_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand,i_brand_id,i_item_sk,i_manager_id] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q55/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q55/explain.txt new file mode 100644 index 0000000000000..fbff0e718872f --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q55/explain.txt @@ -0,0 +1,122 @@ +== Physical Plan == +TakeOrderedAndProject (21) ++- * HashAggregate (20) + +- Exchange (19) + +- * HashAggregate (18) + +- * Project (17) + +- * BroadcastHashJoin Inner BuildRight (16) + :- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Project (4) + : : +- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.date_dim (1) + : +- BroadcastExchange (8) + : +- * Filter (7) + : +- * ColumnarToRow (6) + : +- Scan parquet default.store_sales (5) + +- BroadcastExchange (15) + +- * Project (14) + +- * Filter (13) + +- * ColumnarToRow (12) + +- Scan parquet default.item (11) + + +(1) Scan parquet default.date_dim +Output [3]: [d_date_sk#1, d_year#2, d_moy#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,2001), GreaterThanOrEqual(d_date_sk,2452215), LessThanOrEqual(d_date_sk,2452244), IsNotNull(d_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(3) Filter [codegen id : 3] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Condition : ((((((isnotnull(d_moy#3) AND isnotnull(d_year#2)) AND (d_moy#3 = 11)) AND (d_year#2 = 2001)) AND (d_date_sk#1 >= 2452215)) AND (d_date_sk#1 <= 2452244)) AND isnotnull(d_date_sk#1)) + +(4) Project [codegen id : 3] +Output [1]: [d_date_sk#1] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(5) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), GreaterThanOrEqual(ss_sold_date_sk,2452215), LessThanOrEqual(ss_sold_date_sk,2452244), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] + +(7) Filter [codegen id : 1] +Input [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] +Condition : (((isnotnull(ss_sold_date_sk#4) AND (ss_sold_date_sk#4 >= 2452215)) AND (ss_sold_date_sk#4 <= 2452244)) AND isnotnull(ss_item_sk#5)) + +(8) BroadcastExchange +Input [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [d_date_sk#1] +Right keys [1]: [ss_sold_date_sk#4] +Join condition: None + +(10) Project [codegen id : 3] +Output [2]: [ss_item_sk#5, ss_ext_sales_price#6] +Input [4]: [d_date_sk#1, ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] + +(11) Scan parquet default.item +Output [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,48), IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] + +(13) Filter [codegen id : 2] +Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] +Condition : ((isnotnull(i_manager_id#11) AND (i_manager_id#11 = 48)) AND isnotnull(i_item_sk#8)) + +(14) Project [codegen id : 2] +Output [3]: [i_item_sk#8, i_brand_id#9, i_brand#10] +Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] + +(15) BroadcastExchange +Input [3]: [i_item_sk#8, i_brand_id#9, i_brand#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(16) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_item_sk#5] +Right keys [1]: [i_item_sk#8] +Join condition: None + +(17) Project [codegen id : 3] +Output [3]: [ss_ext_sales_price#6, i_brand_id#9, i_brand#10] +Input [5]: [ss_item_sk#5, ss_ext_sales_price#6, i_item_sk#8, i_brand_id#9, i_brand#10] + +(18) HashAggregate [codegen id : 3] +Input [3]: [ss_ext_sales_price#6, i_brand_id#9, i_brand#10] +Keys [2]: [i_brand#10, i_brand_id#9] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#6))] +Aggregate Attributes [1]: [sum#13] +Results [3]: [i_brand#10, i_brand_id#9, sum#14] + +(19) Exchange +Input [3]: [i_brand#10, i_brand_id#9, sum#14] +Arguments: hashpartitioning(i_brand#10, i_brand_id#9, 5), true, [id=#15] + +(20) HashAggregate [codegen id : 4] +Input [3]: [i_brand#10, i_brand_id#9, sum#14] +Keys [2]: [i_brand#10, i_brand_id#9] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#6))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#6))#16] +Results [3]: [i_brand_id#9 AS brand_id#17, i_brand#10 AS brand#18, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#6))#16,17,2) AS ext_price#19] + +(21) TakeOrderedAndProject +Input [3]: [brand_id#17, brand#18, ext_price#19] +Arguments: 100, [ext_price#19 DESC NULLS LAST, brand_id#17 ASC NULLS FIRST], [brand_id#17, brand#18, ext_price#19] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q55/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q55/simplified.txt new file mode 100644 index 0000000000000..b20fb74817e84 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q55/simplified.txt @@ -0,0 +1,31 @@ +TakeOrderedAndProject [brand,brand_id,ext_price] + WholeStageCodegen (4) + HashAggregate [i_brand,i_brand_id,sum] [brand,brand_id,ext_price,sum,sum(UnscaledValue(ss_ext_sales_price))] + InputAdapter + Exchange [i_brand,i_brand_id] #1 + WholeStageCodegen (3) + HashAggregate [i_brand,i_brand_id,ss_ext_sales_price] [sum,sum] + Project [i_brand,i_brand_id,ss_ext_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_ext_sales_price,ss_item_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [i_brand,i_brand_id,i_item_sk] + Filter [i_item_sk,i_manager_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand,i_brand_id,i_item_sk,i_manager_id] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q59.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q59.sf100/explain.txt new file mode 100644 index 0000000000000..baab17932fa4d --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q59.sf100/explain.txt @@ -0,0 +1,290 @@ +== Physical Plan == +TakeOrderedAndProject (51) ++- * Project (50) + +- * BroadcastHashJoin Inner BuildRight (49) + :- * Project (25) + : +- * BroadcastHashJoin Inner BuildRight (24) + : :- * Project (18) + : : +- * BroadcastHashJoin Inner BuildRight (17) + : : :- * HashAggregate (12) + : : : +- Exchange (11) + : : : +- * HashAggregate (10) + : : : +- * Project (9) + : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (16) + : : +- * Filter (15) + : : +- * ColumnarToRow (14) + : : +- Scan parquet default.store (13) + : +- BroadcastExchange (23) + : +- * Project (22) + : +- * Filter (21) + : +- * ColumnarToRow (20) + : +- Scan parquet default.date_dim (19) + +- BroadcastExchange (48) + +- * Project (47) + +- * BroadcastHashJoin Inner BuildRight (46) + :- * Project (40) + : +- * BroadcastHashJoin Inner BuildRight (39) + : :- * HashAggregate (34) + : : +- Exchange (33) + : : +- * HashAggregate (32) + : : +- * Project (31) + : : +- * BroadcastHashJoin Inner BuildRight (30) + : : :- * Filter (28) + : : : +- * ColumnarToRow (27) + : : : +- Scan parquet default.store_sales (26) + : : +- ReusedExchange (29) + : +- BroadcastExchange (38) + : +- * Filter (37) + : +- * ColumnarToRow (36) + : +- Scan parquet default.store (35) + +- BroadcastExchange (45) + +- * Project (44) + +- * Filter (43) + +- * ColumnarToRow (42) + +- Scan parquet default.date_dim (41) + + +(1) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_sales_price#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 2] +Input [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_sales_price#3] + +(3) Filter [codegen id : 2] +Input [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_sales_price#3] +Condition : (isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#2)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_date_sk), IsNotNull(d_week_seq)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6] +Condition : (isnotnull(d_date_sk#4) AND isnotnull(d_week_seq#5)) + +(7) BroadcastExchange +Input [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7] + +(8) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(9) Project [codegen id : 2] +Output [4]: [ss_store_sk#2, ss_sales_price#3, d_week_seq#5, d_day_name#6] +Input [6]: [ss_sold_date_sk#1, ss_store_sk#2, ss_sales_price#3, d_date_sk#4, d_week_seq#5, d_day_name#6] + +(10) HashAggregate [codegen id : 2] +Input [4]: [ss_store_sk#2, ss_sales_price#3, d_week_seq#5, d_day_name#6] +Keys [2]: [d_week_seq#5, ss_store_sk#2] +Functions [7]: [partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday) THEN ss_sales_price#3 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday) THEN ss_sales_price#3 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday) THEN ss_sales_price#3 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#3 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday) THEN ss_sales_price#3 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday) THEN ss_sales_price#3 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday) THEN ss_sales_price#3 ELSE null END))] +Aggregate Attributes [7]: [sum#8, sum#9, sum#10, sum#11, sum#12, sum#13, sum#14] +Results [9]: [d_week_seq#5, ss_store_sk#2, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20, sum#21] + +(11) Exchange +Input [9]: [d_week_seq#5, ss_store_sk#2, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20, sum#21] +Arguments: hashpartitioning(d_week_seq#5, ss_store_sk#2, 5), true, [id=#22] + +(12) HashAggregate [codegen id : 10] +Input [9]: [d_week_seq#5, ss_store_sk#2, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20, sum#21] +Keys [2]: [d_week_seq#5, ss_store_sk#2] +Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday) THEN ss_sales_price#3 ELSE null END))] +Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday) THEN ss_sales_price#3 ELSE null END))#23, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday) THEN ss_sales_price#3 ELSE null END))#24, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday) THEN ss_sales_price#3 ELSE null END))#25, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#3 ELSE null END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday) THEN ss_sales_price#3 ELSE null END))#27, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday) THEN ss_sales_price#3 ELSE null END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday) THEN ss_sales_price#3 ELSE null END))#29] +Results [9]: [d_week_seq#5, ss_store_sk#2, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday) THEN ss_sales_price#3 ELSE null END))#23,17,2) AS sun_sales#30, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday) THEN ss_sales_price#3 ELSE null END))#24,17,2) AS mon_sales#31, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday) THEN ss_sales_price#3 ELSE null END))#25,17,2) AS tue_sales#32, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#3 ELSE null END))#26,17,2) AS wed_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday) THEN ss_sales_price#3 ELSE null END))#27,17,2) AS thu_sales#34, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday) THEN ss_sales_price#3 ELSE null END))#28,17,2) AS fri_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday) THEN ss_sales_price#3 ELSE null END))#29,17,2) AS sat_sales#36] + +(13) Scan parquet default.store +Output [3]: [s_store_sk#37, s_store_id#38, s_store_name#39] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_id)] +ReadSchema: struct + +(14) ColumnarToRow [codegen id : 3] +Input [3]: [s_store_sk#37, s_store_id#38, s_store_name#39] + +(15) Filter [codegen id : 3] +Input [3]: [s_store_sk#37, s_store_id#38, s_store_name#39] +Condition : (isnotnull(s_store_sk#37) AND isnotnull(s_store_id#38)) + +(16) BroadcastExchange +Input [3]: [s_store_sk#37, s_store_id#38, s_store_name#39] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#40] + +(17) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ss_store_sk#2] +Right keys [1]: [s_store_sk#37] +Join condition: None + +(18) Project [codegen id : 10] +Output [10]: [d_week_seq#5, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#38, s_store_name#39] +Input [12]: [d_week_seq#5, ss_store_sk#2, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_sk#37, s_store_id#38, s_store_name#39] + +(19) Scan parquet default.date_dim +Output [2]: [d_month_seq#41, d_week_seq#42] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1185), LessThanOrEqual(d_month_seq,1196), IsNotNull(d_week_seq)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 4] +Input [2]: [d_month_seq#41, d_week_seq#42] + +(21) Filter [codegen id : 4] +Input [2]: [d_month_seq#41, d_week_seq#42] +Condition : (((isnotnull(d_month_seq#41) AND (d_month_seq#41 >= 1185)) AND (d_month_seq#41 <= 1196)) AND isnotnull(d_week_seq#42)) + +(22) Project [codegen id : 4] +Output [1]: [d_week_seq#42] +Input [2]: [d_month_seq#41, d_week_seq#42] + +(23) BroadcastExchange +Input [1]: [d_week_seq#42] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#43] + +(24) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [d_week_seq#5] +Right keys [1]: [d_week_seq#42] +Join condition: None + +(25) Project [codegen id : 10] +Output [10]: [s_store_name#39 AS s_store_name1#44, d_week_seq#5 AS d_week_seq1#45, s_store_id#38 AS s_store_id1#46, sun_sales#30 AS sun_sales1#47, mon_sales#31 AS mon_sales1#48, tue_sales#32 AS tue_sales1#49, wed_sales#33 AS wed_sales1#50, thu_sales#34 AS thu_sales1#51, fri_sales#35 AS fri_sales1#52, sat_sales#36 AS sat_sales1#53] +Input [11]: [d_week_seq#5, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#38, s_store_name#39, d_week_seq#42] + +(26) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_sales_price#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 6] +Input [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_sales_price#3] + +(28) Filter [codegen id : 6] +Input [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_sales_price#3] +Condition : (isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#2)) + +(29) ReusedExchange [Reuses operator id: 7] +Output [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6] + +(30) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(31) Project [codegen id : 6] +Output [4]: [ss_store_sk#2, ss_sales_price#3, d_week_seq#5, d_day_name#6] +Input [6]: [ss_sold_date_sk#1, ss_store_sk#2, ss_sales_price#3, d_date_sk#4, d_week_seq#5, d_day_name#6] + +(32) HashAggregate [codegen id : 6] +Input [4]: [ss_store_sk#2, ss_sales_price#3, d_week_seq#5, d_day_name#6] +Keys [2]: [d_week_seq#5, ss_store_sk#2] +Functions [6]: [partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday) THEN ss_sales_price#3 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday) THEN ss_sales_price#3 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#3 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday) THEN ss_sales_price#3 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday) THEN ss_sales_price#3 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday) THEN ss_sales_price#3 ELSE null END))] +Aggregate Attributes [6]: [sum#54, sum#55, sum#56, sum#57, sum#58, sum#59] +Results [8]: [d_week_seq#5, ss_store_sk#2, sum#60, sum#61, sum#62, sum#63, sum#64, sum#65] + +(33) Exchange +Input [8]: [d_week_seq#5, ss_store_sk#2, sum#60, sum#61, sum#62, sum#63, sum#64, sum#65] +Arguments: hashpartitioning(d_week_seq#5, ss_store_sk#2, 5), true, [id=#66] + +(34) HashAggregate [codegen id : 9] +Input [8]: [d_week_seq#5, ss_store_sk#2, sum#60, sum#61, sum#62, sum#63, sum#64, sum#65] +Keys [2]: [d_week_seq#5, ss_store_sk#2] +Functions [6]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday) THEN ss_sales_price#3 ELSE null END))] +Aggregate Attributes [6]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday) THEN ss_sales_price#3 ELSE null END))#67, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday) THEN ss_sales_price#3 ELSE null END))#68, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#3 ELSE null END))#69, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday) THEN ss_sales_price#3 ELSE null END))#70, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday) THEN ss_sales_price#3 ELSE null END))#71, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday) THEN ss_sales_price#3 ELSE null END))#72] +Results [8]: [d_week_seq#5, ss_store_sk#2, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday) THEN ss_sales_price#3 ELSE null END))#67,17,2) AS sun_sales#30, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday) THEN ss_sales_price#3 ELSE null END))#68,17,2) AS mon_sales#31, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#3 ELSE null END))#69,17,2) AS wed_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday) THEN ss_sales_price#3 ELSE null END))#70,17,2) AS thu_sales#34, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday) THEN ss_sales_price#3 ELSE null END))#71,17,2) AS fri_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday) THEN ss_sales_price#3 ELSE null END))#72,17,2) AS sat_sales#36] + +(35) Scan parquet default.store +Output [2]: [s_store_sk#37, s_store_id#38] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_id)] +ReadSchema: struct + +(36) ColumnarToRow [codegen id : 7] +Input [2]: [s_store_sk#37, s_store_id#38] + +(37) Filter [codegen id : 7] +Input [2]: [s_store_sk#37, s_store_id#38] +Condition : (isnotnull(s_store_sk#37) AND isnotnull(s_store_id#38)) + +(38) BroadcastExchange +Input [2]: [s_store_sk#37, s_store_id#38] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#73] + +(39) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_store_sk#2] +Right keys [1]: [s_store_sk#37] +Join condition: None + +(40) Project [codegen id : 9] +Output [8]: [d_week_seq#5, sun_sales#30, mon_sales#31, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#38] +Input [10]: [d_week_seq#5, ss_store_sk#2, sun_sales#30, mon_sales#31, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_sk#37, s_store_id#38] + +(41) Scan parquet default.date_dim +Output [2]: [d_month_seq#74, d_week_seq#75] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1197), LessThanOrEqual(d_month_seq,1208), IsNotNull(d_week_seq)] +ReadSchema: struct + +(42) ColumnarToRow [codegen id : 8] +Input [2]: [d_month_seq#74, d_week_seq#75] + +(43) Filter [codegen id : 8] +Input [2]: [d_month_seq#74, d_week_seq#75] +Condition : (((isnotnull(d_month_seq#74) AND (d_month_seq#74 >= 1197)) AND (d_month_seq#74 <= 1208)) AND isnotnull(d_week_seq#75)) + +(44) Project [codegen id : 8] +Output [1]: [d_week_seq#75] +Input [2]: [d_month_seq#74, d_week_seq#75] + +(45) BroadcastExchange +Input [1]: [d_week_seq#75] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#76] + +(46) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [d_week_seq#5] +Right keys [1]: [d_week_seq#75] +Join condition: None + +(47) Project [codegen id : 9] +Output [8]: [d_week_seq#5 AS d_week_seq2#77, s_store_id#38 AS s_store_id2#78, sun_sales#30 AS sun_sales2#79, mon_sales#31 AS mon_sales2#80, wed_sales#33 AS wed_sales2#81, thu_sales#34 AS thu_sales2#82, fri_sales#35 AS fri_sales2#83, sat_sales#36 AS sat_sales2#84] +Input [9]: [d_week_seq#5, sun_sales#30, mon_sales#31, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#38, d_week_seq#75] + +(48) BroadcastExchange +Input [8]: [d_week_seq2#77, s_store_id2#78, sun_sales2#79, mon_sales2#80, wed_sales2#81, thu_sales2#82, fri_sales2#83, sat_sales2#84] +Arguments: HashedRelationBroadcastMode(List(input[1, string, true], (input[0, int, true] - 52)),false), [id=#85] + +(49) BroadcastHashJoin [codegen id : 10] +Left keys [2]: [s_store_id1#46, d_week_seq1#45] +Right keys [2]: [s_store_id2#78, (d_week_seq2#77 - 52)] +Join condition: None + +(50) Project [codegen id : 10] +Output [10]: [s_store_name1#44, s_store_id1#46, d_week_seq1#45, CheckOverflow((promote_precision(sun_sales1#47) / promote_precision(sun_sales2#79)), DecimalType(37,20), true) AS (sun_sales1 / sun_sales2)#86, CheckOverflow((promote_precision(mon_sales1#48) / promote_precision(mon_sales2#80)), DecimalType(37,20), true) AS (mon_sales1 / mon_sales2)#87, CheckOverflow((promote_precision(tue_sales1#49) / promote_precision(tue_sales1#49)), DecimalType(37,20), true) AS (tue_sales1 / tue_sales1)#88, CheckOverflow((promote_precision(wed_sales1#50) / promote_precision(wed_sales2#81)), DecimalType(37,20), true) AS (wed_sales1 / wed_sales2)#89, CheckOverflow((promote_precision(thu_sales1#51) / promote_precision(thu_sales2#82)), DecimalType(37,20), true) AS (thu_sales1 / thu_sales2)#90, CheckOverflow((promote_precision(fri_sales1#52) / promote_precision(fri_sales2#83)), DecimalType(37,20), true) AS (fri_sales1 / fri_sales2)#91, CheckOverflow((promote_precision(sat_sales1#53) / promote_precision(sat_sales2#84)), DecimalType(37,20), true) AS (sat_sales1 / sat_sales2)#92] +Input [18]: [s_store_name1#44, d_week_seq1#45, s_store_id1#46, sun_sales1#47, mon_sales1#48, tue_sales1#49, wed_sales1#50, thu_sales1#51, fri_sales1#52, sat_sales1#53, d_week_seq2#77, s_store_id2#78, sun_sales2#79, mon_sales2#80, wed_sales2#81, thu_sales2#82, fri_sales2#83, sat_sales2#84] + +(51) TakeOrderedAndProject +Input [10]: [s_store_name1#44, s_store_id1#46, d_week_seq1#45, (sun_sales1 / sun_sales2)#86, (mon_sales1 / mon_sales2)#87, (tue_sales1 / tue_sales1)#88, (wed_sales1 / wed_sales2)#89, (thu_sales1 / thu_sales2)#90, (fri_sales1 / fri_sales2)#91, (sat_sales1 / sat_sales2)#92] +Arguments: 100, [s_store_name1#44 ASC NULLS FIRST, s_store_id1#46 ASC NULLS FIRST, d_week_seq1#45 ASC NULLS FIRST], [s_store_name1#44, s_store_id1#46, d_week_seq1#45, (sun_sales1 / sun_sales2)#86, (mon_sales1 / mon_sales2)#87, (tue_sales1 / tue_sales1)#88, (wed_sales1 / wed_sales2)#89, (thu_sales1 / thu_sales2)#90, (fri_sales1 / fri_sales2)#91, (sat_sales1 / sat_sales2)#92] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q59.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q59.sf100/simplified.txt new file mode 100644 index 0000000000000..0650ed03a2e8c --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q59.sf100/simplified.txt @@ -0,0 +1,76 @@ +TakeOrderedAndProject [(fri_sales1 / fri_sales2),(mon_sales1 / mon_sales2),(sat_sales1 / sat_sales2),(sun_sales1 / sun_sales2),(thu_sales1 / thu_sales2),(tue_sales1 / tue_sales1),(wed_sales1 / wed_sales2),d_week_seq1,s_store_id1,s_store_name1] + WholeStageCodegen (10) + Project [d_week_seq1,fri_sales1,fri_sales2,mon_sales1,mon_sales2,s_store_id1,s_store_name1,sat_sales1,sat_sales2,sun_sales1,sun_sales2,thu_sales1,thu_sales2,tue_sales1,wed_sales1,wed_sales2] + BroadcastHashJoin [d_week_seq1,d_week_seq2,s_store_id1,s_store_id2] + Project [d_week_seq,fri_sales,mon_sales,s_store_id,s_store_name,sat_sales,sun_sales,thu_sales,tue_sales,wed_sales] + BroadcastHashJoin [d_week_seq,d_week_seq] + Project [d_week_seq,fri_sales,mon_sales,s_store_id,s_store_name,sat_sales,sun_sales,thu_sales,tue_sales,wed_sales] + BroadcastHashJoin [s_store_sk,ss_store_sk] + HashAggregate [d_week_seq,ss_store_sk,sum,sum,sum,sum,sum,sum,sum] [fri_sales,mon_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sun_sales,thu_sales,tue_sales,wed_sales] + InputAdapter + Exchange [d_week_seq,ss_store_sk] #1 + WholeStageCodegen (2) + HashAggregate [d_day_name,d_week_seq,ss_sales_price,ss_store_sk] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [d_day_name,d_week_seq,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [d_date_sk,d_week_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_day_name,d_week_seq] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (3) + Filter [s_store_id,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_id,s_store_name,s_store_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (4) + Project [d_week_seq] + Filter [d_month_seq,d_week_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_month_seq,d_week_seq] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (9) + Project [d_week_seq,fri_sales,mon_sales,s_store_id,sat_sales,sun_sales,thu_sales,wed_sales] + BroadcastHashJoin [d_week_seq,d_week_seq] + Project [d_week_seq,fri_sales,mon_sales,s_store_id,sat_sales,sun_sales,thu_sales,wed_sales] + BroadcastHashJoin [s_store_sk,ss_store_sk] + HashAggregate [d_week_seq,ss_store_sk,sum,sum,sum,sum,sum,sum] [fri_sales,mon_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sun_sales,thu_sales,wed_sales] + InputAdapter + Exchange [d_week_seq,ss_store_sk] #6 + WholeStageCodegen (6) + HashAggregate [d_day_name,d_week_seq,ss_sales_price,ss_store_sk] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [d_day_name,d_week_seq,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + ReusedExchange [d_date_sk,d_day_name,d_week_seq] #2 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (7) + Filter [s_store_id,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_id,s_store_sk] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (8) + Project [d_week_seq] + Filter [d_month_seq,d_week_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_month_seq,d_week_seq] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q59/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q59/explain.txt new file mode 100644 index 0000000000000..a6c1cd0876c52 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q59/explain.txt @@ -0,0 +1,290 @@ +== Physical Plan == +TakeOrderedAndProject (51) ++- * Project (50) + +- * BroadcastHashJoin Inner BuildRight (49) + :- * Project (25) + : +- * BroadcastHashJoin Inner BuildRight (24) + : :- * Project (18) + : : +- * BroadcastHashJoin Inner BuildRight (17) + : : :- * HashAggregate (12) + : : : +- Exchange (11) + : : : +- * HashAggregate (10) + : : : +- * Project (9) + : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (16) + : : +- * Filter (15) + : : +- * ColumnarToRow (14) + : : +- Scan parquet default.store (13) + : +- BroadcastExchange (23) + : +- * Project (22) + : +- * Filter (21) + : +- * ColumnarToRow (20) + : +- Scan parquet default.date_dim (19) + +- BroadcastExchange (48) + +- * Project (47) + +- * BroadcastHashJoin Inner BuildRight (46) + :- * Project (40) + : +- * BroadcastHashJoin Inner BuildRight (39) + : :- * HashAggregate (34) + : : +- Exchange (33) + : : +- * HashAggregate (32) + : : +- * Project (31) + : : +- * BroadcastHashJoin Inner BuildRight (30) + : : :- * Filter (28) + : : : +- * ColumnarToRow (27) + : : : +- Scan parquet default.store_sales (26) + : : +- ReusedExchange (29) + : +- BroadcastExchange (38) + : +- * Filter (37) + : +- * ColumnarToRow (36) + : +- Scan parquet default.store (35) + +- BroadcastExchange (45) + +- * Project (44) + +- * Filter (43) + +- * ColumnarToRow (42) + +- Scan parquet default.date_dim (41) + + +(1) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_sales_price#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 2] +Input [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_sales_price#3] + +(3) Filter [codegen id : 2] +Input [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_sales_price#3] +Condition : (isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#2)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_date_sk), IsNotNull(d_week_seq)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6] +Condition : (isnotnull(d_date_sk#4) AND isnotnull(d_week_seq#5)) + +(7) BroadcastExchange +Input [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7] + +(8) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(9) Project [codegen id : 2] +Output [4]: [ss_store_sk#2, ss_sales_price#3, d_week_seq#5, d_day_name#6] +Input [6]: [ss_sold_date_sk#1, ss_store_sk#2, ss_sales_price#3, d_date_sk#4, d_week_seq#5, d_day_name#6] + +(10) HashAggregate [codegen id : 2] +Input [4]: [ss_store_sk#2, ss_sales_price#3, d_week_seq#5, d_day_name#6] +Keys [2]: [d_week_seq#5, ss_store_sk#2] +Functions [7]: [partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday) THEN ss_sales_price#3 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday) THEN ss_sales_price#3 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday) THEN ss_sales_price#3 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#3 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday) THEN ss_sales_price#3 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday) THEN ss_sales_price#3 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday) THEN ss_sales_price#3 ELSE null END))] +Aggregate Attributes [7]: [sum#8, sum#9, sum#10, sum#11, sum#12, sum#13, sum#14] +Results [9]: [d_week_seq#5, ss_store_sk#2, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20, sum#21] + +(11) Exchange +Input [9]: [d_week_seq#5, ss_store_sk#2, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20, sum#21] +Arguments: hashpartitioning(d_week_seq#5, ss_store_sk#2, 5), true, [id=#22] + +(12) HashAggregate [codegen id : 10] +Input [9]: [d_week_seq#5, ss_store_sk#2, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20, sum#21] +Keys [2]: [d_week_seq#5, ss_store_sk#2] +Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday) THEN ss_sales_price#3 ELSE null END))] +Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday) THEN ss_sales_price#3 ELSE null END))#23, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday) THEN ss_sales_price#3 ELSE null END))#24, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday) THEN ss_sales_price#3 ELSE null END))#25, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#3 ELSE null END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday) THEN ss_sales_price#3 ELSE null END))#27, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday) THEN ss_sales_price#3 ELSE null END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday) THEN ss_sales_price#3 ELSE null END))#29] +Results [9]: [d_week_seq#5, ss_store_sk#2, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday) THEN ss_sales_price#3 ELSE null END))#23,17,2) AS sun_sales#30, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday) THEN ss_sales_price#3 ELSE null END))#24,17,2) AS mon_sales#31, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday) THEN ss_sales_price#3 ELSE null END))#25,17,2) AS tue_sales#32, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#3 ELSE null END))#26,17,2) AS wed_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday) THEN ss_sales_price#3 ELSE null END))#27,17,2) AS thu_sales#34, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday) THEN ss_sales_price#3 ELSE null END))#28,17,2) AS fri_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday) THEN ss_sales_price#3 ELSE null END))#29,17,2) AS sat_sales#36] + +(13) Scan parquet default.store +Output [3]: [s_store_sk#37, s_store_id#38, s_store_name#39] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_id)] +ReadSchema: struct + +(14) ColumnarToRow [codegen id : 3] +Input [3]: [s_store_sk#37, s_store_id#38, s_store_name#39] + +(15) Filter [codegen id : 3] +Input [3]: [s_store_sk#37, s_store_id#38, s_store_name#39] +Condition : (isnotnull(s_store_sk#37) AND isnotnull(s_store_id#38)) + +(16) BroadcastExchange +Input [3]: [s_store_sk#37, s_store_id#38, s_store_name#39] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#40] + +(17) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ss_store_sk#2] +Right keys [1]: [s_store_sk#37] +Join condition: None + +(18) Project [codegen id : 10] +Output [10]: [d_week_seq#5, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#38, s_store_name#39] +Input [12]: [d_week_seq#5, ss_store_sk#2, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_sk#37, s_store_id#38, s_store_name#39] + +(19) Scan parquet default.date_dim +Output [2]: [d_month_seq#41, d_week_seq#42] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1185), LessThanOrEqual(d_month_seq,1196), IsNotNull(d_week_seq)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 4] +Input [2]: [d_month_seq#41, d_week_seq#42] + +(21) Filter [codegen id : 4] +Input [2]: [d_month_seq#41, d_week_seq#42] +Condition : (((isnotnull(d_month_seq#41) AND (d_month_seq#41 >= 1185)) AND (d_month_seq#41 <= 1196)) AND isnotnull(d_week_seq#42)) + +(22) Project [codegen id : 4] +Output [1]: [d_week_seq#42] +Input [2]: [d_month_seq#41, d_week_seq#42] + +(23) BroadcastExchange +Input [1]: [d_week_seq#42] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#43] + +(24) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [d_week_seq#5] +Right keys [1]: [d_week_seq#42] +Join condition: None + +(25) Project [codegen id : 10] +Output [10]: [s_store_name#39 AS s_store_name1#44, d_week_seq#5 AS d_week_seq1#45, s_store_id#38 AS s_store_id1#46, sun_sales#30 AS sun_sales1#47, mon_sales#31 AS mon_sales1#48, tue_sales#32 AS tue_sales1#49, wed_sales#33 AS wed_sales1#50, thu_sales#34 AS thu_sales1#51, fri_sales#35 AS fri_sales1#52, sat_sales#36 AS sat_sales1#53] +Input [11]: [d_week_seq#5, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#38, s_store_name#39, d_week_seq#42] + +(26) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_sales_price#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 6] +Input [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_sales_price#3] + +(28) Filter [codegen id : 6] +Input [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_sales_price#3] +Condition : (isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#2)) + +(29) ReusedExchange [Reuses operator id: 7] +Output [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6] + +(30) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(31) Project [codegen id : 6] +Output [4]: [ss_store_sk#2, ss_sales_price#3, d_week_seq#5, d_day_name#6] +Input [6]: [ss_sold_date_sk#1, ss_store_sk#2, ss_sales_price#3, d_date_sk#4, d_week_seq#5, d_day_name#6] + +(32) HashAggregate [codegen id : 6] +Input [4]: [ss_store_sk#2, ss_sales_price#3, d_week_seq#5, d_day_name#6] +Keys [2]: [d_week_seq#5, ss_store_sk#2] +Functions [6]: [partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday) THEN ss_sales_price#3 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday) THEN ss_sales_price#3 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#3 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday) THEN ss_sales_price#3 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday) THEN ss_sales_price#3 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday) THEN ss_sales_price#3 ELSE null END))] +Aggregate Attributes [6]: [sum#54, sum#55, sum#56, sum#57, sum#58, sum#59] +Results [8]: [d_week_seq#5, ss_store_sk#2, sum#60, sum#61, sum#62, sum#63, sum#64, sum#65] + +(33) Exchange +Input [8]: [d_week_seq#5, ss_store_sk#2, sum#60, sum#61, sum#62, sum#63, sum#64, sum#65] +Arguments: hashpartitioning(d_week_seq#5, ss_store_sk#2, 5), true, [id=#66] + +(34) HashAggregate [codegen id : 9] +Input [8]: [d_week_seq#5, ss_store_sk#2, sum#60, sum#61, sum#62, sum#63, sum#64, sum#65] +Keys [2]: [d_week_seq#5, ss_store_sk#2] +Functions [6]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday) THEN ss_sales_price#3 ELSE null END))] +Aggregate Attributes [6]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday) THEN ss_sales_price#3 ELSE null END))#67, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday) THEN ss_sales_price#3 ELSE null END))#68, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#3 ELSE null END))#69, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday) THEN ss_sales_price#3 ELSE null END))#70, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday) THEN ss_sales_price#3 ELSE null END))#71, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday) THEN ss_sales_price#3 ELSE null END))#72] +Results [8]: [d_week_seq#5, ss_store_sk#2, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday) THEN ss_sales_price#3 ELSE null END))#67,17,2) AS sun_sales#30, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday) THEN ss_sales_price#3 ELSE null END))#68,17,2) AS mon_sales#31, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#3 ELSE null END))#69,17,2) AS wed_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday) THEN ss_sales_price#3 ELSE null END))#70,17,2) AS thu_sales#34, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday) THEN ss_sales_price#3 ELSE null END))#71,17,2) AS fri_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday) THEN ss_sales_price#3 ELSE null END))#72,17,2) AS sat_sales#36] + +(35) Scan parquet default.store +Output [2]: [s_store_sk#37, s_store_id#38] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_id)] +ReadSchema: struct + +(36) ColumnarToRow [codegen id : 7] +Input [2]: [s_store_sk#37, s_store_id#38] + +(37) Filter [codegen id : 7] +Input [2]: [s_store_sk#37, s_store_id#38] +Condition : (isnotnull(s_store_sk#37) AND isnotnull(s_store_id#38)) + +(38) BroadcastExchange +Input [2]: [s_store_sk#37, s_store_id#38] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#73] + +(39) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_store_sk#2] +Right keys [1]: [s_store_sk#37] +Join condition: None + +(40) Project [codegen id : 9] +Output [8]: [d_week_seq#5, sun_sales#30, mon_sales#31, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#38] +Input [10]: [d_week_seq#5, ss_store_sk#2, sun_sales#30, mon_sales#31, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_sk#37, s_store_id#38] + +(41) Scan parquet default.date_dim +Output [2]: [d_month_seq#74, d_week_seq#75] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1197), LessThanOrEqual(d_month_seq,1208), IsNotNull(d_week_seq)] +ReadSchema: struct + +(42) ColumnarToRow [codegen id : 8] +Input [2]: [d_month_seq#74, d_week_seq#75] + +(43) Filter [codegen id : 8] +Input [2]: [d_month_seq#74, d_week_seq#75] +Condition : (((isnotnull(d_month_seq#74) AND (d_month_seq#74 >= 1197)) AND (d_month_seq#74 <= 1208)) AND isnotnull(d_week_seq#75)) + +(44) Project [codegen id : 8] +Output [1]: [d_week_seq#75] +Input [2]: [d_month_seq#74, d_week_seq#75] + +(45) BroadcastExchange +Input [1]: [d_week_seq#75] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#76] + +(46) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [d_week_seq#5] +Right keys [1]: [d_week_seq#75] +Join condition: None + +(47) Project [codegen id : 9] +Output [8]: [d_week_seq#5 AS d_week_seq2#77, s_store_id#38 AS s_store_id2#78, sun_sales#30 AS sun_sales2#79, mon_sales#31 AS mon_sales2#80, wed_sales#33 AS wed_sales2#81, thu_sales#34 AS thu_sales2#82, fri_sales#35 AS fri_sales2#83, sat_sales#36 AS sat_sales2#84] +Input [9]: [d_week_seq#5, sun_sales#30, mon_sales#31, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#38, d_week_seq#75] + +(48) BroadcastExchange +Input [8]: [d_week_seq2#77, s_store_id2#78, sun_sales2#79, mon_sales2#80, wed_sales2#81, thu_sales2#82, fri_sales2#83, sat_sales2#84] +Arguments: HashedRelationBroadcastMode(List(input[1, string, true], (input[0, int, true] - 52)),false), [id=#85] + +(49) BroadcastHashJoin [codegen id : 10] +Left keys [2]: [s_store_id1#46, d_week_seq1#45] +Right keys [2]: [s_store_id2#78, (d_week_seq2#77 - 52)] +Join condition: None + +(50) Project [codegen id : 10] +Output [10]: [s_store_name1#44, s_store_id1#46, d_week_seq1#45, CheckOverflow((promote_precision(sun_sales1#47) / promote_precision(sun_sales2#79)), DecimalType(37,20), true) AS (sun_sales1 / sun_sales2)#86, CheckOverflow((promote_precision(mon_sales1#48) / promote_precision(mon_sales2#80)), DecimalType(37,20), true) AS (mon_sales1 / mon_sales2)#87, CheckOverflow((promote_precision(tue_sales1#49) / promote_precision(tue_sales1#49)), DecimalType(37,20), true) AS (tue_sales1 / tue_sales1)#88, CheckOverflow((promote_precision(wed_sales1#50) / promote_precision(wed_sales2#81)), DecimalType(37,20), true) AS (wed_sales1 / wed_sales2)#89, CheckOverflow((promote_precision(thu_sales1#51) / promote_precision(thu_sales2#82)), DecimalType(37,20), true) AS (thu_sales1 / thu_sales2)#90, CheckOverflow((promote_precision(fri_sales1#52) / promote_precision(fri_sales2#83)), DecimalType(37,20), true) AS (fri_sales1 / fri_sales2)#91, CheckOverflow((promote_precision(sat_sales1#53) / promote_precision(sat_sales2#84)), DecimalType(37,20), true) AS (sat_sales1 / sat_sales2)#92] +Input [18]: [s_store_name1#44, d_week_seq1#45, s_store_id1#46, sun_sales1#47, mon_sales1#48, tue_sales1#49, wed_sales1#50, thu_sales1#51, fri_sales1#52, sat_sales1#53, d_week_seq2#77, s_store_id2#78, sun_sales2#79, mon_sales2#80, wed_sales2#81, thu_sales2#82, fri_sales2#83, sat_sales2#84] + +(51) TakeOrderedAndProject +Input [10]: [s_store_name1#44, s_store_id1#46, d_week_seq1#45, (sun_sales1 / sun_sales2)#86, (mon_sales1 / mon_sales2)#87, (tue_sales1 / tue_sales1)#88, (wed_sales1 / wed_sales2)#89, (thu_sales1 / thu_sales2)#90, (fri_sales1 / fri_sales2)#91, (sat_sales1 / sat_sales2)#92] +Arguments: 100, [s_store_name1#44 ASC NULLS FIRST, s_store_id1#46 ASC NULLS FIRST, d_week_seq1#45 ASC NULLS FIRST], [s_store_name1#44, s_store_id1#46, d_week_seq1#45, (sun_sales1 / sun_sales2)#86, (mon_sales1 / mon_sales2)#87, (tue_sales1 / tue_sales1)#88, (wed_sales1 / wed_sales2)#89, (thu_sales1 / thu_sales2)#90, (fri_sales1 / fri_sales2)#91, (sat_sales1 / sat_sales2)#92] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q59/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q59/simplified.txt new file mode 100644 index 0000000000000..0650ed03a2e8c --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q59/simplified.txt @@ -0,0 +1,76 @@ +TakeOrderedAndProject [(fri_sales1 / fri_sales2),(mon_sales1 / mon_sales2),(sat_sales1 / sat_sales2),(sun_sales1 / sun_sales2),(thu_sales1 / thu_sales2),(tue_sales1 / tue_sales1),(wed_sales1 / wed_sales2),d_week_seq1,s_store_id1,s_store_name1] + WholeStageCodegen (10) + Project [d_week_seq1,fri_sales1,fri_sales2,mon_sales1,mon_sales2,s_store_id1,s_store_name1,sat_sales1,sat_sales2,sun_sales1,sun_sales2,thu_sales1,thu_sales2,tue_sales1,wed_sales1,wed_sales2] + BroadcastHashJoin [d_week_seq1,d_week_seq2,s_store_id1,s_store_id2] + Project [d_week_seq,fri_sales,mon_sales,s_store_id,s_store_name,sat_sales,sun_sales,thu_sales,tue_sales,wed_sales] + BroadcastHashJoin [d_week_seq,d_week_seq] + Project [d_week_seq,fri_sales,mon_sales,s_store_id,s_store_name,sat_sales,sun_sales,thu_sales,tue_sales,wed_sales] + BroadcastHashJoin [s_store_sk,ss_store_sk] + HashAggregate [d_week_seq,ss_store_sk,sum,sum,sum,sum,sum,sum,sum] [fri_sales,mon_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sun_sales,thu_sales,tue_sales,wed_sales] + InputAdapter + Exchange [d_week_seq,ss_store_sk] #1 + WholeStageCodegen (2) + HashAggregate [d_day_name,d_week_seq,ss_sales_price,ss_store_sk] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [d_day_name,d_week_seq,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [d_date_sk,d_week_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_day_name,d_week_seq] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (3) + Filter [s_store_id,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_id,s_store_name,s_store_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (4) + Project [d_week_seq] + Filter [d_month_seq,d_week_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_month_seq,d_week_seq] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (9) + Project [d_week_seq,fri_sales,mon_sales,s_store_id,sat_sales,sun_sales,thu_sales,wed_sales] + BroadcastHashJoin [d_week_seq,d_week_seq] + Project [d_week_seq,fri_sales,mon_sales,s_store_id,sat_sales,sun_sales,thu_sales,wed_sales] + BroadcastHashJoin [s_store_sk,ss_store_sk] + HashAggregate [d_week_seq,ss_store_sk,sum,sum,sum,sum,sum,sum] [fri_sales,mon_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sun_sales,thu_sales,wed_sales] + InputAdapter + Exchange [d_week_seq,ss_store_sk] #6 + WholeStageCodegen (6) + HashAggregate [d_day_name,d_week_seq,ss_sales_price,ss_store_sk] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [d_day_name,d_week_seq,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + ReusedExchange [d_date_sk,d_day_name,d_week_seq] #2 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (7) + Filter [s_store_id,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_id,s_store_sk] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (8) + Project [d_week_seq] + Filter [d_month_seq,d_week_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_month_seq,d_week_seq] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q63.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q63.sf100/explain.txt new file mode 100644 index 0000000000000..77fb5364a7f96 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q63.sf100/explain.txt @@ -0,0 +1,180 @@ +== Physical Plan == +TakeOrderedAndProject (32) ++- * Project (31) + +- * Filter (30) + +- Window (29) + +- * Sort (28) + +- Exchange (27) + +- * HashAggregate (26) + +- Exchange (25) + +- * HashAggregate (24) + +- * Project (23) + +- * BroadcastHashJoin Inner BuildRight (22) + :- * Project (16) + : +- * BroadcastHashJoin Inner BuildRight (15) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildLeft (9) + : : :- BroadcastExchange (5) + : : : +- * Project (4) + : : : +- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.item (1) + : : +- * Filter (8) + : : +- * ColumnarToRow (7) + : : +- Scan parquet default.store_sales (6) + : +- BroadcastExchange (14) + : +- * Filter (13) + : +- * ColumnarToRow (12) + : +- Scan parquet default.store (11) + +- BroadcastExchange (21) + +- * Project (20) + +- * Filter (19) + +- * ColumnarToRow (18) + +- Scan parquet default.date_dim (17) + + +(1) Scan parquet default.item +Output [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manager_id#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/item] +PushedFilters: [Or(And(And(In(i_category, [Books,Children,Electronics]),In(i_class, [personal,portable,reference,self-help])),In(i_brand, [scholaramalgamalg #6,scholaramalgamalg #7,exportiunivamalg #8,scholaramalgamalg #8])),And(And(In(i_category, [Women,Music,Men]),In(i_class, [accessories,classical,fragrances,pants])),In(i_brand, [amalgimporto #9,edu packscholar #9,exportiimporto #9,importoamalg #9]))), IsNotNull(i_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manager_id#5] + +(3) Filter [codegen id : 1] +Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manager_id#5] +Condition : ((((i_category#4 IN (Books,Children,Electronics) AND i_class#3 IN (personal,portable,reference,self-help)) AND i_brand#2 IN (scholaramalgamalg #6,scholaramalgamalg #7,exportiunivamalg #8,scholaramalgamalg #8)) OR ((i_category#4 IN (Women,Music,Men) AND i_class#3 IN (accessories,classical,fragrances,pants)) AND i_brand#2 IN (amalgimporto #9,edu packscholar #9,exportiimporto #9,importoamalg #9))) AND isnotnull(i_item_sk#1)) + +(4) Project [codegen id : 1] +Output [2]: [i_item_sk#1, i_manager_id#5] +Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manager_id#5] + +(5) BroadcastExchange +Input [2]: [i_item_sk#1, i_manager_id#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#10] + +(6) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#11, ss_item_sk#12, ss_store_sk#13, ss_sales_price#14] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), GreaterThanOrEqual(ss_sold_date_sk,2452123), LessThanOrEqual(ss_sold_date_sk,2452487), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(7) ColumnarToRow +Input [4]: [ss_sold_date_sk#11, ss_item_sk#12, ss_store_sk#13, ss_sales_price#14] + +(8) Filter +Input [4]: [ss_sold_date_sk#11, ss_item_sk#12, ss_store_sk#13, ss_sales_price#14] +Condition : ((((isnotnull(ss_sold_date_sk#11) AND (ss_sold_date_sk#11 >= 2452123)) AND (ss_sold_date_sk#11 <= 2452487)) AND isnotnull(ss_item_sk#12)) AND isnotnull(ss_store_sk#13)) + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [ss_item_sk#12] +Join condition: None + +(10) Project [codegen id : 4] +Output [4]: [i_manager_id#5, ss_sold_date_sk#11, ss_store_sk#13, ss_sales_price#14] +Input [6]: [i_item_sk#1, i_manager_id#5, ss_sold_date_sk#11, ss_item_sk#12, ss_store_sk#13, ss_sales_price#14] + +(11) Scan parquet default.store +Output [1]: [s_store_sk#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [1]: [s_store_sk#15] + +(13) Filter [codegen id : 2] +Input [1]: [s_store_sk#15] +Condition : isnotnull(s_store_sk#15) + +(14) BroadcastExchange +Input [1]: [s_store_sk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#16] + +(15) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#13] +Right keys [1]: [s_store_sk#15] +Join condition: None + +(16) Project [codegen id : 4] +Output [3]: [i_manager_id#5, ss_sold_date_sk#11, ss_sales_price#14] +Input [5]: [i_manager_id#5, ss_sold_date_sk#11, ss_store_sk#13, ss_sales_price#14, s_store_sk#15] + +(17) Scan parquet default.date_dim +Output [3]: [d_date_sk#17, d_month_seq#18, d_moy#19] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/date_dim] +PushedFilters: [In(d_month_seq, [1222,1228,1223,1227,1219,1226,1224,1225,1230,1220,1221,1229]), LessThanOrEqual(d_date_sk,2452487), GreaterThanOrEqual(d_date_sk,2452123), IsNotNull(d_date_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#17, d_month_seq#18, d_moy#19] + +(19) Filter [codegen id : 3] +Input [3]: [d_date_sk#17, d_month_seq#18, d_moy#19] +Condition : (((d_month_seq#18 INSET (1222,1228,1223,1227,1219,1226,1224,1225,1230,1220,1221,1229) AND (d_date_sk#17 <= 2452487)) AND (d_date_sk#17 >= 2452123)) AND isnotnull(d_date_sk#17)) + +(20) Project [codegen id : 3] +Output [2]: [d_date_sk#17, d_moy#19] +Input [3]: [d_date_sk#17, d_month_seq#18, d_moy#19] + +(21) BroadcastExchange +Input [2]: [d_date_sk#17, d_moy#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] + +(22) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#11] +Right keys [1]: [d_date_sk#17] +Join condition: None + +(23) Project [codegen id : 4] +Output [3]: [i_manager_id#5, ss_sales_price#14, d_moy#19] +Input [5]: [i_manager_id#5, ss_sold_date_sk#11, ss_sales_price#14, d_date_sk#17, d_moy#19] + +(24) HashAggregate [codegen id : 4] +Input [3]: [i_manager_id#5, ss_sales_price#14, d_moy#19] +Keys [2]: [i_manager_id#5, d_moy#19] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#14))] +Aggregate Attributes [1]: [sum#21] +Results [3]: [i_manager_id#5, d_moy#19, sum#22] + +(25) Exchange +Input [3]: [i_manager_id#5, d_moy#19, sum#22] +Arguments: hashpartitioning(i_manager_id#5, d_moy#19, 5), true, [id=#23] + +(26) HashAggregate [codegen id : 5] +Input [3]: [i_manager_id#5, d_moy#19, sum#22] +Keys [2]: [i_manager_id#5, d_moy#19] +Functions [1]: [sum(UnscaledValue(ss_sales_price#14))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#14))#24] +Results [3]: [i_manager_id#5, MakeDecimal(sum(UnscaledValue(ss_sales_price#14))#24,17,2) AS sum_sales#25, MakeDecimal(sum(UnscaledValue(ss_sales_price#14))#24,17,2) AS _w0#26] + +(27) Exchange +Input [3]: [i_manager_id#5, sum_sales#25, _w0#26] +Arguments: hashpartitioning(i_manager_id#5, 5), true, [id=#27] + +(28) Sort [codegen id : 6] +Input [3]: [i_manager_id#5, sum_sales#25, _w0#26] +Arguments: [i_manager_id#5 ASC NULLS FIRST], false, 0 + +(29) Window +Input [3]: [i_manager_id#5, sum_sales#25, _w0#26] +Arguments: [avg(_w0#26) windowspecdefinition(i_manager_id#5, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#28], [i_manager_id#5] + +(30) Filter [codegen id : 7] +Input [4]: [i_manager_id#5, sum_sales#25, _w0#26, avg_monthly_sales#28] +Condition : (CASE WHEN (avg_monthly_sales#28 > 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#25 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#28 as decimal(22,6)))), DecimalType(22,6), true))) / promote_precision(cast(avg_monthly_sales#28 as decimal(22,6)))), DecimalType(38,16), true) ELSE null END > 0.1000000000000000) + +(31) Project [codegen id : 7] +Output [3]: [i_manager_id#5, sum_sales#25, avg_monthly_sales#28] +Input [4]: [i_manager_id#5, sum_sales#25, _w0#26, avg_monthly_sales#28] + +(32) TakeOrderedAndProject +Input [3]: [i_manager_id#5, sum_sales#25, avg_monthly_sales#28] +Arguments: 100, [i_manager_id#5 ASC NULLS FIRST, avg_monthly_sales#28 ASC NULLS FIRST, sum_sales#25 ASC NULLS FIRST], [i_manager_id#5, sum_sales#25, avg_monthly_sales#28] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q63.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q63.sf100/simplified.txt new file mode 100644 index 0000000000000..99d321b52694a --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q63.sf100/simplified.txt @@ -0,0 +1,49 @@ +TakeOrderedAndProject [avg_monthly_sales,i_manager_id,sum_sales] + WholeStageCodegen (7) + Project [avg_monthly_sales,i_manager_id,sum_sales] + Filter [avg_monthly_sales,sum_sales] + InputAdapter + Window [_w0,i_manager_id] + WholeStageCodegen (6) + Sort [i_manager_id] + InputAdapter + Exchange [i_manager_id] #1 + WholeStageCodegen (5) + HashAggregate [d_moy,i_manager_id,sum] [_w0,sum,sum(UnscaledValue(ss_sales_price)),sum_sales] + InputAdapter + Exchange [d_moy,i_manager_id] #2 + WholeStageCodegen (4) + HashAggregate [d_moy,i_manager_id,ss_sales_price] [sum,sum] + Project [d_moy,i_manager_id,ss_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_manager_id,ss_sales_price,ss_sold_date_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [i_manager_id,ss_sales_price,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [i_item_sk,i_manager_id] + Filter [i_brand,i_category,i_class,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand,i_category,i_class,i_item_sk,i_manager_id] + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Project [d_date_sk,d_moy] + Filter [d_date_sk,d_month_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq,d_moy] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q63/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q63/explain.txt new file mode 100644 index 0000000000000..cb483711ee02e --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q63/explain.txt @@ -0,0 +1,180 @@ +== Physical Plan == +TakeOrderedAndProject (32) ++- * Project (31) + +- * Filter (30) + +- Window (29) + +- * Sort (28) + +- Exchange (27) + +- * HashAggregate (26) + +- Exchange (25) + +- * HashAggregate (24) + +- * Project (23) + +- * BroadcastHashJoin Inner BuildRight (22) + :- * Project (17) + : +- * BroadcastHashJoin Inner BuildRight (16) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Project (4) + : : : +- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.item (1) + : : +- BroadcastExchange (8) + : : +- * Filter (7) + : : +- * ColumnarToRow (6) + : : +- Scan parquet default.store_sales (5) + : +- BroadcastExchange (15) + : +- * Project (14) + : +- * Filter (13) + : +- * ColumnarToRow (12) + : +- Scan parquet default.date_dim (11) + +- BroadcastExchange (21) + +- * Filter (20) + +- * ColumnarToRow (19) + +- Scan parquet default.store (18) + + +(1) Scan parquet default.item +Output [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manager_id#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/item] +PushedFilters: [Or(And(And(In(i_category, [Books,Children,Electronics]),In(i_class, [personal,portable,reference,self-help])),In(i_brand, [scholaramalgamalg #6,scholaramalgamalg #7,exportiunivamalg #8,scholaramalgamalg #8])),And(And(In(i_category, [Women,Music,Men]),In(i_class, [accessories,classical,fragrances,pants])),In(i_brand, [amalgimporto #9,edu packscholar #9,exportiimporto #9,importoamalg #9]))), IsNotNull(i_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manager_id#5] + +(3) Filter [codegen id : 4] +Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manager_id#5] +Condition : ((((i_category#4 IN (Books,Children,Electronics) AND i_class#3 IN (personal,portable,reference,self-help)) AND i_brand#2 IN (scholaramalgamalg #6,scholaramalgamalg #7,exportiunivamalg #8,scholaramalgamalg #8)) OR ((i_category#4 IN (Women,Music,Men) AND i_class#3 IN (accessories,classical,fragrances,pants)) AND i_brand#2 IN (amalgimporto #9,edu packscholar #9,exportiimporto #9,importoamalg #9))) AND isnotnull(i_item_sk#1)) + +(4) Project [codegen id : 4] +Output [2]: [i_item_sk#1, i_manager_id#5] +Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manager_id#5] + +(5) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#10, ss_item_sk#11, ss_store_sk#12, ss_sales_price#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), GreaterThanOrEqual(ss_sold_date_sk,2452123), LessThanOrEqual(ss_sold_date_sk,2452487), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [4]: [ss_sold_date_sk#10, ss_item_sk#11, ss_store_sk#12, ss_sales_price#13] + +(7) Filter [codegen id : 1] +Input [4]: [ss_sold_date_sk#10, ss_item_sk#11, ss_store_sk#12, ss_sales_price#13] +Condition : ((((isnotnull(ss_sold_date_sk#10) AND (ss_sold_date_sk#10 >= 2452123)) AND (ss_sold_date_sk#10 <= 2452487)) AND isnotnull(ss_item_sk#11)) AND isnotnull(ss_store_sk#12)) + +(8) BroadcastExchange +Input [4]: [ss_sold_date_sk#10, ss_item_sk#11, ss_store_sk#12, ss_sales_price#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#14] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [ss_item_sk#11] +Join condition: None + +(10) Project [codegen id : 4] +Output [4]: [i_manager_id#5, ss_sold_date_sk#10, ss_store_sk#12, ss_sales_price#13] +Input [6]: [i_item_sk#1, i_manager_id#5, ss_sold_date_sk#10, ss_item_sk#11, ss_store_sk#12, ss_sales_price#13] + +(11) Scan parquet default.date_dim +Output [3]: [d_date_sk#15, d_month_seq#16, d_moy#17] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/date_dim] +PushedFilters: [In(d_month_seq, [1222,1228,1223,1227,1219,1226,1224,1225,1230,1220,1221,1229]), LessThanOrEqual(d_date_sk,2452487), GreaterThanOrEqual(d_date_sk,2452123), IsNotNull(d_date_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#15, d_month_seq#16, d_moy#17] + +(13) Filter [codegen id : 2] +Input [3]: [d_date_sk#15, d_month_seq#16, d_moy#17] +Condition : (((d_month_seq#16 INSET (1222,1228,1223,1227,1219,1226,1224,1225,1230,1220,1221,1229) AND (d_date_sk#15 <= 2452487)) AND (d_date_sk#15 >= 2452123)) AND isnotnull(d_date_sk#15)) + +(14) Project [codegen id : 2] +Output [2]: [d_date_sk#15, d_moy#17] +Input [3]: [d_date_sk#15, d_month_seq#16, d_moy#17] + +(15) BroadcastExchange +Input [2]: [d_date_sk#15, d_moy#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] + +(16) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#10] +Right keys [1]: [d_date_sk#15] +Join condition: None + +(17) Project [codegen id : 4] +Output [4]: [i_manager_id#5, ss_store_sk#12, ss_sales_price#13, d_moy#17] +Input [6]: [i_manager_id#5, ss_sold_date_sk#10, ss_store_sk#12, ss_sales_price#13, d_date_sk#15, d_moy#17] + +(18) Scan parquet default.store +Output [1]: [s_store_sk#19] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [1]: [s_store_sk#19] + +(20) Filter [codegen id : 3] +Input [1]: [s_store_sk#19] +Condition : isnotnull(s_store_sk#19) + +(21) BroadcastExchange +Input [1]: [s_store_sk#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#20] + +(22) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#12] +Right keys [1]: [s_store_sk#19] +Join condition: None + +(23) Project [codegen id : 4] +Output [3]: [i_manager_id#5, ss_sales_price#13, d_moy#17] +Input [5]: [i_manager_id#5, ss_store_sk#12, ss_sales_price#13, d_moy#17, s_store_sk#19] + +(24) HashAggregate [codegen id : 4] +Input [3]: [i_manager_id#5, ss_sales_price#13, d_moy#17] +Keys [2]: [i_manager_id#5, d_moy#17] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#13))] +Aggregate Attributes [1]: [sum#21] +Results [3]: [i_manager_id#5, d_moy#17, sum#22] + +(25) Exchange +Input [3]: [i_manager_id#5, d_moy#17, sum#22] +Arguments: hashpartitioning(i_manager_id#5, d_moy#17, 5), true, [id=#23] + +(26) HashAggregate [codegen id : 5] +Input [3]: [i_manager_id#5, d_moy#17, sum#22] +Keys [2]: [i_manager_id#5, d_moy#17] +Functions [1]: [sum(UnscaledValue(ss_sales_price#13))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#13))#24] +Results [3]: [i_manager_id#5, MakeDecimal(sum(UnscaledValue(ss_sales_price#13))#24,17,2) AS sum_sales#25, MakeDecimal(sum(UnscaledValue(ss_sales_price#13))#24,17,2) AS _w0#26] + +(27) Exchange +Input [3]: [i_manager_id#5, sum_sales#25, _w0#26] +Arguments: hashpartitioning(i_manager_id#5, 5), true, [id=#27] + +(28) Sort [codegen id : 6] +Input [3]: [i_manager_id#5, sum_sales#25, _w0#26] +Arguments: [i_manager_id#5 ASC NULLS FIRST], false, 0 + +(29) Window +Input [3]: [i_manager_id#5, sum_sales#25, _w0#26] +Arguments: [avg(_w0#26) windowspecdefinition(i_manager_id#5, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#28], [i_manager_id#5] + +(30) Filter [codegen id : 7] +Input [4]: [i_manager_id#5, sum_sales#25, _w0#26, avg_monthly_sales#28] +Condition : (CASE WHEN (avg_monthly_sales#28 > 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#25 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#28 as decimal(22,6)))), DecimalType(22,6), true))) / promote_precision(cast(avg_monthly_sales#28 as decimal(22,6)))), DecimalType(38,16), true) ELSE null END > 0.1000000000000000) + +(31) Project [codegen id : 7] +Output [3]: [i_manager_id#5, sum_sales#25, avg_monthly_sales#28] +Input [4]: [i_manager_id#5, sum_sales#25, _w0#26, avg_monthly_sales#28] + +(32) TakeOrderedAndProject +Input [3]: [i_manager_id#5, sum_sales#25, avg_monthly_sales#28] +Arguments: 100, [i_manager_id#5 ASC NULLS FIRST, avg_monthly_sales#28 ASC NULLS FIRST, sum_sales#25 ASC NULLS FIRST], [i_manager_id#5, sum_sales#25, avg_monthly_sales#28] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q63/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q63/simplified.txt new file mode 100644 index 0000000000000..7d1b1e0ae9c41 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q63/simplified.txt @@ -0,0 +1,49 @@ +TakeOrderedAndProject [avg_monthly_sales,i_manager_id,sum_sales] + WholeStageCodegen (7) + Project [avg_monthly_sales,i_manager_id,sum_sales] + Filter [avg_monthly_sales,sum_sales] + InputAdapter + Window [_w0,i_manager_id] + WholeStageCodegen (6) + Sort [i_manager_id] + InputAdapter + Exchange [i_manager_id] #1 + WholeStageCodegen (5) + HashAggregate [d_moy,i_manager_id,sum] [_w0,sum,sum(UnscaledValue(ss_sales_price)),sum_sales] + InputAdapter + Exchange [d_moy,i_manager_id] #2 + WholeStageCodegen (4) + HashAggregate [d_moy,i_manager_id,ss_sales_price] [sum,sum] + Project [d_moy,i_manager_id,ss_sales_price] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [d_moy,i_manager_id,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_manager_id,ss_sales_price,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [i_item_sk,i_manager_id] + Filter [i_brand,i_category,i_class,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand,i_category,i_class,i_item_sk,i_manager_id] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [d_date_sk,d_moy] + Filter [d_date_sk,d_month_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq,d_moy] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q65.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q65.sf100/explain.txt new file mode 100644 index 0000000000000..425f7981e384e --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q65.sf100/explain.txt @@ -0,0 +1,245 @@ +== Physical Plan == +TakeOrderedAndProject (42) ++- * Project (41) + +- * BroadcastHashJoin Inner BuildLeft (40) + :- BroadcastExchange (36) + : +- * Project (35) + : +- * BroadcastHashJoin Inner BuildLeft (34) + : :- BroadcastExchange (30) + : : +- * Project (29) + : : +- * BroadcastHashJoin Inner BuildRight (28) + : : :- * Filter (14) + : : : +- * HashAggregate (13) + : : : +- Exchange (12) + : : : +- * HashAggregate (11) + : : : +- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (27) + : : +- * HashAggregate (26) + : : +- Exchange (25) + : : +- * HashAggregate (24) + : : +- * HashAggregate (23) + : : +- Exchange (22) + : : +- * HashAggregate (21) + : : +- * Project (20) + : : +- * BroadcastHashJoin Inner BuildRight (19) + : : :- * Filter (17) + : : : +- * ColumnarToRow (16) + : : : +- Scan parquet default.store_sales (15) + : : +- ReusedExchange (18) + : +- * Filter (33) + : +- * ColumnarToRow (32) + : +- Scan parquet default.store (31) + +- * Filter (39) + +- * ColumnarToRow (38) + +- Scan parquet default.item (37) + + +(1) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_sales_price#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), GreaterThanOrEqual(ss_sold_date_sk,2451911), LessThanOrEqual(ss_sold_date_sk,2452275), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 2] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_sales_price#4] + +(3) Filter [codegen id : 2] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_sales_price#4] +Condition : ((((isnotnull(ss_sold_date_sk#1) AND (ss_sold_date_sk#1 >= 2451911)) AND (ss_sold_date_sk#1 <= 2452275)) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_item_sk#2)) + +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#5, d_month_seq#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223), GreaterThanOrEqual(d_date_sk,2451911), LessThanOrEqual(d_date_sk,2452275), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#5, d_month_seq#6] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#5, d_month_seq#6] +Condition : (((((isnotnull(d_month_seq#6) AND (d_month_seq#6 >= 1212)) AND (d_month_seq#6 <= 1223)) AND (d_date_sk#5 >= 2451911)) AND (d_date_sk#5 <= 2452275)) AND isnotnull(d_date_sk#5)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#5] +Input [2]: [d_date_sk#5, d_month_seq#6] + +(8) BroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#7] + +(9) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(10) Project [codegen id : 2] +Output [3]: [ss_item_sk#2, ss_store_sk#3, ss_sales_price#4] +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_sales_price#4, d_date_sk#5] + +(11) HashAggregate [codegen id : 2] +Input [3]: [ss_item_sk#2, ss_store_sk#3, ss_sales_price#4] +Keys [2]: [ss_store_sk#3, ss_item_sk#2] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#4))] +Aggregate Attributes [1]: [sum#8] +Results [3]: [ss_store_sk#3, ss_item_sk#2, sum#9] + +(12) Exchange +Input [3]: [ss_store_sk#3, ss_item_sk#2, sum#9] +Arguments: hashpartitioning(ss_store_sk#3, ss_item_sk#2, 5), true, [id=#10] + +(13) HashAggregate [codegen id : 7] +Input [3]: [ss_store_sk#3, ss_item_sk#2, sum#9] +Keys [2]: [ss_store_sk#3, ss_item_sk#2] +Functions [1]: [sum(UnscaledValue(ss_sales_price#4))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#4))#11] +Results [3]: [ss_store_sk#3, ss_item_sk#2, MakeDecimal(sum(UnscaledValue(ss_sales_price#4))#11,17,2) AS revenue#12] + +(14) Filter [codegen id : 7] +Input [3]: [ss_store_sk#3, ss_item_sk#2, revenue#12] +Condition : isnotnull(revenue#12) + +(15) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#13, ss_item_sk#14, ss_store_sk#15, ss_sales_price#16] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), GreaterThanOrEqual(ss_sold_date_sk,2451911), LessThanOrEqual(ss_sold_date_sk,2452275), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(16) ColumnarToRow [codegen id : 4] +Input [4]: [ss_sold_date_sk#13, ss_item_sk#14, ss_store_sk#15, ss_sales_price#16] + +(17) Filter [codegen id : 4] +Input [4]: [ss_sold_date_sk#13, ss_item_sk#14, ss_store_sk#15, ss_sales_price#16] +Condition : (((isnotnull(ss_sold_date_sk#13) AND (ss_sold_date_sk#13 >= 2451911)) AND (ss_sold_date_sk#13 <= 2452275)) AND isnotnull(ss_store_sk#15)) + +(18) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(19) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#13] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(20) Project [codegen id : 4] +Output [3]: [ss_item_sk#14, ss_store_sk#15, ss_sales_price#16] +Input [5]: [ss_sold_date_sk#13, ss_item_sk#14, ss_store_sk#15, ss_sales_price#16, d_date_sk#5] + +(21) HashAggregate [codegen id : 4] +Input [3]: [ss_item_sk#14, ss_store_sk#15, ss_sales_price#16] +Keys [2]: [ss_store_sk#15, ss_item_sk#14] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#16))] +Aggregate Attributes [1]: [sum#17] +Results [3]: [ss_store_sk#15, ss_item_sk#14, sum#18] + +(22) Exchange +Input [3]: [ss_store_sk#15, ss_item_sk#14, sum#18] +Arguments: hashpartitioning(ss_store_sk#15, ss_item_sk#14, 5), true, [id=#19] + +(23) HashAggregate [codegen id : 5] +Input [3]: [ss_store_sk#15, ss_item_sk#14, sum#18] +Keys [2]: [ss_store_sk#15, ss_item_sk#14] +Functions [1]: [sum(UnscaledValue(ss_sales_price#16))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#16))#20] +Results [2]: [ss_store_sk#15, MakeDecimal(sum(UnscaledValue(ss_sales_price#16))#20,17,2) AS revenue#21] + +(24) HashAggregate [codegen id : 5] +Input [2]: [ss_store_sk#15, revenue#21] +Keys [1]: [ss_store_sk#15] +Functions [1]: [partial_avg(revenue#21)] +Aggregate Attributes [2]: [sum#22, count#23] +Results [3]: [ss_store_sk#15, sum#24, count#25] + +(25) Exchange +Input [3]: [ss_store_sk#15, sum#24, count#25] +Arguments: hashpartitioning(ss_store_sk#15, 5), true, [id=#26] + +(26) HashAggregate [codegen id : 6] +Input [3]: [ss_store_sk#15, sum#24, count#25] +Keys [1]: [ss_store_sk#15] +Functions [1]: [avg(revenue#21)] +Aggregate Attributes [1]: [avg(revenue#21)#27] +Results [2]: [ss_store_sk#15, avg(revenue#21)#27 AS ave#28] + +(27) BroadcastExchange +Input [2]: [ss_store_sk#15, ave#28] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#29] + +(28) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [ss_store_sk#15] +Join condition: (cast(revenue#12 as decimal(23,7)) <= CheckOverflow((0.100000 * promote_precision(ave#28)), DecimalType(23,7), true)) + +(29) Project [codegen id : 7] +Output [3]: [ss_store_sk#3, ss_item_sk#2, revenue#12] +Input [5]: [ss_store_sk#3, ss_item_sk#2, revenue#12, ss_store_sk#15, ave#28] + +(30) BroadcastExchange +Input [3]: [ss_store_sk#3, ss_item_sk#2, revenue#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#30] + +(31) Scan parquet default.store +Output [2]: [s_store_sk#31, s_store_name#32] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(32) ColumnarToRow +Input [2]: [s_store_sk#31, s_store_name#32] + +(33) Filter +Input [2]: [s_store_sk#31, s_store_name#32] +Condition : isnotnull(s_store_sk#31) + +(34) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#31] +Join condition: None + +(35) Project [codegen id : 8] +Output [3]: [ss_item_sk#2, revenue#12, s_store_name#32] +Input [5]: [ss_store_sk#3, ss_item_sk#2, revenue#12, s_store_sk#31, s_store_name#32] + +(36) BroadcastExchange +Input [3]: [ss_item_sk#2, revenue#12, s_store_name#32] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#33] + +(37) Scan parquet default.item +Output [5]: [i_item_sk#34, i_item_desc#35, i_current_price#36, i_wholesale_cost#37, i_brand#38] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(38) ColumnarToRow +Input [5]: [i_item_sk#34, i_item_desc#35, i_current_price#36, i_wholesale_cost#37, i_brand#38] + +(39) Filter +Input [5]: [i_item_sk#34, i_item_desc#35, i_current_price#36, i_wholesale_cost#37, i_brand#38] +Condition : isnotnull(i_item_sk#34) + +(40) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#34] +Join condition: None + +(41) Project [codegen id : 9] +Output [6]: [s_store_name#32, i_item_desc#35, revenue#12, i_current_price#36, i_wholesale_cost#37, i_brand#38] +Input [8]: [ss_item_sk#2, revenue#12, s_store_name#32, i_item_sk#34, i_item_desc#35, i_current_price#36, i_wholesale_cost#37, i_brand#38] + +(42) TakeOrderedAndProject +Input [6]: [s_store_name#32, i_item_desc#35, revenue#12, i_current_price#36, i_wholesale_cost#37, i_brand#38] +Arguments: 100, [s_store_name#32 ASC NULLS FIRST, i_item_desc#35 ASC NULLS FIRST], [s_store_name#32, i_item_desc#35, revenue#12, i_current_price#36, i_wholesale_cost#37, i_brand#38] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q65.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q65.sf100/simplified.txt new file mode 100644 index 0000000000000..872ce5199073f --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q65.sf100/simplified.txt @@ -0,0 +1,63 @@ +TakeOrderedAndProject [i_brand,i_current_price,i_item_desc,i_wholesale_cost,revenue,s_store_name] + WholeStageCodegen (9) + Project [i_brand,i_current_price,i_item_desc,i_wholesale_cost,revenue,s_store_name] + BroadcastHashJoin [i_item_sk,ss_item_sk] + InputAdapter + BroadcastExchange #1 + WholeStageCodegen (8) + Project [revenue,s_store_name,ss_item_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (7) + Project [revenue,ss_item_sk,ss_store_sk] + BroadcastHashJoin [ave,revenue,ss_store_sk,ss_store_sk] + Filter [revenue] + HashAggregate [ss_item_sk,ss_store_sk,sum] [revenue,sum,sum(UnscaledValue(ss_sales_price))] + InputAdapter + Exchange [ss_item_sk,ss_store_sk] #3 + WholeStageCodegen (2) + HashAggregate [ss_item_sk,ss_sales_price,ss_store_sk] [sum,sum] + Project [ss_item_sk,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_month_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (6) + HashAggregate [count,ss_store_sk,sum] [ave,avg(revenue),count,sum] + InputAdapter + Exchange [ss_store_sk] #6 + WholeStageCodegen (5) + HashAggregate [revenue,ss_store_sk] [count,count,sum,sum] + HashAggregate [ss_item_sk,ss_store_sk,sum] [revenue,sum,sum(UnscaledValue(ss_sales_price))] + InputAdapter + Exchange [ss_item_sk,ss_store_sk] #7 + WholeStageCodegen (4) + HashAggregate [ss_item_sk,ss_sales_price,ss_store_sk] [sum,sum] + Project [ss_item_sk,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + ReusedExchange [d_date_sk] #4 + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_name,s_store_sk] + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand,i_current_price,i_item_desc,i_item_sk,i_wholesale_cost] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q65/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q65/explain.txt new file mode 100644 index 0000000000000..a84b8320f05ae --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q65/explain.txt @@ -0,0 +1,245 @@ +== Physical Plan == +TakeOrderedAndProject (42) ++- * Project (41) + +- * BroadcastHashJoin Inner BuildRight (40) + :- * Project (26) + : +- * BroadcastHashJoin Inner BuildRight (25) + : :- * Project (20) + : : +- * BroadcastHashJoin Inner BuildRight (19) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.store (1) + : : +- BroadcastExchange (18) + : : +- * Filter (17) + : : +- * HashAggregate (16) + : : +- Exchange (15) + : : +- * HashAggregate (14) + : : +- * Project (13) + : : +- * BroadcastHashJoin Inner BuildRight (12) + : : :- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.store_sales (4) + : : +- BroadcastExchange (11) + : : +- * Project (10) + : : +- * Filter (9) + : : +- * ColumnarToRow (8) + : : +- Scan parquet default.date_dim (7) + : +- BroadcastExchange (24) + : +- * Filter (23) + : +- * ColumnarToRow (22) + : +- Scan parquet default.item (21) + +- BroadcastExchange (39) + +- * HashAggregate (38) + +- Exchange (37) + +- * HashAggregate (36) + +- * HashAggregate (35) + +- Exchange (34) + +- * HashAggregate (33) + +- * Project (32) + +- * BroadcastHashJoin Inner BuildRight (31) + :- * Filter (29) + : +- * ColumnarToRow (28) + : +- Scan parquet default.store_sales (27) + +- ReusedExchange (30) + + +(1) Scan parquet default.store +Output [2]: [s_store_sk#1, s_store_name#2] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 9] +Input [2]: [s_store_sk#1, s_store_name#2] + +(3) Filter [codegen id : 9] +Input [2]: [s_store_sk#1, s_store_name#2] +Condition : isnotnull(s_store_sk#1) + +(4) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#3, ss_item_sk#4, ss_store_sk#5, ss_sales_price#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), GreaterThanOrEqual(ss_sold_date_sk,2451911), LessThanOrEqual(ss_sold_date_sk,2452275), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 2] +Input [4]: [ss_sold_date_sk#3, ss_item_sk#4, ss_store_sk#5, ss_sales_price#6] + +(6) Filter [codegen id : 2] +Input [4]: [ss_sold_date_sk#3, ss_item_sk#4, ss_store_sk#5, ss_sales_price#6] +Condition : ((((isnotnull(ss_sold_date_sk#3) AND (ss_sold_date_sk#3 >= 2451911)) AND (ss_sold_date_sk#3 <= 2452275)) AND isnotnull(ss_store_sk#5)) AND isnotnull(ss_item_sk#4)) + +(7) Scan parquet default.date_dim +Output [2]: [d_date_sk#7, d_month_seq#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223), GreaterThanOrEqual(d_date_sk,2451911), LessThanOrEqual(d_date_sk,2452275), IsNotNull(d_date_sk)] +ReadSchema: struct + +(8) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#7, d_month_seq#8] + +(9) Filter [codegen id : 1] +Input [2]: [d_date_sk#7, d_month_seq#8] +Condition : (((((isnotnull(d_month_seq#8) AND (d_month_seq#8 >= 1212)) AND (d_month_seq#8 <= 1223)) AND (d_date_sk#7 >= 2451911)) AND (d_date_sk#7 <= 2452275)) AND isnotnull(d_date_sk#7)) + +(10) Project [codegen id : 1] +Output [1]: [d_date_sk#7] +Input [2]: [d_date_sk#7, d_month_seq#8] + +(11) BroadcastExchange +Input [1]: [d_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] + +(12) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#3] +Right keys [1]: [d_date_sk#7] +Join condition: None + +(13) Project [codegen id : 2] +Output [3]: [ss_item_sk#4, ss_store_sk#5, ss_sales_price#6] +Input [5]: [ss_sold_date_sk#3, ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, d_date_sk#7] + +(14) HashAggregate [codegen id : 2] +Input [3]: [ss_item_sk#4, ss_store_sk#5, ss_sales_price#6] +Keys [2]: [ss_store_sk#5, ss_item_sk#4] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#6))] +Aggregate Attributes [1]: [sum#10] +Results [3]: [ss_store_sk#5, ss_item_sk#4, sum#11] + +(15) Exchange +Input [3]: [ss_store_sk#5, ss_item_sk#4, sum#11] +Arguments: hashpartitioning(ss_store_sk#5, ss_item_sk#4, 5), true, [id=#12] + +(16) HashAggregate [codegen id : 3] +Input [3]: [ss_store_sk#5, ss_item_sk#4, sum#11] +Keys [2]: [ss_store_sk#5, ss_item_sk#4] +Functions [1]: [sum(UnscaledValue(ss_sales_price#6))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#6))#13] +Results [3]: [ss_store_sk#5, ss_item_sk#4, MakeDecimal(sum(UnscaledValue(ss_sales_price#6))#13,17,2) AS revenue#14] + +(17) Filter [codegen id : 3] +Input [3]: [ss_store_sk#5, ss_item_sk#4, revenue#14] +Condition : isnotnull(revenue#14) + +(18) BroadcastExchange +Input [3]: [ss_store_sk#5, ss_item_sk#4, revenue#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] + +(19) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [s_store_sk#1] +Right keys [1]: [ss_store_sk#5] +Join condition: None + +(20) Project [codegen id : 9] +Output [4]: [s_store_name#2, ss_store_sk#5, ss_item_sk#4, revenue#14] +Input [5]: [s_store_sk#1, s_store_name#2, ss_store_sk#5, ss_item_sk#4, revenue#14] + +(21) Scan parquet default.item +Output [5]: [i_item_sk#16, i_item_desc#17, i_current_price#18, i_wholesale_cost#19, i_brand#20] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(22) ColumnarToRow [codegen id : 4] +Input [5]: [i_item_sk#16, i_item_desc#17, i_current_price#18, i_wholesale_cost#19, i_brand#20] + +(23) Filter [codegen id : 4] +Input [5]: [i_item_sk#16, i_item_desc#17, i_current_price#18, i_wholesale_cost#19, i_brand#20] +Condition : isnotnull(i_item_sk#16) + +(24) BroadcastExchange +Input [5]: [i_item_sk#16, i_item_desc#17, i_current_price#18, i_wholesale_cost#19, i_brand#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#21] + +(25) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_item_sk#4] +Right keys [1]: [i_item_sk#16] +Join condition: None + +(26) Project [codegen id : 9] +Output [7]: [s_store_name#2, ss_store_sk#5, revenue#14, i_item_desc#17, i_current_price#18, i_wholesale_cost#19, i_brand#20] +Input [9]: [s_store_name#2, ss_store_sk#5, ss_item_sk#4, revenue#14, i_item_sk#16, i_item_desc#17, i_current_price#18, i_wholesale_cost#19, i_brand#20] + +(27) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#22, ss_item_sk#23, ss_store_sk#24, ss_sales_price#25] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), GreaterThanOrEqual(ss_sold_date_sk,2451911), LessThanOrEqual(ss_sold_date_sk,2452275), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(28) ColumnarToRow [codegen id : 6] +Input [4]: [ss_sold_date_sk#22, ss_item_sk#23, ss_store_sk#24, ss_sales_price#25] + +(29) Filter [codegen id : 6] +Input [4]: [ss_sold_date_sk#22, ss_item_sk#23, ss_store_sk#24, ss_sales_price#25] +Condition : (((isnotnull(ss_sold_date_sk#22) AND (ss_sold_date_sk#22 >= 2451911)) AND (ss_sold_date_sk#22 <= 2452275)) AND isnotnull(ss_store_sk#24)) + +(30) ReusedExchange [Reuses operator id: 11] +Output [1]: [d_date_sk#7] + +(31) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_sold_date_sk#22] +Right keys [1]: [d_date_sk#7] +Join condition: None + +(32) Project [codegen id : 6] +Output [3]: [ss_item_sk#23, ss_store_sk#24, ss_sales_price#25] +Input [5]: [ss_sold_date_sk#22, ss_item_sk#23, ss_store_sk#24, ss_sales_price#25, d_date_sk#7] + +(33) HashAggregate [codegen id : 6] +Input [3]: [ss_item_sk#23, ss_store_sk#24, ss_sales_price#25] +Keys [2]: [ss_store_sk#24, ss_item_sk#23] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#25))] +Aggregate Attributes [1]: [sum#26] +Results [3]: [ss_store_sk#24, ss_item_sk#23, sum#27] + +(34) Exchange +Input [3]: [ss_store_sk#24, ss_item_sk#23, sum#27] +Arguments: hashpartitioning(ss_store_sk#24, ss_item_sk#23, 5), true, [id=#28] + +(35) HashAggregate [codegen id : 7] +Input [3]: [ss_store_sk#24, ss_item_sk#23, sum#27] +Keys [2]: [ss_store_sk#24, ss_item_sk#23] +Functions [1]: [sum(UnscaledValue(ss_sales_price#25))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#25))#29] +Results [2]: [ss_store_sk#24, MakeDecimal(sum(UnscaledValue(ss_sales_price#25))#29,17,2) AS revenue#30] + +(36) HashAggregate [codegen id : 7] +Input [2]: [ss_store_sk#24, revenue#30] +Keys [1]: [ss_store_sk#24] +Functions [1]: [partial_avg(revenue#30)] +Aggregate Attributes [2]: [sum#31, count#32] +Results [3]: [ss_store_sk#24, sum#33, count#34] + +(37) Exchange +Input [3]: [ss_store_sk#24, sum#33, count#34] +Arguments: hashpartitioning(ss_store_sk#24, 5), true, [id=#35] + +(38) HashAggregate [codegen id : 8] +Input [3]: [ss_store_sk#24, sum#33, count#34] +Keys [1]: [ss_store_sk#24] +Functions [1]: [avg(revenue#30)] +Aggregate Attributes [1]: [avg(revenue#30)#36] +Results [2]: [ss_store_sk#24, avg(revenue#30)#36 AS ave#37] + +(39) BroadcastExchange +Input [2]: [ss_store_sk#24, ave#37] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#38] + +(40) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_store_sk#5] +Right keys [1]: [ss_store_sk#24] +Join condition: (cast(revenue#14 as decimal(23,7)) <= CheckOverflow((0.100000 * promote_precision(ave#37)), DecimalType(23,7), true)) + +(41) Project [codegen id : 9] +Output [6]: [s_store_name#2, i_item_desc#17, revenue#14, i_current_price#18, i_wholesale_cost#19, i_brand#20] +Input [9]: [s_store_name#2, ss_store_sk#5, revenue#14, i_item_desc#17, i_current_price#18, i_wholesale_cost#19, i_brand#20, ss_store_sk#24, ave#37] + +(42) TakeOrderedAndProject +Input [6]: [s_store_name#2, i_item_desc#17, revenue#14, i_current_price#18, i_wholesale_cost#19, i_brand#20] +Arguments: 100, [s_store_name#2 ASC NULLS FIRST, i_item_desc#17 ASC NULLS FIRST], [s_store_name#2, i_item_desc#17, revenue#14, i_current_price#18, i_wholesale_cost#19, i_brand#20] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q65/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q65/simplified.txt new file mode 100644 index 0000000000000..77f6cc15c8210 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q65/simplified.txt @@ -0,0 +1,63 @@ +TakeOrderedAndProject [i_brand,i_current_price,i_item_desc,i_wholesale_cost,revenue,s_store_name] + WholeStageCodegen (9) + Project [i_brand,i_current_price,i_item_desc,i_wholesale_cost,revenue,s_store_name] + BroadcastHashJoin [ave,revenue,ss_store_sk,ss_store_sk] + Project [i_brand,i_current_price,i_item_desc,i_wholesale_cost,revenue,s_store_name,ss_store_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [revenue,s_store_name,ss_item_sk,ss_store_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_name,s_store_sk] + InputAdapter + BroadcastExchange #1 + WholeStageCodegen (3) + Filter [revenue] + HashAggregate [ss_item_sk,ss_store_sk,sum] [revenue,sum,sum(UnscaledValue(ss_sales_price))] + InputAdapter + Exchange [ss_item_sk,ss_store_sk] #2 + WholeStageCodegen (2) + HashAggregate [ss_item_sk,ss_sales_price,ss_store_sk] [sum,sum] + Project [ss_item_sk,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_month_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (4) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand,i_current_price,i_item_desc,i_item_sk,i_wholesale_cost] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (8) + HashAggregate [count,ss_store_sk,sum] [ave,avg(revenue),count,sum] + InputAdapter + Exchange [ss_store_sk] #6 + WholeStageCodegen (7) + HashAggregate [revenue,ss_store_sk] [count,count,sum,sum] + HashAggregate [ss_item_sk,ss_store_sk,sum] [revenue,sum,sum(UnscaledValue(ss_sales_price))] + InputAdapter + Exchange [ss_item_sk,ss_store_sk] #7 + WholeStageCodegen (6) + HashAggregate [ss_item_sk,ss_sales_price,ss_store_sk] [sum,sum] + Project [ss_item_sk,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q68.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q68.sf100/explain.txt new file mode 100644 index 0000000000000..31eaa3bf925a8 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q68.sf100/explain.txt @@ -0,0 +1,289 @@ +== Physical Plan == +TakeOrderedAndProject (52) ++- * Project (51) + +- * SortMergeJoin Inner (50) + :- * Sort (44) + : +- Exchange (43) + : +- * Project (42) + : +- * SortMergeJoin Inner (41) + : :- * Sort (35) + : : +- Exchange (34) + : : +- * HashAggregate (33) + : : +- Exchange (32) + : : +- * HashAggregate (31) + : : +- * Project (30) + : : +- * BroadcastHashJoin Inner BuildLeft (29) + : : :- BroadcastExchange (25) + : : : +- * Project (24) + : : : +- * BroadcastHashJoin Inner BuildRight (23) + : : : :- * Project (17) + : : : : +- * BroadcastHashJoin Inner BuildRight (16) + : : : : :- * Project (10) + : : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : : :- * Filter (3) + : : : : : : +- * ColumnarToRow (2) + : : : : : : +- Scan parquet default.store_sales (1) + : : : : : +- BroadcastExchange (8) + : : : : : +- * Project (7) + : : : : : +- * Filter (6) + : : : : : +- * ColumnarToRow (5) + : : : : : +- Scan parquet default.date_dim (4) + : : : : +- BroadcastExchange (15) + : : : : +- * Project (14) + : : : : +- * Filter (13) + : : : : +- * ColumnarToRow (12) + : : : : +- Scan parquet default.store (11) + : : : +- BroadcastExchange (22) + : : : +- * Project (21) + : : : +- * Filter (20) + : : : +- * ColumnarToRow (19) + : : : +- Scan parquet default.household_demographics (18) + : : +- * Filter (28) + : : +- * ColumnarToRow (27) + : : +- Scan parquet default.customer_address (26) + : +- * Sort (40) + : +- Exchange (39) + : +- * Filter (38) + : +- * ColumnarToRow (37) + : +- Scan parquet default.customer (36) + +- * Sort (49) + +- Exchange (48) + +- * Filter (47) + +- * ColumnarToRow (46) + +- Scan parquet default.customer_address (45) + + +(1) Scan parquet default.store_sales +Output [9]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/store_sales] +PushedFilters: [In(ss_sold_date_sk, [2451790,2451180,2452216,2451454,2452184,2451485,2451850,2451514,2452062,2451270,2452123,2451758,2451971,2451546,2451942,2451393,2451667,2451453,2452215,2451819,2451331,2451577,2451911,2452245,2451301,2451545,2451605,2451943,2451851,2451181,2452154,2451820,2452001,2451362,2451392,2451240,2452032,2451637,2451484,2452124,2451300,2451727,2452093,2451759,2451698,2451332,2451606,2451666,2451912,2452185,2451211,2451361,2452031,2451212,2451880,2451789,2451423,2451576,2451728,2452246,2452155,2452092,2451881,2451970,2451697,2452063,2451271,2451636,2451515,2451424,2451239,2452002]), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [9]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9] + +(3) Filter [codegen id : 4] +Input [9]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9] +Condition : (((((ss_sold_date_sk#1 INSET (2451790,2451180,2452216,2451454,2452184,2451485,2451850,2451514,2452062,2451270,2452123,2451758,2451971,2451546,2451942,2451393,2451667,2451453,2452215,2451819,2451331,2451577,2451911,2452245,2451301,2451545,2451605,2451943,2451851,2451181,2452154,2451820,2452001,2451362,2451392,2451240,2452032,2451637,2451484,2452124,2451300,2451727,2452093,2451759,2451698,2451332,2451606,2451666,2451912,2452185,2451211,2451361,2452031,2451212,2451880,2451789,2451423,2451576,2451728,2452246,2452155,2452092,2451881,2451970,2451697,2452063,2451271,2451636,2451515,2451424,2451239,2452002) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_store_sk#5)) AND isnotnull(ss_hdemo_sk#3)) AND isnotnull(ss_addr_sk#4)) AND isnotnull(ss_customer_sk#2)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#10, d_year#11, d_dom#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_dom), GreaterThanOrEqual(d_dom,1), LessThanOrEqual(d_dom,2), In(d_year, [1999,2000,2001]), In(d_date_sk, [2451790,2451180,2452216,2451454,2452184,2451485,2451850,2451514,2452062,2451270,2452123,2451758,2451971,2451546,2451942,2451393,2451667,2451453,2452215,2451819,2451331,2451577,2451911,2452245,2451301,2451545,2451605,2451943,2451851,2451181,2452154,2451820,2452001,2451362,2451392,2451240,2452032,2451637,2451484,2452124,2451300,2451727,2452093,2451759,2451698,2451332,2451606,2451666,2451912,2452185,2451211,2451361,2452031,2451212,2451880,2451789,2451423,2451576,2451728,2452246,2452155,2452092,2451881,2451970,2451697,2452063,2451271,2451636,2451515,2451424,2451239,2452002]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#10, d_year#11, d_dom#12] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#10, d_year#11, d_dom#12] +Condition : (((((isnotnull(d_dom#12) AND (d_dom#12 >= 1)) AND (d_dom#12 <= 2)) AND d_year#11 IN (1999,2000,2001)) AND d_date_sk#10 INSET (2451790,2451180,2452216,2451454,2452184,2451485,2451850,2451514,2452062,2451270,2452123,2451758,2451971,2451546,2451942,2451393,2451667,2451453,2452215,2451819,2451331,2451577,2451911,2452245,2451301,2451545,2451605,2451943,2451851,2451181,2452154,2451820,2452001,2451362,2451392,2451240,2452032,2451637,2451484,2452124,2451300,2451727,2452093,2451759,2451698,2451332,2451606,2451666,2451912,2452185,2451211,2451361,2452031,2451212,2451880,2451789,2451423,2451576,2451728,2452246,2452155,2452092,2451881,2451970,2451697,2452063,2451271,2451636,2451515,2451424,2451239,2452002)) AND isnotnull(d_date_sk#10)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#10] +Input [3]: [d_date_sk#10, d_year#11, d_dom#12] + +(8) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(10) Project [codegen id : 4] +Output [8]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9] +Input [10]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9, d_date_sk#10] + +(11) Scan parquet default.store +Output [2]: [s_store_sk#14, s_city#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/store] +PushedFilters: [In(s_city, [Midway,Fairview]), IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [s_store_sk#14, s_city#15] + +(13) Filter [codegen id : 2] +Input [2]: [s_store_sk#14, s_city#15] +Condition : (s_city#15 IN (Midway,Fairview) AND isnotnull(s_store_sk#14)) + +(14) Project [codegen id : 2] +Output [1]: [s_store_sk#14] +Input [2]: [s_store_sk#14, s_city#15] + +(15) BroadcastExchange +Input [1]: [s_store_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] + +(16) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#5] +Right keys [1]: [s_store_sk#14] +Join condition: None + +(17) Project [codegen id : 4] +Output [7]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9] +Input [9]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9, s_store_sk#14] + +(18) Scan parquet default.household_demographics +Output [3]: [hd_demo_sk#17, hd_dep_count#18, hd_vehicle_count#19] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/household_demographics] +PushedFilters: [Or(EqualTo(hd_dep_count,5),EqualTo(hd_vehicle_count,3)), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [3]: [hd_demo_sk#17, hd_dep_count#18, hd_vehicle_count#19] + +(20) Filter [codegen id : 3] +Input [3]: [hd_demo_sk#17, hd_dep_count#18, hd_vehicle_count#19] +Condition : (((hd_dep_count#18 = 5) OR (hd_vehicle_count#19 = 3)) AND isnotnull(hd_demo_sk#17)) + +(21) Project [codegen id : 3] +Output [1]: [hd_demo_sk#17] +Input [3]: [hd_demo_sk#17, hd_dep_count#18, hd_vehicle_count#19] + +(22) BroadcastExchange +Input [1]: [hd_demo_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] + +(23) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_hdemo_sk#3] +Right keys [1]: [hd_demo_sk#17] +Join condition: None + +(24) Project [codegen id : 4] +Output [6]: [ss_customer_sk#2, ss_addr_sk#4, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9] +Input [8]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9, hd_demo_sk#17] + +(25) BroadcastExchange +Input [6]: [ss_customer_sk#2, ss_addr_sk#4, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [id=#21] + +(26) Scan parquet default.customer_address +Output [2]: [ca_address_sk#22, ca_city#23] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_city)] +ReadSchema: struct + +(27) ColumnarToRow +Input [2]: [ca_address_sk#22, ca_city#23] + +(28) Filter +Input [2]: [ca_address_sk#22, ca_city#23] +Condition : (isnotnull(ca_address_sk#22) AND isnotnull(ca_city#23)) + +(29) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_addr_sk#4] +Right keys [1]: [ca_address_sk#22] +Join condition: None + +(30) Project [codegen id : 5] +Output [7]: [ss_customer_sk#2, ss_addr_sk#4, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9, ca_city#23] +Input [8]: [ss_customer_sk#2, ss_addr_sk#4, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9, ca_address_sk#22, ca_city#23] + +(31) HashAggregate [codegen id : 5] +Input [7]: [ss_customer_sk#2, ss_addr_sk#4, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9, ca_city#23] +Keys [4]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#23] +Functions [3]: [partial_sum(UnscaledValue(ss_ext_sales_price#7)), partial_sum(UnscaledValue(ss_ext_list_price#8)), partial_sum(UnscaledValue(ss_ext_tax#9))] +Aggregate Attributes [3]: [sum#24, sum#25, sum#26] +Results [7]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#23, sum#27, sum#28, sum#29] + +(32) Exchange +Input [7]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#23, sum#27, sum#28, sum#29] +Arguments: hashpartitioning(ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#23, 5), true, [id=#30] + +(33) HashAggregate [codegen id : 6] +Input [7]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#23, sum#27, sum#28, sum#29] +Keys [4]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#23] +Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#7)), sum(UnscaledValue(ss_ext_list_price#8)), sum(UnscaledValue(ss_ext_tax#9))] +Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#7))#31, sum(UnscaledValue(ss_ext_list_price#8))#32, sum(UnscaledValue(ss_ext_tax#9))#33] +Results [6]: [ss_ticket_number#6, ss_customer_sk#2, ca_city#23 AS bought_city#34, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#7))#31,17,2) AS extended_price#35, MakeDecimal(sum(UnscaledValue(ss_ext_list_price#8))#32,17,2) AS list_price#36, MakeDecimal(sum(UnscaledValue(ss_ext_tax#9))#33,17,2) AS extended_tax#37] + +(34) Exchange +Input [6]: [ss_ticket_number#6, ss_customer_sk#2, bought_city#34, extended_price#35, list_price#36, extended_tax#37] +Arguments: hashpartitioning(ss_customer_sk#2, 5), true, [id=#38] + +(35) Sort [codegen id : 7] +Input [6]: [ss_ticket_number#6, ss_customer_sk#2, bought_city#34, extended_price#35, list_price#36, extended_tax#37] +Arguments: [ss_customer_sk#2 ASC NULLS FIRST], false, 0 + +(36) Scan parquet default.customer +Output [4]: [c_customer_sk#39, c_current_addr_sk#40, c_first_name#41, c_last_name#42] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(37) ColumnarToRow [codegen id : 8] +Input [4]: [c_customer_sk#39, c_current_addr_sk#40, c_first_name#41, c_last_name#42] + +(38) Filter [codegen id : 8] +Input [4]: [c_customer_sk#39, c_current_addr_sk#40, c_first_name#41, c_last_name#42] +Condition : (isnotnull(c_customer_sk#39) AND isnotnull(c_current_addr_sk#40)) + +(39) Exchange +Input [4]: [c_customer_sk#39, c_current_addr_sk#40, c_first_name#41, c_last_name#42] +Arguments: hashpartitioning(c_customer_sk#39, 5), true, [id=#43] + +(40) Sort [codegen id : 9] +Input [4]: [c_customer_sk#39, c_current_addr_sk#40, c_first_name#41, c_last_name#42] +Arguments: [c_customer_sk#39 ASC NULLS FIRST], false, 0 + +(41) SortMergeJoin [codegen id : 10] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#39] +Join condition: None + +(42) Project [codegen id : 10] +Output [8]: [ss_ticket_number#6, bought_city#34, extended_price#35, list_price#36, extended_tax#37, c_current_addr_sk#40, c_first_name#41, c_last_name#42] +Input [10]: [ss_ticket_number#6, ss_customer_sk#2, bought_city#34, extended_price#35, list_price#36, extended_tax#37, c_customer_sk#39, c_current_addr_sk#40, c_first_name#41, c_last_name#42] + +(43) Exchange +Input [8]: [ss_ticket_number#6, bought_city#34, extended_price#35, list_price#36, extended_tax#37, c_current_addr_sk#40, c_first_name#41, c_last_name#42] +Arguments: hashpartitioning(c_current_addr_sk#40, 5), true, [id=#44] + +(44) Sort [codegen id : 11] +Input [8]: [ss_ticket_number#6, bought_city#34, extended_price#35, list_price#36, extended_tax#37, c_current_addr_sk#40, c_first_name#41, c_last_name#42] +Arguments: [c_current_addr_sk#40 ASC NULLS FIRST], false, 0 + +(45) Scan parquet default.customer_address +Output [2]: [ca_address_sk#22, ca_city#23] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_city)] +ReadSchema: struct + +(46) ColumnarToRow [codegen id : 12] +Input [2]: [ca_address_sk#22, ca_city#23] + +(47) Filter [codegen id : 12] +Input [2]: [ca_address_sk#22, ca_city#23] +Condition : (isnotnull(ca_address_sk#22) AND isnotnull(ca_city#23)) + +(48) Exchange +Input [2]: [ca_address_sk#22, ca_city#23] +Arguments: hashpartitioning(ca_address_sk#22, 5), true, [id=#45] + +(49) Sort [codegen id : 13] +Input [2]: [ca_address_sk#22, ca_city#23] +Arguments: [ca_address_sk#22 ASC NULLS FIRST], false, 0 + +(50) SortMergeJoin [codegen id : 14] +Left keys [1]: [c_current_addr_sk#40] +Right keys [1]: [ca_address_sk#22] +Join condition: NOT (ca_city#23 = bought_city#34) + +(51) Project [codegen id : 14] +Output [8]: [c_last_name#42, c_first_name#41, ca_city#23, bought_city#34, ss_ticket_number#6, extended_price#35, extended_tax#37, list_price#36] +Input [10]: [ss_ticket_number#6, bought_city#34, extended_price#35, list_price#36, extended_tax#37, c_current_addr_sk#40, c_first_name#41, c_last_name#42, ca_address_sk#22, ca_city#23] + +(52) TakeOrderedAndProject +Input [8]: [c_last_name#42, c_first_name#41, ca_city#23, bought_city#34, ss_ticket_number#6, extended_price#35, extended_tax#37, list_price#36] +Arguments: 100, [c_last_name#42 ASC NULLS FIRST, ss_ticket_number#6 ASC NULLS FIRST], [c_last_name#42, c_first_name#41, ca_city#23, bought_city#34, ss_ticket_number#6, extended_price#35, extended_tax#37, list_price#36] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q68.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q68.sf100/simplified.txt new file mode 100644 index 0000000000000..510bcd475291d --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q68.sf100/simplified.txt @@ -0,0 +1,86 @@ +TakeOrderedAndProject [bought_city,c_first_name,c_last_name,ca_city,extended_price,extended_tax,list_price,ss_ticket_number] + WholeStageCodegen (14) + Project [bought_city,c_first_name,c_last_name,ca_city,extended_price,extended_tax,list_price,ss_ticket_number] + SortMergeJoin [bought_city,c_current_addr_sk,ca_address_sk,ca_city] + InputAdapter + WholeStageCodegen (11) + Sort [c_current_addr_sk] + InputAdapter + Exchange [c_current_addr_sk] #1 + WholeStageCodegen (10) + Project [bought_city,c_current_addr_sk,c_first_name,c_last_name,extended_price,extended_tax,list_price,ss_ticket_number] + SortMergeJoin [c_customer_sk,ss_customer_sk] + InputAdapter + WholeStageCodegen (7) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #2 + WholeStageCodegen (6) + HashAggregate [ca_city,ss_addr_sk,ss_customer_sk,ss_ticket_number,sum,sum,sum] [bought_city,extended_price,extended_tax,list_price,sum,sum,sum,sum(UnscaledValue(ss_ext_list_price)),sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_ext_tax))] + InputAdapter + Exchange [ca_city,ss_addr_sk,ss_customer_sk,ss_ticket_number] #3 + WholeStageCodegen (5) + HashAggregate [ca_city,ss_addr_sk,ss_customer_sk,ss_ext_list_price,ss_ext_sales_price,ss_ext_tax,ss_ticket_number] [sum,sum,sum,sum,sum,sum] + Project [ca_city,ss_addr_sk,ss_customer_sk,ss_ext_list_price,ss_ext_sales_price,ss_ext_tax,ss_ticket_number] + BroadcastHashJoin [ca_address_sk,ss_addr_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (4) + Project [ss_addr_sk,ss_customer_sk,ss_ext_list_price,ss_ext_sales_price,ss_ext_tax,ss_ticket_number] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [ss_addr_sk,ss_customer_sk,ss_ext_list_price,ss_ext_sales_price,ss_ext_tax,ss_hdemo_sk,ss_ticket_number] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_addr_sk,ss_customer_sk,ss_ext_list_price,ss_ext_sales_price,ss_ext_tax,ss_hdemo_sk,ss_store_sk,ss_ticket_number] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_addr_sk,ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_addr_sk,ss_customer_sk,ss_ext_list_price,ss_ext_sales_price,ss_ext_tax,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_dom,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_dom,d_year] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (2) + Project [s_store_sk] + Filter [s_city,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_city,s_store_sk] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (3) + Project [hd_demo_sk] + Filter [hd_demo_sk,hd_dep_count,hd_vehicle_count] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] + Filter [ca_address_sk,ca_city] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_city] + InputAdapter + WholeStageCodegen (9) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #8 + WholeStageCodegen (8) + Filter [c_current_addr_sk,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_current_addr_sk,c_customer_sk,c_first_name,c_last_name] + InputAdapter + WholeStageCodegen (13) + Sort [ca_address_sk] + InputAdapter + Exchange [ca_address_sk] #9 + WholeStageCodegen (12) + Filter [ca_address_sk,ca_city] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_city] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q68/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q68/explain.txt new file mode 100644 index 0000000000000..37e6d860b808a --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q68/explain.txt @@ -0,0 +1,241 @@ +== Physical Plan == +TakeOrderedAndProject (43) ++- * Project (42) + +- * BroadcastHashJoin Inner BuildRight (41) + :- * Project (39) + : +- * BroadcastHashJoin Inner BuildRight (38) + : :- * HashAggregate (33) + : : +- Exchange (32) + : : +- * HashAggregate (31) + : : +- * Project (30) + : : +- * BroadcastHashJoin Inner BuildRight (29) + : : :- * Project (24) + : : : +- * BroadcastHashJoin Inner BuildRight (23) + : : : :- * Project (17) + : : : : +- * BroadcastHashJoin Inner BuildRight (16) + : : : : :- * Project (10) + : : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : : :- * Filter (3) + : : : : : : +- * ColumnarToRow (2) + : : : : : : +- Scan parquet default.store_sales (1) + : : : : : +- BroadcastExchange (8) + : : : : : +- * Project (7) + : : : : : +- * Filter (6) + : : : : : +- * ColumnarToRow (5) + : : : : : +- Scan parquet default.date_dim (4) + : : : : +- BroadcastExchange (15) + : : : : +- * Project (14) + : : : : +- * Filter (13) + : : : : +- * ColumnarToRow (12) + : : : : +- Scan parquet default.store (11) + : : : +- BroadcastExchange (22) + : : : +- * Project (21) + : : : +- * Filter (20) + : : : +- * ColumnarToRow (19) + : : : +- Scan parquet default.household_demographics (18) + : : +- BroadcastExchange (28) + : : +- * Filter (27) + : : +- * ColumnarToRow (26) + : : +- Scan parquet default.customer_address (25) + : +- BroadcastExchange (37) + : +- * Filter (36) + : +- * ColumnarToRow (35) + : +- Scan parquet default.customer (34) + +- ReusedExchange (40) + + +(1) Scan parquet default.store_sales +Output [9]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/store_sales] +PushedFilters: [In(ss_sold_date_sk, [2451790,2451180,2452216,2451454,2452184,2451485,2451850,2451514,2452062,2451270,2452123,2451758,2451971,2451546,2451942,2451393,2451667,2451453,2452215,2451819,2451331,2451577,2451911,2452245,2451301,2451545,2451605,2451943,2451851,2451181,2452154,2451820,2452001,2451362,2451392,2451240,2452032,2451637,2451484,2452124,2451300,2451727,2452093,2451759,2451698,2451332,2451606,2451666,2451912,2452185,2451211,2451361,2452031,2451212,2451880,2451789,2451423,2451576,2451728,2452246,2452155,2452092,2451881,2451970,2451697,2452063,2451271,2451636,2451515,2451424,2451239,2452002]), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [9]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9] + +(3) Filter [codegen id : 5] +Input [9]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9] +Condition : (((((ss_sold_date_sk#1 INSET (2451790,2451180,2452216,2451454,2452184,2451485,2451850,2451514,2452062,2451270,2452123,2451758,2451971,2451546,2451942,2451393,2451667,2451453,2452215,2451819,2451331,2451577,2451911,2452245,2451301,2451545,2451605,2451943,2451851,2451181,2452154,2451820,2452001,2451362,2451392,2451240,2452032,2451637,2451484,2452124,2451300,2451727,2452093,2451759,2451698,2451332,2451606,2451666,2451912,2452185,2451211,2451361,2452031,2451212,2451880,2451789,2451423,2451576,2451728,2452246,2452155,2452092,2451881,2451970,2451697,2452063,2451271,2451636,2451515,2451424,2451239,2452002) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_store_sk#5)) AND isnotnull(ss_hdemo_sk#3)) AND isnotnull(ss_addr_sk#4)) AND isnotnull(ss_customer_sk#2)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#10, d_year#11, d_dom#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_dom), GreaterThanOrEqual(d_dom,1), LessThanOrEqual(d_dom,2), In(d_year, [1999,2000,2001]), In(d_date_sk, [2451790,2451180,2452216,2451454,2452184,2451485,2451850,2451514,2452062,2451270,2452123,2451758,2451971,2451546,2451942,2451393,2451667,2451453,2452215,2451819,2451331,2451577,2451911,2452245,2451301,2451545,2451605,2451943,2451851,2451181,2452154,2451820,2452001,2451362,2451392,2451240,2452032,2451637,2451484,2452124,2451300,2451727,2452093,2451759,2451698,2451332,2451606,2451666,2451912,2452185,2451211,2451361,2452031,2451212,2451880,2451789,2451423,2451576,2451728,2452246,2452155,2452092,2451881,2451970,2451697,2452063,2451271,2451636,2451515,2451424,2451239,2452002]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#10, d_year#11, d_dom#12] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#10, d_year#11, d_dom#12] +Condition : (((((isnotnull(d_dom#12) AND (d_dom#12 >= 1)) AND (d_dom#12 <= 2)) AND d_year#11 IN (1999,2000,2001)) AND d_date_sk#10 INSET (2451790,2451180,2452216,2451454,2452184,2451485,2451850,2451514,2452062,2451270,2452123,2451758,2451971,2451546,2451942,2451393,2451667,2451453,2452215,2451819,2451331,2451577,2451911,2452245,2451301,2451545,2451605,2451943,2451851,2451181,2452154,2451820,2452001,2451362,2451392,2451240,2452032,2451637,2451484,2452124,2451300,2451727,2452093,2451759,2451698,2451332,2451606,2451666,2451912,2452185,2451211,2451361,2452031,2451212,2451880,2451789,2451423,2451576,2451728,2452246,2452155,2452092,2451881,2451970,2451697,2452063,2451271,2451636,2451515,2451424,2451239,2452002)) AND isnotnull(d_date_sk#10)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#10] +Input [3]: [d_date_sk#10, d_year#11, d_dom#12] + +(8) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] + +(9) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(10) Project [codegen id : 5] +Output [8]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9] +Input [10]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9, d_date_sk#10] + +(11) Scan parquet default.store +Output [2]: [s_store_sk#14, s_city#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/store] +PushedFilters: [In(s_city, [Midway,Fairview]), IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [s_store_sk#14, s_city#15] + +(13) Filter [codegen id : 2] +Input [2]: [s_store_sk#14, s_city#15] +Condition : (s_city#15 IN (Midway,Fairview) AND isnotnull(s_store_sk#14)) + +(14) Project [codegen id : 2] +Output [1]: [s_store_sk#14] +Input [2]: [s_store_sk#14, s_city#15] + +(15) BroadcastExchange +Input [1]: [s_store_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] + +(16) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_store_sk#5] +Right keys [1]: [s_store_sk#14] +Join condition: None + +(17) Project [codegen id : 5] +Output [7]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9] +Input [9]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9, s_store_sk#14] + +(18) Scan parquet default.household_demographics +Output [3]: [hd_demo_sk#17, hd_dep_count#18, hd_vehicle_count#19] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/household_demographics] +PushedFilters: [Or(EqualTo(hd_dep_count,5),EqualTo(hd_vehicle_count,3)), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [3]: [hd_demo_sk#17, hd_dep_count#18, hd_vehicle_count#19] + +(20) Filter [codegen id : 3] +Input [3]: [hd_demo_sk#17, hd_dep_count#18, hd_vehicle_count#19] +Condition : (((hd_dep_count#18 = 5) OR (hd_vehicle_count#19 = 3)) AND isnotnull(hd_demo_sk#17)) + +(21) Project [codegen id : 3] +Output [1]: [hd_demo_sk#17] +Input [3]: [hd_demo_sk#17, hd_dep_count#18, hd_vehicle_count#19] + +(22) BroadcastExchange +Input [1]: [hd_demo_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] + +(23) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_hdemo_sk#3] +Right keys [1]: [hd_demo_sk#17] +Join condition: None + +(24) Project [codegen id : 5] +Output [6]: [ss_customer_sk#2, ss_addr_sk#4, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9] +Input [8]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9, hd_demo_sk#17] + +(25) Scan parquet default.customer_address +Output [2]: [ca_address_sk#21, ca_city#22] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_city)] +ReadSchema: struct + +(26) ColumnarToRow [codegen id : 4] +Input [2]: [ca_address_sk#21, ca_city#22] + +(27) Filter [codegen id : 4] +Input [2]: [ca_address_sk#21, ca_city#22] +Condition : (isnotnull(ca_address_sk#21) AND isnotnull(ca_city#22)) + +(28) BroadcastExchange +Input [2]: [ca_address_sk#21, ca_city#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#23] + +(29) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_addr_sk#4] +Right keys [1]: [ca_address_sk#21] +Join condition: None + +(30) Project [codegen id : 5] +Output [7]: [ss_customer_sk#2, ss_addr_sk#4, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9, ca_city#22] +Input [8]: [ss_customer_sk#2, ss_addr_sk#4, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9, ca_address_sk#21, ca_city#22] + +(31) HashAggregate [codegen id : 5] +Input [7]: [ss_customer_sk#2, ss_addr_sk#4, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9, ca_city#22] +Keys [4]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#22] +Functions [3]: [partial_sum(UnscaledValue(ss_ext_sales_price#7)), partial_sum(UnscaledValue(ss_ext_list_price#8)), partial_sum(UnscaledValue(ss_ext_tax#9))] +Aggregate Attributes [3]: [sum#24, sum#25, sum#26] +Results [7]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#22, sum#27, sum#28, sum#29] + +(32) Exchange +Input [7]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#22, sum#27, sum#28, sum#29] +Arguments: hashpartitioning(ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#22, 5), true, [id=#30] + +(33) HashAggregate [codegen id : 8] +Input [7]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#22, sum#27, sum#28, sum#29] +Keys [4]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#22] +Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#7)), sum(UnscaledValue(ss_ext_list_price#8)), sum(UnscaledValue(ss_ext_tax#9))] +Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#7))#31, sum(UnscaledValue(ss_ext_list_price#8))#32, sum(UnscaledValue(ss_ext_tax#9))#33] +Results [6]: [ss_ticket_number#6, ss_customer_sk#2, ca_city#22 AS bought_city#34, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#7))#31,17,2) AS extended_price#35, MakeDecimal(sum(UnscaledValue(ss_ext_list_price#8))#32,17,2) AS list_price#36, MakeDecimal(sum(UnscaledValue(ss_ext_tax#9))#33,17,2) AS extended_tax#37] + +(34) Scan parquet default.customer +Output [4]: [c_customer_sk#38, c_current_addr_sk#39, c_first_name#40, c_last_name#41] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(35) ColumnarToRow [codegen id : 6] +Input [4]: [c_customer_sk#38, c_current_addr_sk#39, c_first_name#40, c_last_name#41] + +(36) Filter [codegen id : 6] +Input [4]: [c_customer_sk#38, c_current_addr_sk#39, c_first_name#40, c_last_name#41] +Condition : (isnotnull(c_customer_sk#38) AND isnotnull(c_current_addr_sk#39)) + +(37) BroadcastExchange +Input [4]: [c_customer_sk#38, c_current_addr_sk#39, c_first_name#40, c_last_name#41] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#42] + +(38) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#38] +Join condition: None + +(39) Project [codegen id : 8] +Output [8]: [ss_ticket_number#6, bought_city#34, extended_price#35, list_price#36, extended_tax#37, c_current_addr_sk#39, c_first_name#40, c_last_name#41] +Input [10]: [ss_ticket_number#6, ss_customer_sk#2, bought_city#34, extended_price#35, list_price#36, extended_tax#37, c_customer_sk#38, c_current_addr_sk#39, c_first_name#40, c_last_name#41] + +(40) ReusedExchange [Reuses operator id: 28] +Output [2]: [ca_address_sk#21, ca_city#22] + +(41) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [c_current_addr_sk#39] +Right keys [1]: [ca_address_sk#21] +Join condition: NOT (ca_city#22 = bought_city#34) + +(42) Project [codegen id : 8] +Output [8]: [c_last_name#41, c_first_name#40, ca_city#22, bought_city#34, ss_ticket_number#6, extended_price#35, extended_tax#37, list_price#36] +Input [10]: [ss_ticket_number#6, bought_city#34, extended_price#35, list_price#36, extended_tax#37, c_current_addr_sk#39, c_first_name#40, c_last_name#41, ca_address_sk#21, ca_city#22] + +(43) TakeOrderedAndProject +Input [8]: [c_last_name#41, c_first_name#40, ca_city#22, bought_city#34, ss_ticket_number#6, extended_price#35, extended_tax#37, list_price#36] +Arguments: 100, [c_last_name#41 ASC NULLS FIRST, ss_ticket_number#6 ASC NULLS FIRST], [c_last_name#41, c_first_name#40, ca_city#22, bought_city#34, ss_ticket_number#6, extended_price#35, extended_tax#37, list_price#36] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q68/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q68/simplified.txt new file mode 100644 index 0000000000000..4c2d24b06c709 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q68/simplified.txt @@ -0,0 +1,63 @@ +TakeOrderedAndProject [bought_city,c_first_name,c_last_name,ca_city,extended_price,extended_tax,list_price,ss_ticket_number] + WholeStageCodegen (8) + Project [bought_city,c_first_name,c_last_name,ca_city,extended_price,extended_tax,list_price,ss_ticket_number] + BroadcastHashJoin [bought_city,c_current_addr_sk,ca_address_sk,ca_city] + Project [bought_city,c_current_addr_sk,c_first_name,c_last_name,extended_price,extended_tax,list_price,ss_ticket_number] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + HashAggregate [ca_city,ss_addr_sk,ss_customer_sk,ss_ticket_number,sum,sum,sum] [bought_city,extended_price,extended_tax,list_price,sum,sum,sum,sum(UnscaledValue(ss_ext_list_price)),sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_ext_tax))] + InputAdapter + Exchange [ca_city,ss_addr_sk,ss_customer_sk,ss_ticket_number] #1 + WholeStageCodegen (5) + HashAggregate [ca_city,ss_addr_sk,ss_customer_sk,ss_ext_list_price,ss_ext_sales_price,ss_ext_tax,ss_ticket_number] [sum,sum,sum,sum,sum,sum] + Project [ca_city,ss_addr_sk,ss_customer_sk,ss_ext_list_price,ss_ext_sales_price,ss_ext_tax,ss_ticket_number] + BroadcastHashJoin [ca_address_sk,ss_addr_sk] + Project [ss_addr_sk,ss_customer_sk,ss_ext_list_price,ss_ext_sales_price,ss_ext_tax,ss_ticket_number] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [ss_addr_sk,ss_customer_sk,ss_ext_list_price,ss_ext_sales_price,ss_ext_tax,ss_hdemo_sk,ss_ticket_number] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_addr_sk,ss_customer_sk,ss_ext_list_price,ss_ext_sales_price,ss_ext_tax,ss_hdemo_sk,ss_store_sk,ss_ticket_number] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_addr_sk,ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_addr_sk,ss_customer_sk,ss_ext_list_price,ss_ext_sales_price,ss_ext_tax,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_dom,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_dom,d_year] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [s_store_sk] + Filter [s_city,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_city,s_store_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [hd_demo_sk] + Filter [hd_demo_sk,hd_dep_count,hd_vehicle_count] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Filter [ca_address_sk,ca_city] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_city] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (6) + Filter [c_current_addr_sk,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_current_addr_sk,c_customer_sk,c_first_name,c_last_name] + InputAdapter + ReusedExchange [ca_address_sk,ca_city] #5 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q7.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q7.sf100/explain.txt new file mode 100644 index 0000000000000..fb14ae2c59517 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q7.sf100/explain.txt @@ -0,0 +1,193 @@ +== Physical Plan == +TakeOrderedAndProject (34) ++- * HashAggregate (33) + +- Exchange (32) + +- * HashAggregate (31) + +- * Project (30) + +- * BroadcastHashJoin Inner BuildRight (29) + :- * Project (24) + : +- * BroadcastHashJoin Inner BuildRight (23) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildLeft (9) + : : : :- BroadcastExchange (5) + : : : : +- * Project (4) + : : : : +- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.date_dim (1) + : : : +- * Filter (8) + : : : +- * ColumnarToRow (7) + : : : +- Scan parquet default.store_sales (6) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.promotion (11) + : +- BroadcastExchange (22) + : +- * Project (21) + : +- * Filter (20) + : +- * ColumnarToRow (19) + : +- Scan parquet default.customer_demographics (18) + +- BroadcastExchange (28) + +- * Filter (27) + +- * ColumnarToRow (26) + +- Scan parquet default.item (25) + + +(1) Scan parquet default.date_dim +Output [2]: [d_date_sk#1, d_year#2] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1998), GreaterThanOrEqual(d_date_sk,2450815), LessThanOrEqual(d_date_sk,2451179), IsNotNull(d_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#1, d_year#2] + +(3) Filter [codegen id : 1] +Input [2]: [d_date_sk#1, d_year#2] +Condition : ((((isnotnull(d_year#2) AND (d_year#2 = 1998)) AND (d_date_sk#1 >= 2450815)) AND (d_date_sk#1 <= 2451179)) AND isnotnull(d_date_sk#1)) + +(4) Project [codegen id : 1] +Output [1]: [d_date_sk#1] +Input [2]: [d_date_sk#1, d_year#2] + +(5) BroadcastExchange +Input [1]: [d_date_sk#1] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#3] + +(6) Scan parquet default.store_sales +Output [8]: [ss_sold_date_sk#4, ss_item_sk#5, ss_cdemo_sk#6, ss_promo_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), GreaterThanOrEqual(ss_sold_date_sk,2450815), LessThanOrEqual(ss_sold_date_sk,2451179), IsNotNull(ss_cdemo_sk), IsNotNull(ss_item_sk), IsNotNull(ss_promo_sk)] +ReadSchema: struct + +(7) ColumnarToRow +Input [8]: [ss_sold_date_sk#4, ss_item_sk#5, ss_cdemo_sk#6, ss_promo_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11] + +(8) Filter +Input [8]: [ss_sold_date_sk#4, ss_item_sk#5, ss_cdemo_sk#6, ss_promo_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11] +Condition : (((((isnotnull(ss_sold_date_sk#4) AND (ss_sold_date_sk#4 >= 2450815)) AND (ss_sold_date_sk#4 <= 2451179)) AND isnotnull(ss_cdemo_sk#6)) AND isnotnull(ss_item_sk#5)) AND isnotnull(ss_promo_sk#7)) + +(9) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [d_date_sk#1] +Right keys [1]: [ss_sold_date_sk#4] +Join condition: None + +(10) Project [codegen id : 5] +Output [7]: [ss_item_sk#5, ss_cdemo_sk#6, ss_promo_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11] +Input [9]: [d_date_sk#1, ss_sold_date_sk#4, ss_item_sk#5, ss_cdemo_sk#6, ss_promo_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11] + +(11) Scan parquet default.promotion +Output [3]: [p_promo_sk#12, p_channel_email#13, p_channel_event#14] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/promotion] +PushedFilters: [Or(EqualTo(p_channel_email,N),EqualTo(p_channel_event,N)), IsNotNull(p_promo_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [p_promo_sk#12, p_channel_email#13, p_channel_event#14] + +(13) Filter [codegen id : 2] +Input [3]: [p_promo_sk#12, p_channel_email#13, p_channel_event#14] +Condition : (((p_channel_email#13 = N) OR (p_channel_event#14 = N)) AND isnotnull(p_promo_sk#12)) + +(14) Project [codegen id : 2] +Output [1]: [p_promo_sk#12] +Input [3]: [p_promo_sk#12, p_channel_email#13, p_channel_event#14] + +(15) BroadcastExchange +Input [1]: [p_promo_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] + +(16) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_promo_sk#7] +Right keys [1]: [p_promo_sk#12] +Join condition: None + +(17) Project [codegen id : 5] +Output [6]: [ss_item_sk#5, ss_cdemo_sk#6, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11] +Input [8]: [ss_item_sk#5, ss_cdemo_sk#6, ss_promo_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11, p_promo_sk#12] + +(18) Scan parquet default.customer_demographics +Output [4]: [cd_demo_sk#16, cd_gender#17, cd_marital_status#18, cd_education_status#19] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/customer_demographics] +PushedFilters: [IsNotNull(cd_education_status), IsNotNull(cd_gender), IsNotNull(cd_marital_status), EqualTo(cd_gender,F), EqualTo(cd_marital_status,W), EqualTo(cd_education_status,Primary), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [4]: [cd_demo_sk#16, cd_gender#17, cd_marital_status#18, cd_education_status#19] + +(20) Filter [codegen id : 3] +Input [4]: [cd_demo_sk#16, cd_gender#17, cd_marital_status#18, cd_education_status#19] +Condition : ((((((isnotnull(cd_education_status#19) AND isnotnull(cd_gender#17)) AND isnotnull(cd_marital_status#18)) AND (cd_gender#17 = F)) AND (cd_marital_status#18 = W)) AND (cd_education_status#19 = Primary)) AND isnotnull(cd_demo_sk#16)) + +(21) Project [codegen id : 3] +Output [1]: [cd_demo_sk#16] +Input [4]: [cd_demo_sk#16, cd_gender#17, cd_marital_status#18, cd_education_status#19] + +(22) BroadcastExchange +Input [1]: [cd_demo_sk#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] + +(23) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_cdemo_sk#6] +Right keys [1]: [cd_demo_sk#16] +Join condition: None + +(24) Project [codegen id : 5] +Output [5]: [ss_item_sk#5, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11] +Input [7]: [ss_item_sk#5, ss_cdemo_sk#6, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11, cd_demo_sk#16] + +(25) Scan parquet default.item +Output [2]: [i_item_sk#21, i_item_id#22] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(26) ColumnarToRow [codegen id : 4] +Input [2]: [i_item_sk#21, i_item_id#22] + +(27) Filter [codegen id : 4] +Input [2]: [i_item_sk#21, i_item_id#22] +Condition : isnotnull(i_item_sk#21) + +(28) BroadcastExchange +Input [2]: [i_item_sk#21, i_item_id#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#23] + +(29) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_item_sk#5] +Right keys [1]: [i_item_sk#21] +Join condition: None + +(30) Project [codegen id : 5] +Output [5]: [ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11, i_item_id#22] +Input [7]: [ss_item_sk#5, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11, i_item_sk#21, i_item_id#22] + +(31) HashAggregate [codegen id : 5] +Input [5]: [ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11, i_item_id#22] +Keys [1]: [i_item_id#22] +Functions [4]: [partial_avg(cast(ss_quantity#8 as bigint)), partial_avg(UnscaledValue(ss_list_price#9)), partial_avg(UnscaledValue(ss_coupon_amt#11)), partial_avg(UnscaledValue(ss_sales_price#10))] +Aggregate Attributes [8]: [sum#24, count#25, sum#26, count#27, sum#28, count#29, sum#30, count#31] +Results [9]: [i_item_id#22, sum#32, count#33, sum#34, count#35, sum#36, count#37, sum#38, count#39] + +(32) Exchange +Input [9]: [i_item_id#22, sum#32, count#33, sum#34, count#35, sum#36, count#37, sum#38, count#39] +Arguments: hashpartitioning(i_item_id#22, 5), true, [id=#40] + +(33) HashAggregate [codegen id : 6] +Input [9]: [i_item_id#22, sum#32, count#33, sum#34, count#35, sum#36, count#37, sum#38, count#39] +Keys [1]: [i_item_id#22] +Functions [4]: [avg(cast(ss_quantity#8 as bigint)), avg(UnscaledValue(ss_list_price#9)), avg(UnscaledValue(ss_coupon_amt#11)), avg(UnscaledValue(ss_sales_price#10))] +Aggregate Attributes [4]: [avg(cast(ss_quantity#8 as bigint))#41, avg(UnscaledValue(ss_list_price#9))#42, avg(UnscaledValue(ss_coupon_amt#11))#43, avg(UnscaledValue(ss_sales_price#10))#44] +Results [5]: [i_item_id#22, avg(cast(ss_quantity#8 as bigint))#41 AS agg1#45, cast((avg(UnscaledValue(ss_list_price#9))#42 / 100.0) as decimal(11,6)) AS agg2#46, cast((avg(UnscaledValue(ss_coupon_amt#11))#43 / 100.0) as decimal(11,6)) AS agg3#47, cast((avg(UnscaledValue(ss_sales_price#10))#44 / 100.0) as decimal(11,6)) AS agg4#48] + +(34) TakeOrderedAndProject +Input [5]: [i_item_id#22, agg1#45, agg2#46, agg3#47, agg4#48] +Arguments: 100, [i_item_id#22 ASC NULLS FIRST], [i_item_id#22, agg1#45, agg2#46, agg3#47, agg4#48] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q7.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q7.sf100/simplified.txt new file mode 100644 index 0000000000000..8f283a7b8e59e --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q7.sf100/simplified.txt @@ -0,0 +1,50 @@ +TakeOrderedAndProject [agg1,agg2,agg3,agg4,i_item_id] + WholeStageCodegen (6) + HashAggregate [count,count,count,count,i_item_id,sum,sum,sum,sum] [agg1,agg2,agg3,agg4,avg(UnscaledValue(ss_coupon_amt)),avg(UnscaledValue(ss_list_price)),avg(UnscaledValue(ss_sales_price)),avg(cast(ss_quantity as bigint)),count,count,count,count,sum,sum,sum,sum] + InputAdapter + Exchange [i_item_id] #1 + WholeStageCodegen (5) + HashAggregate [i_item_id,ss_coupon_amt,ss_list_price,ss_quantity,ss_sales_price] [count,count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum,sum] + Project [i_item_id,ss_coupon_amt,ss_list_price,ss_quantity,ss_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price] + BroadcastHashJoin [cd_demo_sk,ss_cdemo_sk] + Project [ss_cdemo_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price] + BroadcastHashJoin [p_promo_sk,ss_promo_sk] + Project [ss_cdemo_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_promo_sk,ss_quantity,ss_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + Filter [ss_cdemo_sk,ss_item_sk,ss_promo_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_cdemo_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_promo_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [p_promo_sk] + Filter [p_channel_email,p_channel_event,p_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.promotion [p_channel_email,p_channel_event,p_promo_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [cd_demo_sk] + Filter [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_id,i_item_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q7/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q7/explain.txt new file mode 100644 index 0000000000000..5a22d85a44049 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q7/explain.txt @@ -0,0 +1,193 @@ +== Physical Plan == +TakeOrderedAndProject (34) ++- * HashAggregate (33) + +- Exchange (32) + +- * HashAggregate (31) + +- * Project (30) + +- * BroadcastHashJoin Inner BuildRight (29) + :- * Project (23) + : +- * BroadcastHashJoin Inner BuildRight (22) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.customer_demographics (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.date_dim (11) + : +- BroadcastExchange (21) + : +- * Filter (20) + : +- * ColumnarToRow (19) + : +- Scan parquet default.item (18) + +- BroadcastExchange (28) + +- * Project (27) + +- * Filter (26) + +- * ColumnarToRow (25) + +- Scan parquet default.promotion (24) + + +(1) Scan parquet default.store_sales +Output [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), GreaterThanOrEqual(ss_sold_date_sk,2450815), LessThanOrEqual(ss_sold_date_sk,2451179), IsNotNull(ss_cdemo_sk), IsNotNull(ss_item_sk), IsNotNull(ss_promo_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] + +(3) Filter [codegen id : 5] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Condition : (((((isnotnull(ss_sold_date_sk#1) AND (ss_sold_date_sk#1 >= 2450815)) AND (ss_sold_date_sk#1 <= 2451179)) AND isnotnull(ss_cdemo_sk#3)) AND isnotnull(ss_item_sk#2)) AND isnotnull(ss_promo_sk#4)) + +(4) Scan parquet default.customer_demographics +Output [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/customer_demographics] +PushedFilters: [IsNotNull(cd_marital_status), IsNotNull(cd_education_status), IsNotNull(cd_gender), EqualTo(cd_gender,F), EqualTo(cd_marital_status,W), EqualTo(cd_education_status,Primary), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] + +(6) Filter [codegen id : 1] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Condition : ((((((isnotnull(cd_marital_status#11) AND isnotnull(cd_education_status#12)) AND isnotnull(cd_gender#10)) AND (cd_gender#10 = F)) AND (cd_marital_status#11 = W)) AND (cd_education_status#12 = Primary)) AND isnotnull(cd_demo_sk#9)) + +(7) Project [codegen id : 1] +Output [1]: [cd_demo_sk#9] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] + +(8) BroadcastExchange +Input [1]: [cd_demo_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] + +(9) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_cdemo_sk#3] +Right keys [1]: [cd_demo_sk#9] +Join condition: None + +(10) Project [codegen id : 5] +Output [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, cd_demo_sk#9] + +(11) Scan parquet default.date_dim +Output [2]: [d_date_sk#14, d_year#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1998), LessThanOrEqual(d_date_sk,2451179), GreaterThanOrEqual(d_date_sk,2450815), IsNotNull(d_date_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#14, d_year#15] + +(13) Filter [codegen id : 2] +Input [2]: [d_date_sk#14, d_year#15] +Condition : ((((isnotnull(d_year#15) AND (d_year#15 = 1998)) AND (d_date_sk#14 <= 2451179)) AND (d_date_sk#14 >= 2450815)) AND isnotnull(d_date_sk#14)) + +(14) Project [codegen id : 2] +Output [1]: [d_date_sk#14] +Input [2]: [d_date_sk#14, d_year#15] + +(15) BroadcastExchange +Input [1]: [d_date_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] + +(16) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#14] +Join condition: None + +(17) Project [codegen id : 5] +Output [6]: [ss_item_sk#2, ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, d_date_sk#14] + +(18) Scan parquet default.item +Output [2]: [i_item_sk#17, i_item_id#18] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [2]: [i_item_sk#17, i_item_id#18] + +(20) Filter [codegen id : 3] +Input [2]: [i_item_sk#17, i_item_id#18] +Condition : isnotnull(i_item_sk#17) + +(21) BroadcastExchange +Input [2]: [i_item_sk#17, i_item_id#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] + +(22) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#17] +Join condition: None + +(23) Project [codegen id : 5] +Output [6]: [ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_id#18] +Input [8]: [ss_item_sk#2, ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_sk#17, i_item_id#18] + +(24) Scan parquet default.promotion +Output [3]: [p_promo_sk#20, p_channel_email#21, p_channel_event#22] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/promotion] +PushedFilters: [Or(EqualTo(p_channel_email,N),EqualTo(p_channel_event,N)), IsNotNull(p_promo_sk)] +ReadSchema: struct + +(25) ColumnarToRow [codegen id : 4] +Input [3]: [p_promo_sk#20, p_channel_email#21, p_channel_event#22] + +(26) Filter [codegen id : 4] +Input [3]: [p_promo_sk#20, p_channel_email#21, p_channel_event#22] +Condition : (((p_channel_email#21 = N) OR (p_channel_event#22 = N)) AND isnotnull(p_promo_sk#20)) + +(27) Project [codegen id : 4] +Output [1]: [p_promo_sk#20] +Input [3]: [p_promo_sk#20, p_channel_email#21, p_channel_event#22] + +(28) BroadcastExchange +Input [1]: [p_promo_sk#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#23] + +(29) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_promo_sk#4] +Right keys [1]: [p_promo_sk#20] +Join condition: None + +(30) Project [codegen id : 5] +Output [5]: [ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_id#18] +Input [7]: [ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_id#18, p_promo_sk#20] + +(31) HashAggregate [codegen id : 5] +Input [5]: [ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_id#18] +Keys [1]: [i_item_id#18] +Functions [4]: [partial_avg(cast(ss_quantity#5 as bigint)), partial_avg(UnscaledValue(ss_list_price#6)), partial_avg(UnscaledValue(ss_coupon_amt#8)), partial_avg(UnscaledValue(ss_sales_price#7))] +Aggregate Attributes [8]: [sum#24, count#25, sum#26, count#27, sum#28, count#29, sum#30, count#31] +Results [9]: [i_item_id#18, sum#32, count#33, sum#34, count#35, sum#36, count#37, sum#38, count#39] + +(32) Exchange +Input [9]: [i_item_id#18, sum#32, count#33, sum#34, count#35, sum#36, count#37, sum#38, count#39] +Arguments: hashpartitioning(i_item_id#18, 5), true, [id=#40] + +(33) HashAggregate [codegen id : 6] +Input [9]: [i_item_id#18, sum#32, count#33, sum#34, count#35, sum#36, count#37, sum#38, count#39] +Keys [1]: [i_item_id#18] +Functions [4]: [avg(cast(ss_quantity#5 as bigint)), avg(UnscaledValue(ss_list_price#6)), avg(UnscaledValue(ss_coupon_amt#8)), avg(UnscaledValue(ss_sales_price#7))] +Aggregate Attributes [4]: [avg(cast(ss_quantity#5 as bigint))#41, avg(UnscaledValue(ss_list_price#6))#42, avg(UnscaledValue(ss_coupon_amt#8))#43, avg(UnscaledValue(ss_sales_price#7))#44] +Results [5]: [i_item_id#18, avg(cast(ss_quantity#5 as bigint))#41 AS agg1#45, cast((avg(UnscaledValue(ss_list_price#6))#42 / 100.0) as decimal(11,6)) AS agg2#46, cast((avg(UnscaledValue(ss_coupon_amt#8))#43 / 100.0) as decimal(11,6)) AS agg3#47, cast((avg(UnscaledValue(ss_sales_price#7))#44 / 100.0) as decimal(11,6)) AS agg4#48] + +(34) TakeOrderedAndProject +Input [5]: [i_item_id#18, agg1#45, agg2#46, agg3#47, agg4#48] +Arguments: 100, [i_item_id#18 ASC NULLS FIRST], [i_item_id#18, agg1#45, agg2#46, agg3#47, agg4#48] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q7/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q7/simplified.txt new file mode 100644 index 0000000000000..db56467a0218d --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q7/simplified.txt @@ -0,0 +1,50 @@ +TakeOrderedAndProject [agg1,agg2,agg3,agg4,i_item_id] + WholeStageCodegen (6) + HashAggregate [count,count,count,count,i_item_id,sum,sum,sum,sum] [agg1,agg2,agg3,agg4,avg(UnscaledValue(ss_coupon_amt)),avg(UnscaledValue(ss_list_price)),avg(UnscaledValue(ss_sales_price)),avg(cast(ss_quantity as bigint)),count,count,count,count,sum,sum,sum,sum] + InputAdapter + Exchange [i_item_id] #1 + WholeStageCodegen (5) + HashAggregate [i_item_id,ss_coupon_amt,ss_list_price,ss_quantity,ss_sales_price] [count,count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum,sum] + Project [i_item_id,ss_coupon_amt,ss_list_price,ss_quantity,ss_sales_price] + BroadcastHashJoin [p_promo_sk,ss_promo_sk] + Project [i_item_id,ss_coupon_amt,ss_list_price,ss_promo_sk,ss_quantity,ss_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_coupon_amt,ss_item_sk,ss_list_price,ss_promo_sk,ss_quantity,ss_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_coupon_amt,ss_item_sk,ss_list_price,ss_promo_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + BroadcastHashJoin [cd_demo_sk,ss_cdemo_sk] + Filter [ss_cdemo_sk,ss_item_sk,ss_promo_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_cdemo_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_promo_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [cd_demo_sk] + Filter [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_id,i_item_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Project [p_promo_sk] + Filter [p_channel_email,p_channel_event,p_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.promotion [p_channel_email,p_channel_event,p_promo_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73.sf100/explain.txt new file mode 100644 index 0000000000000..ac585d84232d0 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73.sf100/explain.txt @@ -0,0 +1,203 @@ +== Physical Plan == +* Sort (36) ++- Exchange (35) + +- * Project (34) + +- * BroadcastHashJoin Inner BuildLeft (33) + :- BroadcastExchange (29) + : +- * Filter (28) + : +- * HashAggregate (27) + : +- Exchange (26) + : +- * HashAggregate (25) + : +- * Project (24) + : +- * BroadcastHashJoin Inner BuildRight (23) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.store (11) + : +- BroadcastExchange (22) + : +- * Project (21) + : +- * Filter (20) + : +- * ColumnarToRow (19) + : +- Scan parquet default.household_demographics (18) + +- * Filter (32) + +- * ColumnarToRow (31) + +- Scan parquet default.customer (30) + + +(1) Scan parquet default.store_sales +Output [5]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/store_sales] +PushedFilters: [In(ss_sold_date_sk, [2451790,2451119,2451180,2451454,2450874,2450906,2450967,2451485,2451850,2451514,2451270,2451758,2451028,2451546,2450997,2450996,2451393,2451667,2451453,2451819,2450905,2451331,2451577,2451089,2451301,2451545,2451605,2451851,2451181,2451149,2451820,2451362,2451392,2451240,2450935,2451637,2451484,2451058,2451300,2451727,2451759,2450815,2451698,2451150,2451332,2451606,2451666,2451211,2450846,2450875,2450966,2450936,2451361,2451212,2451880,2451059,2451789,2451423,2451576,2450816,2451088,2451728,2451027,2451120,2451881,2451697,2450847,2451271,2451636,2451515,2451424,2451239]), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [5]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5] + +(3) Filter [codegen id : 4] +Input [5]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5] +Condition : ((((ss_sold_date_sk#1 INSET (2451790,2451119,2451180,2451454,2450874,2450906,2450967,2451485,2451850,2451514,2451270,2451758,2451028,2451546,2450997,2450996,2451393,2451667,2451453,2451819,2450905,2451331,2451577,2451089,2451301,2451545,2451605,2451851,2451181,2451149,2451820,2451362,2451392,2451240,2450935,2451637,2451484,2451058,2451300,2451727,2451759,2450815,2451698,2451150,2451332,2451606,2451666,2451211,2450846,2450875,2450966,2450936,2451361,2451212,2451880,2451059,2451789,2451423,2451576,2450816,2451088,2451728,2451027,2451120,2451881,2451697,2450847,2451271,2451636,2451515,2451424,2451239) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_store_sk#4)) AND isnotnull(ss_hdemo_sk#3)) AND isnotnull(ss_customer_sk#2)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#6, d_year#7, d_dom#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_dom), GreaterThanOrEqual(d_dom,1), LessThanOrEqual(d_dom,2), In(d_year, [1998,1999,2000]), In(d_date_sk, [2451790,2451119,2451180,2451454,2450874,2450906,2450967,2451485,2451850,2451514,2451270,2451758,2451028,2451546,2450997,2450996,2451393,2451667,2451453,2451819,2450905,2451331,2451577,2451089,2451301,2451545,2451605,2451851,2451181,2451149,2451820,2451362,2451392,2451240,2450935,2451637,2451484,2451058,2451300,2451727,2451759,2450815,2451698,2451150,2451332,2451606,2451666,2451211,2450846,2450875,2450966,2450936,2451361,2451212,2451880,2451059,2451789,2451423,2451576,2450816,2451088,2451728,2451027,2451120,2451881,2451697,2450847,2451271,2451636,2451515,2451424,2451239]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] +Condition : (((((isnotnull(d_dom#8) AND (d_dom#8 >= 1)) AND (d_dom#8 <= 2)) AND d_year#7 IN (1998,1999,2000)) AND d_date_sk#6 INSET (2451790,2451119,2451180,2451454,2450874,2450906,2450967,2451485,2451850,2451514,2451270,2451758,2451028,2451546,2450997,2450996,2451393,2451667,2451453,2451819,2450905,2451331,2451577,2451089,2451301,2451545,2451605,2451851,2451181,2451149,2451820,2451362,2451392,2451240,2450935,2451637,2451484,2451058,2451300,2451727,2451759,2450815,2451698,2451150,2451332,2451606,2451666,2451211,2450846,2450875,2450966,2450936,2451361,2451212,2451880,2451059,2451789,2451423,2451576,2450816,2451088,2451728,2451027,2451120,2451881,2451697,2450847,2451271,2451636,2451515,2451424,2451239)) AND isnotnull(d_date_sk#6)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#6] +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] + +(8) BroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#6] +Join condition: None + +(10) Project [codegen id : 4] +Output [4]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5] +Input [6]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5, d_date_sk#6] + +(11) Scan parquet default.store +Output [2]: [s_store_sk#10, s_county#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/store] +PushedFilters: [In(s_county, [Fairfield County,Ziebach County,Bronx County,Barrow County]), IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [s_store_sk#10, s_county#11] + +(13) Filter [codegen id : 2] +Input [2]: [s_store_sk#10, s_county#11] +Condition : (s_county#11 IN (Fairfield County,Ziebach County,Bronx County,Barrow County) AND isnotnull(s_store_sk#10)) + +(14) Project [codegen id : 2] +Output [1]: [s_store_sk#10] +Input [2]: [s_store_sk#10, s_county#11] + +(15) BroadcastExchange +Input [1]: [s_store_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(16) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#10] +Join condition: None + +(17) Project [codegen id : 4] +Output [3]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_ticket_number#5] +Input [5]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5, s_store_sk#10] + +(18) Scan parquet default.household_demographics +Output [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/household_demographics] +PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,Unknown)), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] + +(20) Filter [codegen id : 3] +Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] +Condition : ((((isnotnull(hd_vehicle_count#16) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = Unknown))) AND (hd_vehicle_count#16 > 0)) AND (CASE WHEN (hd_vehicle_count#16 > 0) THEN (cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) ELSE null END > 1.0)) AND isnotnull(hd_demo_sk#13)) + +(21) Project [codegen id : 3] +Output [1]: [hd_demo_sk#13] +Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] + +(22) BroadcastExchange +Input [1]: [hd_demo_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#17] + +(23) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_hdemo_sk#3] +Right keys [1]: [hd_demo_sk#13] +Join condition: None + +(24) Project [codegen id : 4] +Output [2]: [ss_customer_sk#2, ss_ticket_number#5] +Input [4]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_ticket_number#5, hd_demo_sk#13] + +(25) HashAggregate [codegen id : 4] +Input [2]: [ss_customer_sk#2, ss_ticket_number#5] +Keys [2]: [ss_ticket_number#5, ss_customer_sk#2] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#18] +Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] + +(26) Exchange +Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] +Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), true, [id=#20] + +(27) HashAggregate [codegen id : 5] +Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] +Keys [2]: [ss_ticket_number#5, ss_customer_sk#2] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#21] +Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count(1)#21 AS cnt#22] + +(28) Filter [codegen id : 5] +Input [3]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22] +Condition : ((cnt#22 >= 1) AND (cnt#22 <= 5)) + +(29) BroadcastExchange +Input [3]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [id=#23] + +(30) Scan parquet default.customer +Output [5]: [c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(31) ColumnarToRow +Input [5]: [c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28] + +(32) Filter +Input [5]: [c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28] +Condition : isnotnull(c_customer_sk#24) + +(33) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#24] +Join condition: None + +(34) Project [codegen id : 6] +Output [6]: [c_last_name#27, c_first_name#26, c_salutation#25, c_preferred_cust_flag#28, ss_ticket_number#5, cnt#22] +Input [8]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22, c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28] + +(35) Exchange +Input [6]: [c_last_name#27, c_first_name#26, c_salutation#25, c_preferred_cust_flag#28, ss_ticket_number#5, cnt#22] +Arguments: rangepartitioning(cnt#22 DESC NULLS LAST, 5), true, [id=#29] + +(36) Sort [codegen id : 7] +Input [6]: [c_last_name#27, c_first_name#26, c_salutation#25, c_preferred_cust_flag#28, ss_ticket_number#5, cnt#22] +Arguments: [cnt#22 DESC NULLS LAST], true, 0 + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73.sf100/simplified.txt new file mode 100644 index 0000000000000..4f19c79039220 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73.sf100/simplified.txt @@ -0,0 +1,54 @@ +WholeStageCodegen (7) + Sort [cnt] + InputAdapter + Exchange [cnt] #1 + WholeStageCodegen (6) + Project [c_first_name,c_last_name,c_preferred_cust_flag,c_salutation,cnt,ss_ticket_number] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (5) + Filter [cnt] + HashAggregate [count,ss_customer_sk,ss_ticket_number] [cnt,count,count(1)] + InputAdapter + Exchange [ss_customer_sk,ss_ticket_number] #3 + WholeStageCodegen (4) + HashAggregate [ss_customer_sk,ss_ticket_number] [count,count] + Project [ss_customer_sk,ss_ticket_number] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [ss_customer_sk,ss_hdemo_sk,ss_ticket_number] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_dom,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_dom,d_year] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Project [s_store_sk] + Filter [s_county,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_county,s_store_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (3) + Project [hd_demo_sk] + Filter [hd_buy_potential,hd_demo_sk,hd_dep_count,hd_vehicle_count] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_buy_potential,hd_demo_sk,hd_dep_count,hd_vehicle_count] + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name,c_preferred_cust_flag,c_salutation] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73/explain.txt new file mode 100644 index 0000000000000..7fb1049935f19 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73/explain.txt @@ -0,0 +1,203 @@ +== Physical Plan == +* Sort (36) ++- Exchange (35) + +- * Project (34) + +- * BroadcastHashJoin Inner BuildRight (33) + :- * Filter (28) + : +- * HashAggregate (27) + : +- Exchange (26) + : +- * HashAggregate (25) + : +- * Project (24) + : +- * BroadcastHashJoin Inner BuildRight (23) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.store (11) + : +- BroadcastExchange (22) + : +- * Project (21) + : +- * Filter (20) + : +- * ColumnarToRow (19) + : +- Scan parquet default.household_demographics (18) + +- BroadcastExchange (32) + +- * Filter (31) + +- * ColumnarToRow (30) + +- Scan parquet default.customer (29) + + +(1) Scan parquet default.store_sales +Output [5]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/store_sales] +PushedFilters: [In(ss_sold_date_sk, [2451790,2451119,2451180,2451454,2450874,2450906,2450967,2451485,2451850,2451514,2451270,2451758,2451028,2451546,2450997,2450996,2451393,2451667,2451453,2451819,2450905,2451331,2451577,2451089,2451301,2451545,2451605,2451851,2451181,2451149,2451820,2451362,2451392,2451240,2450935,2451637,2451484,2451058,2451300,2451727,2451759,2450815,2451698,2451150,2451332,2451606,2451666,2451211,2450846,2450875,2450966,2450936,2451361,2451212,2451880,2451059,2451789,2451423,2451576,2450816,2451088,2451728,2451027,2451120,2451881,2451697,2450847,2451271,2451636,2451515,2451424,2451239]), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [5]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5] + +(3) Filter [codegen id : 4] +Input [5]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5] +Condition : ((((ss_sold_date_sk#1 INSET (2451790,2451119,2451180,2451454,2450874,2450906,2450967,2451485,2451850,2451514,2451270,2451758,2451028,2451546,2450997,2450996,2451393,2451667,2451453,2451819,2450905,2451331,2451577,2451089,2451301,2451545,2451605,2451851,2451181,2451149,2451820,2451362,2451392,2451240,2450935,2451637,2451484,2451058,2451300,2451727,2451759,2450815,2451698,2451150,2451332,2451606,2451666,2451211,2450846,2450875,2450966,2450936,2451361,2451212,2451880,2451059,2451789,2451423,2451576,2450816,2451088,2451728,2451027,2451120,2451881,2451697,2450847,2451271,2451636,2451515,2451424,2451239) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_store_sk#4)) AND isnotnull(ss_hdemo_sk#3)) AND isnotnull(ss_customer_sk#2)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#6, d_year#7, d_dom#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_dom), GreaterThanOrEqual(d_dom,1), LessThanOrEqual(d_dom,2), In(d_year, [1998,1999,2000]), In(d_date_sk, [2451790,2451119,2451180,2451454,2450874,2450906,2450967,2451485,2451850,2451514,2451270,2451758,2451028,2451546,2450997,2450996,2451393,2451667,2451453,2451819,2450905,2451331,2451577,2451089,2451301,2451545,2451605,2451851,2451181,2451149,2451820,2451362,2451392,2451240,2450935,2451637,2451484,2451058,2451300,2451727,2451759,2450815,2451698,2451150,2451332,2451606,2451666,2451211,2450846,2450875,2450966,2450936,2451361,2451212,2451880,2451059,2451789,2451423,2451576,2450816,2451088,2451728,2451027,2451120,2451881,2451697,2450847,2451271,2451636,2451515,2451424,2451239]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] +Condition : (((((isnotnull(d_dom#8) AND (d_dom#8 >= 1)) AND (d_dom#8 <= 2)) AND d_year#7 IN (1998,1999,2000)) AND d_date_sk#6 INSET (2451790,2451119,2451180,2451454,2450874,2450906,2450967,2451485,2451850,2451514,2451270,2451758,2451028,2451546,2450997,2450996,2451393,2451667,2451453,2451819,2450905,2451331,2451577,2451089,2451301,2451545,2451605,2451851,2451181,2451149,2451820,2451362,2451392,2451240,2450935,2451637,2451484,2451058,2451300,2451727,2451759,2450815,2451698,2451150,2451332,2451606,2451666,2451211,2450846,2450875,2450966,2450936,2451361,2451212,2451880,2451059,2451789,2451423,2451576,2450816,2451088,2451728,2451027,2451120,2451881,2451697,2450847,2451271,2451636,2451515,2451424,2451239)) AND isnotnull(d_date_sk#6)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#6] +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] + +(8) BroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#6] +Join condition: None + +(10) Project [codegen id : 4] +Output [4]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5] +Input [6]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5, d_date_sk#6] + +(11) Scan parquet default.store +Output [2]: [s_store_sk#10, s_county#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/store] +PushedFilters: [In(s_county, [Fairfield County,Ziebach County,Bronx County,Barrow County]), IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [s_store_sk#10, s_county#11] + +(13) Filter [codegen id : 2] +Input [2]: [s_store_sk#10, s_county#11] +Condition : (s_county#11 IN (Fairfield County,Ziebach County,Bronx County,Barrow County) AND isnotnull(s_store_sk#10)) + +(14) Project [codegen id : 2] +Output [1]: [s_store_sk#10] +Input [2]: [s_store_sk#10, s_county#11] + +(15) BroadcastExchange +Input [1]: [s_store_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(16) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#10] +Join condition: None + +(17) Project [codegen id : 4] +Output [3]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_ticket_number#5] +Input [5]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5, s_store_sk#10] + +(18) Scan parquet default.household_demographics +Output [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/household_demographics] +PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,Unknown)), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] + +(20) Filter [codegen id : 3] +Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] +Condition : ((((isnotnull(hd_vehicle_count#16) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = Unknown))) AND (hd_vehicle_count#16 > 0)) AND (CASE WHEN (hd_vehicle_count#16 > 0) THEN (cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) ELSE null END > 1.0)) AND isnotnull(hd_demo_sk#13)) + +(21) Project [codegen id : 3] +Output [1]: [hd_demo_sk#13] +Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] + +(22) BroadcastExchange +Input [1]: [hd_demo_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#17] + +(23) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_hdemo_sk#3] +Right keys [1]: [hd_demo_sk#13] +Join condition: None + +(24) Project [codegen id : 4] +Output [2]: [ss_customer_sk#2, ss_ticket_number#5] +Input [4]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_ticket_number#5, hd_demo_sk#13] + +(25) HashAggregate [codegen id : 4] +Input [2]: [ss_customer_sk#2, ss_ticket_number#5] +Keys [2]: [ss_ticket_number#5, ss_customer_sk#2] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#18] +Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] + +(26) Exchange +Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] +Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), true, [id=#20] + +(27) HashAggregate [codegen id : 6] +Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] +Keys [2]: [ss_ticket_number#5, ss_customer_sk#2] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#21] +Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count(1)#21 AS cnt#22] + +(28) Filter [codegen id : 6] +Input [3]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22] +Condition : ((cnt#22 >= 1) AND (cnt#22 <= 5)) + +(29) Scan parquet default.customer +Output [5]: [c_customer_sk#23, c_salutation#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(30) ColumnarToRow [codegen id : 5] +Input [5]: [c_customer_sk#23, c_salutation#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27] + +(31) Filter [codegen id : 5] +Input [5]: [c_customer_sk#23, c_salutation#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27] +Condition : isnotnull(c_customer_sk#23) + +(32) BroadcastExchange +Input [5]: [c_customer_sk#23, c_salutation#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#28] + +(33) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#23] +Join condition: None + +(34) Project [codegen id : 6] +Output [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22] +Input [8]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22, c_customer_sk#23, c_salutation#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27] + +(35) Exchange +Input [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22] +Arguments: rangepartitioning(cnt#22 DESC NULLS LAST, 5), true, [id=#29] + +(36) Sort [codegen id : 7] +Input [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22] +Arguments: [cnt#22 DESC NULLS LAST], true, 0 + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73/simplified.txt new file mode 100644 index 0000000000000..55312b6569a21 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73/simplified.txt @@ -0,0 +1,54 @@ +WholeStageCodegen (7) + Sort [cnt] + InputAdapter + Exchange [cnt] #1 + WholeStageCodegen (6) + Project [c_first_name,c_last_name,c_preferred_cust_flag,c_salutation,cnt,ss_ticket_number] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [cnt] + HashAggregate [count,ss_customer_sk,ss_ticket_number] [cnt,count,count(1)] + InputAdapter + Exchange [ss_customer_sk,ss_ticket_number] #2 + WholeStageCodegen (4) + HashAggregate [ss_customer_sk,ss_ticket_number] [count,count] + Project [ss_customer_sk,ss_ticket_number] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [ss_customer_sk,ss_hdemo_sk,ss_ticket_number] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_dom,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_dom,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [s_store_sk] + Filter [s_county,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_county,s_store_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Project [hd_demo_sk] + Filter [hd_buy_potential,hd_demo_sk,hd_dep_count,hd_vehicle_count] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_buy_potential,hd_demo_sk,hd_dep_count,hd_vehicle_count] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name,c_preferred_cust_flag,c_salutation] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q79.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q79.sf100/explain.txt new file mode 100644 index 0000000000000..cbe3432b9d2bb --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q79.sf100/explain.txt @@ -0,0 +1,208 @@ +== Physical Plan == +TakeOrderedAndProject (37) ++- * Project (36) + +- * SortMergeJoin Inner (35) + :- * Sort (29) + : +- Exchange (28) + : +- * HashAggregate (27) + : +- Exchange (26) + : +- * HashAggregate (25) + : +- * Project (24) + : +- * BroadcastHashJoin Inner BuildRight (23) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.household_demographics (11) + : +- BroadcastExchange (22) + : +- * Project (21) + : +- * Filter (20) + : +- * ColumnarToRow (19) + : +- Scan parquet default.store (18) + +- * Sort (34) + +- Exchange (33) + +- * Filter (32) + +- * ColumnarToRow (31) + +- Scan parquet default.customer (30) + + +(1) Scan parquet default.store_sales +Output [8]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), GreaterThanOrEqual(ss_sold_date_sk,2450819), LessThanOrEqual(ss_sold_date_sk,2451904), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [8]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] + +(3) Filter [codegen id : 4] +Input [8]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] +Condition : (((((isnotnull(ss_sold_date_sk#1) AND (ss_sold_date_sk#1 >= 2450819)) AND (ss_sold_date_sk#1 <= 2451904)) AND isnotnull(ss_store_sk#5)) AND isnotnull(ss_hdemo_sk#3)) AND isnotnull(ss_customer_sk#2)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#9, d_year#10, d_dow#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_dow), EqualTo(d_dow,1), In(d_year, [1998,1999,2000]), GreaterThanOrEqual(d_date_sk,2450819), LessThanOrEqual(d_date_sk,2451904), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#9, d_year#10, d_dow#11] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#9, d_year#10, d_dow#11] +Condition : (((((isnotnull(d_dow#11) AND (d_dow#11 = 1)) AND d_year#10 IN (1998,1999,2000)) AND (d_date_sk#9 >= 2450819)) AND (d_date_sk#9 <= 2451904)) AND isnotnull(d_date_sk#9)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#9] +Input [3]: [d_date_sk#9, d_year#10, d_dow#11] + +(8) BroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#9] +Join condition: None + +(10) Project [codegen id : 4] +Output [7]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] +Input [9]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, d_date_sk#9] + +(11) Scan parquet default.household_demographics +Output [3]: [hd_demo_sk#13, hd_dep_count#14, hd_vehicle_count#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/household_demographics] +PushedFilters: [Or(EqualTo(hd_dep_count,8),GreaterThan(hd_vehicle_count,0)), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [hd_demo_sk#13, hd_dep_count#14, hd_vehicle_count#15] + +(13) Filter [codegen id : 2] +Input [3]: [hd_demo_sk#13, hd_dep_count#14, hd_vehicle_count#15] +Condition : (((hd_dep_count#14 = 8) OR (hd_vehicle_count#15 > 0)) AND isnotnull(hd_demo_sk#13)) + +(14) Project [codegen id : 2] +Output [1]: [hd_demo_sk#13] +Input [3]: [hd_demo_sk#13, hd_dep_count#14, hd_vehicle_count#15] + +(15) BroadcastExchange +Input [1]: [hd_demo_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] + +(16) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_hdemo_sk#3] +Right keys [1]: [hd_demo_sk#13] +Join condition: None + +(17) Project [codegen id : 4] +Output [6]: [ss_customer_sk#2, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] +Input [8]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, hd_demo_sk#13] + +(18) Scan parquet default.store +Output [3]: [s_store_sk#17, s_number_employees#18, s_city#19] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_number_employees), GreaterThanOrEqual(s_number_employees,200), LessThanOrEqual(s_number_employees,295), IsNotNull(s_store_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [3]: [s_store_sk#17, s_number_employees#18, s_city#19] + +(20) Filter [codegen id : 3] +Input [3]: [s_store_sk#17, s_number_employees#18, s_city#19] +Condition : (((isnotnull(s_number_employees#18) AND (s_number_employees#18 >= 200)) AND (s_number_employees#18 <= 295)) AND isnotnull(s_store_sk#17)) + +(21) Project [codegen id : 3] +Output [2]: [s_store_sk#17, s_city#19] +Input [3]: [s_store_sk#17, s_number_employees#18, s_city#19] + +(22) BroadcastExchange +Input [2]: [s_store_sk#17, s_city#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] + +(23) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#5] +Right keys [1]: [s_store_sk#17] +Join condition: None + +(24) Project [codegen id : 4] +Output [6]: [ss_customer_sk#2, ss_addr_sk#4, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, s_city#19] +Input [8]: [ss_customer_sk#2, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, s_store_sk#17, s_city#19] + +(25) HashAggregate [codegen id : 4] +Input [6]: [ss_customer_sk#2, ss_addr_sk#4, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, s_city#19] +Keys [4]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, s_city#19] +Functions [2]: [partial_sum(UnscaledValue(ss_coupon_amt#7)), partial_sum(UnscaledValue(ss_net_profit#8))] +Aggregate Attributes [2]: [sum#21, sum#22] +Results [6]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, s_city#19, sum#23, sum#24] + +(26) Exchange +Input [6]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, s_city#19, sum#23, sum#24] +Arguments: hashpartitioning(ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, s_city#19, 5), true, [id=#25] + +(27) HashAggregate [codegen id : 5] +Input [6]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, s_city#19, sum#23, sum#24] +Keys [4]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, s_city#19] +Functions [2]: [sum(UnscaledValue(ss_coupon_amt#7)), sum(UnscaledValue(ss_net_profit#8))] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_coupon_amt#7))#26, sum(UnscaledValue(ss_net_profit#8))#27] +Results [5]: [ss_ticket_number#6, ss_customer_sk#2, s_city#19, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#7))#26,17,2) AS amt#28, MakeDecimal(sum(UnscaledValue(ss_net_profit#8))#27,17,2) AS profit#29] + +(28) Exchange +Input [5]: [ss_ticket_number#6, ss_customer_sk#2, s_city#19, amt#28, profit#29] +Arguments: hashpartitioning(ss_customer_sk#2, 5), true, [id=#30] + +(29) Sort [codegen id : 6] +Input [5]: [ss_ticket_number#6, ss_customer_sk#2, s_city#19, amt#28, profit#29] +Arguments: [ss_customer_sk#2 ASC NULLS FIRST], false, 0 + +(30) Scan parquet default.customer +Output [3]: [c_customer_sk#31, c_first_name#32, c_last_name#33] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 7] +Input [3]: [c_customer_sk#31, c_first_name#32, c_last_name#33] + +(32) Filter [codegen id : 7] +Input [3]: [c_customer_sk#31, c_first_name#32, c_last_name#33] +Condition : isnotnull(c_customer_sk#31) + +(33) Exchange +Input [3]: [c_customer_sk#31, c_first_name#32, c_last_name#33] +Arguments: hashpartitioning(c_customer_sk#31, 5), true, [id=#34] + +(34) Sort [codegen id : 8] +Input [3]: [c_customer_sk#31, c_first_name#32, c_last_name#33] +Arguments: [c_customer_sk#31 ASC NULLS FIRST], false, 0 + +(35) SortMergeJoin [codegen id : 9] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#31] +Join condition: None + +(36) Project [codegen id : 9] +Output [7]: [c_last_name#33, c_first_name#32, substr(s_city#19, 1, 30) AS substr(s_city, 1, 30)#35, ss_ticket_number#6, amt#28, profit#29, s_city#19] +Input [8]: [ss_ticket_number#6, ss_customer_sk#2, s_city#19, amt#28, profit#29, c_customer_sk#31, c_first_name#32, c_last_name#33] + +(37) TakeOrderedAndProject +Input [7]: [c_last_name#33, c_first_name#32, substr(s_city, 1, 30)#35, ss_ticket_number#6, amt#28, profit#29, s_city#19] +Arguments: 100, [c_last_name#33 ASC NULLS FIRST, c_first_name#32 ASC NULLS FIRST, substr(s_city#19, 1, 30) ASC NULLS FIRST, profit#29 ASC NULLS FIRST], [c_last_name#33, c_first_name#32, substr(s_city, 1, 30)#35, ss_ticket_number#6, amt#28, profit#29] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q79.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q79.sf100/simplified.txt new file mode 100644 index 0000000000000..8ef698c9f896c --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q79.sf100/simplified.txt @@ -0,0 +1,59 @@ +TakeOrderedAndProject [amt,c_first_name,c_last_name,profit,s_city,ss_ticket_number,substr(s_city, 1, 30)] + WholeStageCodegen (9) + Project [amt,c_first_name,c_last_name,profit,s_city,ss_ticket_number] + SortMergeJoin [c_customer_sk,ss_customer_sk] + InputAdapter + WholeStageCodegen (6) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #1 + WholeStageCodegen (5) + HashAggregate [s_city,ss_addr_sk,ss_customer_sk,ss_ticket_number,sum,sum] [amt,profit,sum,sum,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_net_profit))] + InputAdapter + Exchange [s_city,ss_addr_sk,ss_customer_sk,ss_ticket_number] #2 + WholeStageCodegen (4) + HashAggregate [s_city,ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,ss_ticket_number] [sum,sum,sum,sum] + Project [s_city,ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,ss_ticket_number] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,ss_store_sk,ss_ticket_number] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_store_sk,ss_ticket_number] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_dow,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_dow,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [hd_demo_sk] + Filter [hd_demo_sk,hd_dep_count,hd_vehicle_count] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Project [s_city,s_store_sk] + Filter [s_number_employees,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_city,s_number_employees,s_store_sk] + InputAdapter + WholeStageCodegen (8) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #6 + WholeStageCodegen (7) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q79/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q79/explain.txt new file mode 100644 index 0000000000000..e1c2116bf8d19 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q79/explain.txt @@ -0,0 +1,193 @@ +== Physical Plan == +TakeOrderedAndProject (34) ++- * Project (33) + +- * BroadcastHashJoin Inner BuildRight (32) + :- * HashAggregate (27) + : +- Exchange (26) + : +- * HashAggregate (25) + : +- * Project (24) + : +- * BroadcastHashJoin Inner BuildRight (23) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.store (11) + : +- BroadcastExchange (22) + : +- * Project (21) + : +- * Filter (20) + : +- * ColumnarToRow (19) + : +- Scan parquet default.household_demographics (18) + +- BroadcastExchange (31) + +- * Filter (30) + +- * ColumnarToRow (29) + +- Scan parquet default.customer (28) + + +(1) Scan parquet default.store_sales +Output [8]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), GreaterThanOrEqual(ss_sold_date_sk,2450819), LessThanOrEqual(ss_sold_date_sk,2451904), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [8]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] + +(3) Filter [codegen id : 4] +Input [8]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] +Condition : (((((isnotnull(ss_sold_date_sk#1) AND (ss_sold_date_sk#1 >= 2450819)) AND (ss_sold_date_sk#1 <= 2451904)) AND isnotnull(ss_store_sk#5)) AND isnotnull(ss_hdemo_sk#3)) AND isnotnull(ss_customer_sk#2)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#9, d_year#10, d_dow#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_dow), EqualTo(d_dow,1), In(d_year, [1998,1999,2000]), GreaterThanOrEqual(d_date_sk,2450819), LessThanOrEqual(d_date_sk,2451904), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#9, d_year#10, d_dow#11] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#9, d_year#10, d_dow#11] +Condition : (((((isnotnull(d_dow#11) AND (d_dow#11 = 1)) AND d_year#10 IN (1998,1999,2000)) AND (d_date_sk#9 >= 2450819)) AND (d_date_sk#9 <= 2451904)) AND isnotnull(d_date_sk#9)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#9] +Input [3]: [d_date_sk#9, d_year#10, d_dow#11] + +(8) BroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#9] +Join condition: None + +(10) Project [codegen id : 4] +Output [7]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] +Input [9]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, d_date_sk#9] + +(11) Scan parquet default.store +Output [3]: [s_store_sk#13, s_number_employees#14, s_city#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_number_employees), GreaterThanOrEqual(s_number_employees,200), LessThanOrEqual(s_number_employees,295), IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [s_store_sk#13, s_number_employees#14, s_city#15] + +(13) Filter [codegen id : 2] +Input [3]: [s_store_sk#13, s_number_employees#14, s_city#15] +Condition : (((isnotnull(s_number_employees#14) AND (s_number_employees#14 >= 200)) AND (s_number_employees#14 <= 295)) AND isnotnull(s_store_sk#13)) + +(14) Project [codegen id : 2] +Output [2]: [s_store_sk#13, s_city#15] +Input [3]: [s_store_sk#13, s_number_employees#14, s_city#15] + +(15) BroadcastExchange +Input [2]: [s_store_sk#13, s_city#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] + +(16) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#5] +Right keys [1]: [s_store_sk#13] +Join condition: None + +(17) Project [codegen id : 4] +Output [7]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, s_city#15] +Input [9]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, s_store_sk#13, s_city#15] + +(18) Scan parquet default.household_demographics +Output [3]: [hd_demo_sk#17, hd_dep_count#18, hd_vehicle_count#19] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/household_demographics] +PushedFilters: [Or(EqualTo(hd_dep_count,8),GreaterThan(hd_vehicle_count,0)), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [3]: [hd_demo_sk#17, hd_dep_count#18, hd_vehicle_count#19] + +(20) Filter [codegen id : 3] +Input [3]: [hd_demo_sk#17, hd_dep_count#18, hd_vehicle_count#19] +Condition : (((hd_dep_count#18 = 8) OR (hd_vehicle_count#19 > 0)) AND isnotnull(hd_demo_sk#17)) + +(21) Project [codegen id : 3] +Output [1]: [hd_demo_sk#17] +Input [3]: [hd_demo_sk#17, hd_dep_count#18, hd_vehicle_count#19] + +(22) BroadcastExchange +Input [1]: [hd_demo_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] + +(23) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_hdemo_sk#3] +Right keys [1]: [hd_demo_sk#17] +Join condition: None + +(24) Project [codegen id : 4] +Output [6]: [ss_customer_sk#2, ss_addr_sk#4, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, s_city#15] +Input [8]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, s_city#15, hd_demo_sk#17] + +(25) HashAggregate [codegen id : 4] +Input [6]: [ss_customer_sk#2, ss_addr_sk#4, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, s_city#15] +Keys [4]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, s_city#15] +Functions [2]: [partial_sum(UnscaledValue(ss_coupon_amt#7)), partial_sum(UnscaledValue(ss_net_profit#8))] +Aggregate Attributes [2]: [sum#21, sum#22] +Results [6]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, s_city#15, sum#23, sum#24] + +(26) Exchange +Input [6]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, s_city#15, sum#23, sum#24] +Arguments: hashpartitioning(ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, s_city#15, 5), true, [id=#25] + +(27) HashAggregate [codegen id : 6] +Input [6]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, s_city#15, sum#23, sum#24] +Keys [4]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, s_city#15] +Functions [2]: [sum(UnscaledValue(ss_coupon_amt#7)), sum(UnscaledValue(ss_net_profit#8))] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_coupon_amt#7))#26, sum(UnscaledValue(ss_net_profit#8))#27] +Results [5]: [ss_ticket_number#6, ss_customer_sk#2, s_city#15, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#7))#26,17,2) AS amt#28, MakeDecimal(sum(UnscaledValue(ss_net_profit#8))#27,17,2) AS profit#29] + +(28) Scan parquet default.customer +Output [3]: [c_customer_sk#30, c_first_name#31, c_last_name#32] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 5] +Input [3]: [c_customer_sk#30, c_first_name#31, c_last_name#32] + +(30) Filter [codegen id : 5] +Input [3]: [c_customer_sk#30, c_first_name#31, c_last_name#32] +Condition : isnotnull(c_customer_sk#30) + +(31) BroadcastExchange +Input [3]: [c_customer_sk#30, c_first_name#31, c_last_name#32] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#33] + +(32) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#30] +Join condition: None + +(33) Project [codegen id : 6] +Output [7]: [c_last_name#32, c_first_name#31, substr(s_city#15, 1, 30) AS substr(s_city, 1, 30)#34, ss_ticket_number#6, amt#28, profit#29, s_city#15] +Input [8]: [ss_ticket_number#6, ss_customer_sk#2, s_city#15, amt#28, profit#29, c_customer_sk#30, c_first_name#31, c_last_name#32] + +(34) TakeOrderedAndProject +Input [7]: [c_last_name#32, c_first_name#31, substr(s_city, 1, 30)#34, ss_ticket_number#6, amt#28, profit#29, s_city#15] +Arguments: 100, [c_last_name#32 ASC NULLS FIRST, c_first_name#31 ASC NULLS FIRST, substr(s_city#15, 1, 30) ASC NULLS FIRST, profit#29 ASC NULLS FIRST], [c_last_name#32, c_first_name#31, substr(s_city, 1, 30)#34, ss_ticket_number#6, amt#28, profit#29] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q79/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q79/simplified.txt new file mode 100644 index 0000000000000..53f5a7d0acc0e --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q79/simplified.txt @@ -0,0 +1,50 @@ +TakeOrderedAndProject [amt,c_first_name,c_last_name,profit,s_city,ss_ticket_number,substr(s_city, 1, 30)] + WholeStageCodegen (6) + Project [amt,c_first_name,c_last_name,profit,s_city,ss_ticket_number] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + HashAggregate [s_city,ss_addr_sk,ss_customer_sk,ss_ticket_number,sum,sum] [amt,profit,sum,sum,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_net_profit))] + InputAdapter + Exchange [s_city,ss_addr_sk,ss_customer_sk,ss_ticket_number] #1 + WholeStageCodegen (4) + HashAggregate [s_city,ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,ss_ticket_number] [sum,sum,sum,sum] + Project [s_city,ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,ss_ticket_number] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [s_city,ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_ticket_number] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_store_sk,ss_ticket_number] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_dow,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_dow,d_year] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [s_city,s_store_sk] + Filter [s_number_employees,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_city,s_number_employees,s_store_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [hd_demo_sk] + Filter [hd_demo_sk,hd_dep_count,hd_vehicle_count] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (5) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q89.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q89.sf100/explain.txt new file mode 100644 index 0000000000000..d610aa7854bcb --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q89.sf100/explain.txt @@ -0,0 +1,175 @@ +== Physical Plan == +TakeOrderedAndProject (31) ++- * Project (30) + +- * Filter (29) + +- Window (28) + +- * Sort (27) + +- Exchange (26) + +- * HashAggregate (25) + +- Exchange (24) + +- * HashAggregate (23) + +- * Project (22) + +- * BroadcastHashJoin Inner BuildRight (21) + :- * Project (16) + : +- * BroadcastHashJoin Inner BuildRight (15) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.store_sales (1) + : : +- BroadcastExchange (8) + : : +- * Project (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet default.date_dim (4) + : +- BroadcastExchange (14) + : +- * Filter (13) + : +- * ColumnarToRow (12) + : +- Scan parquet default.store (11) + +- BroadcastExchange (20) + +- * Filter (19) + +- * ColumnarToRow (18) + +- Scan parquet default.item (17) + + +(1) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_sales_price#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), GreaterThanOrEqual(ss_sold_date_sk,2451545), LessThanOrEqual(ss_sold_date_sk,2451910), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_sales_price#4] + +(3) Filter [codegen id : 4] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_sales_price#4] +Condition : ((((isnotnull(ss_sold_date_sk#1) AND (ss_sold_date_sk#1 >= 2451545)) AND (ss_sold_date_sk#1 <= 2451910)) AND isnotnull(ss_item_sk#2)) AND isnotnull(ss_store_sk#3)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#5, d_year#6, d_moy#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), LessThanOrEqual(d_date_sk,2451910), GreaterThanOrEqual(d_date_sk,2451545), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] +Condition : ((((isnotnull(d_year#6) AND (d_year#6 = 2000)) AND (d_date_sk#5 <= 2451910)) AND (d_date_sk#5 >= 2451545)) AND isnotnull(d_date_sk#5)) + +(7) Project [codegen id : 1] +Output [2]: [d_date_sk#5, d_moy#7] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] + +(8) BroadcastExchange +Input [2]: [d_date_sk#5, d_moy#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#8] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(10) Project [codegen id : 4] +Output [4]: [ss_item_sk#2, ss_store_sk#3, ss_sales_price#4, d_moy#7] +Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_sales_price#4, d_date_sk#5, d_moy#7] + +(11) Scan parquet default.store +Output [3]: [s_store_sk#9, s_store_name#10, s_company_name#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [s_store_sk#9, s_store_name#10, s_company_name#11] + +(13) Filter [codegen id : 2] +Input [3]: [s_store_sk#9, s_store_name#10, s_company_name#11] +Condition : isnotnull(s_store_sk#9) + +(14) BroadcastExchange +Input [3]: [s_store_sk#9, s_store_name#10, s_company_name#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#12] + +(15) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#9] +Join condition: None + +(16) Project [codegen id : 4] +Output [5]: [ss_item_sk#2, ss_sales_price#4, d_moy#7, s_store_name#10, s_company_name#11] +Input [7]: [ss_item_sk#2, ss_store_sk#3, ss_sales_price#4, d_moy#7, s_store_sk#9, s_store_name#10, s_company_name#11] + +(17) Scan parquet default.item +Output [4]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/item] +PushedFilters: [Or(And(In(i_category, [Home,Books,Electronics]),In(i_class, [wallpaper,parenting,musical])),And(In(i_category, [Shoes,Jewelry,Men]),In(i_class, [womens,birdal,pants]))), IsNotNull(i_item_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [4]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16] + +(19) Filter [codegen id : 3] +Input [4]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16] +Condition : (((i_category#16 IN (Home,Books,Electronics) AND i_class#15 IN (wallpaper,parenting,musical)) OR (i_category#16 IN (Shoes,Jewelry,Men) AND i_class#15 IN (womens,birdal,pants))) AND isnotnull(i_item_sk#13)) + +(20) BroadcastExchange +Input [4]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#17] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#13] +Join condition: None + +(22) Project [codegen id : 4] +Output [7]: [i_brand#14, i_class#15, i_category#16, ss_sales_price#4, d_moy#7, s_store_name#10, s_company_name#11] +Input [9]: [ss_item_sk#2, ss_sales_price#4, d_moy#7, s_store_name#10, s_company_name#11, i_item_sk#13, i_brand#14, i_class#15, i_category#16] + +(23) HashAggregate [codegen id : 4] +Input [7]: [i_brand#14, i_class#15, i_category#16, ss_sales_price#4, d_moy#7, s_store_name#10, s_company_name#11] +Keys [6]: [i_category#16, i_class#15, i_brand#14, s_store_name#10, s_company_name#11, d_moy#7] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#4))] +Aggregate Attributes [1]: [sum#18] +Results [7]: [i_category#16, i_class#15, i_brand#14, s_store_name#10, s_company_name#11, d_moy#7, sum#19] + +(24) Exchange +Input [7]: [i_category#16, i_class#15, i_brand#14, s_store_name#10, s_company_name#11, d_moy#7, sum#19] +Arguments: hashpartitioning(i_category#16, i_class#15, i_brand#14, s_store_name#10, s_company_name#11, d_moy#7, 5), true, [id=#20] + +(25) HashAggregate [codegen id : 5] +Input [7]: [i_category#16, i_class#15, i_brand#14, s_store_name#10, s_company_name#11, d_moy#7, sum#19] +Keys [6]: [i_category#16, i_class#15, i_brand#14, s_store_name#10, s_company_name#11, d_moy#7] +Functions [1]: [sum(UnscaledValue(ss_sales_price#4))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#4))#21] +Results [8]: [i_category#16, i_class#15, i_brand#14, s_store_name#10, s_company_name#11, d_moy#7, MakeDecimal(sum(UnscaledValue(ss_sales_price#4))#21,17,2) AS sum_sales#22, MakeDecimal(sum(UnscaledValue(ss_sales_price#4))#21,17,2) AS _w0#23] + +(26) Exchange +Input [8]: [i_category#16, i_class#15, i_brand#14, s_store_name#10, s_company_name#11, d_moy#7, sum_sales#22, _w0#23] +Arguments: hashpartitioning(i_category#16, i_brand#14, s_store_name#10, s_company_name#11, 5), true, [id=#24] + +(27) Sort [codegen id : 6] +Input [8]: [i_category#16, i_class#15, i_brand#14, s_store_name#10, s_company_name#11, d_moy#7, sum_sales#22, _w0#23] +Arguments: [i_category#16 ASC NULLS FIRST, i_brand#14 ASC NULLS FIRST, s_store_name#10 ASC NULLS FIRST, s_company_name#11 ASC NULLS FIRST], false, 0 + +(28) Window +Input [8]: [i_category#16, i_class#15, i_brand#14, s_store_name#10, s_company_name#11, d_moy#7, sum_sales#22, _w0#23] +Arguments: [avg(_w0#23) windowspecdefinition(i_category#16, i_brand#14, s_store_name#10, s_company_name#11, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#25], [i_category#16, i_brand#14, s_store_name#10, s_company_name#11] + +(29) Filter [codegen id : 7] +Input [9]: [i_category#16, i_class#15, i_brand#14, s_store_name#10, s_company_name#11, d_moy#7, sum_sales#22, _w0#23, avg_monthly_sales#25] +Condition : (CASE WHEN NOT (avg_monthly_sales#25 = 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#22 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(22,6), true))) / promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(38,16), true) ELSE null END > 0.1000000000000000) + +(30) Project [codegen id : 7] +Output [8]: [i_category#16, i_class#15, i_brand#14, s_store_name#10, s_company_name#11, d_moy#7, sum_sales#22, avg_monthly_sales#25] +Input [9]: [i_category#16, i_class#15, i_brand#14, s_store_name#10, s_company_name#11, d_moy#7, sum_sales#22, _w0#23, avg_monthly_sales#25] + +(31) TakeOrderedAndProject +Input [8]: [i_category#16, i_class#15, i_brand#14, s_store_name#10, s_company_name#11, d_moy#7, sum_sales#22, avg_monthly_sales#25] +Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#22 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(22,6), true) ASC NULLS FIRST, s_store_name#10 ASC NULLS FIRST], [i_category#16, i_class#15, i_brand#14, s_store_name#10, s_company_name#11, d_moy#7, sum_sales#22, avg_monthly_sales#25] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q89.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q89.sf100/simplified.txt new file mode 100644 index 0000000000000..6f7fbff7c58d8 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q89.sf100/simplified.txt @@ -0,0 +1,48 @@ +TakeOrderedAndProject [avg_monthly_sales,d_moy,i_brand,i_category,i_class,s_company_name,s_store_name,sum_sales] + WholeStageCodegen (7) + Project [avg_monthly_sales,d_moy,i_brand,i_category,i_class,s_company_name,s_store_name,sum_sales] + Filter [avg_monthly_sales,sum_sales] + InputAdapter + Window [_w0,i_brand,i_category,s_company_name,s_store_name] + WholeStageCodegen (6) + Sort [i_brand,i_category,s_company_name,s_store_name] + InputAdapter + Exchange [i_brand,i_category,s_company_name,s_store_name] #1 + WholeStageCodegen (5) + HashAggregate [d_moy,i_brand,i_category,i_class,s_company_name,s_store_name,sum] [_w0,sum,sum(UnscaledValue(ss_sales_price)),sum_sales] + InputAdapter + Exchange [d_moy,i_brand,i_category,i_class,s_company_name,s_store_name] #2 + WholeStageCodegen (4) + HashAggregate [d_moy,i_brand,i_category,i_class,s_company_name,s_store_name,ss_sales_price] [sum,sum] + Project [d_moy,i_brand,i_category,i_class,s_company_name,s_store_name,ss_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [d_moy,s_company_name,s_store_name,ss_item_sk,ss_sales_price] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [d_moy,ss_item_sk,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk,d_moy] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_company_name,s_store_name,s_store_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Filter [i_category,i_class,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand,i_category,i_class,i_item_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q89/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q89/explain.txt new file mode 100644 index 0000000000000..46e18398e24a2 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q89/explain.txt @@ -0,0 +1,175 @@ +== Physical Plan == +TakeOrderedAndProject (31) ++- * Project (30) + +- * Filter (29) + +- Window (28) + +- * Sort (27) + +- Exchange (26) + +- * HashAggregate (25) + +- Exchange (24) + +- * HashAggregate (23) + +- * Project (22) + +- * BroadcastHashJoin Inner BuildRight (21) + :- * Project (16) + : +- * BroadcastHashJoin Inner BuildRight (15) + : :- * Project (9) + : : +- * BroadcastHashJoin Inner BuildRight (8) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.item (1) + : : +- BroadcastExchange (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet default.store_sales (4) + : +- BroadcastExchange (14) + : +- * Project (13) + : +- * Filter (12) + : +- * ColumnarToRow (11) + : +- Scan parquet default.date_dim (10) + +- BroadcastExchange (20) + +- * Filter (19) + +- * ColumnarToRow (18) + +- Scan parquet default.store (17) + + +(1) Scan parquet default.item +Output [4]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/item] +PushedFilters: [Or(And(In(i_category, [Home,Books,Electronics]),In(i_class, [wallpaper,parenting,musical])),And(In(i_category, [Shoes,Jewelry,Men]),In(i_class, [womens,birdal,pants]))), IsNotNull(i_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [4]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4] + +(3) Filter [codegen id : 4] +Input [4]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4] +Condition : (((i_category#4 IN (Home,Books,Electronics) AND i_class#3 IN (wallpaper,parenting,musical)) OR (i_category#4 IN (Shoes,Jewelry,Men) AND i_class#3 IN (womens,birdal,pants))) AND isnotnull(i_item_sk#1)) + +(4) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#5, ss_item_sk#6, ss_store_sk#7, ss_sales_price#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), GreaterThanOrEqual(ss_sold_date_sk,2451545), LessThanOrEqual(ss_sold_date_sk,2451910), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [ss_sold_date_sk#5, ss_item_sk#6, ss_store_sk#7, ss_sales_price#8] + +(6) Filter [codegen id : 1] +Input [4]: [ss_sold_date_sk#5, ss_item_sk#6, ss_store_sk#7, ss_sales_price#8] +Condition : ((((isnotnull(ss_sold_date_sk#5) AND (ss_sold_date_sk#5 >= 2451545)) AND (ss_sold_date_sk#5 <= 2451910)) AND isnotnull(ss_item_sk#6)) AND isnotnull(ss_store_sk#7)) + +(7) BroadcastExchange +Input [4]: [ss_sold_date_sk#5, ss_item_sk#6, ss_store_sk#7, ss_sales_price#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#9] + +(8) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [ss_item_sk#6] +Join condition: None + +(9) Project [codegen id : 4] +Output [6]: [i_brand#2, i_class#3, i_category#4, ss_sold_date_sk#5, ss_store_sk#7, ss_sales_price#8] +Input [8]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, ss_sold_date_sk#5, ss_item_sk#6, ss_store_sk#7, ss_sales_price#8] + +(10) Scan parquet default.date_dim +Output [3]: [d_date_sk#10, d_year#11, d_moy#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), GreaterThanOrEqual(d_date_sk,2451545), LessThanOrEqual(d_date_sk,2451910), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#10, d_year#11, d_moy#12] + +(12) Filter [codegen id : 2] +Input [3]: [d_date_sk#10, d_year#11, d_moy#12] +Condition : ((((isnotnull(d_year#11) AND (d_year#11 = 2000)) AND (d_date_sk#10 >= 2451545)) AND (d_date_sk#10 <= 2451910)) AND isnotnull(d_date_sk#10)) + +(13) Project [codegen id : 2] +Output [2]: [d_date_sk#10, d_moy#12] +Input [3]: [d_date_sk#10, d_year#11, d_moy#12] + +(14) BroadcastExchange +Input [2]: [d_date_sk#10, d_moy#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] + +(15) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#5] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(16) Project [codegen id : 4] +Output [6]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#7, ss_sales_price#8, d_moy#12] +Input [8]: [i_brand#2, i_class#3, i_category#4, ss_sold_date_sk#5, ss_store_sk#7, ss_sales_price#8, d_date_sk#10, d_moy#12] + +(17) Scan parquet default.store +Output [3]: [s_store_sk#14, s_store_name#15, s_company_name#16] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [3]: [s_store_sk#14, s_store_name#15, s_company_name#16] + +(19) Filter [codegen id : 3] +Input [3]: [s_store_sk#14, s_store_name#15, s_company_name#16] +Condition : isnotnull(s_store_sk#14) + +(20) BroadcastExchange +Input [3]: [s_store_sk#14, s_store_name#15, s_company_name#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#17] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#7] +Right keys [1]: [s_store_sk#14] +Join condition: None + +(22) Project [codegen id : 4] +Output [7]: [i_brand#2, i_class#3, i_category#4, ss_sales_price#8, d_moy#12, s_store_name#15, s_company_name#16] +Input [9]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#7, ss_sales_price#8, d_moy#12, s_store_sk#14, s_store_name#15, s_company_name#16] + +(23) HashAggregate [codegen id : 4] +Input [7]: [i_brand#2, i_class#3, i_category#4, ss_sales_price#8, d_moy#12, s_store_name#15, s_company_name#16] +Keys [6]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#8))] +Aggregate Attributes [1]: [sum#18] +Results [7]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, sum#19] + +(24) Exchange +Input [7]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, sum#19] +Arguments: hashpartitioning(i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, 5), true, [id=#20] + +(25) HashAggregate [codegen id : 5] +Input [7]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, sum#19] +Keys [6]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12] +Functions [1]: [sum(UnscaledValue(ss_sales_price#8))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#8))#21] +Results [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, MakeDecimal(sum(UnscaledValue(ss_sales_price#8))#21,17,2) AS sum_sales#22, MakeDecimal(sum(UnscaledValue(ss_sales_price#8))#21,17,2) AS _w0#23] + +(26) Exchange +Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, sum_sales#22, _w0#23] +Arguments: hashpartitioning(i_category#4, i_brand#2, s_store_name#15, s_company_name#16, 5), true, [id=#24] + +(27) Sort [codegen id : 6] +Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, sum_sales#22, _w0#23] +Arguments: [i_category#4 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, s_store_name#15 ASC NULLS FIRST, s_company_name#16 ASC NULLS FIRST], false, 0 + +(28) Window +Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, sum_sales#22, _w0#23] +Arguments: [avg(_w0#23) windowspecdefinition(i_category#4, i_brand#2, s_store_name#15, s_company_name#16, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#25], [i_category#4, i_brand#2, s_store_name#15, s_company_name#16] + +(29) Filter [codegen id : 7] +Input [9]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, sum_sales#22, _w0#23, avg_monthly_sales#25] +Condition : (CASE WHEN NOT (avg_monthly_sales#25 = 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#22 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(22,6), true))) / promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(38,16), true) ELSE null END > 0.1000000000000000) + +(30) Project [codegen id : 7] +Output [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, sum_sales#22, avg_monthly_sales#25] +Input [9]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, sum_sales#22, _w0#23, avg_monthly_sales#25] + +(31) TakeOrderedAndProject +Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, sum_sales#22, avg_monthly_sales#25] +Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#22 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(22,6), true) ASC NULLS FIRST, s_store_name#15 ASC NULLS FIRST], [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, sum_sales#22, avg_monthly_sales#25] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q89/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q89/simplified.txt new file mode 100644 index 0000000000000..3b3ad9cf61293 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q89/simplified.txt @@ -0,0 +1,48 @@ +TakeOrderedAndProject [avg_monthly_sales,d_moy,i_brand,i_category,i_class,s_company_name,s_store_name,sum_sales] + WholeStageCodegen (7) + Project [avg_monthly_sales,d_moy,i_brand,i_category,i_class,s_company_name,s_store_name,sum_sales] + Filter [avg_monthly_sales,sum_sales] + InputAdapter + Window [_w0,i_brand,i_category,s_company_name,s_store_name] + WholeStageCodegen (6) + Sort [i_brand,i_category,s_company_name,s_store_name] + InputAdapter + Exchange [i_brand,i_category,s_company_name,s_store_name] #1 + WholeStageCodegen (5) + HashAggregate [d_moy,i_brand,i_category,i_class,s_company_name,s_store_name,sum] [_w0,sum,sum(UnscaledValue(ss_sales_price)),sum_sales] + InputAdapter + Exchange [d_moy,i_brand,i_category,i_class,s_company_name,s_store_name] #2 + WholeStageCodegen (4) + HashAggregate [d_moy,i_brand,i_category,i_class,s_company_name,s_store_name,ss_sales_price] [sum,sum] + Project [d_moy,i_brand,i_category,i_class,s_company_name,s_store_name,ss_sales_price] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [d_moy,i_brand,i_category,i_class,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_brand,i_category,i_class,ss_sales_price,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Filter [i_category,i_class,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand,i_category,i_class,i_item_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [d_date_sk,d_moy] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_company_name,s_store_name,s_store_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q98.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q98.sf100/explain.txt new file mode 100644 index 0000000000000..62e06c90b0015 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q98.sf100/explain.txt @@ -0,0 +1,162 @@ +== Physical Plan == +* Project (29) ++- * Sort (28) + +- Exchange (27) + +- * Project (26) + +- Window (25) + +- * Sort (24) + +- Exchange (23) + +- * HashAggregate (22) + +- Exchange (21) + +- * HashAggregate (20) + +- * Project (19) + +- * SortMergeJoin Inner (18) + :- * Sort (12) + : +- Exchange (11) + : +- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.store_sales (1) + : +- BroadcastExchange (8) + : +- * Project (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet default.date_dim (4) + +- * Sort (17) + +- Exchange (16) + +- * Filter (15) + +- * ColumnarToRow (14) + +- Scan parquet default.item (13) + + +(1) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), GreaterThanOrEqual(ss_sold_date_sk,2451911), LessThanOrEqual(ss_sold_date_sk,2451941), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 2] +Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3] + +(3) Filter [codegen id : 2] +Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3] +Condition : (((isnotnull(ss_sold_date_sk#1) AND (ss_sold_date_sk#1 >= 2451911)) AND (ss_sold_date_sk#1 <= 2451941)) AND isnotnull(ss_item_sk#2)) + +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#4, d_date#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2001-01-01), LessThanOrEqual(d_date,2001-01-31), GreaterThanOrEqual(d_date_sk,2451911), LessThanOrEqual(d_date_sk,2451941), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#4, d_date#5] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#4, d_date#5] +Condition : (((((isnotnull(d_date#5) AND (d_date#5 >= 11323)) AND (d_date#5 <= 11353)) AND (d_date_sk#4 >= 2451911)) AND (d_date_sk#4 <= 2451941)) AND isnotnull(d_date_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#4] +Input [2]: [d_date_sk#4, d_date#5] + +(8) BroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#6] + +(9) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(10) Project [codegen id : 2] +Output [2]: [ss_item_sk#2, ss_ext_sales_price#3] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3, d_date_sk#4] + +(11) Exchange +Input [2]: [ss_item_sk#2, ss_ext_sales_price#3] +Arguments: hashpartitioning(ss_item_sk#2, 5), true, [id=#7] + +(12) Sort [codegen id : 3] +Input [2]: [ss_item_sk#2, ss_ext_sales_price#3] +Arguments: [ss_item_sk#2 ASC NULLS FIRST], false, 0 + +(13) Scan parquet default.item +Output [6]: [i_item_sk#8, i_item_id#9, i_item_desc#10, i_current_price#11, i_class#12, i_category#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/item] +PushedFilters: [In(i_category, [Jewelry,Sports,Books]), IsNotNull(i_item_sk)] +ReadSchema: struct + +(14) ColumnarToRow [codegen id : 4] +Input [6]: [i_item_sk#8, i_item_id#9, i_item_desc#10, i_current_price#11, i_class#12, i_category#13] + +(15) Filter [codegen id : 4] +Input [6]: [i_item_sk#8, i_item_id#9, i_item_desc#10, i_current_price#11, i_class#12, i_category#13] +Condition : (i_category#13 IN (Jewelry,Sports,Books) AND isnotnull(i_item_sk#8)) + +(16) Exchange +Input [6]: [i_item_sk#8, i_item_id#9, i_item_desc#10, i_current_price#11, i_class#12, i_category#13] +Arguments: hashpartitioning(i_item_sk#8, 5), true, [id=#14] + +(17) Sort [codegen id : 5] +Input [6]: [i_item_sk#8, i_item_id#9, i_item_desc#10, i_current_price#11, i_class#12, i_category#13] +Arguments: [i_item_sk#8 ASC NULLS FIRST], false, 0 + +(18) SortMergeJoin [codegen id : 6] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#8] +Join condition: None + +(19) Project [codegen id : 6] +Output [6]: [ss_ext_sales_price#3, i_item_id#9, i_item_desc#10, i_current_price#11, i_class#12, i_category#13] +Input [8]: [ss_item_sk#2, ss_ext_sales_price#3, i_item_sk#8, i_item_id#9, i_item_desc#10, i_current_price#11, i_class#12, i_category#13] + +(20) HashAggregate [codegen id : 6] +Input [6]: [ss_ext_sales_price#3, i_item_id#9, i_item_desc#10, i_current_price#11, i_class#12, i_category#13] +Keys [5]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum#15] +Results [6]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11, sum#16] + +(21) Exchange +Input [6]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11, sum#16] +Arguments: hashpartitioning(i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11, 5), true, [id=#17] + +(22) HashAggregate [codegen id : 7] +Input [6]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11, sum#16] +Keys [5]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#18] +Results [8]: [i_item_desc#10, i_category#13, i_class#12, i_current_price#11, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#18,17,2) AS itemrevenue#19, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#18,17,2) AS _w0#20, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#18,17,2) AS _w1#21, i_item_id#9] + +(23) Exchange +Input [8]: [i_item_desc#10, i_category#13, i_class#12, i_current_price#11, itemrevenue#19, _w0#20, _w1#21, i_item_id#9] +Arguments: hashpartitioning(i_class#12, 5), true, [id=#22] + +(24) Sort [codegen id : 8] +Input [8]: [i_item_desc#10, i_category#13, i_class#12, i_current_price#11, itemrevenue#19, _w0#20, _w1#21, i_item_id#9] +Arguments: [i_class#12 ASC NULLS FIRST], false, 0 + +(25) Window +Input [8]: [i_item_desc#10, i_category#13, i_class#12, i_current_price#11, itemrevenue#19, _w0#20, _w1#21, i_item_id#9] +Arguments: [sum(_w1#21) windowspecdefinition(i_class#12, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#23], [i_class#12] + +(26) Project [codegen id : 9] +Output [7]: [i_item_desc#10, i_category#13, i_class#12, i_current_price#11, itemrevenue#19, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#20) * 100.00), DecimalType(21,2), true) as decimal(27,2))) / promote_precision(_we0#23)), DecimalType(38,17), true) AS revenueratio#24, i_item_id#9] +Input [9]: [i_item_desc#10, i_category#13, i_class#12, i_current_price#11, itemrevenue#19, _w0#20, _w1#21, i_item_id#9, _we0#23] + +(27) Exchange +Input [7]: [i_item_desc#10, i_category#13, i_class#12, i_current_price#11, itemrevenue#19, revenueratio#24, i_item_id#9] +Arguments: rangepartitioning(i_category#13 ASC NULLS FIRST, i_class#12 ASC NULLS FIRST, i_item_id#9 ASC NULLS FIRST, i_item_desc#10 ASC NULLS FIRST, revenueratio#24 ASC NULLS FIRST, 5), true, [id=#25] + +(28) Sort [codegen id : 10] +Input [7]: [i_item_desc#10, i_category#13, i_class#12, i_current_price#11, itemrevenue#19, revenueratio#24, i_item_id#9] +Arguments: [i_category#13 ASC NULLS FIRST, i_class#12 ASC NULLS FIRST, i_item_id#9 ASC NULLS FIRST, i_item_desc#10 ASC NULLS FIRST, revenueratio#24 ASC NULLS FIRST], true, 0 + +(29) Project [codegen id : 10] +Output [6]: [i_item_desc#10, i_category#13, i_class#12, i_current_price#11, itemrevenue#19, revenueratio#24] +Input [7]: [i_item_desc#10, i_category#13, i_class#12, i_current_price#11, itemrevenue#19, revenueratio#24, i_item_id#9] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q98.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q98.sf100/simplified.txt new file mode 100644 index 0000000000000..8e55b010434aa --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q98.sf100/simplified.txt @@ -0,0 +1,51 @@ +WholeStageCodegen (10) + Project [i_category,i_class,i_current_price,i_item_desc,itemrevenue,revenueratio] + Sort [i_category,i_class,i_item_desc,i_item_id,revenueratio] + InputAdapter + Exchange [i_category,i_class,i_item_desc,i_item_id,revenueratio] #1 + WholeStageCodegen (9) + Project [_w0,_we0,i_category,i_class,i_current_price,i_item_desc,i_item_id,itemrevenue] + InputAdapter + Window [_w1,i_class] + WholeStageCodegen (8) + Sort [i_class] + InputAdapter + Exchange [i_class] #2 + WholeStageCodegen (7) + HashAggregate [i_category,i_class,i_current_price,i_item_desc,i_item_id,sum] [_w0,_w1,itemrevenue,sum,sum(UnscaledValue(ss_ext_sales_price))] + InputAdapter + Exchange [i_category,i_class,i_current_price,i_item_desc,i_item_id] #3 + WholeStageCodegen (6) + HashAggregate [i_category,i_class,i_current_price,i_item_desc,i_item_id,ss_ext_sales_price] [sum,sum] + Project [i_category,i_class,i_current_price,i_item_desc,i_item_id,ss_ext_sales_price] + SortMergeJoin [i_item_sk,ss_item_sk] + InputAdapter + WholeStageCodegen (3) + Sort [ss_item_sk] + InputAdapter + Exchange [ss_item_sk] #4 + WholeStageCodegen (2) + Project [ss_ext_sales_price,ss_item_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk] + InputAdapter + WholeStageCodegen (5) + Sort [i_item_sk] + InputAdapter + Exchange [i_item_sk] #6 + WholeStageCodegen (4) + Filter [i_category,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_category,i_class,i_current_price,i_item_desc,i_item_id,i_item_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q98/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q98/explain.txt new file mode 100644 index 0000000000000..d5d8d64149346 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q98/explain.txt @@ -0,0 +1,147 @@ +== Physical Plan == +* Project (26) ++- * Sort (25) + +- Exchange (24) + +- * Project (23) + +- Window (22) + +- * Sort (21) + +- Exchange (20) + +- * HashAggregate (19) + +- Exchange (18) + +- * HashAggregate (17) + +- * Project (16) + +- * BroadcastHashJoin Inner BuildRight (15) + :- * Project (9) + : +- * BroadcastHashJoin Inner BuildRight (8) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.store_sales (1) + : +- BroadcastExchange (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet default.item (4) + +- BroadcastExchange (14) + +- * Project (13) + +- * Filter (12) + +- * ColumnarToRow (11) + +- Scan parquet default.date_dim (10) + + +(1) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), GreaterThanOrEqual(ss_sold_date_sk,2451911), LessThanOrEqual(ss_sold_date_sk,2451941), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3] + +(3) Filter [codegen id : 3] +Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3] +Condition : (((isnotnull(ss_sold_date_sk#1) AND (ss_sold_date_sk#1 >= 2451911)) AND (ss_sold_date_sk#1 <= 2451941)) AND isnotnull(ss_item_sk#2)) + +(4) Scan parquet default.item +Output [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/item] +PushedFilters: [In(i_category, [Jewelry,Sports,Books]), IsNotNull(i_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(6) Filter [codegen id : 1] +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Condition : (i_category#9 IN (Jewelry,Sports,Books) AND isnotnull(i_item_sk#4)) + +(7) BroadcastExchange +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#10] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#4] +Join condition: None + +(9) Project [codegen id : 3] +Output [7]: [ss_sold_date_sk#1, ss_ext_sales_price#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3, i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(10) Scan parquet default.date_dim +Output [2]: [d_date_sk#11, d_date#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2001-01-01), LessThanOrEqual(d_date,2001-01-31), GreaterThanOrEqual(d_date_sk,2451911), LessThanOrEqual(d_date_sk,2451941), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#11, d_date#12] + +(12) Filter [codegen id : 2] +Input [2]: [d_date_sk#11, d_date#12] +Condition : (((((isnotnull(d_date#12) AND (d_date#12 >= 11323)) AND (d_date#12 <= 11353)) AND (d_date_sk#11 >= 2451911)) AND (d_date_sk#11 <= 2451941)) AND isnotnull(d_date_sk#11)) + +(13) Project [codegen id : 2] +Output [1]: [d_date_sk#11] +Input [2]: [d_date_sk#11, d_date#12] + +(14) BroadcastExchange +Input [1]: [d_date_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#11] +Join condition: None + +(16) Project [codegen id : 3] +Output [6]: [ss_ext_sales_price#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Input [8]: [ss_sold_date_sk#1, ss_ext_sales_price#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9, d_date_sk#11] + +(17) HashAggregate [codegen id : 3] +Input [6]: [ss_ext_sales_price#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum#14] +Results [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#15] + +(18) Exchange +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#15] +Arguments: hashpartitioning(i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, 5), true, [id=#16] + +(19) HashAggregate [codegen id : 4] +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#15] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#17] +Results [8]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#17,17,2) AS itemrevenue#18, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#17,17,2) AS _w0#19, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#17,17,2) AS _w1#20, i_item_id#5] + +(20) Exchange +Input [8]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, _w0#19, _w1#20, i_item_id#5] +Arguments: hashpartitioning(i_class#8, 5), true, [id=#21] + +(21) Sort [codegen id : 5] +Input [8]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, _w0#19, _w1#20, i_item_id#5] +Arguments: [i_class#8 ASC NULLS FIRST], false, 0 + +(22) Window +Input [8]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, _w0#19, _w1#20, i_item_id#5] +Arguments: [sum(_w1#20) windowspecdefinition(i_class#8, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#22], [i_class#8] + +(23) Project [codegen id : 6] +Output [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#19) * 100.00), DecimalType(21,2), true) as decimal(27,2))) / promote_precision(_we0#22)), DecimalType(38,17), true) AS revenueratio#23, i_item_id#5] +Input [9]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, _w0#19, _w1#20, i_item_id#5, _we0#22] + +(24) Exchange +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, revenueratio#23, i_item_id#5] +Arguments: rangepartitioning(i_category#9 ASC NULLS FIRST, i_class#8 ASC NULLS FIRST, i_item_id#5 ASC NULLS FIRST, i_item_desc#6 ASC NULLS FIRST, revenueratio#23 ASC NULLS FIRST, 5), true, [id=#24] + +(25) Sort [codegen id : 7] +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, revenueratio#23, i_item_id#5] +Arguments: [i_category#9 ASC NULLS FIRST, i_class#8 ASC NULLS FIRST, i_item_id#5 ASC NULLS FIRST, i_item_desc#6 ASC NULLS FIRST, revenueratio#23 ASC NULLS FIRST], true, 0 + +(26) Project [codegen id : 7] +Output [6]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, revenueratio#23] +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, revenueratio#23, i_item_id#5] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q98/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q98/simplified.txt new file mode 100644 index 0000000000000..6f39176d9a9e1 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q98/simplified.txt @@ -0,0 +1,42 @@ +WholeStageCodegen (7) + Project [i_category,i_class,i_current_price,i_item_desc,itemrevenue,revenueratio] + Sort [i_category,i_class,i_item_desc,i_item_id,revenueratio] + InputAdapter + Exchange [i_category,i_class,i_item_desc,i_item_id,revenueratio] #1 + WholeStageCodegen (6) + Project [_w0,_we0,i_category,i_class,i_current_price,i_item_desc,i_item_id,itemrevenue] + InputAdapter + Window [_w1,i_class] + WholeStageCodegen (5) + Sort [i_class] + InputAdapter + Exchange [i_class] #2 + WholeStageCodegen (4) + HashAggregate [i_category,i_class,i_current_price,i_item_desc,i_item_id,sum] [_w0,_w1,itemrevenue,sum,sum(UnscaledValue(ss_ext_sales_price))] + InputAdapter + Exchange [i_category,i_class,i_current_price,i_item_desc,i_item_id] #3 + WholeStageCodegen (3) + HashAggregate [i_category,i_class,i_current_price,i_item_desc,i_item_id,ss_ext_sales_price] [sum,sum] + Project [i_category,i_class,i_current_price,i_item_desc,i_item_id,ss_ext_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_category,i_class,i_current_price,i_item_desc,i_item_id,ss_ext_sales_price,ss_sold_date_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Filter [i_category,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_category,i_class,i_current_price,i_item_desc,i_item_id,i_item_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/ss_max.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/ss_max.sf100/explain.txt new file mode 100644 index 0000000000000..aba9a769a0b6b --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/ss_max.sf100/explain.txt @@ -0,0 +1,56 @@ +== Physical Plan == +* HashAggregate (8) ++- Exchange (7) + +- * HashAggregate (6) + +- * HashAggregate (5) + +- Exchange (4) + +- * HashAggregate (3) + +- * ColumnarToRow (2) + +- Scan parquet default.store_sales (1) + + +(1) Scan parquet default.store_sales +Output [9]: [ss_sold_date_sk#1, ss_sold_time_sk#2, ss_item_sk#3, ss_customer_sk#4, ss_cdemo_sk#5, ss_hdemo_sk#6, ss_addr_sk#7, ss_store_sk#8, ss_promo_sk#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilityWithStatsSuite/store_sales] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [9]: [ss_sold_date_sk#1, ss_sold_time_sk#2, ss_item_sk#3, ss_customer_sk#4, ss_cdemo_sk#5, ss_hdemo_sk#6, ss_addr_sk#7, ss_store_sk#8, ss_promo_sk#9] + +(3) HashAggregate [codegen id : 1] +Input [9]: [ss_sold_date_sk#1, ss_sold_time_sk#2, ss_item_sk#3, ss_customer_sk#4, ss_cdemo_sk#5, ss_hdemo_sk#6, ss_addr_sk#7, ss_store_sk#8, ss_promo_sk#9] +Keys [1]: [ss_sold_date_sk#1] +Functions [11]: [partial_count(1), partial_count(ss_sold_date_sk#1), partial_max(ss_sold_date_sk#1), partial_max(ss_sold_time_sk#2), partial_max(ss_item_sk#3), partial_max(ss_customer_sk#4), partial_max(ss_cdemo_sk#5), partial_max(ss_hdemo_sk#6), partial_max(ss_addr_sk#7), partial_max(ss_store_sk#8), partial_max(ss_promo_sk#9)] +Aggregate Attributes [11]: [count(1)#10, count(ss_sold_date_sk#1)#11, max(ss_sold_date_sk#1)#12, max(ss_sold_time_sk#2)#13, max(ss_item_sk#3)#14, max(ss_customer_sk#4)#15, max(ss_cdemo_sk#5)#16, max(ss_hdemo_sk#6)#17, max(ss_addr_sk#7)#18, max(ss_store_sk#8)#19, max(ss_promo_sk#9)#20] +Results [12]: [ss_sold_date_sk#1, count#21, count#22, max#23, max#24, max#25, max#26, max#27, max#28, max#29, max#30, max#31] + +(4) Exchange +Input [12]: [ss_sold_date_sk#1, count#21, count#22, max#23, max#24, max#25, max#26, max#27, max#28, max#29, max#30, max#31] +Arguments: hashpartitioning(ss_sold_date_sk#1, 5), true, [id=#32] + +(5) HashAggregate [codegen id : 2] +Input [12]: [ss_sold_date_sk#1, count#21, count#22, max#23, max#24, max#25, max#26, max#27, max#28, max#29, max#30, max#31] +Keys [1]: [ss_sold_date_sk#1] +Functions [11]: [merge_count(1), merge_count(ss_sold_date_sk#1), merge_max(ss_sold_date_sk#1), merge_max(ss_sold_time_sk#2), merge_max(ss_item_sk#3), merge_max(ss_customer_sk#4), merge_max(ss_cdemo_sk#5), merge_max(ss_hdemo_sk#6), merge_max(ss_addr_sk#7), merge_max(ss_store_sk#8), merge_max(ss_promo_sk#9)] +Aggregate Attributes [11]: [count(1)#10, count(ss_sold_date_sk#1)#11, max(ss_sold_date_sk#1)#12, max(ss_sold_time_sk#2)#13, max(ss_item_sk#3)#14, max(ss_customer_sk#4)#15, max(ss_cdemo_sk#5)#16, max(ss_hdemo_sk#6)#17, max(ss_addr_sk#7)#18, max(ss_store_sk#8)#19, max(ss_promo_sk#9)#20] +Results [12]: [ss_sold_date_sk#1, count#21, count#22, max#23, max#24, max#25, max#26, max#27, max#28, max#29, max#30, max#31] + +(6) HashAggregate [codegen id : 2] +Input [12]: [ss_sold_date_sk#1, count#21, count#22, max#23, max#24, max#25, max#26, max#27, max#28, max#29, max#30, max#31] +Keys: [] +Functions [12]: [merge_count(1), merge_count(ss_sold_date_sk#1), merge_max(ss_sold_date_sk#1), merge_max(ss_sold_time_sk#2), merge_max(ss_item_sk#3), merge_max(ss_customer_sk#4), merge_max(ss_cdemo_sk#5), merge_max(ss_hdemo_sk#6), merge_max(ss_addr_sk#7), merge_max(ss_store_sk#8), merge_max(ss_promo_sk#9), partial_count(distinct ss_sold_date_sk#1)] +Aggregate Attributes [12]: [count(1)#10, count(ss_sold_date_sk#1)#11, max(ss_sold_date_sk#1)#12, max(ss_sold_time_sk#2)#13, max(ss_item_sk#3)#14, max(ss_customer_sk#4)#15, max(ss_cdemo_sk#5)#16, max(ss_hdemo_sk#6)#17, max(ss_addr_sk#7)#18, max(ss_store_sk#8)#19, max(ss_promo_sk#9)#20, count(ss_sold_date_sk#1)#33] +Results [12]: [count#21, count#22, max#23, max#24, max#25, max#26, max#27, max#28, max#29, max#30, max#31, count#34] + +(7) Exchange +Input [12]: [count#21, count#22, max#23, max#24, max#25, max#26, max#27, max#28, max#29, max#30, max#31, count#34] +Arguments: SinglePartition, true, [id=#35] + +(8) HashAggregate [codegen id : 3] +Input [12]: [count#21, count#22, max#23, max#24, max#25, max#26, max#27, max#28, max#29, max#30, max#31, count#34] +Keys: [] +Functions [12]: [count(1), count(ss_sold_date_sk#1), max(ss_sold_date_sk#1), max(ss_sold_time_sk#2), max(ss_item_sk#3), max(ss_customer_sk#4), max(ss_cdemo_sk#5), max(ss_hdemo_sk#6), max(ss_addr_sk#7), max(ss_store_sk#8), max(ss_promo_sk#9), count(distinct ss_sold_date_sk#1)] +Aggregate Attributes [12]: [count(1)#10, count(ss_sold_date_sk#1)#11, max(ss_sold_date_sk#1)#12, max(ss_sold_time_sk#2)#13, max(ss_item_sk#3)#14, max(ss_customer_sk#4)#15, max(ss_cdemo_sk#5)#16, max(ss_hdemo_sk#6)#17, max(ss_addr_sk#7)#18, max(ss_store_sk#8)#19, max(ss_promo_sk#9)#20, count(ss_sold_date_sk#1)#33] +Results [12]: [count(1)#10 AS total#36, count(ss_sold_date_sk#1)#11 AS not_null_total#37, count(ss_sold_date_sk#1)#33 AS unique_days#38, max(ss_sold_date_sk#1)#12 AS max_ss_sold_date_sk#39, max(ss_sold_time_sk#2)#13 AS max_ss_sold_time_sk#40, max(ss_item_sk#3)#14 AS max_ss_item_sk#41, max(ss_customer_sk#4)#15 AS max_ss_customer_sk#42, max(ss_cdemo_sk#5)#16 AS max_ss_cdemo_sk#43, max(ss_hdemo_sk#6)#17 AS max_ss_hdemo_sk#44, max(ss_addr_sk#7)#18 AS max_ss_addr_sk#45, max(ss_store_sk#8)#19 AS max_ss_store_sk#46, max(ss_promo_sk#9)#20 AS max_ss_promo_sk#47] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/ss_max.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/ss_max.sf100/simplified.txt new file mode 100644 index 0000000000000..31622e85a4a03 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/ss_max.sf100/simplified.txt @@ -0,0 +1,14 @@ +WholeStageCodegen (3) + HashAggregate [count,count,count,max,max,max,max,max,max,max,max,max] [count,count,count,count(1),count(ss_sold_date_sk),count(ss_sold_date_sk),max,max,max,max,max,max,max,max,max,max(ss_addr_sk),max(ss_cdemo_sk),max(ss_customer_sk),max(ss_hdemo_sk),max(ss_item_sk),max(ss_promo_sk),max(ss_sold_date_sk),max(ss_sold_time_sk),max(ss_store_sk),max_ss_addr_sk,max_ss_cdemo_sk,max_ss_customer_sk,max_ss_hdemo_sk,max_ss_item_sk,max_ss_promo_sk,max_ss_sold_date_sk,max_ss_sold_time_sk,max_ss_store_sk,not_null_total,total,unique_days] + InputAdapter + Exchange #1 + WholeStageCodegen (2) + HashAggregate [ss_sold_date_sk] [count,count,count,count,count,count,count(1),count(ss_sold_date_sk),count(ss_sold_date_sk),max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max(ss_addr_sk),max(ss_cdemo_sk),max(ss_customer_sk),max(ss_hdemo_sk),max(ss_item_sk),max(ss_promo_sk),max(ss_sold_date_sk),max(ss_sold_time_sk),max(ss_store_sk)] + HashAggregate [ss_sold_date_sk] [count,count,count,count,count(1),count(ss_sold_date_sk),max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max(ss_addr_sk),max(ss_cdemo_sk),max(ss_customer_sk),max(ss_hdemo_sk),max(ss_item_sk),max(ss_promo_sk),max(ss_sold_date_sk),max(ss_sold_time_sk),max(ss_store_sk)] + InputAdapter + Exchange [ss_sold_date_sk] #2 + WholeStageCodegen (1) + HashAggregate [ss_addr_sk,ss_cdemo_sk,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_sold_time_sk,ss_store_sk] [count,count,count,count,count(1),count(ss_sold_date_sk),max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max(ss_addr_sk),max(ss_cdemo_sk),max(ss_customer_sk),max(ss_hdemo_sk),max(ss_item_sk),max(ss_promo_sk),max(ss_sold_date_sk),max(ss_sold_time_sk),max(ss_store_sk)] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_addr_sk,ss_cdemo_sk,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_sold_time_sk,ss_store_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/ss_max/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/ss_max/explain.txt new file mode 100644 index 0000000000000..f763b06dd842b --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/ss_max/explain.txt @@ -0,0 +1,56 @@ +== Physical Plan == +* HashAggregate (8) ++- Exchange (7) + +- * HashAggregate (6) + +- * HashAggregate (5) + +- Exchange (4) + +- * HashAggregate (3) + +- * ColumnarToRow (2) + +- Scan parquet default.store_sales (1) + + +(1) Scan parquet default.store_sales +Output [9]: [ss_sold_date_sk#1, ss_sold_time_sk#2, ss_item_sk#3, ss_customer_sk#4, ss_cdemo_sk#5, ss_hdemo_sk#6, ss_addr_sk#7, ss_store_sk#8, ss_promo_sk#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSModifiedPlanStabilitySuite/store_sales] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [9]: [ss_sold_date_sk#1, ss_sold_time_sk#2, ss_item_sk#3, ss_customer_sk#4, ss_cdemo_sk#5, ss_hdemo_sk#6, ss_addr_sk#7, ss_store_sk#8, ss_promo_sk#9] + +(3) HashAggregate [codegen id : 1] +Input [9]: [ss_sold_date_sk#1, ss_sold_time_sk#2, ss_item_sk#3, ss_customer_sk#4, ss_cdemo_sk#5, ss_hdemo_sk#6, ss_addr_sk#7, ss_store_sk#8, ss_promo_sk#9] +Keys [1]: [ss_sold_date_sk#1] +Functions [11]: [partial_count(1), partial_count(ss_sold_date_sk#1), partial_max(ss_sold_date_sk#1), partial_max(ss_sold_time_sk#2), partial_max(ss_item_sk#3), partial_max(ss_customer_sk#4), partial_max(ss_cdemo_sk#5), partial_max(ss_hdemo_sk#6), partial_max(ss_addr_sk#7), partial_max(ss_store_sk#8), partial_max(ss_promo_sk#9)] +Aggregate Attributes [11]: [count(1)#10, count(ss_sold_date_sk#1)#11, max(ss_sold_date_sk#1)#12, max(ss_sold_time_sk#2)#13, max(ss_item_sk#3)#14, max(ss_customer_sk#4)#15, max(ss_cdemo_sk#5)#16, max(ss_hdemo_sk#6)#17, max(ss_addr_sk#7)#18, max(ss_store_sk#8)#19, max(ss_promo_sk#9)#20] +Results [12]: [ss_sold_date_sk#1, count#21, count#22, max#23, max#24, max#25, max#26, max#27, max#28, max#29, max#30, max#31] + +(4) Exchange +Input [12]: [ss_sold_date_sk#1, count#21, count#22, max#23, max#24, max#25, max#26, max#27, max#28, max#29, max#30, max#31] +Arguments: hashpartitioning(ss_sold_date_sk#1, 5), true, [id=#32] + +(5) HashAggregate [codegen id : 2] +Input [12]: [ss_sold_date_sk#1, count#21, count#22, max#23, max#24, max#25, max#26, max#27, max#28, max#29, max#30, max#31] +Keys [1]: [ss_sold_date_sk#1] +Functions [11]: [merge_count(1), merge_count(ss_sold_date_sk#1), merge_max(ss_sold_date_sk#1), merge_max(ss_sold_time_sk#2), merge_max(ss_item_sk#3), merge_max(ss_customer_sk#4), merge_max(ss_cdemo_sk#5), merge_max(ss_hdemo_sk#6), merge_max(ss_addr_sk#7), merge_max(ss_store_sk#8), merge_max(ss_promo_sk#9)] +Aggregate Attributes [11]: [count(1)#10, count(ss_sold_date_sk#1)#11, max(ss_sold_date_sk#1)#12, max(ss_sold_time_sk#2)#13, max(ss_item_sk#3)#14, max(ss_customer_sk#4)#15, max(ss_cdemo_sk#5)#16, max(ss_hdemo_sk#6)#17, max(ss_addr_sk#7)#18, max(ss_store_sk#8)#19, max(ss_promo_sk#9)#20] +Results [12]: [ss_sold_date_sk#1, count#21, count#22, max#23, max#24, max#25, max#26, max#27, max#28, max#29, max#30, max#31] + +(6) HashAggregate [codegen id : 2] +Input [12]: [ss_sold_date_sk#1, count#21, count#22, max#23, max#24, max#25, max#26, max#27, max#28, max#29, max#30, max#31] +Keys: [] +Functions [12]: [merge_count(1), merge_count(ss_sold_date_sk#1), merge_max(ss_sold_date_sk#1), merge_max(ss_sold_time_sk#2), merge_max(ss_item_sk#3), merge_max(ss_customer_sk#4), merge_max(ss_cdemo_sk#5), merge_max(ss_hdemo_sk#6), merge_max(ss_addr_sk#7), merge_max(ss_store_sk#8), merge_max(ss_promo_sk#9), partial_count(distinct ss_sold_date_sk#1)] +Aggregate Attributes [12]: [count(1)#10, count(ss_sold_date_sk#1)#11, max(ss_sold_date_sk#1)#12, max(ss_sold_time_sk#2)#13, max(ss_item_sk#3)#14, max(ss_customer_sk#4)#15, max(ss_cdemo_sk#5)#16, max(ss_hdemo_sk#6)#17, max(ss_addr_sk#7)#18, max(ss_store_sk#8)#19, max(ss_promo_sk#9)#20, count(ss_sold_date_sk#1)#33] +Results [12]: [count#21, count#22, max#23, max#24, max#25, max#26, max#27, max#28, max#29, max#30, max#31, count#34] + +(7) Exchange +Input [12]: [count#21, count#22, max#23, max#24, max#25, max#26, max#27, max#28, max#29, max#30, max#31, count#34] +Arguments: SinglePartition, true, [id=#35] + +(8) HashAggregate [codegen id : 3] +Input [12]: [count#21, count#22, max#23, max#24, max#25, max#26, max#27, max#28, max#29, max#30, max#31, count#34] +Keys: [] +Functions [12]: [count(1), count(ss_sold_date_sk#1), max(ss_sold_date_sk#1), max(ss_sold_time_sk#2), max(ss_item_sk#3), max(ss_customer_sk#4), max(ss_cdemo_sk#5), max(ss_hdemo_sk#6), max(ss_addr_sk#7), max(ss_store_sk#8), max(ss_promo_sk#9), count(distinct ss_sold_date_sk#1)] +Aggregate Attributes [12]: [count(1)#10, count(ss_sold_date_sk#1)#11, max(ss_sold_date_sk#1)#12, max(ss_sold_time_sk#2)#13, max(ss_item_sk#3)#14, max(ss_customer_sk#4)#15, max(ss_cdemo_sk#5)#16, max(ss_hdemo_sk#6)#17, max(ss_addr_sk#7)#18, max(ss_store_sk#8)#19, max(ss_promo_sk#9)#20, count(ss_sold_date_sk#1)#33] +Results [12]: [count(1)#10 AS total#36, count(ss_sold_date_sk#1)#11 AS not_null_total#37, count(ss_sold_date_sk#1)#33 AS unique_days#38, max(ss_sold_date_sk#1)#12 AS max_ss_sold_date_sk#39, max(ss_sold_time_sk#2)#13 AS max_ss_sold_time_sk#40, max(ss_item_sk#3)#14 AS max_ss_item_sk#41, max(ss_customer_sk#4)#15 AS max_ss_customer_sk#42, max(ss_cdemo_sk#5)#16 AS max_ss_cdemo_sk#43, max(ss_hdemo_sk#6)#17 AS max_ss_hdemo_sk#44, max(ss_addr_sk#7)#18 AS max_ss_addr_sk#45, max(ss_store_sk#8)#19 AS max_ss_store_sk#46, max(ss_promo_sk#9)#20 AS max_ss_promo_sk#47] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/ss_max/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/ss_max/simplified.txt new file mode 100644 index 0000000000000..31622e85a4a03 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/ss_max/simplified.txt @@ -0,0 +1,14 @@ +WholeStageCodegen (3) + HashAggregate [count,count,count,max,max,max,max,max,max,max,max,max] [count,count,count,count(1),count(ss_sold_date_sk),count(ss_sold_date_sk),max,max,max,max,max,max,max,max,max,max(ss_addr_sk),max(ss_cdemo_sk),max(ss_customer_sk),max(ss_hdemo_sk),max(ss_item_sk),max(ss_promo_sk),max(ss_sold_date_sk),max(ss_sold_time_sk),max(ss_store_sk),max_ss_addr_sk,max_ss_cdemo_sk,max_ss_customer_sk,max_ss_hdemo_sk,max_ss_item_sk,max_ss_promo_sk,max_ss_sold_date_sk,max_ss_sold_time_sk,max_ss_store_sk,not_null_total,total,unique_days] + InputAdapter + Exchange #1 + WholeStageCodegen (2) + HashAggregate [ss_sold_date_sk] [count,count,count,count,count,count,count(1),count(ss_sold_date_sk),count(ss_sold_date_sk),max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max(ss_addr_sk),max(ss_cdemo_sk),max(ss_customer_sk),max(ss_hdemo_sk),max(ss_item_sk),max(ss_promo_sk),max(ss_sold_date_sk),max(ss_sold_time_sk),max(ss_store_sk)] + HashAggregate [ss_sold_date_sk] [count,count,count,count,count(1),count(ss_sold_date_sk),max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max(ss_addr_sk),max(ss_cdemo_sk),max(ss_customer_sk),max(ss_hdemo_sk),max(ss_item_sk),max(ss_promo_sk),max(ss_sold_date_sk),max(ss_sold_time_sk),max(ss_store_sk)] + InputAdapter + Exchange [ss_sold_date_sk] #2 + WholeStageCodegen (1) + HashAggregate [ss_addr_sk,ss_cdemo_sk,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_sold_time_sk,ss_store_sk] [count,count,count,count,count(1),count(ss_sold_date_sk),max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max(ss_addr_sk),max(ss_cdemo_sk),max(ss_customer_sk),max(ss_hdemo_sk),max(ss_item_sk),max(ss_promo_sk),max(ss_sold_date_sk),max(ss_sold_time_sk),max(ss_store_sk)] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_addr_sk,ss_cdemo_sk,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_sold_time_sk,ss_store_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1.sf100/explain.txt new file mode 100644 index 0000000000000..5caf95bee5481 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1.sf100/explain.txt @@ -0,0 +1,270 @@ +== Physical Plan == +TakeOrderedAndProject (47) ++- * Project (46) + +- * SortMergeJoin Inner (45) + :- * Sort (39) + : +- Exchange (38) + : +- * Project (37) + : +- * BroadcastHashJoin Inner BuildRight (36) + : :- * Project (30) + : : +- * BroadcastHashJoin Inner BuildRight (29) + : : :- * Filter (14) + : : : +- * HashAggregate (13) + : : : +- Exchange (12) + : : : +- * HashAggregate (11) + : : : +- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_returns (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (28) + : : +- * Filter (27) + : : +- * HashAggregate (26) + : : +- Exchange (25) + : : +- * HashAggregate (24) + : : +- * HashAggregate (23) + : : +- Exchange (22) + : : +- * HashAggregate (21) + : : +- * Project (20) + : : +- * BroadcastHashJoin Inner BuildRight (19) + : : :- * Filter (17) + : : : +- * ColumnarToRow (16) + : : : +- Scan parquet default.store_returns (15) + : : +- ReusedExchange (18) + : +- BroadcastExchange (35) + : +- * Project (34) + : +- * Filter (33) + : +- * ColumnarToRow (32) + : +- Scan parquet default.store (31) + +- * Sort (44) + +- Exchange (43) + +- * Filter (42) + +- * ColumnarToRow (41) + +- Scan parquet default.customer (40) + + +(1) Scan parquet default.store_returns +Output [4]: [sr_returned_date_sk#1, sr_customer_sk#2, sr_store_sk#3, sr_return_amt#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_returns] +PushedFilters: [IsNotNull(sr_returned_date_sk), IsNotNull(sr_store_sk), IsNotNull(sr_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 2] +Input [4]: [sr_returned_date_sk#1, sr_customer_sk#2, sr_store_sk#3, sr_return_amt#4] + +(3) Filter [codegen id : 2] +Input [4]: [sr_returned_date_sk#1, sr_customer_sk#2, sr_store_sk#3, sr_return_amt#4] +Condition : ((isnotnull(sr_returned_date_sk#1) AND isnotnull(sr_store_sk#3)) AND isnotnull(sr_customer_sk#2)) + +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#5, d_year#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#5, d_year#6] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#5, d_year#6] +Condition : ((isnotnull(d_year#6) AND (d_year#6 = 2000)) AND isnotnull(d_date_sk#5)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#5] +Input [2]: [d_date_sk#5, d_year#6] + +(8) BroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#7] + +(9) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [sr_returned_date_sk#1] +Right keys [1]: [cast(d_date_sk#5 as bigint)] +Join condition: None + +(10) Project [codegen id : 2] +Output [3]: [sr_customer_sk#2, sr_store_sk#3, sr_return_amt#4] +Input [5]: [sr_returned_date_sk#1, sr_customer_sk#2, sr_store_sk#3, sr_return_amt#4, d_date_sk#5] + +(11) HashAggregate [codegen id : 2] +Input [3]: [sr_customer_sk#2, sr_store_sk#3, sr_return_amt#4] +Keys [2]: [sr_customer_sk#2, sr_store_sk#3] +Functions [1]: [partial_sum(UnscaledValue(sr_return_amt#4))] +Aggregate Attributes [1]: [sum#8] +Results [3]: [sr_customer_sk#2, sr_store_sk#3, sum#9] + +(12) Exchange +Input [3]: [sr_customer_sk#2, sr_store_sk#3, sum#9] +Arguments: hashpartitioning(sr_customer_sk#2, sr_store_sk#3, 5), true, [id=#10] + +(13) HashAggregate [codegen id : 8] +Input [3]: [sr_customer_sk#2, sr_store_sk#3, sum#9] +Keys [2]: [sr_customer_sk#2, sr_store_sk#3] +Functions [1]: [sum(UnscaledValue(sr_return_amt#4))] +Aggregate Attributes [1]: [sum(UnscaledValue(sr_return_amt#4))#11] +Results [3]: [sr_customer_sk#2 AS ctr_customer_sk#12, sr_store_sk#3 AS ctr_store_sk#13, MakeDecimal(sum(UnscaledValue(sr_return_amt#4))#11,17,2) AS ctr_total_return#14] + +(14) Filter [codegen id : 8] +Input [3]: [ctr_customer_sk#12, ctr_store_sk#13, ctr_total_return#14] +Condition : isnotnull(ctr_total_return#14) + +(15) Scan parquet default.store_returns +Output [4]: [sr_returned_date_sk#1, sr_customer_sk#2, sr_store_sk#3, sr_return_amt#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_returns] +PushedFilters: [IsNotNull(sr_returned_date_sk), IsNotNull(sr_store_sk)] +ReadSchema: struct + +(16) ColumnarToRow [codegen id : 4] +Input [4]: [sr_returned_date_sk#1, sr_customer_sk#2, sr_store_sk#3, sr_return_amt#4] + +(17) Filter [codegen id : 4] +Input [4]: [sr_returned_date_sk#1, sr_customer_sk#2, sr_store_sk#3, sr_return_amt#4] +Condition : (isnotnull(sr_returned_date_sk#1) AND isnotnull(sr_store_sk#3)) + +(18) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(19) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [sr_returned_date_sk#1] +Right keys [1]: [cast(d_date_sk#5 as bigint)] +Join condition: None + +(20) Project [codegen id : 4] +Output [3]: [sr_customer_sk#2, sr_store_sk#3, sr_return_amt#4] +Input [5]: [sr_returned_date_sk#1, sr_customer_sk#2, sr_store_sk#3, sr_return_amt#4, d_date_sk#5] + +(21) HashAggregate [codegen id : 4] +Input [3]: [sr_customer_sk#2, sr_store_sk#3, sr_return_amt#4] +Keys [2]: [sr_customer_sk#2, sr_store_sk#3] +Functions [1]: [partial_sum(UnscaledValue(sr_return_amt#4))] +Aggregate Attributes [1]: [sum#15] +Results [3]: [sr_customer_sk#2, sr_store_sk#3, sum#16] + +(22) Exchange +Input [3]: [sr_customer_sk#2, sr_store_sk#3, sum#16] +Arguments: hashpartitioning(sr_customer_sk#2, sr_store_sk#3, 5), true, [id=#17] + +(23) HashAggregate [codegen id : 5] +Input [3]: [sr_customer_sk#2, sr_store_sk#3, sum#16] +Keys [2]: [sr_customer_sk#2, sr_store_sk#3] +Functions [1]: [sum(UnscaledValue(sr_return_amt#4))] +Aggregate Attributes [1]: [sum(UnscaledValue(sr_return_amt#4))#18] +Results [2]: [sr_store_sk#3 AS ctr_store_sk#13, MakeDecimal(sum(UnscaledValue(sr_return_amt#4))#18,17,2) AS ctr_total_return#14] + +(24) HashAggregate [codegen id : 5] +Input [2]: [ctr_store_sk#13, ctr_total_return#14] +Keys [1]: [ctr_store_sk#13] +Functions [1]: [partial_avg(ctr_total_return#14)] +Aggregate Attributes [2]: [sum#19, count#20] +Results [3]: [ctr_store_sk#13, sum#21, count#22] + +(25) Exchange +Input [3]: [ctr_store_sk#13, sum#21, count#22] +Arguments: hashpartitioning(ctr_store_sk#13, 5), true, [id=#23] + +(26) HashAggregate [codegen id : 6] +Input [3]: [ctr_store_sk#13, sum#21, count#22] +Keys [1]: [ctr_store_sk#13] +Functions [1]: [avg(ctr_total_return#14)] +Aggregate Attributes [1]: [avg(ctr_total_return#14)#24] +Results [2]: [CheckOverflow((promote_precision(avg(ctr_total_return#14)#24) * 1.200000), DecimalType(24,7), true) AS (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#25, ctr_store_sk#13 AS ctr_store_sk#13#26] + +(27) Filter [codegen id : 6] +Input [2]: [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#25, ctr_store_sk#13#26] +Condition : isnotnull((CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#25) + +(28) BroadcastExchange +Input [2]: [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#25, ctr_store_sk#13#26] +Arguments: HashedRelationBroadcastMode(List(input[1, bigint, true]),false), [id=#27] + +(29) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ctr_store_sk#13] +Right keys [1]: [ctr_store_sk#13#26] +Join condition: (cast(ctr_total_return#14 as decimal(24,7)) > (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#25) + +(30) Project [codegen id : 8] +Output [2]: [ctr_customer_sk#12, ctr_store_sk#13] +Input [5]: [ctr_customer_sk#12, ctr_store_sk#13, ctr_total_return#14, (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#25, ctr_store_sk#13#26] + +(31) Scan parquet default.store +Output [2]: [s_store_sk#28, s_state#29] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)] +ReadSchema: struct + +(32) ColumnarToRow [codegen id : 7] +Input [2]: [s_store_sk#28, s_state#29] + +(33) Filter [codegen id : 7] +Input [2]: [s_store_sk#28, s_state#29] +Condition : ((isnotnull(s_state#29) AND (s_state#29 = TN)) AND isnotnull(s_store_sk#28)) + +(34) Project [codegen id : 7] +Output [1]: [s_store_sk#28] +Input [2]: [s_store_sk#28, s_state#29] + +(35) BroadcastExchange +Input [1]: [s_store_sk#28] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#30] + +(36) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ctr_store_sk#13] +Right keys [1]: [cast(s_store_sk#28 as bigint)] +Join condition: None + +(37) Project [codegen id : 8] +Output [1]: [ctr_customer_sk#12] +Input [3]: [ctr_customer_sk#12, ctr_store_sk#13, s_store_sk#28] + +(38) Exchange +Input [1]: [ctr_customer_sk#12] +Arguments: hashpartitioning(ctr_customer_sk#12, 5), true, [id=#31] + +(39) Sort [codegen id : 9] +Input [1]: [ctr_customer_sk#12] +Arguments: [ctr_customer_sk#12 ASC NULLS FIRST], false, 0 + +(40) Scan parquet default.customer +Output [2]: [c_customer_sk#32, c_customer_id#33] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(41) ColumnarToRow [codegen id : 10] +Input [2]: [c_customer_sk#32, c_customer_id#33] + +(42) Filter [codegen id : 10] +Input [2]: [c_customer_sk#32, c_customer_id#33] +Condition : isnotnull(c_customer_sk#32) + +(43) Exchange +Input [2]: [c_customer_sk#32, c_customer_id#33] +Arguments: hashpartitioning(cast(c_customer_sk#32 as bigint), 5), true, [id=#34] + +(44) Sort [codegen id : 11] +Input [2]: [c_customer_sk#32, c_customer_id#33] +Arguments: [cast(c_customer_sk#32 as bigint) ASC NULLS FIRST], false, 0 + +(45) SortMergeJoin [codegen id : 12] +Left keys [1]: [ctr_customer_sk#12] +Right keys [1]: [cast(c_customer_sk#32 as bigint)] +Join condition: None + +(46) Project [codegen id : 12] +Output [1]: [c_customer_id#33] +Input [3]: [ctr_customer_sk#12, c_customer_sk#32, c_customer_id#33] + +(47) TakeOrderedAndProject +Input [1]: [c_customer_id#33] +Arguments: 100, [c_customer_id#33 ASC NULLS FIRST], [c_customer_id#33] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1.sf100/simplified.txt new file mode 100644 index 0000000000000..40992f74d8a06 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1.sf100/simplified.txt @@ -0,0 +1,74 @@ +TakeOrderedAndProject [c_customer_id] + WholeStageCodegen (12) + Project [c_customer_id] + SortMergeJoin [c_customer_sk,ctr_customer_sk] + InputAdapter + WholeStageCodegen (9) + Sort [ctr_customer_sk] + InputAdapter + Exchange [ctr_customer_sk] #1 + WholeStageCodegen (8) + Project [ctr_customer_sk] + BroadcastHashJoin [ctr_store_sk,s_store_sk] + Project [ctr_customer_sk,ctr_store_sk] + BroadcastHashJoin [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6))),ctr_store_sk,ctr_store_skL,ctr_total_return] + Filter [ctr_total_return] + HashAggregate [sr_customer_sk,sr_store_sk,sum] [ctr_customer_sk,ctr_store_sk,ctr_total_return,sum,sum(UnscaledValue(sr_return_amt))] + InputAdapter + Exchange [sr_customer_sk,sr_store_sk] #2 + WholeStageCodegen (2) + HashAggregate [sr_customer_sk,sr_return_amt,sr_store_sk] [sum,sum] + Project [sr_customer_sk,sr_return_amt,sr_store_sk] + BroadcastHashJoin [d_date_sk,sr_returned_date_sk] + Filter [sr_customer_sk,sr_returned_date_sk,sr_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_customer_sk,sr_return_amt,sr_returned_date_sk,sr_store_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (6) + Filter [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))] + HashAggregate [count,ctr_store_sk,sum] [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6))),avg(ctr_total_return),count,ctr_store_skL,sum] + InputAdapter + Exchange [ctr_store_sk] #5 + WholeStageCodegen (5) + HashAggregate [ctr_store_sk,ctr_total_return] [count,count,sum,sum] + HashAggregate [sr_customer_sk,sr_store_sk,sum] [ctr_store_sk,ctr_total_return,sum,sum(UnscaledValue(sr_return_amt))] + InputAdapter + Exchange [sr_customer_sk,sr_store_sk] #6 + WholeStageCodegen (4) + HashAggregate [sr_customer_sk,sr_return_amt,sr_store_sk] [sum,sum] + Project [sr_customer_sk,sr_return_amt,sr_store_sk] + BroadcastHashJoin [d_date_sk,sr_returned_date_sk] + Filter [sr_returned_date_sk,sr_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_customer_sk,sr_return_amt,sr_returned_date_sk,sr_store_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (7) + Project [s_store_sk] + Filter [s_state,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_state,s_store_sk] + InputAdapter + WholeStageCodegen (11) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #8 + WholeStageCodegen (10) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_id,c_customer_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/explain.txt new file mode 100644 index 0000000000000..756c752dd7040 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/explain.txt @@ -0,0 +1,255 @@ +== Physical Plan == +TakeOrderedAndProject (44) ++- * Project (43) + +- * BroadcastHashJoin Inner BuildRight (42) + :- * Project (37) + : +- * BroadcastHashJoin Inner BuildRight (36) + : :- * Project (30) + : : +- * BroadcastHashJoin Inner BuildRight (29) + : : :- * Filter (14) + : : : +- * HashAggregate (13) + : : : +- Exchange (12) + : : : +- * HashAggregate (11) + : : : +- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_returns (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (28) + : : +- * Filter (27) + : : +- * HashAggregate (26) + : : +- Exchange (25) + : : +- * HashAggregate (24) + : : +- * HashAggregate (23) + : : +- Exchange (22) + : : +- * HashAggregate (21) + : : +- * Project (20) + : : +- * BroadcastHashJoin Inner BuildRight (19) + : : :- * Filter (17) + : : : +- * ColumnarToRow (16) + : : : +- Scan parquet default.store_returns (15) + : : +- ReusedExchange (18) + : +- BroadcastExchange (35) + : +- * Project (34) + : +- * Filter (33) + : +- * ColumnarToRow (32) + : +- Scan parquet default.store (31) + +- BroadcastExchange (41) + +- * Filter (40) + +- * ColumnarToRow (39) + +- Scan parquet default.customer (38) + + +(1) Scan parquet default.store_returns +Output [4]: [sr_returned_date_sk#1, sr_customer_sk#2, sr_store_sk#3, sr_return_amt#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_returns] +PushedFilters: [IsNotNull(sr_returned_date_sk), IsNotNull(sr_store_sk), IsNotNull(sr_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 2] +Input [4]: [sr_returned_date_sk#1, sr_customer_sk#2, sr_store_sk#3, sr_return_amt#4] + +(3) Filter [codegen id : 2] +Input [4]: [sr_returned_date_sk#1, sr_customer_sk#2, sr_store_sk#3, sr_return_amt#4] +Condition : ((isnotnull(sr_returned_date_sk#1) AND isnotnull(sr_store_sk#3)) AND isnotnull(sr_customer_sk#2)) + +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#5, d_year#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#5, d_year#6] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#5, d_year#6] +Condition : ((isnotnull(d_year#6) AND (d_year#6 = 2000)) AND isnotnull(d_date_sk#5)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#5] +Input [2]: [d_date_sk#5, d_year#6] + +(8) BroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#7] + +(9) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [sr_returned_date_sk#1] +Right keys [1]: [cast(d_date_sk#5 as bigint)] +Join condition: None + +(10) Project [codegen id : 2] +Output [3]: [sr_customer_sk#2, sr_store_sk#3, sr_return_amt#4] +Input [5]: [sr_returned_date_sk#1, sr_customer_sk#2, sr_store_sk#3, sr_return_amt#4, d_date_sk#5] + +(11) HashAggregate [codegen id : 2] +Input [3]: [sr_customer_sk#2, sr_store_sk#3, sr_return_amt#4] +Keys [2]: [sr_customer_sk#2, sr_store_sk#3] +Functions [1]: [partial_sum(UnscaledValue(sr_return_amt#4))] +Aggregate Attributes [1]: [sum#8] +Results [3]: [sr_customer_sk#2, sr_store_sk#3, sum#9] + +(12) Exchange +Input [3]: [sr_customer_sk#2, sr_store_sk#3, sum#9] +Arguments: hashpartitioning(sr_customer_sk#2, sr_store_sk#3, 5), true, [id=#10] + +(13) HashAggregate [codegen id : 9] +Input [3]: [sr_customer_sk#2, sr_store_sk#3, sum#9] +Keys [2]: [sr_customer_sk#2, sr_store_sk#3] +Functions [1]: [sum(UnscaledValue(sr_return_amt#4))] +Aggregate Attributes [1]: [sum(UnscaledValue(sr_return_amt#4))#11] +Results [3]: [sr_customer_sk#2 AS ctr_customer_sk#12, sr_store_sk#3 AS ctr_store_sk#13, MakeDecimal(sum(UnscaledValue(sr_return_amt#4))#11,17,2) AS ctr_total_return#14] + +(14) Filter [codegen id : 9] +Input [3]: [ctr_customer_sk#12, ctr_store_sk#13, ctr_total_return#14] +Condition : isnotnull(ctr_total_return#14) + +(15) Scan parquet default.store_returns +Output [4]: [sr_returned_date_sk#1, sr_customer_sk#2, sr_store_sk#3, sr_return_amt#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_returns] +PushedFilters: [IsNotNull(sr_returned_date_sk), IsNotNull(sr_store_sk)] +ReadSchema: struct + +(16) ColumnarToRow [codegen id : 4] +Input [4]: [sr_returned_date_sk#1, sr_customer_sk#2, sr_store_sk#3, sr_return_amt#4] + +(17) Filter [codegen id : 4] +Input [4]: [sr_returned_date_sk#1, sr_customer_sk#2, sr_store_sk#3, sr_return_amt#4] +Condition : (isnotnull(sr_returned_date_sk#1) AND isnotnull(sr_store_sk#3)) + +(18) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(19) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [sr_returned_date_sk#1] +Right keys [1]: [cast(d_date_sk#5 as bigint)] +Join condition: None + +(20) Project [codegen id : 4] +Output [3]: [sr_customer_sk#2, sr_store_sk#3, sr_return_amt#4] +Input [5]: [sr_returned_date_sk#1, sr_customer_sk#2, sr_store_sk#3, sr_return_amt#4, d_date_sk#5] + +(21) HashAggregate [codegen id : 4] +Input [3]: [sr_customer_sk#2, sr_store_sk#3, sr_return_amt#4] +Keys [2]: [sr_customer_sk#2, sr_store_sk#3] +Functions [1]: [partial_sum(UnscaledValue(sr_return_amt#4))] +Aggregate Attributes [1]: [sum#15] +Results [3]: [sr_customer_sk#2, sr_store_sk#3, sum#16] + +(22) Exchange +Input [3]: [sr_customer_sk#2, sr_store_sk#3, sum#16] +Arguments: hashpartitioning(sr_customer_sk#2, sr_store_sk#3, 5), true, [id=#17] + +(23) HashAggregate [codegen id : 5] +Input [3]: [sr_customer_sk#2, sr_store_sk#3, sum#16] +Keys [2]: [sr_customer_sk#2, sr_store_sk#3] +Functions [1]: [sum(UnscaledValue(sr_return_amt#4))] +Aggregate Attributes [1]: [sum(UnscaledValue(sr_return_amt#4))#18] +Results [2]: [sr_store_sk#3 AS ctr_store_sk#13, MakeDecimal(sum(UnscaledValue(sr_return_amt#4))#18,17,2) AS ctr_total_return#14] + +(24) HashAggregate [codegen id : 5] +Input [2]: [ctr_store_sk#13, ctr_total_return#14] +Keys [1]: [ctr_store_sk#13] +Functions [1]: [partial_avg(ctr_total_return#14)] +Aggregate Attributes [2]: [sum#19, count#20] +Results [3]: [ctr_store_sk#13, sum#21, count#22] + +(25) Exchange +Input [3]: [ctr_store_sk#13, sum#21, count#22] +Arguments: hashpartitioning(ctr_store_sk#13, 5), true, [id=#23] + +(26) HashAggregate [codegen id : 6] +Input [3]: [ctr_store_sk#13, sum#21, count#22] +Keys [1]: [ctr_store_sk#13] +Functions [1]: [avg(ctr_total_return#14)] +Aggregate Attributes [1]: [avg(ctr_total_return#14)#24] +Results [2]: [CheckOverflow((promote_precision(avg(ctr_total_return#14)#24) * 1.200000), DecimalType(24,7), true) AS (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#25, ctr_store_sk#13 AS ctr_store_sk#13#26] + +(27) Filter [codegen id : 6] +Input [2]: [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#25, ctr_store_sk#13#26] +Condition : isnotnull((CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#25) + +(28) BroadcastExchange +Input [2]: [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#25, ctr_store_sk#13#26] +Arguments: HashedRelationBroadcastMode(List(input[1, bigint, true]),false), [id=#27] + +(29) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ctr_store_sk#13] +Right keys [1]: [ctr_store_sk#13#26] +Join condition: (cast(ctr_total_return#14 as decimal(24,7)) > (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#25) + +(30) Project [codegen id : 9] +Output [2]: [ctr_customer_sk#12, ctr_store_sk#13] +Input [5]: [ctr_customer_sk#12, ctr_store_sk#13, ctr_total_return#14, (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#25, ctr_store_sk#13#26] + +(31) Scan parquet default.store +Output [2]: [s_store_sk#28, s_state#29] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)] +ReadSchema: struct + +(32) ColumnarToRow [codegen id : 7] +Input [2]: [s_store_sk#28, s_state#29] + +(33) Filter [codegen id : 7] +Input [2]: [s_store_sk#28, s_state#29] +Condition : ((isnotnull(s_state#29) AND (s_state#29 = TN)) AND isnotnull(s_store_sk#28)) + +(34) Project [codegen id : 7] +Output [1]: [s_store_sk#28] +Input [2]: [s_store_sk#28, s_state#29] + +(35) BroadcastExchange +Input [1]: [s_store_sk#28] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#30] + +(36) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ctr_store_sk#13] +Right keys [1]: [cast(s_store_sk#28 as bigint)] +Join condition: None + +(37) Project [codegen id : 9] +Output [1]: [ctr_customer_sk#12] +Input [3]: [ctr_customer_sk#12, ctr_store_sk#13, s_store_sk#28] + +(38) Scan parquet default.customer +Output [2]: [c_customer_sk#31, c_customer_id#32] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(39) ColumnarToRow [codegen id : 8] +Input [2]: [c_customer_sk#31, c_customer_id#32] + +(40) Filter [codegen id : 8] +Input [2]: [c_customer_sk#31, c_customer_id#32] +Condition : isnotnull(c_customer_sk#31) + +(41) BroadcastExchange +Input [2]: [c_customer_sk#31, c_customer_id#32] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#33] + +(42) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ctr_customer_sk#12] +Right keys [1]: [cast(c_customer_sk#31 as bigint)] +Join condition: None + +(43) Project [codegen id : 9] +Output [1]: [c_customer_id#32] +Input [3]: [ctr_customer_sk#12, c_customer_sk#31, c_customer_id#32] + +(44) TakeOrderedAndProject +Input [1]: [c_customer_id#32] +Arguments: 100, [c_customer_id#32 ASC NULLS FIRST], [c_customer_id#32] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/simplified.txt new file mode 100644 index 0000000000000..301ea242ec70b --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/simplified.txt @@ -0,0 +1,65 @@ +TakeOrderedAndProject [c_customer_id] + WholeStageCodegen (9) + Project [c_customer_id] + BroadcastHashJoin [c_customer_sk,ctr_customer_sk] + Project [ctr_customer_sk] + BroadcastHashJoin [ctr_store_sk,s_store_sk] + Project [ctr_customer_sk,ctr_store_sk] + BroadcastHashJoin [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6))),ctr_store_sk,ctr_store_skL,ctr_total_return] + Filter [ctr_total_return] + HashAggregate [sr_customer_sk,sr_store_sk,sum] [ctr_customer_sk,ctr_store_sk,ctr_total_return,sum,sum(UnscaledValue(sr_return_amt))] + InputAdapter + Exchange [sr_customer_sk,sr_store_sk] #1 + WholeStageCodegen (2) + HashAggregate [sr_customer_sk,sr_return_amt,sr_store_sk] [sum,sum] + Project [sr_customer_sk,sr_return_amt,sr_store_sk] + BroadcastHashJoin [d_date_sk,sr_returned_date_sk] + Filter [sr_customer_sk,sr_returned_date_sk,sr_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_customer_sk,sr_return_amt,sr_returned_date_sk,sr_store_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (6) + Filter [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))] + HashAggregate [count,ctr_store_sk,sum] [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6))),avg(ctr_total_return),count,ctr_store_skL,sum] + InputAdapter + Exchange [ctr_store_sk] #4 + WholeStageCodegen (5) + HashAggregate [ctr_store_sk,ctr_total_return] [count,count,sum,sum] + HashAggregate [sr_customer_sk,sr_store_sk,sum] [ctr_store_sk,ctr_total_return,sum,sum(UnscaledValue(sr_return_amt))] + InputAdapter + Exchange [sr_customer_sk,sr_store_sk] #5 + WholeStageCodegen (4) + HashAggregate [sr_customer_sk,sr_return_amt,sr_store_sk] [sum,sum] + Project [sr_customer_sk,sr_return_amt,sr_store_sk] + BroadcastHashJoin [d_date_sk,sr_returned_date_sk] + Filter [sr_returned_date_sk,sr_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_customer_sk,sr_return_amt,sr_returned_date_sk,sr_store_sk] + InputAdapter + ReusedExchange [d_date_sk] #2 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (7) + Project [s_store_sk] + Filter [s_state,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_state,s_store_sk] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (8) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_id,c_customer_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10.sf100/explain.txt new file mode 100644 index 0000000000000..72f9339134e87 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10.sf100/explain.txt @@ -0,0 +1,319 @@ +== Physical Plan == +TakeOrderedAndProject (58) ++- * HashAggregate (57) + +- Exchange (56) + +- * HashAggregate (55) + +- * Project (54) + +- * SortMergeJoin Inner (53) + :- * Sort (47) + : +- Exchange (46) + : +- * Project (45) + : +- * BroadcastHashJoin Inner BuildRight (44) + : :- * Project (38) + : : +- * Filter (37) + : : +- SortMergeJoin ExistenceJoin(exists#1) (36) + : : :- SortMergeJoin ExistenceJoin(exists#2) (27) + : : : :- SortMergeJoin LeftSemi (18) + : : : : :- * Sort (5) + : : : : : +- Exchange (4) + : : : : : +- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.customer (1) + : : : : +- * Sort (17) + : : : : +- Exchange (16) + : : : : +- * Project (15) + : : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : : :- * Filter (8) + : : : : : +- * ColumnarToRow (7) + : : : : : +- Scan parquet default.store_sales (6) + : : : : +- BroadcastExchange (13) + : : : : +- * Project (12) + : : : : +- * Filter (11) + : : : : +- * ColumnarToRow (10) + : : : : +- Scan parquet default.date_dim (9) + : : : +- * Sort (26) + : : : +- Exchange (25) + : : : +- * Project (24) + : : : +- * BroadcastHashJoin Inner BuildRight (23) + : : : :- * Filter (21) + : : : : +- * ColumnarToRow (20) + : : : : +- Scan parquet default.web_sales (19) + : : : +- ReusedExchange (22) + : : +- * Sort (35) + : : +- Exchange (34) + : : +- * Project (33) + : : +- * BroadcastHashJoin Inner BuildRight (32) + : : :- * Filter (30) + : : : +- * ColumnarToRow (29) + : : : +- Scan parquet default.catalog_sales (28) + : : +- ReusedExchange (31) + : +- BroadcastExchange (43) + : +- * Project (42) + : +- * Filter (41) + : +- * ColumnarToRow (40) + : +- Scan parquet default.customer_address (39) + +- * Sort (52) + +- Exchange (51) + +- * Filter (50) + +- * ColumnarToRow (49) + +- Scan parquet default.customer_demographics (48) + + +(1) Scan parquet default.customer +Output [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer] +PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] + +(3) Filter [codegen id : 1] +Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] +Condition : (isnotnull(c_current_addr_sk#5) AND isnotnull(c_current_cdemo_sk#4)) + +(4) Exchange +Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] +Arguments: hashpartitioning(c_customer_sk#3, 5), true, [id=#6] + +(5) Sort [codegen id : 2] +Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] +Arguments: [c_customer_sk#3 ASC NULLS FIRST], false, 0 + +(6) Scan parquet default.store_sales +Output [2]: [ss_sold_date_sk#7, ss_customer_sk#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 4] +Input [2]: [ss_sold_date_sk#7, ss_customer_sk#8] + +(8) Filter [codegen id : 4] +Input [2]: [ss_sold_date_sk#7, ss_customer_sk#8] +Condition : isnotnull(ss_sold_date_sk#7) + +(9) Scan parquet default.date_dim +Output [3]: [d_date_sk#9, d_year#10, d_moy#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2002), GreaterThanOrEqual(d_moy,1), LessThanOrEqual(d_moy,4), IsNotNull(d_date_sk)] +ReadSchema: struct + +(10) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] + +(11) Filter [codegen id : 3] +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] +Condition : (((((isnotnull(d_year#10) AND isnotnull(d_moy#11)) AND (d_year#10 = 2002)) AND (d_moy#11 >= 1)) AND (d_moy#11 <= 4)) AND isnotnull(d_date_sk#9)) + +(12) Project [codegen id : 3] +Output [1]: [d_date_sk#9] +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] + +(13) BroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(14) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#7] +Right keys [1]: [d_date_sk#9] +Join condition: None + +(15) Project [codegen id : 4] +Output [1]: [ss_customer_sk#8] +Input [3]: [ss_sold_date_sk#7, ss_customer_sk#8, d_date_sk#9] + +(16) Exchange +Input [1]: [ss_customer_sk#8] +Arguments: hashpartitioning(ss_customer_sk#8, 5), true, [id=#13] + +(17) Sort [codegen id : 5] +Input [1]: [ss_customer_sk#8] +Arguments: [ss_customer_sk#8 ASC NULLS FIRST], false, 0 + +(18) SortMergeJoin +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [ss_customer_sk#8] +Join condition: None + +(19) Scan parquet default.web_sales +Output [2]: [ws_sold_date_sk#14, ws_bill_customer_sk#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 7] +Input [2]: [ws_sold_date_sk#14, ws_bill_customer_sk#15] + +(21) Filter [codegen id : 7] +Input [2]: [ws_sold_date_sk#14, ws_bill_customer_sk#15] +Condition : isnotnull(ws_sold_date_sk#14) + +(22) ReusedExchange [Reuses operator id: 13] +Output [1]: [d_date_sk#9] + +(23) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ws_sold_date_sk#14] +Right keys [1]: [d_date_sk#9] +Join condition: None + +(24) Project [codegen id : 7] +Output [1]: [ws_bill_customer_sk#15] +Input [3]: [ws_sold_date_sk#14, ws_bill_customer_sk#15, d_date_sk#9] + +(25) Exchange +Input [1]: [ws_bill_customer_sk#15] +Arguments: hashpartitioning(ws_bill_customer_sk#15, 5), true, [id=#16] + +(26) Sort [codegen id : 8] +Input [1]: [ws_bill_customer_sk#15] +Arguments: [ws_bill_customer_sk#15 ASC NULLS FIRST], false, 0 + +(27) SortMergeJoin +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [ws_bill_customer_sk#15] +Join condition: None + +(28) Scan parquet default.catalog_sales +Output [2]: [cs_sold_date_sk#17, cs_ship_customer_sk#18] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 10] +Input [2]: [cs_sold_date_sk#17, cs_ship_customer_sk#18] + +(30) Filter [codegen id : 10] +Input [2]: [cs_sold_date_sk#17, cs_ship_customer_sk#18] +Condition : isnotnull(cs_sold_date_sk#17) + +(31) ReusedExchange [Reuses operator id: 13] +Output [1]: [d_date_sk#9] + +(32) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_sold_date_sk#17] +Right keys [1]: [d_date_sk#9] +Join condition: None + +(33) Project [codegen id : 10] +Output [1]: [cs_ship_customer_sk#18] +Input [3]: [cs_sold_date_sk#17, cs_ship_customer_sk#18, d_date_sk#9] + +(34) Exchange +Input [1]: [cs_ship_customer_sk#18] +Arguments: hashpartitioning(cs_ship_customer_sk#18, 5), true, [id=#19] + +(35) Sort [codegen id : 11] +Input [1]: [cs_ship_customer_sk#18] +Arguments: [cs_ship_customer_sk#18 ASC NULLS FIRST], false, 0 + +(36) SortMergeJoin +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [cs_ship_customer_sk#18] +Join condition: None + +(37) Filter [codegen id : 13] +Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1] +Condition : (exists#2 OR exists#1) + +(38) Project [codegen id : 13] +Output [2]: [c_current_cdemo_sk#4, c_current_addr_sk#5] +Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1] + +(39) Scan parquet default.customer_address +Output [2]: [ca_address_sk#20, ca_county#21] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer_address] +PushedFilters: [In(ca_county, [Rush County,Toole County,Jefferson County,Dona Ana County,La Porte County]), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(40) ColumnarToRow [codegen id : 12] +Input [2]: [ca_address_sk#20, ca_county#21] + +(41) Filter [codegen id : 12] +Input [2]: [ca_address_sk#20, ca_county#21] +Condition : (ca_county#21 IN (Rush County,Toole County,Jefferson County,Dona Ana County,La Porte County) AND isnotnull(ca_address_sk#20)) + +(42) Project [codegen id : 12] +Output [1]: [ca_address_sk#20] +Input [2]: [ca_address_sk#20, ca_county#21] + +(43) BroadcastExchange +Input [1]: [ca_address_sk#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#22] + +(44) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [c_current_addr_sk#5] +Right keys [1]: [ca_address_sk#20] +Join condition: None + +(45) Project [codegen id : 13] +Output [1]: [c_current_cdemo_sk#4] +Input [3]: [c_current_cdemo_sk#4, c_current_addr_sk#5, ca_address_sk#20] + +(46) Exchange +Input [1]: [c_current_cdemo_sk#4] +Arguments: hashpartitioning(c_current_cdemo_sk#4, 5), true, [id=#23] + +(47) Sort [codegen id : 14] +Input [1]: [c_current_cdemo_sk#4] +Arguments: [c_current_cdemo_sk#4 ASC NULLS FIRST], false, 0 + +(48) Scan parquet default.customer_demographics +Output [9]: [cd_demo_sk#24, cd_gender#25, cd_marital_status#26, cd_education_status#27, cd_purchase_estimate#28, cd_credit_rating#29, cd_dep_count#30, cd_dep_employed_count#31, cd_dep_college_count#32] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(49) ColumnarToRow [codegen id : 15] +Input [9]: [cd_demo_sk#24, cd_gender#25, cd_marital_status#26, cd_education_status#27, cd_purchase_estimate#28, cd_credit_rating#29, cd_dep_count#30, cd_dep_employed_count#31, cd_dep_college_count#32] + +(50) Filter [codegen id : 15] +Input [9]: [cd_demo_sk#24, cd_gender#25, cd_marital_status#26, cd_education_status#27, cd_purchase_estimate#28, cd_credit_rating#29, cd_dep_count#30, cd_dep_employed_count#31, cd_dep_college_count#32] +Condition : isnotnull(cd_demo_sk#24) + +(51) Exchange +Input [9]: [cd_demo_sk#24, cd_gender#25, cd_marital_status#26, cd_education_status#27, cd_purchase_estimate#28, cd_credit_rating#29, cd_dep_count#30, cd_dep_employed_count#31, cd_dep_college_count#32] +Arguments: hashpartitioning(cd_demo_sk#24, 5), true, [id=#33] + +(52) Sort [codegen id : 16] +Input [9]: [cd_demo_sk#24, cd_gender#25, cd_marital_status#26, cd_education_status#27, cd_purchase_estimate#28, cd_credit_rating#29, cd_dep_count#30, cd_dep_employed_count#31, cd_dep_college_count#32] +Arguments: [cd_demo_sk#24 ASC NULLS FIRST], false, 0 + +(53) SortMergeJoin [codegen id : 17] +Left keys [1]: [c_current_cdemo_sk#4] +Right keys [1]: [cd_demo_sk#24] +Join condition: None + +(54) Project [codegen id : 17] +Output [8]: [cd_gender#25, cd_marital_status#26, cd_education_status#27, cd_purchase_estimate#28, cd_credit_rating#29, cd_dep_count#30, cd_dep_employed_count#31, cd_dep_college_count#32] +Input [10]: [c_current_cdemo_sk#4, cd_demo_sk#24, cd_gender#25, cd_marital_status#26, cd_education_status#27, cd_purchase_estimate#28, cd_credit_rating#29, cd_dep_count#30, cd_dep_employed_count#31, cd_dep_college_count#32] + +(55) HashAggregate [codegen id : 17] +Input [8]: [cd_gender#25, cd_marital_status#26, cd_education_status#27, cd_purchase_estimate#28, cd_credit_rating#29, cd_dep_count#30, cd_dep_employed_count#31, cd_dep_college_count#32] +Keys [8]: [cd_gender#25, cd_marital_status#26, cd_education_status#27, cd_purchase_estimate#28, cd_credit_rating#29, cd_dep_count#30, cd_dep_employed_count#31, cd_dep_college_count#32] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#34] +Results [9]: [cd_gender#25, cd_marital_status#26, cd_education_status#27, cd_purchase_estimate#28, cd_credit_rating#29, cd_dep_count#30, cd_dep_employed_count#31, cd_dep_college_count#32, count#35] + +(56) Exchange +Input [9]: [cd_gender#25, cd_marital_status#26, cd_education_status#27, cd_purchase_estimate#28, cd_credit_rating#29, cd_dep_count#30, cd_dep_employed_count#31, cd_dep_college_count#32, count#35] +Arguments: hashpartitioning(cd_gender#25, cd_marital_status#26, cd_education_status#27, cd_purchase_estimate#28, cd_credit_rating#29, cd_dep_count#30, cd_dep_employed_count#31, cd_dep_college_count#32, 5), true, [id=#36] + +(57) HashAggregate [codegen id : 18] +Input [9]: [cd_gender#25, cd_marital_status#26, cd_education_status#27, cd_purchase_estimate#28, cd_credit_rating#29, cd_dep_count#30, cd_dep_employed_count#31, cd_dep_college_count#32, count#35] +Keys [8]: [cd_gender#25, cd_marital_status#26, cd_education_status#27, cd_purchase_estimate#28, cd_credit_rating#29, cd_dep_count#30, cd_dep_employed_count#31, cd_dep_college_count#32] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#37] +Results [14]: [cd_gender#25, cd_marital_status#26, cd_education_status#27, count(1)#37 AS cnt1#38, cd_purchase_estimate#28, count(1)#37 AS cnt2#39, cd_credit_rating#29, count(1)#37 AS cnt3#40, cd_dep_count#30, count(1)#37 AS cnt4#41, cd_dep_employed_count#31, count(1)#37 AS cnt5#42, cd_dep_college_count#32, count(1)#37 AS cnt6#43] + +(58) TakeOrderedAndProject +Input [14]: [cd_gender#25, cd_marital_status#26, cd_education_status#27, cnt1#38, cd_purchase_estimate#28, cnt2#39, cd_credit_rating#29, cnt3#40, cd_dep_count#30, cnt4#41, cd_dep_employed_count#31, cnt5#42, cd_dep_college_count#32, cnt6#43] +Arguments: 100, [cd_gender#25 ASC NULLS FIRST, cd_marital_status#26 ASC NULLS FIRST, cd_education_status#27 ASC NULLS FIRST, cd_purchase_estimate#28 ASC NULLS FIRST, cd_credit_rating#29 ASC NULLS FIRST, cd_dep_count#30 ASC NULLS FIRST, cd_dep_employed_count#31 ASC NULLS FIRST, cd_dep_college_count#32 ASC NULLS FIRST], [cd_gender#25, cd_marital_status#26, cd_education_status#27, cnt1#38, cd_purchase_estimate#28, cnt2#39, cd_credit_rating#29, cnt3#40, cd_dep_count#30, cnt4#41, cd_dep_employed_count#31, cnt5#42, cd_dep_college_count#32, cnt6#43] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10.sf100/simplified.txt new file mode 100644 index 0000000000000..32dfbc7173529 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10.sf100/simplified.txt @@ -0,0 +1,95 @@ +TakeOrderedAndProject [cd_credit_rating,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate,cnt1,cnt2,cnt3,cnt4,cnt5,cnt6] + WholeStageCodegen (18) + HashAggregate [cd_credit_rating,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate,count] [cnt1,cnt2,cnt3,cnt4,cnt5,cnt6,count,count(1)] + InputAdapter + Exchange [cd_credit_rating,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate] #1 + WholeStageCodegen (17) + HashAggregate [cd_credit_rating,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate] [count,count] + Project [cd_credit_rating,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate] + SortMergeJoin [c_current_cdemo_sk,cd_demo_sk] + InputAdapter + WholeStageCodegen (14) + Sort [c_current_cdemo_sk] + InputAdapter + Exchange [c_current_cdemo_sk] #2 + WholeStageCodegen (13) + Project [c_current_cdemo_sk] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_addr_sk,c_current_cdemo_sk] + Filter [exists,exists] + InputAdapter + SortMergeJoin [c_customer_sk,cs_ship_customer_sk] + SortMergeJoin [c_customer_sk,ws_bill_customer_sk] + SortMergeJoin [c_customer_sk,ss_customer_sk] + WholeStageCodegen (2) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #3 + WholeStageCodegen (1) + Filter [c_current_addr_sk,c_current_cdemo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] + WholeStageCodegen (5) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #4 + WholeStageCodegen (4) + Project [ss_customer_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + WholeStageCodegen (8) + Sort [ws_bill_customer_sk] + InputAdapter + Exchange [ws_bill_customer_sk] #6 + WholeStageCodegen (7) + Project [ws_bill_customer_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #5 + WholeStageCodegen (11) + Sort [cs_ship_customer_sk] + InputAdapter + Exchange [cs_ship_customer_sk] #7 + WholeStageCodegen (10) + Project [cs_ship_customer_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #5 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (12) + Project [ca_address_sk] + Filter [ca_address_sk,ca_county] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_county] + InputAdapter + WholeStageCodegen (16) + Sort [cd_demo_sk] + InputAdapter + Exchange [cd_demo_sk] #9 + WholeStageCodegen (15) + Filter [cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_credit_rating,cd_demo_sk,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10/explain.txt new file mode 100644 index 0000000000000..f9e871077f684 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10/explain.txt @@ -0,0 +1,279 @@ +== Physical Plan == +TakeOrderedAndProject (50) ++- * HashAggregate (49) + +- Exchange (48) + +- * HashAggregate (47) + +- * Project (46) + +- * BroadcastHashJoin Inner BuildRight (45) + :- * Project (40) + : +- * BroadcastHashJoin Inner BuildRight (39) + : :- * Project (33) + : : +- * Filter (32) + : : +- * BroadcastHashJoin ExistenceJoin(exists#1) BuildRight (31) + : : :- * BroadcastHashJoin ExistenceJoin(exists#2) BuildRight (23) + : : : :- * BroadcastHashJoin LeftSemi BuildRight (15) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.customer (1) + : : : : +- BroadcastExchange (14) + : : : : +- * Project (13) + : : : : +- * BroadcastHashJoin Inner BuildRight (12) + : : : : :- * Filter (6) + : : : : : +- * ColumnarToRow (5) + : : : : : +- Scan parquet default.store_sales (4) + : : : : +- BroadcastExchange (11) + : : : : +- * Project (10) + : : : : +- * Filter (9) + : : : : +- * ColumnarToRow (8) + : : : : +- Scan parquet default.date_dim (7) + : : : +- BroadcastExchange (22) + : : : +- * Project (21) + : : : +- * BroadcastHashJoin Inner BuildRight (20) + : : : :- * Filter (18) + : : : : +- * ColumnarToRow (17) + : : : : +- Scan parquet default.web_sales (16) + : : : +- ReusedExchange (19) + : : +- BroadcastExchange (30) + : : +- * Project (29) + : : +- * BroadcastHashJoin Inner BuildRight (28) + : : :- * Filter (26) + : : : +- * ColumnarToRow (25) + : : : +- Scan parquet default.catalog_sales (24) + : : +- ReusedExchange (27) + : +- BroadcastExchange (38) + : +- * Project (37) + : +- * Filter (36) + : +- * ColumnarToRow (35) + : +- Scan parquet default.customer_address (34) + +- BroadcastExchange (44) + +- * Filter (43) + +- * ColumnarToRow (42) + +- Scan parquet default.customer_demographics (41) + + +(1) Scan parquet default.customer +Output [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 9] +Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] + +(3) Filter [codegen id : 9] +Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] +Condition : (isnotnull(c_current_addr_sk#5) AND isnotnull(c_current_cdemo_sk#4)) + +(4) Scan parquet default.store_sales +Output [2]: [ss_sold_date_sk#6, ss_customer_sk#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 2] +Input [2]: [ss_sold_date_sk#6, ss_customer_sk#7] + +(6) Filter [codegen id : 2] +Input [2]: [ss_sold_date_sk#6, ss_customer_sk#7] +Condition : isnotnull(ss_sold_date_sk#6) + +(7) Scan parquet default.date_dim +Output [3]: [d_date_sk#8, d_year#9, d_moy#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_year,2002), GreaterThanOrEqual(d_moy,1), LessThanOrEqual(d_moy,4), IsNotNull(d_date_sk)] +ReadSchema: struct + +(8) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] + +(9) Filter [codegen id : 1] +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Condition : (((((isnotnull(d_moy#10) AND isnotnull(d_year#9)) AND (d_year#9 = 2002)) AND (d_moy#10 >= 1)) AND (d_moy#10 <= 4)) AND isnotnull(d_date_sk#8)) + +(10) Project [codegen id : 1] +Output [1]: [d_date_sk#8] +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] + +(11) BroadcastExchange +Input [1]: [d_date_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] + +(12) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#6] +Right keys [1]: [d_date_sk#8] +Join condition: None + +(13) Project [codegen id : 2] +Output [1]: [ss_customer_sk#7] +Input [3]: [ss_sold_date_sk#6, ss_customer_sk#7, d_date_sk#8] + +(14) BroadcastExchange +Input [1]: [ss_customer_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(15) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [ss_customer_sk#7] +Join condition: None + +(16) Scan parquet default.web_sales +Output [2]: [ws_sold_date_sk#13, ws_bill_customer_sk#14] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 4] +Input [2]: [ws_sold_date_sk#13, ws_bill_customer_sk#14] + +(18) Filter [codegen id : 4] +Input [2]: [ws_sold_date_sk#13, ws_bill_customer_sk#14] +Condition : isnotnull(ws_sold_date_sk#13) + +(19) ReusedExchange [Reuses operator id: 11] +Output [1]: [d_date_sk#8] + +(20) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ws_sold_date_sk#13] +Right keys [1]: [d_date_sk#8] +Join condition: None + +(21) Project [codegen id : 4] +Output [1]: [ws_bill_customer_sk#14] +Input [3]: [ws_sold_date_sk#13, ws_bill_customer_sk#14, d_date_sk#8] + +(22) BroadcastExchange +Input [1]: [ws_bill_customer_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] + +(23) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [ws_bill_customer_sk#14] +Join condition: None + +(24) Scan parquet default.catalog_sales +Output [2]: [cs_sold_date_sk#16, cs_ship_customer_sk#17] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(25) ColumnarToRow [codegen id : 6] +Input [2]: [cs_sold_date_sk#16, cs_ship_customer_sk#17] + +(26) Filter [codegen id : 6] +Input [2]: [cs_sold_date_sk#16, cs_ship_customer_sk#17] +Condition : isnotnull(cs_sold_date_sk#16) + +(27) ReusedExchange [Reuses operator id: 11] +Output [1]: [d_date_sk#8] + +(28) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cs_sold_date_sk#16] +Right keys [1]: [d_date_sk#8] +Join condition: None + +(29) Project [codegen id : 6] +Output [1]: [cs_ship_customer_sk#17] +Input [3]: [cs_sold_date_sk#16, cs_ship_customer_sk#17, d_date_sk#8] + +(30) BroadcastExchange +Input [1]: [cs_ship_customer_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] + +(31) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [cs_ship_customer_sk#17] +Join condition: None + +(32) Filter [codegen id : 9] +Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1] +Condition : (exists#2 OR exists#1) + +(33) Project [codegen id : 9] +Output [2]: [c_current_cdemo_sk#4, c_current_addr_sk#5] +Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1] + +(34) Scan parquet default.customer_address +Output [2]: [ca_address_sk#19, ca_county#20] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer_address] +PushedFilters: [In(ca_county, [Rush County,Toole County,Jefferson County,Dona Ana County,La Porte County]), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(35) ColumnarToRow [codegen id : 7] +Input [2]: [ca_address_sk#19, ca_county#20] + +(36) Filter [codegen id : 7] +Input [2]: [ca_address_sk#19, ca_county#20] +Condition : (ca_county#20 IN (Rush County,Toole County,Jefferson County,Dona Ana County,La Porte County) AND isnotnull(ca_address_sk#19)) + +(37) Project [codegen id : 7] +Output [1]: [ca_address_sk#19] +Input [2]: [ca_address_sk#19, ca_county#20] + +(38) BroadcastExchange +Input [1]: [ca_address_sk#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#21] + +(39) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_current_addr_sk#5] +Right keys [1]: [ca_address_sk#19] +Join condition: None + +(40) Project [codegen id : 9] +Output [1]: [c_current_cdemo_sk#4] +Input [3]: [c_current_cdemo_sk#4, c_current_addr_sk#5, ca_address_sk#19] + +(41) Scan parquet default.customer_demographics +Output [9]: [cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(42) ColumnarToRow [codegen id : 8] +Input [9]: [cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] + +(43) Filter [codegen id : 8] +Input [9]: [cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] +Condition : isnotnull(cd_demo_sk#22) + +(44) BroadcastExchange +Input [9]: [cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#31] + +(45) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_current_cdemo_sk#4] +Right keys [1]: [cd_demo_sk#22] +Join condition: None + +(46) Project [codegen id : 9] +Output [8]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] +Input [10]: [c_current_cdemo_sk#4, cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] + +(47) HashAggregate [codegen id : 9] +Input [8]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] +Keys [8]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#32] +Results [9]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30, count#33] + +(48) Exchange +Input [9]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30, count#33] +Arguments: hashpartitioning(cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30, 5), true, [id=#34] + +(49) HashAggregate [codegen id : 10] +Input [9]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30, count#33] +Keys [8]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#35] +Results [14]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, count(1)#35 AS cnt1#36, cd_purchase_estimate#26, count(1)#35 AS cnt2#37, cd_credit_rating#27, count(1)#35 AS cnt3#38, cd_dep_count#28, count(1)#35 AS cnt4#39, cd_dep_employed_count#29, count(1)#35 AS cnt5#40, cd_dep_college_count#30, count(1)#35 AS cnt6#41] + +(50) TakeOrderedAndProject +Input [14]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cnt1#36, cd_purchase_estimate#26, cnt2#37, cd_credit_rating#27, cnt3#38, cd_dep_count#28, cnt4#39, cd_dep_employed_count#29, cnt5#40, cd_dep_college_count#30, cnt6#41] +Arguments: 100, [cd_gender#23 ASC NULLS FIRST, cd_marital_status#24 ASC NULLS FIRST, cd_education_status#25 ASC NULLS FIRST, cd_purchase_estimate#26 ASC NULLS FIRST, cd_credit_rating#27 ASC NULLS FIRST, cd_dep_count#28 ASC NULLS FIRST, cd_dep_employed_count#29 ASC NULLS FIRST, cd_dep_college_count#30 ASC NULLS FIRST], [cd_gender#23, cd_marital_status#24, cd_education_status#25, cnt1#36, cd_purchase_estimate#26, cnt2#37, cd_credit_rating#27, cnt3#38, cd_dep_count#28, cnt4#39, cd_dep_employed_count#29, cnt5#40, cd_dep_college_count#30, cnt6#41] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10/simplified.txt new file mode 100644 index 0000000000000..19781433465a3 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10/simplified.txt @@ -0,0 +1,74 @@ +TakeOrderedAndProject [cd_credit_rating,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate,cnt1,cnt2,cnt3,cnt4,cnt5,cnt6] + WholeStageCodegen (10) + HashAggregate [cd_credit_rating,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate,count] [cnt1,cnt2,cnt3,cnt4,cnt5,cnt6,count,count(1)] + InputAdapter + Exchange [cd_credit_rating,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate] #1 + WholeStageCodegen (9) + HashAggregate [cd_credit_rating,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate] [count,count] + Project [cd_credit_rating,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [c_current_cdemo_sk] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_addr_sk,c_current_cdemo_sk] + Filter [exists,exists] + BroadcastHashJoin [c_customer_sk,cs_ship_customer_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [c_current_addr_sk,c_current_cdemo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (2) + Project [ss_customer_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (4) + Project [ws_bill_customer_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (6) + Project [cs_ship_customer_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (7) + Project [ca_address_sk] + Filter [ca_address_sk,ca_county] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_county] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (8) + Filter [cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_credit_rating,cd_demo_sk,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11.sf100/explain.txt new file mode 100644 index 0000000000000..e9e79ad7e413d --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11.sf100/explain.txt @@ -0,0 +1,482 @@ +== Physical Plan == +TakeOrderedAndProject (87) ++- * Project (86) + +- * SortMergeJoin Inner (85) + :- * Project (67) + : +- * SortMergeJoin Inner (66) + : :- * Project (46) + : : +- * SortMergeJoin Inner (45) + : : :- * Sort (24) + : : : +- Exchange (23) + : : : +- * Filter (22) + : : : +- * HashAggregate (21) + : : : +- Exchange (20) + : : : +- * HashAggregate (19) + : : : +- * Project (18) + : : : +- * SortMergeJoin Inner (17) + : : : :- * Sort (11) + : : : : +- Exchange (10) + : : : : +- * Project (9) + : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.store_sales (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.date_dim (4) + : : : +- * Sort (16) + : : : +- Exchange (15) + : : : +- * Filter (14) + : : : +- * ColumnarToRow (13) + : : : +- Scan parquet default.customer (12) + : : +- * Sort (44) + : : +- Exchange (43) + : : +- * HashAggregate (42) + : : +- Exchange (41) + : : +- * HashAggregate (40) + : : +- * Project (39) + : : +- * SortMergeJoin Inner (38) + : : :- * Sort (35) + : : : +- Exchange (34) + : : : +- * Project (33) + : : : +- * BroadcastHashJoin Inner BuildRight (32) + : : : :- * Filter (27) + : : : : +- * ColumnarToRow (26) + : : : : +- Scan parquet default.store_sales (25) + : : : +- BroadcastExchange (31) + : : : +- * Filter (30) + : : : +- * ColumnarToRow (29) + : : : +- Scan parquet default.date_dim (28) + : : +- * Sort (37) + : : +- ReusedExchange (36) + : +- * Sort (65) + : +- Exchange (64) + : +- * Project (63) + : +- * Filter (62) + : +- * HashAggregate (61) + : +- Exchange (60) + : +- * HashAggregate (59) + : +- * Project (58) + : +- * SortMergeJoin Inner (57) + : :- * Sort (54) + : : +- Exchange (53) + : : +- * Project (52) + : : +- * BroadcastHashJoin Inner BuildRight (51) + : : :- * Filter (49) + : : : +- * ColumnarToRow (48) + : : : +- Scan parquet default.web_sales (47) + : : +- ReusedExchange (50) + : +- * Sort (56) + : +- ReusedExchange (55) + +- * Sort (84) + +- Exchange (83) + +- * HashAggregate (82) + +- Exchange (81) + +- * HashAggregate (80) + +- * Project (79) + +- * SortMergeJoin Inner (78) + :- * Sort (75) + : +- Exchange (74) + : +- * Project (73) + : +- * BroadcastHashJoin Inner BuildRight (72) + : :- * Filter (70) + : : +- * ColumnarToRow (69) + : : +- Scan parquet default.web_sales (68) + : +- ReusedExchange (71) + +- * Sort (77) + +- ReusedExchange (76) + + +(1) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_ext_discount_amt#3, ss_ext_list_price#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 2] +Input [4]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_ext_discount_amt#3, ss_ext_list_price#4] + +(3) Filter [codegen id : 2] +Input [4]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_ext_discount_amt#3, ss_ext_list_price#4] +Condition : (isnotnull(ss_customer_sk#2) AND isnotnull(ss_sold_date_sk#1)) + +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#5, d_year#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#5, d_year#6] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#5, d_year#6] +Condition : ((isnotnull(d_year#6) AND (d_year#6 = 2001)) AND isnotnull(d_date_sk#5)) + +(7) BroadcastExchange +Input [2]: [d_date_sk#5, d_year#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7] + +(8) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(9) Project [codegen id : 2] +Output [4]: [ss_customer_sk#2, ss_ext_discount_amt#3, ss_ext_list_price#4, d_year#6] +Input [6]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_ext_discount_amt#3, ss_ext_list_price#4, d_date_sk#5, d_year#6] + +(10) Exchange +Input [4]: [ss_customer_sk#2, ss_ext_discount_amt#3, ss_ext_list_price#4, d_year#6] +Arguments: hashpartitioning(ss_customer_sk#2, 5), true, [id=#8] + +(11) Sort [codegen id : 3] +Input [4]: [ss_customer_sk#2, ss_ext_discount_amt#3, ss_ext_list_price#4, d_year#6] +Arguments: [ss_customer_sk#2 ASC NULLS FIRST], false, 0 + +(12) Scan parquet default.customer +Output [8]: [c_customer_sk#9, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(13) ColumnarToRow [codegen id : 4] +Input [8]: [c_customer_sk#9, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16] + +(14) Filter [codegen id : 4] +Input [8]: [c_customer_sk#9, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16] +Condition : (isnotnull(c_customer_sk#9) AND isnotnull(c_customer_id#10)) + +(15) Exchange +Input [8]: [c_customer_sk#9, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16] +Arguments: hashpartitioning(c_customer_sk#9, 5), true, [id=#17] + +(16) Sort [codegen id : 5] +Input [8]: [c_customer_sk#9, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16] +Arguments: [c_customer_sk#9 ASC NULLS FIRST], false, 0 + +(17) SortMergeJoin [codegen id : 6] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#9] +Join condition: None + +(18) Project [codegen id : 6] +Output [10]: [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, ss_ext_discount_amt#3, ss_ext_list_price#4, d_year#6] +Input [12]: [ss_customer_sk#2, ss_ext_discount_amt#3, ss_ext_list_price#4, d_year#6, c_customer_sk#9, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16] + +(19) HashAggregate [codegen id : 6] +Input [10]: [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, ss_ext_discount_amt#3, ss_ext_list_price#4, d_year#6] +Keys [8]: [c_customer_id#10, c_first_name#11, c_last_name#12, d_year#6, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16] +Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#4 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#3 as decimal(8,2)))), DecimalType(8,2), true)))] +Aggregate Attributes [1]: [sum#18] +Results [9]: [c_customer_id#10, c_first_name#11, c_last_name#12, d_year#6, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, sum#19] + +(20) Exchange +Input [9]: [c_customer_id#10, c_first_name#11, c_last_name#12, d_year#6, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, sum#19] +Arguments: hashpartitioning(c_customer_id#10, c_first_name#11, c_last_name#12, d_year#6, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, 5), true, [id=#20] + +(21) HashAggregate [codegen id : 7] +Input [9]: [c_customer_id#10, c_first_name#11, c_last_name#12, d_year#6, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, sum#19] +Keys [8]: [c_customer_id#10, c_first_name#11, c_last_name#12, d_year#6, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16] +Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#4 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#3 as decimal(8,2)))), DecimalType(8,2), true)))] +Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#4 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#3 as decimal(8,2)))), DecimalType(8,2), true)))#21] +Results [2]: [c_customer_id#10 AS customer_id#22, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#4 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#3 as decimal(8,2)))), DecimalType(8,2), true)))#21,18,2) AS year_total#23] + +(22) Filter [codegen id : 7] +Input [2]: [customer_id#22, year_total#23] +Condition : (isnotnull(year_total#23) AND (year_total#23 > 0.00)) + +(23) Exchange +Input [2]: [customer_id#22, year_total#23] +Arguments: hashpartitioning(customer_id#22, 5), true, [id=#24] + +(24) Sort [codegen id : 8] +Input [2]: [customer_id#22, year_total#23] +Arguments: [customer_id#22 ASC NULLS FIRST], false, 0 + +(25) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_ext_discount_amt#3, ss_ext_list_price#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(26) ColumnarToRow [codegen id : 10] +Input [4]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_ext_discount_amt#3, ss_ext_list_price#4] + +(27) Filter [codegen id : 10] +Input [4]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_ext_discount_amt#3, ss_ext_list_price#4] +Condition : (isnotnull(ss_customer_sk#2) AND isnotnull(ss_sold_date_sk#1)) + +(28) Scan parquet default.date_dim +Output [2]: [d_date_sk#5, d_year#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 9] +Input [2]: [d_date_sk#5, d_year#6] + +(30) Filter [codegen id : 9] +Input [2]: [d_date_sk#5, d_year#6] +Condition : ((isnotnull(d_year#6) AND (d_year#6 = 2002)) AND isnotnull(d_date_sk#5)) + +(31) BroadcastExchange +Input [2]: [d_date_sk#5, d_year#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#25] + +(32) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(33) Project [codegen id : 10] +Output [4]: [ss_customer_sk#2, ss_ext_discount_amt#3, ss_ext_list_price#4, d_year#6] +Input [6]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_ext_discount_amt#3, ss_ext_list_price#4, d_date_sk#5, d_year#6] + +(34) Exchange +Input [4]: [ss_customer_sk#2, ss_ext_discount_amt#3, ss_ext_list_price#4, d_year#6] +Arguments: hashpartitioning(ss_customer_sk#2, 5), true, [id=#26] + +(35) Sort [codegen id : 11] +Input [4]: [ss_customer_sk#2, ss_ext_discount_amt#3, ss_ext_list_price#4, d_year#6] +Arguments: [ss_customer_sk#2 ASC NULLS FIRST], false, 0 + +(36) ReusedExchange [Reuses operator id: 15] +Output [8]: [c_customer_sk#9, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16] + +(37) Sort [codegen id : 13] +Input [8]: [c_customer_sk#9, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16] +Arguments: [c_customer_sk#9 ASC NULLS FIRST], false, 0 + +(38) SortMergeJoin [codegen id : 14] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#9] +Join condition: None + +(39) Project [codegen id : 14] +Output [10]: [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, ss_ext_discount_amt#3, ss_ext_list_price#4, d_year#6] +Input [12]: [ss_customer_sk#2, ss_ext_discount_amt#3, ss_ext_list_price#4, d_year#6, c_customer_sk#9, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16] + +(40) HashAggregate [codegen id : 14] +Input [10]: [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, ss_ext_discount_amt#3, ss_ext_list_price#4, d_year#6] +Keys [8]: [c_customer_id#10, c_first_name#11, c_last_name#12, d_year#6, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16] +Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#4 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#3 as decimal(8,2)))), DecimalType(8,2), true)))] +Aggregate Attributes [1]: [sum#27] +Results [9]: [c_customer_id#10, c_first_name#11, c_last_name#12, d_year#6, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, sum#28] + +(41) Exchange +Input [9]: [c_customer_id#10, c_first_name#11, c_last_name#12, d_year#6, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, sum#28] +Arguments: hashpartitioning(c_customer_id#10, c_first_name#11, c_last_name#12, d_year#6, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, 5), true, [id=#29] + +(42) HashAggregate [codegen id : 15] +Input [9]: [c_customer_id#10, c_first_name#11, c_last_name#12, d_year#6, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, sum#28] +Keys [8]: [c_customer_id#10, c_first_name#11, c_last_name#12, d_year#6, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16] +Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#4 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#3 as decimal(8,2)))), DecimalType(8,2), true)))] +Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#4 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#3 as decimal(8,2)))), DecimalType(8,2), true)))#30] +Results [3]: [c_customer_id#10 AS customer_id#31, c_preferred_cust_flag#13 AS customer_preferred_cust_flag#32, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#4 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#3 as decimal(8,2)))), DecimalType(8,2), true)))#30,18,2) AS year_total#33] + +(43) Exchange +Input [3]: [customer_id#31, customer_preferred_cust_flag#32, year_total#33] +Arguments: hashpartitioning(customer_id#31, 5), true, [id=#34] + +(44) Sort [codegen id : 16] +Input [3]: [customer_id#31, customer_preferred_cust_flag#32, year_total#33] +Arguments: [customer_id#31 ASC NULLS FIRST], false, 0 + +(45) SortMergeJoin [codegen id : 17] +Left keys [1]: [customer_id#22] +Right keys [1]: [customer_id#31] +Join condition: None + +(46) Project [codegen id : 17] +Output [4]: [customer_id#22, year_total#23, customer_preferred_cust_flag#32, year_total#33] +Input [5]: [customer_id#22, year_total#23, customer_id#31, customer_preferred_cust_flag#32, year_total#33] + +(47) Scan parquet default.web_sales +Output [4]: [ws_sold_date_sk#35, ws_bill_customer_sk#36, ws_ext_discount_amt#37, ws_ext_list_price#38] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_bill_customer_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(48) ColumnarToRow [codegen id : 19] +Input [4]: [ws_sold_date_sk#35, ws_bill_customer_sk#36, ws_ext_discount_amt#37, ws_ext_list_price#38] + +(49) Filter [codegen id : 19] +Input [4]: [ws_sold_date_sk#35, ws_bill_customer_sk#36, ws_ext_discount_amt#37, ws_ext_list_price#38] +Condition : (isnotnull(ws_bill_customer_sk#36) AND isnotnull(ws_sold_date_sk#35)) + +(50) ReusedExchange [Reuses operator id: 7] +Output [2]: [d_date_sk#5, d_year#6] + +(51) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [ws_sold_date_sk#35] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(52) Project [codegen id : 19] +Output [4]: [ws_bill_customer_sk#36, ws_ext_discount_amt#37, ws_ext_list_price#38, d_year#6] +Input [6]: [ws_sold_date_sk#35, ws_bill_customer_sk#36, ws_ext_discount_amt#37, ws_ext_list_price#38, d_date_sk#5, d_year#6] + +(53) Exchange +Input [4]: [ws_bill_customer_sk#36, ws_ext_discount_amt#37, ws_ext_list_price#38, d_year#6] +Arguments: hashpartitioning(ws_bill_customer_sk#36, 5), true, [id=#39] + +(54) Sort [codegen id : 20] +Input [4]: [ws_bill_customer_sk#36, ws_ext_discount_amt#37, ws_ext_list_price#38, d_year#6] +Arguments: [ws_bill_customer_sk#36 ASC NULLS FIRST], false, 0 + +(55) ReusedExchange [Reuses operator id: 15] +Output [8]: [c_customer_sk#9, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16] + +(56) Sort [codegen id : 22] +Input [8]: [c_customer_sk#9, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16] +Arguments: [c_customer_sk#9 ASC NULLS FIRST], false, 0 + +(57) SortMergeJoin [codegen id : 23] +Left keys [1]: [ws_bill_customer_sk#36] +Right keys [1]: [c_customer_sk#9] +Join condition: None + +(58) Project [codegen id : 23] +Output [10]: [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, ws_ext_discount_amt#37, ws_ext_list_price#38, d_year#6] +Input [12]: [ws_bill_customer_sk#36, ws_ext_discount_amt#37, ws_ext_list_price#38, d_year#6, c_customer_sk#9, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16] + +(59) HashAggregate [codegen id : 23] +Input [10]: [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, ws_ext_discount_amt#37, ws_ext_list_price#38, d_year#6] +Keys [8]: [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, d_year#6] +Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#38 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#37 as decimal(8,2)))), DecimalType(8,2), true)))] +Aggregate Attributes [1]: [sum#40] +Results [9]: [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, d_year#6, sum#41] + +(60) Exchange +Input [9]: [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, d_year#6, sum#41] +Arguments: hashpartitioning(c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, d_year#6, 5), true, [id=#42] + +(61) HashAggregate [codegen id : 24] +Input [9]: [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, d_year#6, sum#41] +Keys [8]: [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, d_year#6] +Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#38 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#37 as decimal(8,2)))), DecimalType(8,2), true)))] +Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#38 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#37 as decimal(8,2)))), DecimalType(8,2), true)))#43] +Results [2]: [c_customer_id#10 AS customer_id#44, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#38 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#37 as decimal(8,2)))), DecimalType(8,2), true)))#43,18,2) AS year_total#45] + +(62) Filter [codegen id : 24] +Input [2]: [customer_id#44, year_total#45] +Condition : (isnotnull(year_total#45) AND (year_total#45 > 0.00)) + +(63) Project [codegen id : 24] +Output [2]: [customer_id#44 AS customer_id#46, year_total#45 AS year_total#47] +Input [2]: [customer_id#44, year_total#45] + +(64) Exchange +Input [2]: [customer_id#46, year_total#47] +Arguments: hashpartitioning(customer_id#46, 5), true, [id=#48] + +(65) Sort [codegen id : 25] +Input [2]: [customer_id#46, year_total#47] +Arguments: [customer_id#46 ASC NULLS FIRST], false, 0 + +(66) SortMergeJoin [codegen id : 26] +Left keys [1]: [customer_id#22] +Right keys [1]: [customer_id#46] +Join condition: None + +(67) Project [codegen id : 26] +Output [5]: [customer_id#22, year_total#23, customer_preferred_cust_flag#32, year_total#33, year_total#47] +Input [6]: [customer_id#22, year_total#23, customer_preferred_cust_flag#32, year_total#33, customer_id#46, year_total#47] + +(68) Scan parquet default.web_sales +Output [4]: [ws_sold_date_sk#35, ws_bill_customer_sk#36, ws_ext_discount_amt#37, ws_ext_list_price#38] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_bill_customer_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(69) ColumnarToRow [codegen id : 28] +Input [4]: [ws_sold_date_sk#35, ws_bill_customer_sk#36, ws_ext_discount_amt#37, ws_ext_list_price#38] + +(70) Filter [codegen id : 28] +Input [4]: [ws_sold_date_sk#35, ws_bill_customer_sk#36, ws_ext_discount_amt#37, ws_ext_list_price#38] +Condition : (isnotnull(ws_bill_customer_sk#36) AND isnotnull(ws_sold_date_sk#35)) + +(71) ReusedExchange [Reuses operator id: 31] +Output [2]: [d_date_sk#5, d_year#6] + +(72) BroadcastHashJoin [codegen id : 28] +Left keys [1]: [ws_sold_date_sk#35] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(73) Project [codegen id : 28] +Output [4]: [ws_bill_customer_sk#36, ws_ext_discount_amt#37, ws_ext_list_price#38, d_year#6] +Input [6]: [ws_sold_date_sk#35, ws_bill_customer_sk#36, ws_ext_discount_amt#37, ws_ext_list_price#38, d_date_sk#5, d_year#6] + +(74) Exchange +Input [4]: [ws_bill_customer_sk#36, ws_ext_discount_amt#37, ws_ext_list_price#38, d_year#6] +Arguments: hashpartitioning(ws_bill_customer_sk#36, 5), true, [id=#49] + +(75) Sort [codegen id : 29] +Input [4]: [ws_bill_customer_sk#36, ws_ext_discount_amt#37, ws_ext_list_price#38, d_year#6] +Arguments: [ws_bill_customer_sk#36 ASC NULLS FIRST], false, 0 + +(76) ReusedExchange [Reuses operator id: 15] +Output [8]: [c_customer_sk#9, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16] + +(77) Sort [codegen id : 31] +Input [8]: [c_customer_sk#9, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16] +Arguments: [c_customer_sk#9 ASC NULLS FIRST], false, 0 + +(78) SortMergeJoin [codegen id : 32] +Left keys [1]: [ws_bill_customer_sk#36] +Right keys [1]: [c_customer_sk#9] +Join condition: None + +(79) Project [codegen id : 32] +Output [10]: [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, ws_ext_discount_amt#37, ws_ext_list_price#38, d_year#6] +Input [12]: [ws_bill_customer_sk#36, ws_ext_discount_amt#37, ws_ext_list_price#38, d_year#6, c_customer_sk#9, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16] + +(80) HashAggregate [codegen id : 32] +Input [10]: [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, ws_ext_discount_amt#37, ws_ext_list_price#38, d_year#6] +Keys [8]: [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, d_year#6] +Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#38 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#37 as decimal(8,2)))), DecimalType(8,2), true)))] +Aggregate Attributes [1]: [sum#50] +Results [9]: [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, d_year#6, sum#51] + +(81) Exchange +Input [9]: [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, d_year#6, sum#51] +Arguments: hashpartitioning(c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, d_year#6, 5), true, [id=#52] + +(82) HashAggregate [codegen id : 33] +Input [9]: [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, d_year#6, sum#51] +Keys [8]: [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, d_year#6] +Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#38 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#37 as decimal(8,2)))), DecimalType(8,2), true)))] +Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#38 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#37 as decimal(8,2)))), DecimalType(8,2), true)))#53] +Results [2]: [c_customer_id#10 AS customer_id#54, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#38 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#37 as decimal(8,2)))), DecimalType(8,2), true)))#53,18,2) AS year_total#55] + +(83) Exchange +Input [2]: [customer_id#54, year_total#55] +Arguments: hashpartitioning(customer_id#54, 5), true, [id=#56] + +(84) Sort [codegen id : 34] +Input [2]: [customer_id#54, year_total#55] +Arguments: [customer_id#54 ASC NULLS FIRST], false, 0 + +(85) SortMergeJoin [codegen id : 35] +Left keys [1]: [customer_id#22] +Right keys [1]: [customer_id#54] +Join condition: (CASE WHEN (year_total#47 > 0.00) THEN CheckOverflow((promote_precision(year_total#55) / promote_precision(year_total#47)), DecimalType(38,20), true) ELSE null END > CASE WHEN (year_total#23 > 0.00) THEN CheckOverflow((promote_precision(year_total#33) / promote_precision(year_total#23)), DecimalType(38,20), true) ELSE null END) + +(86) Project [codegen id : 35] +Output [1]: [customer_preferred_cust_flag#32] +Input [7]: [customer_id#22, year_total#23, customer_preferred_cust_flag#32, year_total#33, year_total#47, customer_id#54, year_total#55] + +(87) TakeOrderedAndProject +Input [1]: [customer_preferred_cust_flag#32] +Arguments: 100, [customer_preferred_cust_flag#32 ASC NULLS FIRST], [customer_preferred_cust_flag#32] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11.sf100/simplified.txt new file mode 100644 index 0000000000000..281afc27939f3 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11.sf100/simplified.txt @@ -0,0 +1,158 @@ +TakeOrderedAndProject [customer_preferred_cust_flag] + WholeStageCodegen (35) + Project [customer_preferred_cust_flag] + SortMergeJoin [customer_id,customer_id,year_total,year_total,year_total,year_total] + InputAdapter + WholeStageCodegen (26) + Project [customer_id,customer_preferred_cust_flag,year_total,year_total,year_total] + SortMergeJoin [customer_id,customer_id] + InputAdapter + WholeStageCodegen (17) + Project [customer_id,customer_preferred_cust_flag,year_total,year_total] + SortMergeJoin [customer_id,customer_id] + InputAdapter + WholeStageCodegen (8) + Sort [customer_id] + InputAdapter + Exchange [customer_id] #1 + WholeStageCodegen (7) + Filter [year_total] + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,sum] [customer_id,sum,sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2), true))),year_total] + InputAdapter + Exchange [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year] #2 + WholeStageCodegen (6) + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ss_ext_discount_amt,ss_ext_list_price] [sum,sum] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ss_ext_discount_amt,ss_ext_list_price] + SortMergeJoin [c_customer_sk,ss_customer_sk] + InputAdapter + WholeStageCodegen (3) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #3 + WholeStageCodegen (2) + Project [d_year,ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_customer_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + WholeStageCodegen (5) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #5 + WholeStageCodegen (4) + Filter [c_customer_id,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] + InputAdapter + WholeStageCodegen (16) + Sort [customer_id] + InputAdapter + Exchange [customer_id] #6 + WholeStageCodegen (15) + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,sum] [customer_id,customer_preferred_cust_flag,sum,sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2), true))),year_total] + InputAdapter + Exchange [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year] #7 + WholeStageCodegen (14) + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ss_ext_discount_amt,ss_ext_list_price] [sum,sum] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ss_ext_discount_amt,ss_ext_list_price] + SortMergeJoin [c_customer_sk,ss_customer_sk] + InputAdapter + WholeStageCodegen (11) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #8 + WholeStageCodegen (10) + Project [d_year,ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_customer_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (9) + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + WholeStageCodegen (13) + Sort [c_customer_sk] + InputAdapter + ReusedExchange [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] #5 + InputAdapter + WholeStageCodegen (25) + Sort [customer_id] + InputAdapter + Exchange [customer_id] #10 + WholeStageCodegen (24) + Project [customer_id,year_total] + Filter [year_total] + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,sum] [customer_id,sum,sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2), true))),year_total] + InputAdapter + Exchange [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year] #11 + WholeStageCodegen (23) + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ws_ext_discount_amt,ws_ext_list_price] [sum,sum] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ws_ext_discount_amt,ws_ext_list_price] + SortMergeJoin [c_customer_sk,ws_bill_customer_sk] + InputAdapter + WholeStageCodegen (20) + Sort [ws_bill_customer_sk] + InputAdapter + Exchange [ws_bill_customer_sk] #12 + WholeStageCodegen (19) + Project [d_year,ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_bill_customer_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_year] #4 + InputAdapter + WholeStageCodegen (22) + Sort [c_customer_sk] + InputAdapter + ReusedExchange [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] #5 + InputAdapter + WholeStageCodegen (34) + Sort [customer_id] + InputAdapter + Exchange [customer_id] #13 + WholeStageCodegen (33) + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,sum] [customer_id,sum,sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2), true))),year_total] + InputAdapter + Exchange [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year] #14 + WholeStageCodegen (32) + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ws_ext_discount_amt,ws_ext_list_price] [sum,sum] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ws_ext_discount_amt,ws_ext_list_price] + SortMergeJoin [c_customer_sk,ws_bill_customer_sk] + InputAdapter + WholeStageCodegen (29) + Sort [ws_bill_customer_sk] + InputAdapter + Exchange [ws_bill_customer_sk] #15 + WholeStageCodegen (28) + Project [d_year,ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_bill_customer_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_year] #9 + InputAdapter + WholeStageCodegen (31) + Sort [c_customer_sk] + InputAdapter + ReusedExchange [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] #5 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11/explain.txt new file mode 100644 index 0000000000000..c8404017689c0 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11/explain.txt @@ -0,0 +1,415 @@ +== Physical Plan == +TakeOrderedAndProject (73) ++- * Project (72) + +- * BroadcastHashJoin Inner BuildRight (71) + :- * Project (57) + : +- * BroadcastHashJoin Inner BuildRight (56) + : :- * Project (37) + : : +- * BroadcastHashJoin Inner BuildRight (36) + : : :- * Filter (19) + : : : +- * HashAggregate (18) + : : : +- Exchange (17) + : : : +- * HashAggregate (16) + : : : +- * Project (15) + : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : :- * Project (9) + : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.customer (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.store_sales (4) + : : : +- BroadcastExchange (13) + : : : +- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet default.date_dim (10) + : : +- BroadcastExchange (35) + : : +- * HashAggregate (34) + : : +- Exchange (33) + : : +- * HashAggregate (32) + : : +- * Project (31) + : : +- * BroadcastHashJoin Inner BuildRight (30) + : : :- * Project (25) + : : : +- * BroadcastHashJoin Inner BuildRight (24) + : : : :- * Filter (22) + : : : : +- * ColumnarToRow (21) + : : : : +- Scan parquet default.customer (20) + : : : +- ReusedExchange (23) + : : +- BroadcastExchange (29) + : : +- * Filter (28) + : : +- * ColumnarToRow (27) + : : +- Scan parquet default.date_dim (26) + : +- BroadcastExchange (55) + : +- * Project (54) + : +- * Filter (53) + : +- * HashAggregate (52) + : +- Exchange (51) + : +- * HashAggregate (50) + : +- * Project (49) + : +- * BroadcastHashJoin Inner BuildRight (48) + : :- * Project (46) + : : +- * BroadcastHashJoin Inner BuildRight (45) + : : :- * Filter (40) + : : : +- * ColumnarToRow (39) + : : : +- Scan parquet default.customer (38) + : : +- BroadcastExchange (44) + : : +- * Filter (43) + : : +- * ColumnarToRow (42) + : : +- Scan parquet default.web_sales (41) + : +- ReusedExchange (47) + +- BroadcastExchange (70) + +- * HashAggregate (69) + +- Exchange (68) + +- * HashAggregate (67) + +- * Project (66) + +- * BroadcastHashJoin Inner BuildRight (65) + :- * Project (63) + : +- * BroadcastHashJoin Inner BuildRight (62) + : :- * Filter (60) + : : +- * ColumnarToRow (59) + : : +- Scan parquet default.customer (58) + : +- ReusedExchange (61) + +- ReusedExchange (64) + + +(1) Scan parquet default.customer +Output [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] + +(3) Filter [codegen id : 3] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(4) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#9, ss_customer_sk#10, ss_ext_discount_amt#11, ss_ext_list_price#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [ss_sold_date_sk#9, ss_customer_sk#10, ss_ext_discount_amt#11, ss_ext_list_price#12] + +(6) Filter [codegen id : 1] +Input [4]: [ss_sold_date_sk#9, ss_customer_sk#10, ss_ext_discount_amt#11, ss_ext_list_price#12] +Condition : (isnotnull(ss_customer_sk#10) AND isnotnull(ss_sold_date_sk#9)) + +(7) BroadcastExchange +Input [4]: [ss_sold_date_sk#9, ss_customer_sk#10, ss_ext_discount_amt#11, ss_ext_list_price#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#13] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ss_customer_sk#10] +Join condition: None + +(9) Project [codegen id : 3] +Output [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_sold_date_sk#9, ss_ext_discount_amt#11, ss_ext_list_price#12] +Input [12]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_sold_date_sk#9, ss_customer_sk#10, ss_ext_discount_amt#11, ss_ext_list_price#12] + +(10) Scan parquet default.date_dim +Output [2]: [d_date_sk#14, d_year#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#14, d_year#15] + +(12) Filter [codegen id : 2] +Input [2]: [d_date_sk#14, d_year#15] +Condition : ((isnotnull(d_year#15) AND (d_year#15 = 2001)) AND isnotnull(d_date_sk#14)) + +(13) BroadcastExchange +Input [2]: [d_date_sk#14, d_year#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#16] + +(14) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#9] +Right keys [1]: [d_date_sk#14] +Join condition: None + +(15) Project [codegen id : 3] +Output [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#11, ss_ext_list_price#12, d_year#15] +Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_sold_date_sk#9, ss_ext_discount_amt#11, ss_ext_list_price#12, d_date_sk#14, d_year#15] + +(16) HashAggregate [codegen id : 3] +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#11, ss_ext_list_price#12, d_year#15] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#12 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#11 as decimal(8,2)))), DecimalType(8,2), true)))] +Aggregate Attributes [1]: [sum#17] +Results [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#18] + +(17) Exchange +Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#18] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, 5), true, [id=#19] + +(18) HashAggregate [codegen id : 16] +Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#18] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#12 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#11 as decimal(8,2)))), DecimalType(8,2), true)))] +Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#12 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#11 as decimal(8,2)))), DecimalType(8,2), true)))#20] +Results [2]: [c_customer_id#2 AS customer_id#21, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#12 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#11 as decimal(8,2)))), DecimalType(8,2), true)))#20,18,2) AS year_total#22] + +(19) Filter [codegen id : 16] +Input [2]: [customer_id#21, year_total#22] +Condition : (isnotnull(year_total#22) AND (year_total#22 > 0.00)) + +(20) Scan parquet default.customer +Output [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 6] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] + +(22) Filter [codegen id : 6] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(23) ReusedExchange [Reuses operator id: 7] +Output [4]: [ss_sold_date_sk#9, ss_customer_sk#10, ss_ext_discount_amt#11, ss_ext_list_price#12] + +(24) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ss_customer_sk#10] +Join condition: None + +(25) Project [codegen id : 6] +Output [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_sold_date_sk#9, ss_ext_discount_amt#11, ss_ext_list_price#12] +Input [12]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_sold_date_sk#9, ss_customer_sk#10, ss_ext_discount_amt#11, ss_ext_list_price#12] + +(26) Scan parquet default.date_dim +Output [2]: [d_date_sk#14, d_year#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 5] +Input [2]: [d_date_sk#14, d_year#15] + +(28) Filter [codegen id : 5] +Input [2]: [d_date_sk#14, d_year#15] +Condition : ((isnotnull(d_year#15) AND (d_year#15 = 2002)) AND isnotnull(d_date_sk#14)) + +(29) BroadcastExchange +Input [2]: [d_date_sk#14, d_year#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#23] + +(30) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_sold_date_sk#9] +Right keys [1]: [d_date_sk#14] +Join condition: None + +(31) Project [codegen id : 6] +Output [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#11, ss_ext_list_price#12, d_year#15] +Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_sold_date_sk#9, ss_ext_discount_amt#11, ss_ext_list_price#12, d_date_sk#14, d_year#15] + +(32) HashAggregate [codegen id : 6] +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#11, ss_ext_list_price#12, d_year#15] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#12 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#11 as decimal(8,2)))), DecimalType(8,2), true)))] +Aggregate Attributes [1]: [sum#24] +Results [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#25] + +(33) Exchange +Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#25] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, 5), true, [id=#26] + +(34) HashAggregate [codegen id : 7] +Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#25] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#12 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#11 as decimal(8,2)))), DecimalType(8,2), true)))] +Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#12 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#11 as decimal(8,2)))), DecimalType(8,2), true)))#27] +Results [3]: [c_customer_id#2 AS customer_id#28, c_preferred_cust_flag#5 AS customer_preferred_cust_flag#29, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#12 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#11 as decimal(8,2)))), DecimalType(8,2), true)))#27,18,2) AS year_total#30] + +(35) BroadcastExchange +Input [3]: [customer_id#28, customer_preferred_cust_flag#29, year_total#30] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#31] + +(36) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [customer_id#21] +Right keys [1]: [customer_id#28] +Join condition: None + +(37) Project [codegen id : 16] +Output [4]: [customer_id#21, year_total#22, customer_preferred_cust_flag#29, year_total#30] +Input [5]: [customer_id#21, year_total#22, customer_id#28, customer_preferred_cust_flag#29, year_total#30] + +(38) Scan parquet default.customer +Output [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(39) ColumnarToRow [codegen id : 10] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] + +(40) Filter [codegen id : 10] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(41) Scan parquet default.web_sales +Output [4]: [ws_sold_date_sk#32, ws_bill_customer_sk#33, ws_ext_discount_amt#34, ws_ext_list_price#35] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_bill_customer_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(42) ColumnarToRow [codegen id : 8] +Input [4]: [ws_sold_date_sk#32, ws_bill_customer_sk#33, ws_ext_discount_amt#34, ws_ext_list_price#35] + +(43) Filter [codegen id : 8] +Input [4]: [ws_sold_date_sk#32, ws_bill_customer_sk#33, ws_ext_discount_amt#34, ws_ext_list_price#35] +Condition : (isnotnull(ws_bill_customer_sk#33) AND isnotnull(ws_sold_date_sk#32)) + +(44) BroadcastExchange +Input [4]: [ws_sold_date_sk#32, ws_bill_customer_sk#33, ws_ext_discount_amt#34, ws_ext_list_price#35] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#36] + +(45) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ws_bill_customer_sk#33] +Join condition: None + +(46) Project [codegen id : 10] +Output [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_sold_date_sk#32, ws_ext_discount_amt#34, ws_ext_list_price#35] +Input [12]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_sold_date_sk#32, ws_bill_customer_sk#33, ws_ext_discount_amt#34, ws_ext_list_price#35] + +(47) ReusedExchange [Reuses operator id: 13] +Output [2]: [d_date_sk#14, d_year#15] + +(48) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ws_sold_date_sk#32] +Right keys [1]: [d_date_sk#14] +Join condition: None + +(49) Project [codegen id : 10] +Output [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_ext_discount_amt#34, ws_ext_list_price#35, d_year#15] +Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_sold_date_sk#32, ws_ext_discount_amt#34, ws_ext_list_price#35, d_date_sk#14, d_year#15] + +(50) HashAggregate [codegen id : 10] +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_ext_discount_amt#34, ws_ext_list_price#35, d_year#15] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#15] +Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#35 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#34 as decimal(8,2)))), DecimalType(8,2), true)))] +Aggregate Attributes [1]: [sum#37] +Results [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#15, sum#38] + +(51) Exchange +Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#15, sum#38] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#15, 5), true, [id=#39] + +(52) HashAggregate [codegen id : 11] +Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#15, sum#38] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#15] +Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#35 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#34 as decimal(8,2)))), DecimalType(8,2), true)))] +Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#35 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#34 as decimal(8,2)))), DecimalType(8,2), true)))#40] +Results [2]: [c_customer_id#2 AS customer_id#41, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#35 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#34 as decimal(8,2)))), DecimalType(8,2), true)))#40,18,2) AS year_total#42] + +(53) Filter [codegen id : 11] +Input [2]: [customer_id#41, year_total#42] +Condition : (isnotnull(year_total#42) AND (year_total#42 > 0.00)) + +(54) Project [codegen id : 11] +Output [2]: [customer_id#41 AS customer_id#43, year_total#42 AS year_total#44] +Input [2]: [customer_id#41, year_total#42] + +(55) BroadcastExchange +Input [2]: [customer_id#43, year_total#44] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#45] + +(56) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [customer_id#21] +Right keys [1]: [customer_id#43] +Join condition: None + +(57) Project [codegen id : 16] +Output [5]: [customer_id#21, year_total#22, customer_preferred_cust_flag#29, year_total#30, year_total#44] +Input [6]: [customer_id#21, year_total#22, customer_preferred_cust_flag#29, year_total#30, customer_id#43, year_total#44] + +(58) Scan parquet default.customer +Output [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(59) ColumnarToRow [codegen id : 14] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] + +(60) Filter [codegen id : 14] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(61) ReusedExchange [Reuses operator id: 44] +Output [4]: [ws_sold_date_sk#32, ws_bill_customer_sk#33, ws_ext_discount_amt#34, ws_ext_list_price#35] + +(62) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ws_bill_customer_sk#33] +Join condition: None + +(63) Project [codegen id : 14] +Output [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_sold_date_sk#32, ws_ext_discount_amt#34, ws_ext_list_price#35] +Input [12]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_sold_date_sk#32, ws_bill_customer_sk#33, ws_ext_discount_amt#34, ws_ext_list_price#35] + +(64) ReusedExchange [Reuses operator id: 29] +Output [2]: [d_date_sk#14, d_year#15] + +(65) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [ws_sold_date_sk#32] +Right keys [1]: [d_date_sk#14] +Join condition: None + +(66) Project [codegen id : 14] +Output [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_ext_discount_amt#34, ws_ext_list_price#35, d_year#15] +Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_sold_date_sk#32, ws_ext_discount_amt#34, ws_ext_list_price#35, d_date_sk#14, d_year#15] + +(67) HashAggregate [codegen id : 14] +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_ext_discount_amt#34, ws_ext_list_price#35, d_year#15] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#15] +Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#35 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#34 as decimal(8,2)))), DecimalType(8,2), true)))] +Aggregate Attributes [1]: [sum#46] +Results [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#15, sum#47] + +(68) Exchange +Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#15, sum#47] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#15, 5), true, [id=#48] + +(69) HashAggregate [codegen id : 15] +Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#15, sum#47] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#15] +Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#35 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#34 as decimal(8,2)))), DecimalType(8,2), true)))] +Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#35 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#34 as decimal(8,2)))), DecimalType(8,2), true)))#49] +Results [2]: [c_customer_id#2 AS customer_id#50, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#35 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#34 as decimal(8,2)))), DecimalType(8,2), true)))#49,18,2) AS year_total#51] + +(70) BroadcastExchange +Input [2]: [customer_id#50, year_total#51] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#52] + +(71) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [customer_id#21] +Right keys [1]: [customer_id#50] +Join condition: (CASE WHEN (year_total#44 > 0.00) THEN CheckOverflow((promote_precision(year_total#51) / promote_precision(year_total#44)), DecimalType(38,20), true) ELSE null END > CASE WHEN (year_total#22 > 0.00) THEN CheckOverflow((promote_precision(year_total#30) / promote_precision(year_total#22)), DecimalType(38,20), true) ELSE null END) + +(72) Project [codegen id : 16] +Output [1]: [customer_preferred_cust_flag#29] +Input [7]: [customer_id#21, year_total#22, customer_preferred_cust_flag#29, year_total#30, year_total#44, customer_id#50, year_total#51] + +(73) TakeOrderedAndProject +Input [1]: [customer_preferred_cust_flag#29] +Arguments: 100, [customer_preferred_cust_flag#29 ASC NULLS FIRST], [customer_preferred_cust_flag#29] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11/simplified.txt new file mode 100644 index 0000000000000..6cb07efba16af --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11/simplified.txt @@ -0,0 +1,108 @@ +TakeOrderedAndProject [customer_preferred_cust_flag] + WholeStageCodegen (16) + Project [customer_preferred_cust_flag] + BroadcastHashJoin [customer_id,customer_id,year_total,year_total,year_total,year_total] + Project [customer_id,customer_preferred_cust_flag,year_total,year_total,year_total] + BroadcastHashJoin [customer_id,customer_id] + Project [customer_id,customer_preferred_cust_flag,year_total,year_total] + BroadcastHashJoin [customer_id,customer_id] + Filter [year_total] + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,sum] [customer_id,sum,sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2), true))),year_total] + InputAdapter + Exchange [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year] #1 + WholeStageCodegen (3) + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ss_ext_discount_amt,ss_ext_list_price] [sum,sum] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ss_ext_discount_amt,ss_ext_list_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [c_customer_id,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [ss_customer_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (7) + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,sum] [customer_id,customer_preferred_cust_flag,sum,sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2), true))),year_total] + InputAdapter + Exchange [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year] #5 + WholeStageCodegen (6) + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ss_ext_discount_amt,ss_ext_list_price] [sum,sum] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ss_ext_discount_amt,ss_ext_list_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [c_customer_id,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] + InputAdapter + ReusedExchange [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] #2 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (11) + Project [customer_id,year_total] + Filter [year_total] + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,sum] [customer_id,sum,sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2), true))),year_total] + InputAdapter + Exchange [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year] #8 + WholeStageCodegen (10) + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ws_ext_discount_amt,ws_ext_list_price] [sum,sum] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ws_ext_discount_amt,ws_ext_list_price] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + Filter [c_customer_id,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (8) + Filter [ws_bill_customer_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_year] #3 + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (15) + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,sum] [customer_id,sum,sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2), true))),year_total] + InputAdapter + Exchange [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year] #11 + WholeStageCodegen (14) + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ws_ext_discount_amt,ws_ext_list_price] [sum,sum] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ws_ext_discount_amt,ws_ext_list_price] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + Filter [c_customer_id,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] + InputAdapter + ReusedExchange [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] #9 + InputAdapter + ReusedExchange [d_date_sk,d_year] #6 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12.sf100/explain.txt new file mode 100644 index 0000000000000..d62e42ea554fe --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12.sf100/explain.txt @@ -0,0 +1,152 @@ +== Physical Plan == +TakeOrderedAndProject (27) ++- * Project (26) + +- Window (25) + +- * Sort (24) + +- Exchange (23) + +- * HashAggregate (22) + +- Exchange (21) + +- * HashAggregate (20) + +- * Project (19) + +- * SortMergeJoin Inner (18) + :- * Sort (12) + : +- Exchange (11) + : +- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.web_sales (1) + : +- BroadcastExchange (8) + : +- * Project (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet default.date_dim (4) + +- * Sort (17) + +- Exchange (16) + +- * Filter (15) + +- * ColumnarToRow (14) + +- Scan parquet default.item (13) + + +(1) Scan parquet default.web_sales +Output [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_ext_sales_price#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 2] +Input [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_ext_sales_price#3] + +(3) Filter [codegen id : 2] +Input [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_ext_sales_price#3] +Condition : (isnotnull(ws_item_sk#2) AND isnotnull(ws_sold_date_sk#1)) + +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#4, d_date#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#4, d_date#5] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#4, d_date#5] +Condition : (((isnotnull(d_date#5) AND (d_date#5 >= 10644)) AND (d_date#5 <= 10674)) AND isnotnull(d_date_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#4] +Input [2]: [d_date_sk#4, d_date#5] + +(8) BroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#6] + +(9) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ws_sold_date_sk#1] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(10) Project [codegen id : 2] +Output [2]: [ws_item_sk#2, ws_ext_sales_price#3] +Input [4]: [ws_sold_date_sk#1, ws_item_sk#2, ws_ext_sales_price#3, d_date_sk#4] + +(11) Exchange +Input [2]: [ws_item_sk#2, ws_ext_sales_price#3] +Arguments: hashpartitioning(ws_item_sk#2, 5), true, [id=#7] + +(12) Sort [codegen id : 3] +Input [2]: [ws_item_sk#2, ws_ext_sales_price#3] +Arguments: [ws_item_sk#2 ASC NULLS FIRST], false, 0 + +(13) Scan parquet default.item +Output [6]: [i_item_sk#8, i_item_id#9, i_item_desc#10, i_current_price#11, i_class#12, i_category#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [In(i_category, [Sports,Books,Home]), IsNotNull(i_item_sk)] +ReadSchema: struct + +(14) ColumnarToRow [codegen id : 4] +Input [6]: [i_item_sk#8, i_item_id#9, i_item_desc#10, i_current_price#11, i_class#12, i_category#13] + +(15) Filter [codegen id : 4] +Input [6]: [i_item_sk#8, i_item_id#9, i_item_desc#10, i_current_price#11, i_class#12, i_category#13] +Condition : (i_category#13 IN (Sports,Books,Home) AND isnotnull(i_item_sk#8)) + +(16) Exchange +Input [6]: [i_item_sk#8, i_item_id#9, i_item_desc#10, i_current_price#11, i_class#12, i_category#13] +Arguments: hashpartitioning(i_item_sk#8, 5), true, [id=#14] + +(17) Sort [codegen id : 5] +Input [6]: [i_item_sk#8, i_item_id#9, i_item_desc#10, i_current_price#11, i_class#12, i_category#13] +Arguments: [i_item_sk#8 ASC NULLS FIRST], false, 0 + +(18) SortMergeJoin [codegen id : 6] +Left keys [1]: [ws_item_sk#2] +Right keys [1]: [i_item_sk#8] +Join condition: None + +(19) Project [codegen id : 6] +Output [6]: [ws_ext_sales_price#3, i_item_id#9, i_item_desc#10, i_current_price#11, i_class#12, i_category#13] +Input [8]: [ws_item_sk#2, ws_ext_sales_price#3, i_item_sk#8, i_item_id#9, i_item_desc#10, i_current_price#11, i_class#12, i_category#13] + +(20) HashAggregate [codegen id : 6] +Input [6]: [ws_ext_sales_price#3, i_item_id#9, i_item_desc#10, i_current_price#11, i_class#12, i_category#13] +Keys [5]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#3))] +Aggregate Attributes [1]: [sum#15] +Results [6]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11, sum#16] + +(21) Exchange +Input [6]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11, sum#16] +Arguments: hashpartitioning(i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11, 5), true, [id=#17] + +(22) HashAggregate [codegen id : 7] +Input [6]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11, sum#16] +Keys [5]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#3))#18] +Results [8]: [i_item_desc#10, i_category#13, i_class#12, i_current_price#11, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#3))#18,17,2) AS itemrevenue#19, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#3))#18,17,2) AS _w0#20, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#3))#18,17,2) AS _w1#21, i_item_id#9] + +(23) Exchange +Input [8]: [i_item_desc#10, i_category#13, i_class#12, i_current_price#11, itemrevenue#19, _w0#20, _w1#21, i_item_id#9] +Arguments: hashpartitioning(i_class#12, 5), true, [id=#22] + +(24) Sort [codegen id : 8] +Input [8]: [i_item_desc#10, i_category#13, i_class#12, i_current_price#11, itemrevenue#19, _w0#20, _w1#21, i_item_id#9] +Arguments: [i_class#12 ASC NULLS FIRST], false, 0 + +(25) Window +Input [8]: [i_item_desc#10, i_category#13, i_class#12, i_current_price#11, itemrevenue#19, _w0#20, _w1#21, i_item_id#9] +Arguments: [sum(_w1#21) windowspecdefinition(i_class#12, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#23], [i_class#12] + +(26) Project [codegen id : 9] +Output [7]: [i_item_desc#10, i_category#13, i_class#12, i_current_price#11, itemrevenue#19, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#20) * 100.00), DecimalType(21,2), true) as decimal(27,2))) / promote_precision(_we0#23)), DecimalType(38,17), true) AS revenueratio#24, i_item_id#9] +Input [9]: [i_item_desc#10, i_category#13, i_class#12, i_current_price#11, itemrevenue#19, _w0#20, _w1#21, i_item_id#9, _we0#23] + +(27) TakeOrderedAndProject +Input [7]: [i_item_desc#10, i_category#13, i_class#12, i_current_price#11, itemrevenue#19, revenueratio#24, i_item_id#9] +Arguments: 100, [i_category#13 ASC NULLS FIRST, i_class#12 ASC NULLS FIRST, i_item_id#9 ASC NULLS FIRST, i_item_desc#10 ASC NULLS FIRST, revenueratio#24 ASC NULLS FIRST], [i_item_desc#10, i_category#13, i_class#12, i_current_price#11, itemrevenue#19, revenueratio#24] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12.sf100/simplified.txt new file mode 100644 index 0000000000000..cf472842f1431 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12.sf100/simplified.txt @@ -0,0 +1,47 @@ +TakeOrderedAndProject [i_category,i_class,i_current_price,i_item_desc,i_item_id,itemrevenue,revenueratio] + WholeStageCodegen (9) + Project [_w0,_we0,i_category,i_class,i_current_price,i_item_desc,i_item_id,itemrevenue] + InputAdapter + Window [_w1,i_class] + WholeStageCodegen (8) + Sort [i_class] + InputAdapter + Exchange [i_class] #1 + WholeStageCodegen (7) + HashAggregate [i_category,i_class,i_current_price,i_item_desc,i_item_id,sum] [_w0,_w1,itemrevenue,sum,sum(UnscaledValue(ws_ext_sales_price))] + InputAdapter + Exchange [i_category,i_class,i_current_price,i_item_desc,i_item_id] #2 + WholeStageCodegen (6) + HashAggregate [i_category,i_class,i_current_price,i_item_desc,i_item_id,ws_ext_sales_price] [sum,sum] + Project [i_category,i_class,i_current_price,i_item_desc,i_item_id,ws_ext_sales_price] + SortMergeJoin [i_item_sk,ws_item_sk] + InputAdapter + WholeStageCodegen (3) + Sort [ws_item_sk] + InputAdapter + Exchange [ws_item_sk] #3 + WholeStageCodegen (2) + Project [ws_ext_sales_price,ws_item_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk] + InputAdapter + WholeStageCodegen (5) + Sort [i_item_sk] + InputAdapter + Exchange [i_item_sk] #5 + WholeStageCodegen (4) + Filter [i_category,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_category,i_class,i_current_price,i_item_desc,i_item_id,i_item_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12/explain.txt new file mode 100644 index 0000000000000..63dabf5c122e6 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12/explain.txt @@ -0,0 +1,137 @@ +== Physical Plan == +TakeOrderedAndProject (24) ++- * Project (23) + +- Window (22) + +- * Sort (21) + +- Exchange (20) + +- * HashAggregate (19) + +- Exchange (18) + +- * HashAggregate (17) + +- * Project (16) + +- * BroadcastHashJoin Inner BuildRight (15) + :- * Project (9) + : +- * BroadcastHashJoin Inner BuildRight (8) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.web_sales (1) + : +- BroadcastExchange (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet default.item (4) + +- BroadcastExchange (14) + +- * Project (13) + +- * Filter (12) + +- * ColumnarToRow (11) + +- Scan parquet default.date_dim (10) + + +(1) Scan parquet default.web_sales +Output [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_ext_sales_price#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_ext_sales_price#3] + +(3) Filter [codegen id : 3] +Input [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_ext_sales_price#3] +Condition : (isnotnull(ws_item_sk#2) AND isnotnull(ws_sold_date_sk#1)) + +(4) Scan parquet default.item +Output [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [In(i_category, [Sports,Books,Home]), IsNotNull(i_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(6) Filter [codegen id : 1] +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Condition : (i_category#9 IN (Sports,Books,Home) AND isnotnull(i_item_sk#4)) + +(7) BroadcastExchange +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#10] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ws_item_sk#2] +Right keys [1]: [i_item_sk#4] +Join condition: None + +(9) Project [codegen id : 3] +Output [7]: [ws_sold_date_sk#1, ws_ext_sales_price#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Input [9]: [ws_sold_date_sk#1, ws_item_sk#2, ws_ext_sales_price#3, i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(10) Scan parquet default.date_dim +Output [2]: [d_date_sk#11, d_date#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#11, d_date#12] + +(12) Filter [codegen id : 2] +Input [2]: [d_date_sk#11, d_date#12] +Condition : (((isnotnull(d_date#12) AND (d_date#12 >= 10644)) AND (d_date#12 <= 10674)) AND isnotnull(d_date_sk#11)) + +(13) Project [codegen id : 2] +Output [1]: [d_date_sk#11] +Input [2]: [d_date_sk#11, d_date#12] + +(14) BroadcastExchange +Input [1]: [d_date_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ws_sold_date_sk#1] +Right keys [1]: [d_date_sk#11] +Join condition: None + +(16) Project [codegen id : 3] +Output [6]: [ws_ext_sales_price#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Input [8]: [ws_sold_date_sk#1, ws_ext_sales_price#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9, d_date_sk#11] + +(17) HashAggregate [codegen id : 3] +Input [6]: [ws_ext_sales_price#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#3))] +Aggregate Attributes [1]: [sum#14] +Results [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#15] + +(18) Exchange +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#15] +Arguments: hashpartitioning(i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, 5), true, [id=#16] + +(19) HashAggregate [codegen id : 4] +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#15] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#3))#17] +Results [8]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#3))#17,17,2) AS itemrevenue#18, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#3))#17,17,2) AS _w0#19, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#3))#17,17,2) AS _w1#20, i_item_id#5] + +(20) Exchange +Input [8]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, _w0#19, _w1#20, i_item_id#5] +Arguments: hashpartitioning(i_class#8, 5), true, [id=#21] + +(21) Sort [codegen id : 5] +Input [8]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, _w0#19, _w1#20, i_item_id#5] +Arguments: [i_class#8 ASC NULLS FIRST], false, 0 + +(22) Window +Input [8]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, _w0#19, _w1#20, i_item_id#5] +Arguments: [sum(_w1#20) windowspecdefinition(i_class#8, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#22], [i_class#8] + +(23) Project [codegen id : 6] +Output [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#19) * 100.00), DecimalType(21,2), true) as decimal(27,2))) / promote_precision(_we0#22)), DecimalType(38,17), true) AS revenueratio#23, i_item_id#5] +Input [9]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, _w0#19, _w1#20, i_item_id#5, _we0#22] + +(24) TakeOrderedAndProject +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, revenueratio#23, i_item_id#5] +Arguments: 100, [i_category#9 ASC NULLS FIRST, i_class#8 ASC NULLS FIRST, i_item_id#5 ASC NULLS FIRST, i_item_desc#6 ASC NULLS FIRST, revenueratio#23 ASC NULLS FIRST], [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, revenueratio#23] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12/simplified.txt new file mode 100644 index 0000000000000..620baa8d07fa8 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12/simplified.txt @@ -0,0 +1,38 @@ +TakeOrderedAndProject [i_category,i_class,i_current_price,i_item_desc,i_item_id,itemrevenue,revenueratio] + WholeStageCodegen (6) + Project [_w0,_we0,i_category,i_class,i_current_price,i_item_desc,i_item_id,itemrevenue] + InputAdapter + Window [_w1,i_class] + WholeStageCodegen (5) + Sort [i_class] + InputAdapter + Exchange [i_class] #1 + WholeStageCodegen (4) + HashAggregate [i_category,i_class,i_current_price,i_item_desc,i_item_id,sum] [_w0,_w1,itemrevenue,sum,sum(UnscaledValue(ws_ext_sales_price))] + InputAdapter + Exchange [i_category,i_class,i_current_price,i_item_desc,i_item_id] #2 + WholeStageCodegen (3) + HashAggregate [i_category,i_class,i_current_price,i_item_desc,i_item_id,ws_ext_sales_price] [sum,sum] + Project [i_category,i_class,i_current_price,i_item_desc,i_item_id,ws_ext_sales_price] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [i_category,i_class,i_current_price,i_item_desc,i_item_id,ws_ext_sales_price,ws_sold_date_sk] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Filter [ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [i_category,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_category,i_class,i_current_price,i_item_desc,i_item_id,i_item_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.sf100/explain.txt new file mode 100644 index 0000000000000..e7cb5071d561f --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.sf100/explain.txt @@ -0,0 +1,216 @@ +== Physical Plan == +* HashAggregate (38) ++- Exchange (37) + +- * HashAggregate (36) + +- * Project (35) + +- * BroadcastHashJoin Inner BuildRight (34) + :- * Project (28) + : +- * BroadcastHashJoin Inner BuildRight (27) + : :- * Project (22) + : : +- * BroadcastHashJoin Inner BuildRight (21) + : : :- * Project (15) + : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : :- * Project (9) + : : : : +- * BroadcastHashJoin Inner BuildLeft (8) + : : : : :- BroadcastExchange (4) + : : : : : +- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.customer_demographics (1) + : : : : +- * Filter (7) + : : : : +- * ColumnarToRow (6) + : : : : +- Scan parquet default.store_sales (5) + : : : +- BroadcastExchange (13) + : : : +- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet default.household_demographics (10) + : : +- BroadcastExchange (20) + : : +- * Project (19) + : : +- * Filter (18) + : : +- * ColumnarToRow (17) + : : +- Scan parquet default.date_dim (16) + : +- BroadcastExchange (26) + : +- * Filter (25) + : +- * ColumnarToRow (24) + : +- Scan parquet default.store (23) + +- BroadcastExchange (33) + +- * Project (32) + +- * Filter (31) + +- * ColumnarToRow (30) + +- Scan parquet default.customer_address (29) + + +(1) Scan parquet default.customer_demographics +Output [3]: [cd_demo_sk#1, cd_marital_status#2, cd_education_status#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk), Or(Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,Advanced Degree)),And(EqualTo(cd_marital_status,S),EqualTo(cd_education_status,College))),And(EqualTo(cd_marital_status,W),EqualTo(cd_education_status,2 yr Degree)))] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [3]: [cd_demo_sk#1, cd_marital_status#2, cd_education_status#3] + +(3) Filter [codegen id : 1] +Input [3]: [cd_demo_sk#1, cd_marital_status#2, cd_education_status#3] +Condition : (isnotnull(cd_demo_sk#1) AND ((((cd_marital_status#2 = M) AND (cd_education_status#3 = Advanced Degree)) OR ((cd_marital_status#2 = S) AND (cd_education_status#3 = College))) OR ((cd_marital_status#2 = W) AND (cd_education_status#3 = 2 yr Degree)))) + +(4) BroadcastExchange +Input [3]: [cd_demo_sk#1, cd_marital_status#2, cd_education_status#3] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#4] + +(5) Scan parquet default.store_sales +Output [10]: [ss_sold_date_sk#5, ss_cdemo_sk#6, ss_hdemo_sk#7, ss_addr_sk#8, ss_store_sk#9, ss_quantity#10, ss_sales_price#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_net_profit#14] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_cdemo_sk), IsNotNull(ss_hdemo_sk), Or(Or(And(GreaterThanOrEqual(ss_net_profit,100.00),LessThanOrEqual(ss_net_profit,200.00)),And(GreaterThanOrEqual(ss_net_profit,150.00),LessThanOrEqual(ss_net_profit,300.00))),And(GreaterThanOrEqual(ss_net_profit,50.00),LessThanOrEqual(ss_net_profit,250.00))), Or(Or(And(GreaterThanOrEqual(ss_sales_price,100.00),LessThanOrEqual(ss_sales_price,150.00)),And(GreaterThanOrEqual(ss_sales_price,50.00),LessThanOrEqual(ss_sales_price,100.00))),And(GreaterThanOrEqual(ss_sales_price,150.00),LessThanOrEqual(ss_sales_price,200.00)))] +ReadSchema: struct + +(6) ColumnarToRow +Input [10]: [ss_sold_date_sk#5, ss_cdemo_sk#6, ss_hdemo_sk#7, ss_addr_sk#8, ss_store_sk#9, ss_quantity#10, ss_sales_price#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_net_profit#14] + +(7) Filter +Input [10]: [ss_sold_date_sk#5, ss_cdemo_sk#6, ss_hdemo_sk#7, ss_addr_sk#8, ss_store_sk#9, ss_quantity#10, ss_sales_price#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_net_profit#14] +Condition : ((((((isnotnull(ss_store_sk#9) AND isnotnull(ss_addr_sk#8)) AND isnotnull(ss_sold_date_sk#5)) AND isnotnull(ss_cdemo_sk#6)) AND isnotnull(ss_hdemo_sk#7)) AND ((((ss_net_profit#14 >= 100.00) AND (ss_net_profit#14 <= 200.00)) OR ((ss_net_profit#14 >= 150.00) AND (ss_net_profit#14 <= 300.00))) OR ((ss_net_profit#14 >= 50.00) AND (ss_net_profit#14 <= 250.00)))) AND ((((ss_sales_price#11 >= 100.00) AND (ss_sales_price#11 <= 150.00)) OR ((ss_sales_price#11 >= 50.00) AND (ss_sales_price#11 <= 100.00))) OR ((ss_sales_price#11 >= 150.00) AND (ss_sales_price#11 <= 200.00)))) + +(8) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cd_demo_sk#1] +Right keys [1]: [ss_cdemo_sk#6] +Join condition: ((((((cd_marital_status#2 = M) AND (cd_education_status#3 = Advanced Degree)) AND (ss_sales_price#11 >= 100.00)) AND (ss_sales_price#11 <= 150.00)) OR ((((cd_marital_status#2 = S) AND (cd_education_status#3 = College)) AND (ss_sales_price#11 >= 50.00)) AND (ss_sales_price#11 <= 100.00))) OR ((((cd_marital_status#2 = W) AND (cd_education_status#3 = 2 yr Degree)) AND (ss_sales_price#11 >= 150.00)) AND (ss_sales_price#11 <= 200.00))) + +(9) Project [codegen id : 6] +Output [11]: [cd_marital_status#2, cd_education_status#3, ss_sold_date_sk#5, ss_hdemo_sk#7, ss_addr_sk#8, ss_store_sk#9, ss_quantity#10, ss_sales_price#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_net_profit#14] +Input [13]: [cd_demo_sk#1, cd_marital_status#2, cd_education_status#3, ss_sold_date_sk#5, ss_cdemo_sk#6, ss_hdemo_sk#7, ss_addr_sk#8, ss_store_sk#9, ss_quantity#10, ss_sales_price#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_net_profit#14] + +(10) Scan parquet default.household_demographics +Output [2]: [hd_demo_sk#15, hd_dep_count#16] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/household_demographics] +PushedFilters: [IsNotNull(hd_demo_sk), Or(Or(EqualTo(hd_dep_count,3),EqualTo(hd_dep_count,1)),EqualTo(hd_dep_count,1))] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [hd_demo_sk#15, hd_dep_count#16] + +(12) Filter [codegen id : 2] +Input [2]: [hd_demo_sk#15, hd_dep_count#16] +Condition : (isnotnull(hd_demo_sk#15) AND (((hd_dep_count#16 = 3) OR (hd_dep_count#16 = 1)) OR (hd_dep_count#16 = 1))) + +(13) BroadcastExchange +Input [2]: [hd_demo_sk#15, hd_dep_count#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#17] + +(14) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_hdemo_sk#7] +Right keys [1]: [hd_demo_sk#15] +Join condition: (((((((cd_marital_status#2 = M) AND (cd_education_status#3 = Advanced Degree)) AND (ss_sales_price#11 >= 100.00)) AND (ss_sales_price#11 <= 150.00)) AND (hd_dep_count#16 = 3)) OR (((((cd_marital_status#2 = S) AND (cd_education_status#3 = College)) AND (ss_sales_price#11 >= 50.00)) AND (ss_sales_price#11 <= 100.00)) AND (hd_dep_count#16 = 1))) OR (((((cd_marital_status#2 = W) AND (cd_education_status#3 = 2 yr Degree)) AND (ss_sales_price#11 >= 150.00)) AND (ss_sales_price#11 <= 200.00)) AND (hd_dep_count#16 = 1))) + +(15) Project [codegen id : 6] +Output [7]: [ss_sold_date_sk#5, ss_addr_sk#8, ss_store_sk#9, ss_quantity#10, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_net_profit#14] +Input [13]: [cd_marital_status#2, cd_education_status#3, ss_sold_date_sk#5, ss_hdemo_sk#7, ss_addr_sk#8, ss_store_sk#9, ss_quantity#10, ss_sales_price#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_net_profit#14, hd_demo_sk#15, hd_dep_count#16] + +(16) Scan parquet default.date_dim +Output [2]: [d_date_sk#18, d_year#19] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [2]: [d_date_sk#18, d_year#19] + +(18) Filter [codegen id : 3] +Input [2]: [d_date_sk#18, d_year#19] +Condition : ((isnotnull(d_year#19) AND (d_year#19 = 2001)) AND isnotnull(d_date_sk#18)) + +(19) Project [codegen id : 3] +Output [1]: [d_date_sk#18] +Input [2]: [d_date_sk#18, d_year#19] + +(20) BroadcastExchange +Input [1]: [d_date_sk#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] + +(21) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_sold_date_sk#5] +Right keys [1]: [d_date_sk#18] +Join condition: None + +(22) Project [codegen id : 6] +Output [6]: [ss_addr_sk#8, ss_store_sk#9, ss_quantity#10, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_net_profit#14] +Input [8]: [ss_sold_date_sk#5, ss_addr_sk#8, ss_store_sk#9, ss_quantity#10, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_net_profit#14, d_date_sk#18] + +(23) Scan parquet default.store +Output [1]: [s_store_sk#21] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 4] +Input [1]: [s_store_sk#21] + +(25) Filter [codegen id : 4] +Input [1]: [s_store_sk#21] +Condition : isnotnull(s_store_sk#21) + +(26) BroadcastExchange +Input [1]: [s_store_sk#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#22] + +(27) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_store_sk#9] +Right keys [1]: [s_store_sk#21] +Join condition: None + +(28) Project [codegen id : 6] +Output [5]: [ss_addr_sk#8, ss_quantity#10, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_net_profit#14] +Input [7]: [ss_addr_sk#8, ss_store_sk#9, ss_quantity#10, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_net_profit#14, s_store_sk#21] + +(29) Scan parquet default.customer_address +Output [3]: [ca_address_sk#23, ca_state#24, ca_country#25] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer_address] +PushedFilters: [IsNotNull(ca_country), EqualTo(ca_country,United States), IsNotNull(ca_address_sk), Or(Or(In(ca_state, [TX,OH]),In(ca_state, [OR,NM,KY])),In(ca_state, [VA,TX,MS]))] +ReadSchema: struct + +(30) ColumnarToRow [codegen id : 5] +Input [3]: [ca_address_sk#23, ca_state#24, ca_country#25] + +(31) Filter [codegen id : 5] +Input [3]: [ca_address_sk#23, ca_state#24, ca_country#25] +Condition : (((isnotnull(ca_country#25) AND (ca_country#25 = United States)) AND isnotnull(ca_address_sk#23)) AND ((ca_state#24 IN (TX,OH) OR ca_state#24 IN (OR,NM,KY)) OR ca_state#24 IN (VA,TX,MS))) + +(32) Project [codegen id : 5] +Output [2]: [ca_address_sk#23, ca_state#24] +Input [3]: [ca_address_sk#23, ca_state#24, ca_country#25] + +(33) BroadcastExchange +Input [2]: [ca_address_sk#23, ca_state#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#26] + +(34) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_addr_sk#8] +Right keys [1]: [ca_address_sk#23] +Join condition: ((((ca_state#24 IN (TX,OH) AND (ss_net_profit#14 >= 100.00)) AND (ss_net_profit#14 <= 200.00)) OR ((ca_state#24 IN (OR,NM,KY) AND (ss_net_profit#14 >= 150.00)) AND (ss_net_profit#14 <= 300.00))) OR ((ca_state#24 IN (VA,TX,MS) AND (ss_net_profit#14 >= 50.00)) AND (ss_net_profit#14 <= 250.00))) + +(35) Project [codegen id : 6] +Output [3]: [ss_quantity#10, ss_ext_sales_price#12, ss_ext_wholesale_cost#13] +Input [7]: [ss_addr_sk#8, ss_quantity#10, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_net_profit#14, ca_address_sk#23, ca_state#24] + +(36) HashAggregate [codegen id : 6] +Input [3]: [ss_quantity#10, ss_ext_sales_price#12, ss_ext_wholesale_cost#13] +Keys: [] +Functions [4]: [partial_avg(cast(ss_quantity#10 as bigint)), partial_avg(UnscaledValue(ss_ext_sales_price#12)), partial_avg(UnscaledValue(ss_ext_wholesale_cost#13)), partial_sum(UnscaledValue(ss_ext_wholesale_cost#13))] +Aggregate Attributes [7]: [sum#27, count#28, sum#29, count#30, sum#31, count#32, sum#33] +Results [7]: [sum#34, count#35, sum#36, count#37, sum#38, count#39, sum#40] + +(37) Exchange +Input [7]: [sum#34, count#35, sum#36, count#37, sum#38, count#39, sum#40] +Arguments: SinglePartition, true, [id=#41] + +(38) HashAggregate [codegen id : 7] +Input [7]: [sum#34, count#35, sum#36, count#37, sum#38, count#39, sum#40] +Keys: [] +Functions [4]: [avg(cast(ss_quantity#10 as bigint)), avg(UnscaledValue(ss_ext_sales_price#12)), avg(UnscaledValue(ss_ext_wholesale_cost#13)), sum(UnscaledValue(ss_ext_wholesale_cost#13))] +Aggregate Attributes [4]: [avg(cast(ss_quantity#10 as bigint))#42, avg(UnscaledValue(ss_ext_sales_price#12))#43, avg(UnscaledValue(ss_ext_wholesale_cost#13))#44, sum(UnscaledValue(ss_ext_wholesale_cost#13))#45] +Results [4]: [avg(cast(ss_quantity#10 as bigint))#42 AS avg(ss_quantity)#46, cast((avg(UnscaledValue(ss_ext_sales_price#12))#43 / 100.0) as decimal(11,6)) AS avg(ss_ext_sales_price)#47, cast((avg(UnscaledValue(ss_ext_wholesale_cost#13))#44 / 100.0) as decimal(11,6)) AS avg(ss_ext_wholesale_cost)#48, MakeDecimal(sum(UnscaledValue(ss_ext_wholesale_cost#13))#45,17,2) AS sum(ss_ext_wholesale_cost)#49] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.sf100/simplified.txt new file mode 100644 index 0000000000000..5a0627040acd4 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.sf100/simplified.txt @@ -0,0 +1,57 @@ +WholeStageCodegen (7) + HashAggregate [count,count,count,sum,sum,sum,sum] [avg(UnscaledValue(ss_ext_sales_price)),avg(UnscaledValue(ss_ext_wholesale_cost)),avg(cast(ss_quantity as bigint)),avg(ss_ext_sales_price),avg(ss_ext_wholesale_cost),avg(ss_quantity),count,count,count,sum,sum,sum,sum,sum(UnscaledValue(ss_ext_wholesale_cost)),sum(ss_ext_wholesale_cost)] + InputAdapter + Exchange #1 + WholeStageCodegen (6) + HashAggregate [ss_ext_sales_price,ss_ext_wholesale_cost,ss_quantity] [count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum,sum] + Project [ss_ext_sales_price,ss_ext_wholesale_cost,ss_quantity] + BroadcastHashJoin [ca_address_sk,ca_state,ss_addr_sk,ss_net_profit] + Project [ss_addr_sk,ss_ext_sales_price,ss_ext_wholesale_cost,ss_net_profit,ss_quantity] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_addr_sk,ss_ext_sales_price,ss_ext_wholesale_cost,ss_net_profit,ss_quantity,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_addr_sk,ss_ext_sales_price,ss_ext_wholesale_cost,ss_net_profit,ss_quantity,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [cd_education_status,cd_marital_status,hd_demo_sk,hd_dep_count,ss_hdemo_sk,ss_sales_price] + Project [cd_education_status,cd_marital_status,ss_addr_sk,ss_ext_sales_price,ss_ext_wholesale_cost,ss_hdemo_sk,ss_net_profit,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [cd_demo_sk,cd_education_status,cd_marital_status,ss_cdemo_sk,ss_sales_price] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [cd_demo_sk,cd_education_status,cd_marital_status] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_education_status,cd_marital_status] + Filter [ss_addr_sk,ss_cdemo_sk,ss_hdemo_sk,ss_net_profit,ss_sales_price,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_addr_sk,ss_cdemo_sk,ss_ext_sales_price,ss_ext_wholesale_cost,ss_hdemo_sk,ss_net_profit,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [hd_demo_sk,hd_dep_count] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Project [ca_address_sk,ca_state] + Filter [ca_address_sk,ca_country,ca_state] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_country,ca_state] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13/explain.txt new file mode 100644 index 0000000000000..010e711f30bb0 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13/explain.txt @@ -0,0 +1,216 @@ +== Physical Plan == +* HashAggregate (38) ++- Exchange (37) + +- * HashAggregate (36) + +- * Project (35) + +- * BroadcastHashJoin Inner BuildRight (34) + :- * Project (29) + : +- * BroadcastHashJoin Inner BuildRight (28) + : :- * Project (23) + : : +- * BroadcastHashJoin Inner BuildRight (22) + : : :- * Project (16) + : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : :- * Project (9) + : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.store_sales (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.store (4) + : : : +- BroadcastExchange (14) + : : : +- * Project (13) + : : : +- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet default.customer_address (10) + : : +- BroadcastExchange (21) + : : +- * Project (20) + : : +- * Filter (19) + : : +- * ColumnarToRow (18) + : : +- Scan parquet default.date_dim (17) + : +- BroadcastExchange (27) + : +- * Filter (26) + : +- * ColumnarToRow (25) + : +- Scan parquet default.customer_demographics (24) + +- BroadcastExchange (33) + +- * Filter (32) + +- * ColumnarToRow (31) + +- Scan parquet default.household_demographics (30) + + +(1) Scan parquet default.store_sales +Output [10]: [ss_sold_date_sk#1, ss_cdemo_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_quantity#6, ss_sales_price#7, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_cdemo_sk), IsNotNull(ss_hdemo_sk), Or(Or(And(GreaterThanOrEqual(ss_net_profit,100.00),LessThanOrEqual(ss_net_profit,200.00)),And(GreaterThanOrEqual(ss_net_profit,150.00),LessThanOrEqual(ss_net_profit,300.00))),And(GreaterThanOrEqual(ss_net_profit,50.00),LessThanOrEqual(ss_net_profit,250.00))), Or(Or(And(GreaterThanOrEqual(ss_sales_price,100.00),LessThanOrEqual(ss_sales_price,150.00)),And(GreaterThanOrEqual(ss_sales_price,50.00),LessThanOrEqual(ss_sales_price,100.00))),And(GreaterThanOrEqual(ss_sales_price,150.00),LessThanOrEqual(ss_sales_price,200.00)))] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 6] +Input [10]: [ss_sold_date_sk#1, ss_cdemo_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_quantity#6, ss_sales_price#7, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10] + +(3) Filter [codegen id : 6] +Input [10]: [ss_sold_date_sk#1, ss_cdemo_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_quantity#6, ss_sales_price#7, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10] +Condition : ((((((isnotnull(ss_store_sk#5) AND isnotnull(ss_addr_sk#4)) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_cdemo_sk#2)) AND isnotnull(ss_hdemo_sk#3)) AND ((((ss_net_profit#10 >= 100.00) AND (ss_net_profit#10 <= 200.00)) OR ((ss_net_profit#10 >= 150.00) AND (ss_net_profit#10 <= 300.00))) OR ((ss_net_profit#10 >= 50.00) AND (ss_net_profit#10 <= 250.00)))) AND ((((ss_sales_price#7 >= 100.00) AND (ss_sales_price#7 <= 150.00)) OR ((ss_sales_price#7 >= 50.00) AND (ss_sales_price#7 <= 100.00))) OR ((ss_sales_price#7 >= 150.00) AND (ss_sales_price#7 <= 200.00)))) + +(4) Scan parquet default.store +Output [1]: [s_store_sk#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [1]: [s_store_sk#11] + +(6) Filter [codegen id : 1] +Input [1]: [s_store_sk#11] +Condition : isnotnull(s_store_sk#11) + +(7) BroadcastExchange +Input [1]: [s_store_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#12] + +(8) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_store_sk#5] +Right keys [1]: [s_store_sk#11] +Join condition: None + +(9) Project [codegen id : 6] +Output [9]: [ss_sold_date_sk#1, ss_cdemo_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_quantity#6, ss_sales_price#7, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10] +Input [11]: [ss_sold_date_sk#1, ss_cdemo_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_quantity#6, ss_sales_price#7, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10, s_store_sk#11] + +(10) Scan parquet default.customer_address +Output [3]: [ca_address_sk#13, ca_state#14, ca_country#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer_address] +PushedFilters: [IsNotNull(ca_country), EqualTo(ca_country,United States), IsNotNull(ca_address_sk), Or(Or(In(ca_state, [TX,OH]),In(ca_state, [OR,NM,KY])),In(ca_state, [VA,TX,MS]))] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [ca_address_sk#13, ca_state#14, ca_country#15] + +(12) Filter [codegen id : 2] +Input [3]: [ca_address_sk#13, ca_state#14, ca_country#15] +Condition : (((isnotnull(ca_country#15) AND (ca_country#15 = United States)) AND isnotnull(ca_address_sk#13)) AND ((ca_state#14 IN (TX,OH) OR ca_state#14 IN (OR,NM,KY)) OR ca_state#14 IN (VA,TX,MS))) + +(13) Project [codegen id : 2] +Output [2]: [ca_address_sk#13, ca_state#14] +Input [3]: [ca_address_sk#13, ca_state#14, ca_country#15] + +(14) BroadcastExchange +Input [2]: [ca_address_sk#13, ca_state#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] + +(15) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_addr_sk#4] +Right keys [1]: [ca_address_sk#13] +Join condition: ((((ca_state#14 IN (TX,OH) AND (ss_net_profit#10 >= 100.00)) AND (ss_net_profit#10 <= 200.00)) OR ((ca_state#14 IN (OR,NM,KY) AND (ss_net_profit#10 >= 150.00)) AND (ss_net_profit#10 <= 300.00))) OR ((ca_state#14 IN (VA,TX,MS) AND (ss_net_profit#10 >= 50.00)) AND (ss_net_profit#10 <= 250.00))) + +(16) Project [codegen id : 6] +Output [7]: [ss_sold_date_sk#1, ss_cdemo_sk#2, ss_hdemo_sk#3, ss_quantity#6, ss_sales_price#7, ss_ext_sales_price#8, ss_ext_wholesale_cost#9] +Input [11]: [ss_sold_date_sk#1, ss_cdemo_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_quantity#6, ss_sales_price#7, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10, ca_address_sk#13, ca_state#14] + +(17) Scan parquet default.date_dim +Output [2]: [d_date_sk#17, d_year#18] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [2]: [d_date_sk#17, d_year#18] + +(19) Filter [codegen id : 3] +Input [2]: [d_date_sk#17, d_year#18] +Condition : ((isnotnull(d_year#18) AND (d_year#18 = 2001)) AND isnotnull(d_date_sk#17)) + +(20) Project [codegen id : 3] +Output [1]: [d_date_sk#17] +Input [2]: [d_date_sk#17, d_year#18] + +(21) BroadcastExchange +Input [1]: [d_date_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#19] + +(22) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#17] +Join condition: None + +(23) Project [codegen id : 6] +Output [6]: [ss_cdemo_sk#2, ss_hdemo_sk#3, ss_quantity#6, ss_sales_price#7, ss_ext_sales_price#8, ss_ext_wholesale_cost#9] +Input [8]: [ss_sold_date_sk#1, ss_cdemo_sk#2, ss_hdemo_sk#3, ss_quantity#6, ss_sales_price#7, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, d_date_sk#17] + +(24) Scan parquet default.customer_demographics +Output [3]: [cd_demo_sk#20, cd_marital_status#21, cd_education_status#22] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk), Or(Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,Advanced Degree)),And(EqualTo(cd_marital_status,S),EqualTo(cd_education_status,College))),And(EqualTo(cd_marital_status,W),EqualTo(cd_education_status,2 yr Degree)))] +ReadSchema: struct + +(25) ColumnarToRow [codegen id : 4] +Input [3]: [cd_demo_sk#20, cd_marital_status#21, cd_education_status#22] + +(26) Filter [codegen id : 4] +Input [3]: [cd_demo_sk#20, cd_marital_status#21, cd_education_status#22] +Condition : (isnotnull(cd_demo_sk#20) AND ((((cd_marital_status#21 = M) AND (cd_education_status#22 = Advanced Degree)) OR ((cd_marital_status#21 = S) AND (cd_education_status#22 = College))) OR ((cd_marital_status#21 = W) AND (cd_education_status#22 = 2 yr Degree)))) + +(27) BroadcastExchange +Input [3]: [cd_demo_sk#20, cd_marital_status#21, cd_education_status#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#23] + +(28) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#20] +Join condition: ((((((cd_marital_status#21 = M) AND (cd_education_status#22 = Advanced Degree)) AND (ss_sales_price#7 >= 100.00)) AND (ss_sales_price#7 <= 150.00)) OR ((((cd_marital_status#21 = S) AND (cd_education_status#22 = College)) AND (ss_sales_price#7 >= 50.00)) AND (ss_sales_price#7 <= 100.00))) OR ((((cd_marital_status#21 = W) AND (cd_education_status#22 = 2 yr Degree)) AND (ss_sales_price#7 >= 150.00)) AND (ss_sales_price#7 <= 200.00))) + +(29) Project [codegen id : 6] +Output [7]: [ss_hdemo_sk#3, ss_quantity#6, ss_sales_price#7, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, cd_marital_status#21, cd_education_status#22] +Input [9]: [ss_cdemo_sk#2, ss_hdemo_sk#3, ss_quantity#6, ss_sales_price#7, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, cd_demo_sk#20, cd_marital_status#21, cd_education_status#22] + +(30) Scan parquet default.household_demographics +Output [2]: [hd_demo_sk#24, hd_dep_count#25] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/household_demographics] +PushedFilters: [IsNotNull(hd_demo_sk), Or(Or(EqualTo(hd_dep_count,3),EqualTo(hd_dep_count,1)),EqualTo(hd_dep_count,1))] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 5] +Input [2]: [hd_demo_sk#24, hd_dep_count#25] + +(32) Filter [codegen id : 5] +Input [2]: [hd_demo_sk#24, hd_dep_count#25] +Condition : (isnotnull(hd_demo_sk#24) AND (((hd_dep_count#25 = 3) OR (hd_dep_count#25 = 1)) OR (hd_dep_count#25 = 1))) + +(33) BroadcastExchange +Input [2]: [hd_demo_sk#24, hd_dep_count#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#26] + +(34) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_hdemo_sk#3] +Right keys [1]: [hd_demo_sk#24] +Join condition: (((((((cd_marital_status#21 = M) AND (cd_education_status#22 = Advanced Degree)) AND (ss_sales_price#7 >= 100.00)) AND (ss_sales_price#7 <= 150.00)) AND (hd_dep_count#25 = 3)) OR (((((cd_marital_status#21 = S) AND (cd_education_status#22 = College)) AND (ss_sales_price#7 >= 50.00)) AND (ss_sales_price#7 <= 100.00)) AND (hd_dep_count#25 = 1))) OR (((((cd_marital_status#21 = W) AND (cd_education_status#22 = 2 yr Degree)) AND (ss_sales_price#7 >= 150.00)) AND (ss_sales_price#7 <= 200.00)) AND (hd_dep_count#25 = 1))) + +(35) Project [codegen id : 6] +Output [3]: [ss_quantity#6, ss_ext_sales_price#8, ss_ext_wholesale_cost#9] +Input [9]: [ss_hdemo_sk#3, ss_quantity#6, ss_sales_price#7, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, cd_marital_status#21, cd_education_status#22, hd_demo_sk#24, hd_dep_count#25] + +(36) HashAggregate [codegen id : 6] +Input [3]: [ss_quantity#6, ss_ext_sales_price#8, ss_ext_wholesale_cost#9] +Keys: [] +Functions [4]: [partial_avg(cast(ss_quantity#6 as bigint)), partial_avg(UnscaledValue(ss_ext_sales_price#8)), partial_avg(UnscaledValue(ss_ext_wholesale_cost#9)), partial_sum(UnscaledValue(ss_ext_wholesale_cost#9))] +Aggregate Attributes [7]: [sum#27, count#28, sum#29, count#30, sum#31, count#32, sum#33] +Results [7]: [sum#34, count#35, sum#36, count#37, sum#38, count#39, sum#40] + +(37) Exchange +Input [7]: [sum#34, count#35, sum#36, count#37, sum#38, count#39, sum#40] +Arguments: SinglePartition, true, [id=#41] + +(38) HashAggregate [codegen id : 7] +Input [7]: [sum#34, count#35, sum#36, count#37, sum#38, count#39, sum#40] +Keys: [] +Functions [4]: [avg(cast(ss_quantity#6 as bigint)), avg(UnscaledValue(ss_ext_sales_price#8)), avg(UnscaledValue(ss_ext_wholesale_cost#9)), sum(UnscaledValue(ss_ext_wholesale_cost#9))] +Aggregate Attributes [4]: [avg(cast(ss_quantity#6 as bigint))#42, avg(UnscaledValue(ss_ext_sales_price#8))#43, avg(UnscaledValue(ss_ext_wholesale_cost#9))#44, sum(UnscaledValue(ss_ext_wholesale_cost#9))#45] +Results [4]: [avg(cast(ss_quantity#6 as bigint))#42 AS avg(ss_quantity)#46, cast((avg(UnscaledValue(ss_ext_sales_price#8))#43 / 100.0) as decimal(11,6)) AS avg(ss_ext_sales_price)#47, cast((avg(UnscaledValue(ss_ext_wholesale_cost#9))#44 / 100.0) as decimal(11,6)) AS avg(ss_ext_wholesale_cost)#48, MakeDecimal(sum(UnscaledValue(ss_ext_wholesale_cost#9))#45,17,2) AS sum(ss_ext_wholesale_cost)#49] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13/simplified.txt new file mode 100644 index 0000000000000..8a0a2ee6b74c7 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13/simplified.txt @@ -0,0 +1,57 @@ +WholeStageCodegen (7) + HashAggregate [count,count,count,sum,sum,sum,sum] [avg(UnscaledValue(ss_ext_sales_price)),avg(UnscaledValue(ss_ext_wholesale_cost)),avg(cast(ss_quantity as bigint)),avg(ss_ext_sales_price),avg(ss_ext_wholesale_cost),avg(ss_quantity),count,count,count,sum,sum,sum,sum,sum(UnscaledValue(ss_ext_wholesale_cost)),sum(ss_ext_wholesale_cost)] + InputAdapter + Exchange #1 + WholeStageCodegen (6) + HashAggregate [ss_ext_sales_price,ss_ext_wholesale_cost,ss_quantity] [count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum,sum] + Project [ss_ext_sales_price,ss_ext_wholesale_cost,ss_quantity] + BroadcastHashJoin [cd_education_status,cd_marital_status,hd_demo_sk,hd_dep_count,ss_hdemo_sk,ss_sales_price] + Project [cd_education_status,cd_marital_status,ss_ext_sales_price,ss_ext_wholesale_cost,ss_hdemo_sk,ss_quantity,ss_sales_price] + BroadcastHashJoin [cd_demo_sk,cd_education_status,cd_marital_status,ss_cdemo_sk,ss_sales_price] + Project [ss_cdemo_sk,ss_ext_sales_price,ss_ext_wholesale_cost,ss_hdemo_sk,ss_quantity,ss_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_cdemo_sk,ss_ext_sales_price,ss_ext_wholesale_cost,ss_hdemo_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + BroadcastHashJoin [ca_address_sk,ca_state,ss_addr_sk,ss_net_profit] + Project [ss_addr_sk,ss_cdemo_sk,ss_ext_sales_price,ss_ext_wholesale_cost,ss_hdemo_sk,ss_net_profit,ss_quantity,ss_sales_price,ss_sold_date_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Filter [ss_addr_sk,ss_cdemo_sk,ss_hdemo_sk,ss_net_profit,ss_sales_price,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_addr_sk,ss_cdemo_sk,ss_ext_sales_price,ss_ext_wholesale_cost,ss_hdemo_sk,ss_net_profit,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [ca_address_sk,ca_state] + Filter [ca_address_sk,ca_country,ca_state] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_country,ca_state] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Filter [cd_demo_sk,cd_education_status,cd_marital_status] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_education_status,cd_marital_status] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Filter [hd_demo_sk,hd_dep_count] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/explain.txt new file mode 100644 index 0000000000000..c03db659679ce --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/explain.txt @@ -0,0 +1,878 @@ +== Physical Plan == +TakeOrderedAndProject (131) ++- * HashAggregate (130) + +- Exchange (129) + +- * HashAggregate (128) + +- * Expand (127) + +- Union (126) + :- * Project (87) + : +- * Filter (86) + : +- * HashAggregate (85) + : +- Exchange (84) + : +- * HashAggregate (83) + : +- * Project (82) + : +- * BroadcastHashJoin Inner BuildRight (81) + : :- * Project (71) + : : +- * BroadcastHashJoin Inner BuildRight (70) + : : :- SortMergeJoin LeftSemi (64) + : : : :- * Sort (5) + : : : : +- Exchange (4) + : : : : +- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- * Sort (63) + : : : +- Exchange (62) + : : : +- * Project (61) + : : : +- * BroadcastHashJoin Inner BuildRight (60) + : : : :- * Filter (8) + : : : : +- * ColumnarToRow (7) + : : : : +- Scan parquet default.item (6) + : : : +- BroadcastExchange (59) + : : : +- * HashAggregate (58) + : : : +- * HashAggregate (57) + : : : +- * HashAggregate (56) + : : : +- Exchange (55) + : : : +- * HashAggregate (54) + : : : +- SortMergeJoin LeftSemi (53) + : : : :- SortMergeJoin LeftSemi (41) + : : : : :- * Sort (26) + : : : : : +- Exchange (25) + : : : : : +- * Project (24) + : : : : : +- * BroadcastHashJoin Inner BuildRight (23) + : : : : : :- * Project (18) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (17) + : : : : : : :- * Filter (11) + : : : : : : : +- * ColumnarToRow (10) + : : : : : : : +- Scan parquet default.store_sales (9) + : : : : : : +- BroadcastExchange (16) + : : : : : : +- * Project (15) + : : : : : : +- * Filter (14) + : : : : : : +- * ColumnarToRow (13) + : : : : : : +- Scan parquet default.date_dim (12) + : : : : : +- BroadcastExchange (22) + : : : : : +- * Filter (21) + : : : : : +- * ColumnarToRow (20) + : : : : : +- Scan parquet default.item (19) + : : : : +- * Sort (40) + : : : : +- Exchange (39) + : : : : +- * Project (38) + : : : : +- * BroadcastHashJoin Inner BuildRight (37) + : : : : :- * Project (32) + : : : : : +- * BroadcastHashJoin Inner BuildRight (31) + : : : : : :- * Filter (29) + : : : : : : +- * ColumnarToRow (28) + : : : : : : +- Scan parquet default.catalog_sales (27) + : : : : : +- ReusedExchange (30) + : : : : +- BroadcastExchange (36) + : : : : +- * Filter (35) + : : : : +- * ColumnarToRow (34) + : : : : +- Scan parquet default.item (33) + : : : +- * Sort (52) + : : : +- Exchange (51) + : : : +- * Project (50) + : : : +- * BroadcastHashJoin Inner BuildRight (49) + : : : :- * Project (47) + : : : : +- * BroadcastHashJoin Inner BuildRight (46) + : : : : :- * Filter (44) + : : : : : +- * ColumnarToRow (43) + : : : : : +- Scan parquet default.web_sales (42) + : : : : +- ReusedExchange (45) + : : : +- ReusedExchange (48) + : : +- BroadcastExchange (69) + : : +- * Project (68) + : : +- * Filter (67) + : : +- * ColumnarToRow (66) + : : +- Scan parquet default.date_dim (65) + : +- BroadcastExchange (80) + : +- SortMergeJoin LeftSemi (79) + : :- * Sort (76) + : : +- Exchange (75) + : : +- * Filter (74) + : : +- * ColumnarToRow (73) + : : +- Scan parquet default.item (72) + : +- * Sort (78) + : +- ReusedExchange (77) + :- * Project (106) + : +- * Filter (105) + : +- * HashAggregate (104) + : +- Exchange (103) + : +- * HashAggregate (102) + : +- * Project (101) + : +- * BroadcastHashJoin Inner BuildRight (100) + : :- * Project (98) + : : +- * BroadcastHashJoin Inner BuildRight (97) + : : :- SortMergeJoin LeftSemi (95) + : : : :- * Sort (92) + : : : : +- Exchange (91) + : : : : +- * Filter (90) + : : : : +- * ColumnarToRow (89) + : : : : +- Scan parquet default.catalog_sales (88) + : : : +- * Sort (94) + : : : +- ReusedExchange (93) + : : +- ReusedExchange (96) + : +- ReusedExchange (99) + +- * Project (125) + +- * Filter (124) + +- * HashAggregate (123) + +- Exchange (122) + +- * HashAggregate (121) + +- * Project (120) + +- * BroadcastHashJoin Inner BuildRight (119) + :- * Project (117) + : +- * BroadcastHashJoin Inner BuildRight (116) + : :- SortMergeJoin LeftSemi (114) + : : :- * Sort (111) + : : : +- Exchange (110) + : : : +- * Filter (109) + : : : +- * ColumnarToRow (108) + : : : +- Scan parquet default.web_sales (107) + : : +- * Sort (113) + : : +- ReusedExchange (112) + : +- ReusedExchange (115) + +- ReusedExchange (118) + + +(1) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] + +(3) Filter [codegen id : 1] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] +Condition : (isnotnull(ss_item_sk#2) AND isnotnull(ss_sold_date_sk#1)) + +(4) Exchange +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] +Arguments: hashpartitioning(ss_item_sk#2, 5), true, [id=#5] + +(5) Sort [codegen id : 2] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] +Arguments: [ss_item_sk#2 ASC NULLS FIRST], false, 0 + +(6) Scan parquet default.item +Output [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_class_id), IsNotNull(i_brand_id), IsNotNull(i_category_id)] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 17] +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] + +(8) Filter [codegen id : 17] +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] +Condition : ((isnotnull(i_class_id#8) AND isnotnull(i_brand_id#7)) AND isnotnull(i_category_id#9)) + +(9) Scan parquet default.store_sales +Output [2]: [ss_sold_date_sk#1, ss_item_sk#2] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(10) ColumnarToRow [codegen id : 5] +Input [2]: [ss_sold_date_sk#1, ss_item_sk#2] + +(11) Filter [codegen id : 5] +Input [2]: [ss_sold_date_sk#1, ss_item_sk#2] +Condition : (isnotnull(ss_item_sk#2) AND isnotnull(ss_sold_date_sk#1)) + +(12) Scan parquet default.date_dim +Output [2]: [d_date_sk#10, d_year#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(13) ColumnarToRow [codegen id : 3] +Input [2]: [d_date_sk#10, d_year#11] + +(14) Filter [codegen id : 3] +Input [2]: [d_date_sk#10, d_year#11] +Condition : (((isnotnull(d_year#11) AND (d_year#11 >= 1999)) AND (d_year#11 <= 2001)) AND isnotnull(d_date_sk#10)) + +(15) Project [codegen id : 3] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_year#11] + +(16) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(17) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(18) Project [codegen id : 5] +Output [1]: [ss_item_sk#2] +Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, d_date_sk#10] + +(19) Scan parquet default.item +Output [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_category_id), IsNotNull(i_brand_id), IsNotNull(i_class_id)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 4] +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] + +(21) Filter [codegen id : 4] +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] +Condition : (((isnotnull(i_item_sk#6) AND isnotnull(i_category_id#9)) AND isnotnull(i_brand_id#7)) AND isnotnull(i_class_id#8)) + +(22) BroadcastExchange +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#13] + +(23) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#6] +Join condition: None + +(24) Project [codegen id : 5] +Output [3]: [i_brand_id#7 AS brand_id#14, i_class_id#8 AS class_id#15, i_category_id#9 AS category_id#16] +Input [5]: [ss_item_sk#2, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] + +(25) Exchange +Input [3]: [brand_id#14, class_id#15, category_id#16] +Arguments: hashpartitioning(coalesce(brand_id#14, 0), isnull(brand_id#14), coalesce(class_id#15, 0), isnull(class_id#15), coalesce(category_id#16, 0), isnull(category_id#16), 5), true, [id=#17] + +(26) Sort [codegen id : 6] +Input [3]: [brand_id#14, class_id#15, category_id#16] +Arguments: [coalesce(brand_id#14, 0) ASC NULLS FIRST, isnull(brand_id#14) ASC NULLS FIRST, coalesce(class_id#15, 0) ASC NULLS FIRST, isnull(class_id#15) ASC NULLS FIRST, coalesce(category_id#16, 0) ASC NULLS FIRST, isnull(category_id#16) ASC NULLS FIRST], false, 0 + +(27) Scan parquet default.catalog_sales +Output [2]: [cs_sold_date_sk#18, cs_item_sk#19] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(28) ColumnarToRow [codegen id : 9] +Input [2]: [cs_sold_date_sk#18, cs_item_sk#19] + +(29) Filter [codegen id : 9] +Input [2]: [cs_sold_date_sk#18, cs_item_sk#19] +Condition : (isnotnull(cs_item_sk#19) AND isnotnull(cs_sold_date_sk#18)) + +(30) ReusedExchange [Reuses operator id: 16] +Output [1]: [d_date_sk#10] + +(31) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [cs_sold_date_sk#18] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(32) Project [codegen id : 9] +Output [1]: [cs_item_sk#19] +Input [3]: [cs_sold_date_sk#18, cs_item_sk#19, d_date_sk#10] + +(33) Scan parquet default.item +Output [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 8] +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] + +(35) Filter [codegen id : 8] +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] +Condition : isnotnull(i_item_sk#6) + +(36) BroadcastExchange +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#20] + +(37) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [cs_item_sk#19] +Right keys [1]: [i_item_sk#6] +Join condition: None + +(38) Project [codegen id : 9] +Output [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Input [5]: [cs_item_sk#19, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] + +(39) Exchange +Input [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Arguments: hashpartitioning(coalesce(i_brand_id#7, 0), isnull(i_brand_id#7), coalesce(i_class_id#8, 0), isnull(i_class_id#8), coalesce(i_category_id#9, 0), isnull(i_category_id#9), 5), true, [id=#21] + +(40) Sort [codegen id : 10] +Input [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Arguments: [coalesce(i_brand_id#7, 0) ASC NULLS FIRST, isnull(i_brand_id#7) ASC NULLS FIRST, coalesce(i_class_id#8, 0) ASC NULLS FIRST, isnull(i_class_id#8) ASC NULLS FIRST, coalesce(i_category_id#9, 0) ASC NULLS FIRST, isnull(i_category_id#9) ASC NULLS FIRST], false, 0 + +(41) SortMergeJoin +Left keys [6]: [coalesce(brand_id#14, 0), isnull(brand_id#14), coalesce(class_id#15, 0), isnull(class_id#15), coalesce(category_id#16, 0), isnull(category_id#16)] +Right keys [6]: [coalesce(i_brand_id#7, 0), isnull(i_brand_id#7), coalesce(i_class_id#8, 0), isnull(i_class_id#8), coalesce(i_category_id#9, 0), isnull(i_category_id#9)] +Join condition: None + +(42) Scan parquet default.web_sales +Output [2]: [ws_sold_date_sk#22, ws_item_sk#23] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(43) ColumnarToRow [codegen id : 13] +Input [2]: [ws_sold_date_sk#22, ws_item_sk#23] + +(44) Filter [codegen id : 13] +Input [2]: [ws_sold_date_sk#22, ws_item_sk#23] +Condition : (isnotnull(ws_item_sk#23) AND isnotnull(ws_sold_date_sk#22)) + +(45) ReusedExchange [Reuses operator id: 16] +Output [1]: [d_date_sk#10] + +(46) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [ws_sold_date_sk#22] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(47) Project [codegen id : 13] +Output [1]: [ws_item_sk#23] +Input [3]: [ws_sold_date_sk#22, ws_item_sk#23, d_date_sk#10] + +(48) ReusedExchange [Reuses operator id: 36] +Output [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] + +(49) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [ws_item_sk#23] +Right keys [1]: [i_item_sk#6] +Join condition: None + +(50) Project [codegen id : 13] +Output [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Input [5]: [ws_item_sk#23, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] + +(51) Exchange +Input [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Arguments: hashpartitioning(coalesce(i_brand_id#7, 0), isnull(i_brand_id#7), coalesce(i_class_id#8, 0), isnull(i_class_id#8), coalesce(i_category_id#9, 0), isnull(i_category_id#9), 5), true, [id=#24] + +(52) Sort [codegen id : 14] +Input [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Arguments: [coalesce(i_brand_id#7, 0) ASC NULLS FIRST, isnull(i_brand_id#7) ASC NULLS FIRST, coalesce(i_class_id#8, 0) ASC NULLS FIRST, isnull(i_class_id#8) ASC NULLS FIRST, coalesce(i_category_id#9, 0) ASC NULLS FIRST, isnull(i_category_id#9) ASC NULLS FIRST], false, 0 + +(53) SortMergeJoin +Left keys [6]: [coalesce(brand_id#14, 0), isnull(brand_id#14), coalesce(class_id#15, 0), isnull(class_id#15), coalesce(category_id#16, 0), isnull(category_id#16)] +Right keys [6]: [coalesce(i_brand_id#7, 0), isnull(i_brand_id#7), coalesce(i_class_id#8, 0), isnull(i_class_id#8), coalesce(i_category_id#9, 0), isnull(i_category_id#9)] +Join condition: None + +(54) HashAggregate [codegen id : 15] +Input [3]: [brand_id#14, class_id#15, category_id#16] +Keys [3]: [brand_id#14, class_id#15, category_id#16] +Functions: [] +Aggregate Attributes: [] +Results [3]: [brand_id#14, class_id#15, category_id#16] + +(55) Exchange +Input [3]: [brand_id#14, class_id#15, category_id#16] +Arguments: hashpartitioning(brand_id#14, class_id#15, category_id#16, 5), true, [id=#25] + +(56) HashAggregate [codegen id : 16] +Input [3]: [brand_id#14, class_id#15, category_id#16] +Keys [3]: [brand_id#14, class_id#15, category_id#16] +Functions: [] +Aggregate Attributes: [] +Results [3]: [brand_id#14, class_id#15, category_id#16] + +(57) HashAggregate [codegen id : 16] +Input [3]: [brand_id#14, class_id#15, category_id#16] +Keys [3]: [brand_id#14, class_id#15, category_id#16] +Functions: [] +Aggregate Attributes: [] +Results [3]: [brand_id#14, class_id#15, category_id#16] + +(58) HashAggregate [codegen id : 16] +Input [3]: [brand_id#14, class_id#15, category_id#16] +Keys [3]: [brand_id#14, class_id#15, category_id#16] +Functions: [] +Aggregate Attributes: [] +Results [3]: [brand_id#14, class_id#15, category_id#16] + +(59) BroadcastExchange +Input [3]: [brand_id#14, class_id#15, category_id#16] +Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [id=#26] + +(60) BroadcastHashJoin [codegen id : 17] +Left keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Right keys [3]: [brand_id#14, class_id#15, category_id#16] +Join condition: None + +(61) Project [codegen id : 17] +Output [1]: [i_item_sk#6 AS ss_item_sk#27] +Input [7]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, brand_id#14, class_id#15, category_id#16] + +(62) Exchange +Input [1]: [ss_item_sk#27] +Arguments: hashpartitioning(ss_item_sk#27, 5), true, [id=#28] + +(63) Sort [codegen id : 18] +Input [1]: [ss_item_sk#27] +Arguments: [ss_item_sk#27 ASC NULLS FIRST], false, 0 + +(64) SortMergeJoin +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [ss_item_sk#27] +Join condition: None + +(65) Scan parquet default.date_dim +Output [3]: [d_date_sk#10, d_year#11, d_moy#29] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,11), IsNotNull(d_date_sk)] +ReadSchema: struct + +(66) ColumnarToRow [codegen id : 19] +Input [3]: [d_date_sk#10, d_year#11, d_moy#29] + +(67) Filter [codegen id : 19] +Input [3]: [d_date_sk#10, d_year#11, d_moy#29] +Condition : ((((isnotnull(d_year#11) AND isnotnull(d_moy#29)) AND (d_year#11 = 2001)) AND (d_moy#29 = 11)) AND isnotnull(d_date_sk#10)) + +(68) Project [codegen id : 19] +Output [1]: [d_date_sk#10] +Input [3]: [d_date_sk#10, d_year#11, d_moy#29] + +(69) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#30] + +(70) BroadcastHashJoin [codegen id : 38] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(71) Project [codegen id : 38] +Output [3]: [ss_item_sk#2, ss_quantity#3, ss_list_price#4] +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4, d_date_sk#10] + +(72) Scan parquet default.item +Output [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(73) ColumnarToRow [codegen id : 20] +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] + +(74) Filter [codegen id : 20] +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] +Condition : isnotnull(i_item_sk#6) + +(75) Exchange +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] +Arguments: hashpartitioning(i_item_sk#6, 5), true, [id=#31] + +(76) Sort [codegen id : 21] +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] +Arguments: [i_item_sk#6 ASC NULLS FIRST], false, 0 + +(77) ReusedExchange [Reuses operator id: 62] +Output [1]: [ss_item_sk#27] + +(78) Sort [codegen id : 37] +Input [1]: [ss_item_sk#27] +Arguments: [ss_item_sk#27 ASC NULLS FIRST], false, 0 + +(79) SortMergeJoin +Left keys [1]: [i_item_sk#6] +Right keys [1]: [ss_item_sk#27] +Join condition: None + +(80) BroadcastExchange +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#32] + +(81) BroadcastHashJoin [codegen id : 38] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#6] +Join condition: None + +(82) Project [codegen id : 38] +Output [5]: [ss_quantity#3, ss_list_price#4, i_brand_id#7, i_class_id#8, i_category_id#9] +Input [7]: [ss_item_sk#2, ss_quantity#3, ss_list_price#4, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] + +(83) HashAggregate [codegen id : 38] +Input [5]: [ss_quantity#3, ss_list_price#4, i_brand_id#7, i_class_id#8, i_category_id#9] +Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)] +Aggregate Attributes [3]: [sum#33, isEmpty#34, count#35] +Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#36, isEmpty#37, count#38] + +(84) Exchange +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#36, isEmpty#37, count#38] +Arguments: hashpartitioning(i_brand_id#7, i_class_id#8, i_category_id#9, 5), true, [id=#39] + +(85) HashAggregate [codegen id : 39] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#36, isEmpty#37, count#38] +Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#40, count(1)#41] +Results [7]: [store AS channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#40 AS sales#43, count(1)#41 AS number_sales#44, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#40 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#45] + +(86) Filter [codegen id : 39] +Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#45] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#45) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#45 as decimal(32,6)) > cast(Subquery scalar-subquery#46, [id=#47] as decimal(32,6)))) + +(87) Project [codegen id : 39] +Output [6]: [sales#43, number_sales#44, channel#42, i_brand_id#7, i_class_id#8, i_category_id#9] +Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#45] + +(88) Scan parquet default.catalog_sales +Output [4]: [cs_sold_date_sk#18, cs_item_sk#19, cs_quantity#48, cs_list_price#49] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(89) ColumnarToRow [codegen id : 40] +Input [4]: [cs_sold_date_sk#18, cs_item_sk#19, cs_quantity#48, cs_list_price#49] + +(90) Filter [codegen id : 40] +Input [4]: [cs_sold_date_sk#18, cs_item_sk#19, cs_quantity#48, cs_list_price#49] +Condition : (isnotnull(cs_item_sk#19) AND isnotnull(cs_sold_date_sk#18)) + +(91) Exchange +Input [4]: [cs_sold_date_sk#18, cs_item_sk#19, cs_quantity#48, cs_list_price#49] +Arguments: hashpartitioning(cs_item_sk#19, 5), true, [id=#50] + +(92) Sort [codegen id : 41] +Input [4]: [cs_sold_date_sk#18, cs_item_sk#19, cs_quantity#48, cs_list_price#49] +Arguments: [cs_item_sk#19 ASC NULLS FIRST], false, 0 + +(93) ReusedExchange [Reuses operator id: 62] +Output [1]: [ss_item_sk#27] + +(94) Sort [codegen id : 57] +Input [1]: [ss_item_sk#27] +Arguments: [ss_item_sk#27 ASC NULLS FIRST], false, 0 + +(95) SortMergeJoin +Left keys [1]: [cs_item_sk#19] +Right keys [1]: [ss_item_sk#27] +Join condition: None + +(96) ReusedExchange [Reuses operator id: 69] +Output [1]: [d_date_sk#10] + +(97) BroadcastHashJoin [codegen id : 77] +Left keys [1]: [cs_sold_date_sk#18] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(98) Project [codegen id : 77] +Output [3]: [cs_item_sk#19, cs_quantity#48, cs_list_price#49] +Input [5]: [cs_sold_date_sk#18, cs_item_sk#19, cs_quantity#48, cs_list_price#49, d_date_sk#10] + +(99) ReusedExchange [Reuses operator id: 80] +Output [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] + +(100) BroadcastHashJoin [codegen id : 77] +Left keys [1]: [cs_item_sk#19] +Right keys [1]: [i_item_sk#6] +Join condition: None + +(101) Project [codegen id : 77] +Output [5]: [cs_quantity#48, cs_list_price#49, i_brand_id#7, i_class_id#8, i_category_id#9] +Input [7]: [cs_item_sk#19, cs_quantity#48, cs_list_price#49, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] + +(102) HashAggregate [codegen id : 77] +Input [5]: [cs_quantity#48, cs_list_price#49, i_brand_id#7, i_class_id#8, i_category_id#9] +Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)] +Aggregate Attributes [3]: [sum#51, isEmpty#52, count#53] +Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#54, isEmpty#55, count#56] + +(103) Exchange +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#54, isEmpty#55, count#56] +Arguments: hashpartitioning(i_brand_id#7, i_class_id#8, i_category_id#9, 5), true, [id=#57] + +(104) HashAggregate [codegen id : 78] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#54, isEmpty#55, count#56] +Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#58, count(1)#59] +Results [7]: [catalog AS channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#58 AS sales#61, count(1)#59 AS number_sales#62, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#58 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#63] + +(105) Filter [codegen id : 78] +Input [7]: [channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sales#61, number_sales#62, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#63] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#63) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#63 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#46, [id=#47] as decimal(32,6)))) + +(106) Project [codegen id : 78] +Output [6]: [sales#61, number_sales#62, channel#60, i_brand_id#7, i_class_id#8, i_category_id#9] +Input [7]: [channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sales#61, number_sales#62, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#63] + +(107) Scan parquet default.web_sales +Output [4]: [ws_sold_date_sk#22, ws_item_sk#23, ws_quantity#64, ws_list_price#65] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(108) ColumnarToRow [codegen id : 79] +Input [4]: [ws_sold_date_sk#22, ws_item_sk#23, ws_quantity#64, ws_list_price#65] + +(109) Filter [codegen id : 79] +Input [4]: [ws_sold_date_sk#22, ws_item_sk#23, ws_quantity#64, ws_list_price#65] +Condition : (isnotnull(ws_item_sk#23) AND isnotnull(ws_sold_date_sk#22)) + +(110) Exchange +Input [4]: [ws_sold_date_sk#22, ws_item_sk#23, ws_quantity#64, ws_list_price#65] +Arguments: hashpartitioning(ws_item_sk#23, 5), true, [id=#66] + +(111) Sort [codegen id : 80] +Input [4]: [ws_sold_date_sk#22, ws_item_sk#23, ws_quantity#64, ws_list_price#65] +Arguments: [ws_item_sk#23 ASC NULLS FIRST], false, 0 + +(112) ReusedExchange [Reuses operator id: 62] +Output [1]: [ss_item_sk#27] + +(113) Sort [codegen id : 96] +Input [1]: [ss_item_sk#27] +Arguments: [ss_item_sk#27 ASC NULLS FIRST], false, 0 + +(114) SortMergeJoin +Left keys [1]: [ws_item_sk#23] +Right keys [1]: [ss_item_sk#27] +Join condition: None + +(115) ReusedExchange [Reuses operator id: 69] +Output [1]: [d_date_sk#10] + +(116) BroadcastHashJoin [codegen id : 116] +Left keys [1]: [ws_sold_date_sk#22] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(117) Project [codegen id : 116] +Output [3]: [ws_item_sk#23, ws_quantity#64, ws_list_price#65] +Input [5]: [ws_sold_date_sk#22, ws_item_sk#23, ws_quantity#64, ws_list_price#65, d_date_sk#10] + +(118) ReusedExchange [Reuses operator id: 80] +Output [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] + +(119) BroadcastHashJoin [codegen id : 116] +Left keys [1]: [ws_item_sk#23] +Right keys [1]: [i_item_sk#6] +Join condition: None + +(120) Project [codegen id : 116] +Output [5]: [ws_quantity#64, ws_list_price#65, i_brand_id#7, i_class_id#8, i_category_id#9] +Input [7]: [ws_item_sk#23, ws_quantity#64, ws_list_price#65, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] + +(121) HashAggregate [codegen id : 116] +Input [5]: [ws_quantity#64, ws_list_price#65, i_brand_id#7, i_class_id#8, i_category_id#9] +Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)] +Aggregate Attributes [3]: [sum#67, isEmpty#68, count#69] +Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#70, isEmpty#71, count#72] + +(122) Exchange +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#70, isEmpty#71, count#72] +Arguments: hashpartitioning(i_brand_id#7, i_class_id#8, i_category_id#9, 5), true, [id=#73] + +(123) HashAggregate [codegen id : 117] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#70, isEmpty#71, count#72] +Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#74, count(1)#75] +Results [7]: [web AS channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#74 AS sales#77, count(1)#75 AS number_sales#78, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#74 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#79] + +(124) Filter [codegen id : 117] +Input [7]: [channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sales#77, number_sales#78, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#79] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#79) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#79 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#46, [id=#47] as decimal(32,6)))) + +(125) Project [codegen id : 117] +Output [6]: [sales#77, number_sales#78, channel#76, i_brand_id#7, i_class_id#8, i_category_id#9] +Input [7]: [channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sales#77, number_sales#78, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#79] + +(126) Union + +(127) Expand [codegen id : 118] +Input [6]: [sales#43, number_sales#44, channel#42, i_brand_id#7, i_class_id#8, i_category_id#9] +Arguments: [List(sales#43, number_sales#44, channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, 0), List(sales#43, number_sales#44, channel#42, i_brand_id#7, i_class_id#8, null, 1), List(sales#43, number_sales#44, channel#42, i_brand_id#7, null, null, 3), List(sales#43, number_sales#44, channel#42, null, null, null, 7), List(sales#43, number_sales#44, null, null, null, null, 15)], [sales#43, number_sales#44, channel#80, i_brand_id#81, i_class_id#82, i_category_id#83, spark_grouping_id#84] + +(128) HashAggregate [codegen id : 118] +Input [7]: [sales#43, number_sales#44, channel#80, i_brand_id#81, i_class_id#82, i_category_id#83, spark_grouping_id#84] +Keys [5]: [channel#80, i_brand_id#81, i_class_id#82, i_category_id#83, spark_grouping_id#84] +Functions [2]: [partial_sum(sales#43), partial_sum(number_sales#44)] +Aggregate Attributes [3]: [sum#85, isEmpty#86, sum#87] +Results [8]: [channel#80, i_brand_id#81, i_class_id#82, i_category_id#83, spark_grouping_id#84, sum#88, isEmpty#89, sum#90] + +(129) Exchange +Input [8]: [channel#80, i_brand_id#81, i_class_id#82, i_category_id#83, spark_grouping_id#84, sum#88, isEmpty#89, sum#90] +Arguments: hashpartitioning(channel#80, i_brand_id#81, i_class_id#82, i_category_id#83, spark_grouping_id#84, 5), true, [id=#91] + +(130) HashAggregate [codegen id : 119] +Input [8]: [channel#80, i_brand_id#81, i_class_id#82, i_category_id#83, spark_grouping_id#84, sum#88, isEmpty#89, sum#90] +Keys [5]: [channel#80, i_brand_id#81, i_class_id#82, i_category_id#83, spark_grouping_id#84] +Functions [2]: [sum(sales#43), sum(number_sales#44)] +Aggregate Attributes [2]: [sum(sales#43)#92, sum(number_sales#44)#93] +Results [6]: [channel#80, i_brand_id#81, i_class_id#82, i_category_id#83, sum(sales#43)#92 AS sum(sales)#94, sum(number_sales#44)#93 AS sum(number_sales)#95] + +(131) TakeOrderedAndProject +Input [6]: [channel#80, i_brand_id#81, i_class_id#82, i_category_id#83, sum(sales)#94, sum(number_sales)#95] +Arguments: 100, [channel#80 ASC NULLS FIRST, i_brand_id#81 ASC NULLS FIRST, i_class_id#82 ASC NULLS FIRST, i_category_id#83 ASC NULLS FIRST], [channel#80, i_brand_id#81, i_class_id#82, i_category_id#83, sum(sales)#94, sum(number_sales)#95] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 86 Hosting Expression = Subquery scalar-subquery#46, [id=#47] +* HashAggregate (157) ++- Exchange (156) + +- * HashAggregate (155) + +- Union (154) + :- * Project (141) + : +- * BroadcastHashJoin Inner BuildRight (140) + : :- * Filter (134) + : : +- * ColumnarToRow (133) + : : +- Scan parquet default.store_sales (132) + : +- BroadcastExchange (139) + : +- * Project (138) + : +- * Filter (137) + : +- * ColumnarToRow (136) + : +- Scan parquet default.date_dim (135) + :- * Project (147) + : +- * BroadcastHashJoin Inner BuildRight (146) + : :- * Filter (144) + : : +- * ColumnarToRow (143) + : : +- Scan parquet default.catalog_sales (142) + : +- ReusedExchange (145) + +- * Project (153) + +- * BroadcastHashJoin Inner BuildRight (152) + :- * Filter (150) + : +- * ColumnarToRow (149) + : +- Scan parquet default.web_sales (148) + +- ReusedExchange (151) + + +(132) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(133) ColumnarToRow [codegen id : 2] +Input [3]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4] + +(134) Filter [codegen id : 2] +Input [3]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4] +Condition : isnotnull(ss_sold_date_sk#1) + +(135) Scan parquet default.date_dim +Output [2]: [d_date_sk#10, d_year#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(136) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#10, d_year#11] + +(137) Filter [codegen id : 1] +Input [2]: [d_date_sk#10, d_year#11] +Condition : (((isnotnull(d_year#11) AND (d_year#11 >= 1999)) AND (d_year#11 <= 2001)) AND isnotnull(d_date_sk#10)) + +(138) Project [codegen id : 1] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_year#11] + +(139) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#96] + +(140) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(141) Project [codegen id : 2] +Output [2]: [ss_quantity#3 AS quantity#97, ss_list_price#4 AS list_price#98] +Input [4]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4, d_date_sk#10] + +(142) Scan parquet default.catalog_sales +Output [3]: [cs_sold_date_sk#18, cs_quantity#48, cs_list_price#49] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(143) ColumnarToRow [codegen id : 4] +Input [3]: [cs_sold_date_sk#18, cs_quantity#48, cs_list_price#49] + +(144) Filter [codegen id : 4] +Input [3]: [cs_sold_date_sk#18, cs_quantity#48, cs_list_price#49] +Condition : isnotnull(cs_sold_date_sk#18) + +(145) ReusedExchange [Reuses operator id: 139] +Output [1]: [d_date_sk#10] + +(146) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_sold_date_sk#18] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(147) Project [codegen id : 4] +Output [2]: [cs_quantity#48 AS quantity#99, cs_list_price#49 AS list_price#100] +Input [4]: [cs_sold_date_sk#18, cs_quantity#48, cs_list_price#49, d_date_sk#10] + +(148) Scan parquet default.web_sales +Output [3]: [ws_sold_date_sk#22, ws_quantity#64, ws_list_price#65] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(149) ColumnarToRow [codegen id : 6] +Input [3]: [ws_sold_date_sk#22, ws_quantity#64, ws_list_price#65] + +(150) Filter [codegen id : 6] +Input [3]: [ws_sold_date_sk#22, ws_quantity#64, ws_list_price#65] +Condition : isnotnull(ws_sold_date_sk#22) + +(151) ReusedExchange [Reuses operator id: 139] +Output [1]: [d_date_sk#10] + +(152) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_sold_date_sk#22] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(153) Project [codegen id : 6] +Output [2]: [ws_quantity#64 AS quantity#101, ws_list_price#65 AS list_price#102] +Input [4]: [ws_sold_date_sk#22, ws_quantity#64, ws_list_price#65, d_date_sk#10] + +(154) Union + +(155) HashAggregate [codegen id : 7] +Input [2]: [quantity#97, list_price#98] +Keys: [] +Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#97 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#98 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [2]: [sum#103, count#104] +Results [2]: [sum#105, count#106] + +(156) Exchange +Input [2]: [sum#105, count#106] +Arguments: SinglePartition, true, [id=#107] + +(157) HashAggregate [codegen id : 8] +Input [2]: [sum#105, count#106] +Keys: [] +Functions [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#97 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#98 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#97 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#98 as decimal(12,2)))), DecimalType(18,2), true))#108] +Results [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#97 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#98 as decimal(12,2)))), DecimalType(18,2), true))#108 AS average_sales#109] + +Subquery:2 Hosting operator id = 105 Hosting Expression = ReusedSubquery Subquery scalar-subquery#46, [id=#47] + +Subquery:3 Hosting operator id = 124 Hosting Expression = ReusedSubquery Subquery scalar-subquery#46, [id=#47] + + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/simplified.txt new file mode 100644 index 0000000000000..71d4c167f6564 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/simplified.txt @@ -0,0 +1,254 @@ +TakeOrderedAndProject [channel,i_brand_id,i_category_id,i_class_id,sum(number_sales),sum(sales)] + WholeStageCodegen (119) + HashAggregate [channel,i_brand_id,i_category_id,i_class_id,isEmpty,spark_grouping_id,sum,sum] [isEmpty,sum,sum,sum(number_salesL),sum(number_sales),sum(sales),sum(sales)] + InputAdapter + Exchange [channel,i_brand_id,i_category_id,i_class_id,spark_grouping_id] #1 + WholeStageCodegen (118) + HashAggregate [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales,spark_grouping_id] [isEmpty,isEmpty,sum,sum,sum,sum] + Expand [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] + InputAdapter + Union + WholeStageCodegen (39) + Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] + Subquery #1 + WholeStageCodegen (8) + HashAggregate [count,sum] [average_sales,avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2), true)),count,sum] + InputAdapter + Exchange #16 + WholeStageCodegen (7) + HashAggregate [list_price,quantity] [count,count,sum,sum] + InputAdapter + Union + WholeStageCodegen (2) + Project [ss_list_price,ss_quantity] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_list_price,ss_quantity,ss_sold_date_sk] + InputAdapter + BroadcastExchange #17 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + WholeStageCodegen (4) + Project [cs_list_price,cs_quantity] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_list_price,cs_quantity,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #17 + WholeStageCodegen (6) + Project [ws_list_price,ws_quantity] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_list_price,ws_quantity,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #17 + HashAggregate [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] [channel,count,count(1),isEmpty,number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] + InputAdapter + Exchange [i_brand_id,i_category_id,i_class_id] #2 + WholeStageCodegen (38) + HashAggregate [i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity] [count,count,isEmpty,isEmpty,sum,sum] + Project [i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_item_sk,ss_list_price,ss_quantity] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + InputAdapter + SortMergeJoin [ss_item_sk,ss_item_sk] + WholeStageCodegen (2) + Sort [ss_item_sk] + InputAdapter + Exchange [ss_item_sk] #3 + WholeStageCodegen (1) + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_list_price,ss_quantity,ss_sold_date_sk] + WholeStageCodegen (18) + Sort [ss_item_sk] + InputAdapter + Exchange [ss_item_sk] #4 + WholeStageCodegen (17) + Project [i_item_sk] + BroadcastHashJoin [brand_id,category_id,class_id,i_brand_id,i_category_id,i_class_id] + Filter [i_brand_id,i_category_id,i_class_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (16) + HashAggregate [brand_id,category_id,class_id] + HashAggregate [brand_id,category_id,class_id] + HashAggregate [brand_id,category_id,class_id] + InputAdapter + Exchange [brand_id,category_id,class_id] #6 + WholeStageCodegen (15) + HashAggregate [brand_id,category_id,class_id] + InputAdapter + SortMergeJoin [brand_id,category_id,class_id,i_brand_id,i_category_id,i_class_id] + SortMergeJoin [brand_id,category_id,class_id,i_brand_id,i_category_id,i_class_id] + WholeStageCodegen (6) + Sort [brand_id,category_id,class_id] + InputAdapter + Exchange [brand_id,category_id,class_id] #7 + WholeStageCodegen (5) + Project [i_brand_id,i_category_id,i_class_id] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_item_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (4) + Filter [i_brand_id,i_category_id,i_class_id,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] + WholeStageCodegen (10) + Sort [i_brand_id,i_category_id,i_class_id] + InputAdapter + Exchange [i_brand_id,i_category_id,i_class_id] #10 + WholeStageCodegen (9) + Project [i_brand_id,i_category_id,i_class_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_item_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_item_sk,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #8 + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (8) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] + WholeStageCodegen (14) + Sort [i_brand_id,i_category_id,i_class_id] + InputAdapter + Exchange [i_brand_id,i_category_id,i_class_id] #12 + WholeStageCodegen (13) + Project [i_brand_id,i_category_id,i_class_id] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Project [ws_item_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_item_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #8 + InputAdapter + ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk] #11 + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (19) + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + InputAdapter + BroadcastExchange #14 + SortMergeJoin [i_item_sk,ss_item_sk] + WholeStageCodegen (21) + Sort [i_item_sk] + InputAdapter + Exchange [i_item_sk] #15 + WholeStageCodegen (20) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] + WholeStageCodegen (37) + Sort [ss_item_sk] + InputAdapter + ReusedExchange [ss_item_sk] #4 + WholeStageCodegen (78) + Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #1 + HashAggregate [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] [channel,count,count(1),isEmpty,number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] + InputAdapter + Exchange [i_brand_id,i_category_id,i_class_id] #18 + WholeStageCodegen (77) + HashAggregate [cs_list_price,cs_quantity,i_brand_id,i_category_id,i_class_id] [count,count,isEmpty,isEmpty,sum,sum] + Project [cs_list_price,cs_quantity,i_brand_id,i_category_id,i_class_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_item_sk,cs_list_price,cs_quantity] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + InputAdapter + SortMergeJoin [cs_item_sk,ss_item_sk] + WholeStageCodegen (41) + Sort [cs_item_sk] + InputAdapter + Exchange [cs_item_sk] #19 + WholeStageCodegen (40) + Filter [cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_item_sk,cs_list_price,cs_quantity,cs_sold_date_sk] + WholeStageCodegen (57) + Sort [ss_item_sk] + InputAdapter + ReusedExchange [ss_item_sk] #4 + InputAdapter + ReusedExchange [d_date_sk] #13 + InputAdapter + ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk] #14 + WholeStageCodegen (117) + Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #1 + HashAggregate [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] [channel,count,count(1),isEmpty,number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] + InputAdapter + Exchange [i_brand_id,i_category_id,i_class_id] #20 + WholeStageCodegen (116) + HashAggregate [i_brand_id,i_category_id,i_class_id,ws_list_price,ws_quantity] [count,count,isEmpty,isEmpty,sum,sum] + Project [i_brand_id,i_category_id,i_class_id,ws_list_price,ws_quantity] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Project [ws_item_sk,ws_list_price,ws_quantity] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + InputAdapter + SortMergeJoin [ss_item_sk,ws_item_sk] + WholeStageCodegen (80) + Sort [ws_item_sk] + InputAdapter + Exchange [ws_item_sk] #21 + WholeStageCodegen (79) + Filter [ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_item_sk,ws_list_price,ws_quantity,ws_sold_date_sk] + WholeStageCodegen (96) + Sort [ss_item_sk] + InputAdapter + ReusedExchange [ss_item_sk] #4 + InputAdapter + ReusedExchange [d_date_sk] #13 + InputAdapter + ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk] #14 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/explain.txt new file mode 100644 index 0000000000000..c96b1c502a15f --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/explain.txt @@ -0,0 +1,798 @@ +== Physical Plan == +TakeOrderedAndProject (115) ++- * HashAggregate (114) + +- Exchange (113) + +- * HashAggregate (112) + +- * Expand (111) + +- Union (110) + :- * Project (77) + : +- * Filter (76) + : +- * HashAggregate (75) + : +- Exchange (74) + : +- * HashAggregate (73) + : +- * Project (72) + : +- * BroadcastHashJoin Inner BuildRight (71) + : :- * Project (65) + : : +- * BroadcastHashJoin Inner BuildRight (64) + : : :- * BroadcastHashJoin LeftSemi BuildRight (57) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (56) + : : : +- * Project (55) + : : : +- * BroadcastHashJoin Inner BuildRight (54) + : : : :- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.item (4) + : : : +- BroadcastExchange (53) + : : : +- * HashAggregate (52) + : : : +- * HashAggregate (51) + : : : +- * HashAggregate (50) + : : : +- Exchange (49) + : : : +- * HashAggregate (48) + : : : +- * BroadcastHashJoin LeftSemi BuildRight (47) + : : : :- * BroadcastHashJoin LeftSemi BuildRight (36) + : : : : :- * Project (22) + : : : : : +- * BroadcastHashJoin Inner BuildRight (21) + : : : : : :- * Project (15) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : : : : :- * Filter (9) + : : : : : : : +- * ColumnarToRow (8) + : : : : : : : +- Scan parquet default.store_sales (7) + : : : : : : +- BroadcastExchange (13) + : : : : : : +- * Filter (12) + : : : : : : +- * ColumnarToRow (11) + : : : : : : +- Scan parquet default.item (10) + : : : : : +- BroadcastExchange (20) + : : : : : +- * Project (19) + : : : : : +- * Filter (18) + : : : : : +- * ColumnarToRow (17) + : : : : : +- Scan parquet default.date_dim (16) + : : : : +- BroadcastExchange (35) + : : : : +- * Project (34) + : : : : +- * BroadcastHashJoin Inner BuildRight (33) + : : : : :- * Project (31) + : : : : : +- * BroadcastHashJoin Inner BuildRight (30) + : : : : : :- * Filter (25) + : : : : : : +- * ColumnarToRow (24) + : : : : : : +- Scan parquet default.catalog_sales (23) + : : : : : +- BroadcastExchange (29) + : : : : : +- * Filter (28) + : : : : : +- * ColumnarToRow (27) + : : : : : +- Scan parquet default.item (26) + : : : : +- ReusedExchange (32) + : : : +- BroadcastExchange (46) + : : : +- * Project (45) + : : : +- * BroadcastHashJoin Inner BuildRight (44) + : : : :- * Project (42) + : : : : +- * BroadcastHashJoin Inner BuildRight (41) + : : : : :- * Filter (39) + : : : : : +- * ColumnarToRow (38) + : : : : : +- Scan parquet default.web_sales (37) + : : : : +- ReusedExchange (40) + : : : +- ReusedExchange (43) + : : +- BroadcastExchange (63) + : : +- * BroadcastHashJoin LeftSemi BuildRight (62) + : : :- * Filter (60) + : : : +- * ColumnarToRow (59) + : : : +- Scan parquet default.item (58) + : : +- ReusedExchange (61) + : +- BroadcastExchange (70) + : +- * Project (69) + : +- * Filter (68) + : +- * ColumnarToRow (67) + : +- Scan parquet default.date_dim (66) + :- * Project (93) + : +- * Filter (92) + : +- * HashAggregate (91) + : +- Exchange (90) + : +- * HashAggregate (89) + : +- * Project (88) + : +- * BroadcastHashJoin Inner BuildRight (87) + : :- * Project (85) + : : +- * BroadcastHashJoin Inner BuildRight (84) + : : :- * BroadcastHashJoin LeftSemi BuildRight (82) + : : : :- * Filter (80) + : : : : +- * ColumnarToRow (79) + : : : : +- Scan parquet default.catalog_sales (78) + : : : +- ReusedExchange (81) + : : +- ReusedExchange (83) + : +- ReusedExchange (86) + +- * Project (109) + +- * Filter (108) + +- * HashAggregate (107) + +- Exchange (106) + +- * HashAggregate (105) + +- * Project (104) + +- * BroadcastHashJoin Inner BuildRight (103) + :- * Project (101) + : +- * BroadcastHashJoin Inner BuildRight (100) + : :- * BroadcastHashJoin LeftSemi BuildRight (98) + : : :- * Filter (96) + : : : +- * ColumnarToRow (95) + : : : +- Scan parquet default.web_sales (94) + : : +- ReusedExchange (97) + : +- ReusedExchange (99) + +- ReusedExchange (102) + + +(1) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 25] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] + +(3) Filter [codegen id : 25] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] +Condition : (isnotnull(ss_item_sk#2) AND isnotnull(ss_sold_date_sk#1)) + +(4) Scan parquet default.item +Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_class_id), IsNotNull(i_brand_id), IsNotNull(i_category_id)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 11] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(6) Filter [codegen id : 11] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Condition : ((isnotnull(i_class_id#7) AND isnotnull(i_brand_id#6)) AND isnotnull(i_category_id#8)) + +(7) Scan parquet default.store_sales +Output [2]: [ss_sold_date_sk#1, ss_item_sk#2] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(8) ColumnarToRow [codegen id : 9] +Input [2]: [ss_sold_date_sk#1, ss_item_sk#2] + +(9) Filter [codegen id : 9] +Input [2]: [ss_sold_date_sk#1, ss_item_sk#2] +Condition : (isnotnull(ss_item_sk#2) AND isnotnull(ss_sold_date_sk#1)) + +(10) Scan parquet default.item +Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_category_id), IsNotNull(i_brand_id), IsNotNull(i_class_id)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 1] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(12) Filter [codegen id : 1] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Condition : (((isnotnull(i_item_sk#5) AND isnotnull(i_category_id#8)) AND isnotnull(i_brand_id#6)) AND isnotnull(i_class_id#7)) + +(13) BroadcastExchange +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#9] + +(14) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#5] +Join condition: None + +(15) Project [codegen id : 9] +Output [4]: [ss_sold_date_sk#1, i_brand_id#6, i_class_id#7, i_category_id#8] +Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(16) Scan parquet default.date_dim +Output [2]: [d_date_sk#10, d_year#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#10, d_year#11] + +(18) Filter [codegen id : 2] +Input [2]: [d_date_sk#10, d_year#11] +Condition : (((isnotnull(d_year#11) AND (d_year#11 >= 1999)) AND (d_year#11 <= 2001)) AND isnotnull(d_date_sk#10)) + +(19) Project [codegen id : 2] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_year#11] + +(20) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(21) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(22) Project [codegen id : 9] +Output [3]: [i_brand_id#6 AS brand_id#13, i_class_id#7 AS class_id#14, i_category_id#8 AS category_id#15] +Input [5]: [ss_sold_date_sk#1, i_brand_id#6, i_class_id#7, i_category_id#8, d_date_sk#10] + +(23) Scan parquet default.catalog_sales +Output [2]: [cs_sold_date_sk#16, cs_item_sk#17] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 5] +Input [2]: [cs_sold_date_sk#16, cs_item_sk#17] + +(25) Filter [codegen id : 5] +Input [2]: [cs_sold_date_sk#16, cs_item_sk#17] +Condition : (isnotnull(cs_item_sk#17) AND isnotnull(cs_sold_date_sk#16)) + +(26) Scan parquet default.item +Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 3] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(28) Filter [codegen id : 3] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Condition : isnotnull(i_item_sk#5) + +(29) BroadcastExchange +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#18] + +(30) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_item_sk#17] +Right keys [1]: [i_item_sk#5] +Join condition: None + +(31) Project [codegen id : 5] +Output [4]: [cs_sold_date_sk#16, i_brand_id#6, i_class_id#7, i_category_id#8] +Input [6]: [cs_sold_date_sk#16, cs_item_sk#17, i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(32) ReusedExchange [Reuses operator id: 20] +Output [1]: [d_date_sk#10] + +(33) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_sold_date_sk#16] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(34) Project [codegen id : 5] +Output [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Input [5]: [cs_sold_date_sk#16, i_brand_id#6, i_class_id#7, i_category_id#8, d_date_sk#10] + +(35) BroadcastExchange +Input [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), isnull(input[0, int, true]), coalesce(input[1, int, true], 0), isnull(input[1, int, true]), coalesce(input[2, int, true], 0), isnull(input[2, int, true])),false), [id=#19] + +(36) BroadcastHashJoin [codegen id : 9] +Left keys [6]: [coalesce(brand_id#13, 0), isnull(brand_id#13), coalesce(class_id#14, 0), isnull(class_id#14), coalesce(category_id#15, 0), isnull(category_id#15)] +Right keys [6]: [coalesce(i_brand_id#6, 0), isnull(i_brand_id#6), coalesce(i_class_id#7, 0), isnull(i_class_id#7), coalesce(i_category_id#8, 0), isnull(i_category_id#8)] +Join condition: None + +(37) Scan parquet default.web_sales +Output [2]: [ws_sold_date_sk#20, ws_item_sk#21] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(38) ColumnarToRow [codegen id : 8] +Input [2]: [ws_sold_date_sk#20, ws_item_sk#21] + +(39) Filter [codegen id : 8] +Input [2]: [ws_sold_date_sk#20, ws_item_sk#21] +Condition : (isnotnull(ws_item_sk#21) AND isnotnull(ws_sold_date_sk#20)) + +(40) ReusedExchange [Reuses operator id: 29] +Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(41) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ws_item_sk#21] +Right keys [1]: [i_item_sk#5] +Join condition: None + +(42) Project [codegen id : 8] +Output [4]: [ws_sold_date_sk#20, i_brand_id#6, i_class_id#7, i_category_id#8] +Input [6]: [ws_sold_date_sk#20, ws_item_sk#21, i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(43) ReusedExchange [Reuses operator id: 20] +Output [1]: [d_date_sk#10] + +(44) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ws_sold_date_sk#20] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(45) Project [codegen id : 8] +Output [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Input [5]: [ws_sold_date_sk#20, i_brand_id#6, i_class_id#7, i_category_id#8, d_date_sk#10] + +(46) BroadcastExchange +Input [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), isnull(input[0, int, true]), coalesce(input[1, int, true], 0), isnull(input[1, int, true]), coalesce(input[2, int, true], 0), isnull(input[2, int, true])),false), [id=#22] + +(47) BroadcastHashJoin [codegen id : 9] +Left keys [6]: [coalesce(brand_id#13, 0), isnull(brand_id#13), coalesce(class_id#14, 0), isnull(class_id#14), coalesce(category_id#15, 0), isnull(category_id#15)] +Right keys [6]: [coalesce(i_brand_id#6, 0), isnull(i_brand_id#6), coalesce(i_class_id#7, 0), isnull(i_class_id#7), coalesce(i_category_id#8, 0), isnull(i_category_id#8)] +Join condition: None + +(48) HashAggregate [codegen id : 9] +Input [3]: [brand_id#13, class_id#14, category_id#15] +Keys [3]: [brand_id#13, class_id#14, category_id#15] +Functions: [] +Aggregate Attributes: [] +Results [3]: [brand_id#13, class_id#14, category_id#15] + +(49) Exchange +Input [3]: [brand_id#13, class_id#14, category_id#15] +Arguments: hashpartitioning(brand_id#13, class_id#14, category_id#15, 5), true, [id=#23] + +(50) HashAggregate [codegen id : 10] +Input [3]: [brand_id#13, class_id#14, category_id#15] +Keys [3]: [brand_id#13, class_id#14, category_id#15] +Functions: [] +Aggregate Attributes: [] +Results [3]: [brand_id#13, class_id#14, category_id#15] + +(51) HashAggregate [codegen id : 10] +Input [3]: [brand_id#13, class_id#14, category_id#15] +Keys [3]: [brand_id#13, class_id#14, category_id#15] +Functions: [] +Aggregate Attributes: [] +Results [3]: [brand_id#13, class_id#14, category_id#15] + +(52) HashAggregate [codegen id : 10] +Input [3]: [brand_id#13, class_id#14, category_id#15] +Keys [3]: [brand_id#13, class_id#14, category_id#15] +Functions: [] +Aggregate Attributes: [] +Results [3]: [brand_id#13, class_id#14, category_id#15] + +(53) BroadcastExchange +Input [3]: [brand_id#13, class_id#14, category_id#15] +Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [id=#24] + +(54) BroadcastHashJoin [codegen id : 11] +Left keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Right keys [3]: [brand_id#13, class_id#14, category_id#15] +Join condition: None + +(55) Project [codegen id : 11] +Output [1]: [i_item_sk#5 AS ss_item_sk#25] +Input [7]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8, brand_id#13, class_id#14, category_id#15] + +(56) BroadcastExchange +Input [1]: [ss_item_sk#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#26] + +(57) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [ss_item_sk#25] +Join condition: None + +(58) Scan parquet default.item +Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(59) ColumnarToRow [codegen id : 23] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(60) Filter [codegen id : 23] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Condition : isnotnull(i_item_sk#5) + +(61) ReusedExchange [Reuses operator id: 56] +Output [1]: [ss_item_sk#25] + +(62) BroadcastHashJoin [codegen id : 23] +Left keys [1]: [i_item_sk#5] +Right keys [1]: [ss_item_sk#25] +Join condition: None + +(63) BroadcastExchange +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#27] + +(64) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#5] +Join condition: None + +(65) Project [codegen id : 25] +Output [6]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4, i_brand_id#6, i_class_id#7, i_category_id#8] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4, i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(66) Scan parquet default.date_dim +Output [3]: [d_date_sk#10, d_year#11, d_moy#28] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,11), IsNotNull(d_date_sk)] +ReadSchema: struct + +(67) ColumnarToRow [codegen id : 24] +Input [3]: [d_date_sk#10, d_year#11, d_moy#28] + +(68) Filter [codegen id : 24] +Input [3]: [d_date_sk#10, d_year#11, d_moy#28] +Condition : ((((isnotnull(d_year#11) AND isnotnull(d_moy#28)) AND (d_year#11 = 2001)) AND (d_moy#28 = 11)) AND isnotnull(d_date_sk#10)) + +(69) Project [codegen id : 24] +Output [1]: [d_date_sk#10] +Input [3]: [d_date_sk#10, d_year#11, d_moy#28] + +(70) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#29] + +(71) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(72) Project [codegen id : 25] +Output [5]: [ss_quantity#3, ss_list_price#4, i_brand_id#6, i_class_id#7, i_category_id#8] +Input [7]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4, i_brand_id#6, i_class_id#7, i_category_id#8, d_date_sk#10] + +(73) HashAggregate [codegen id : 25] +Input [5]: [ss_quantity#3, ss_list_price#4, i_brand_id#6, i_class_id#7, i_category_id#8] +Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)] +Aggregate Attributes [3]: [sum#30, isEmpty#31, count#32] +Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#33, isEmpty#34, count#35] + +(74) Exchange +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#33, isEmpty#34, count#35] +Arguments: hashpartitioning(i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#36] + +(75) HashAggregate [codegen id : 26] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#33, isEmpty#34, count#35] +Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#37, count(1)#38] +Results [7]: [store AS channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#37 AS sales#40, count(1)#38 AS number_sales#41, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#37 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#42] + +(76) Filter [codegen id : 26] +Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#42] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#42) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#42 as decimal(32,6)) > cast(Subquery scalar-subquery#43, [id=#44] as decimal(32,6)))) + +(77) Project [codegen id : 26] +Output [6]: [sales#40, number_sales#41, channel#39, i_brand_id#6, i_class_id#7, i_category_id#8] +Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#42] + +(78) Scan parquet default.catalog_sales +Output [4]: [cs_sold_date_sk#16, cs_item_sk#17, cs_quantity#45, cs_list_price#46] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(79) ColumnarToRow [codegen id : 51] +Input [4]: [cs_sold_date_sk#16, cs_item_sk#17, cs_quantity#45, cs_list_price#46] + +(80) Filter [codegen id : 51] +Input [4]: [cs_sold_date_sk#16, cs_item_sk#17, cs_quantity#45, cs_list_price#46] +Condition : (isnotnull(cs_item_sk#17) AND isnotnull(cs_sold_date_sk#16)) + +(81) ReusedExchange [Reuses operator id: 56] +Output [1]: [ss_item_sk#25] + +(82) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [cs_item_sk#17] +Right keys [1]: [ss_item_sk#25] +Join condition: None + +(83) ReusedExchange [Reuses operator id: 63] +Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(84) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [cs_item_sk#17] +Right keys [1]: [i_item_sk#5] +Join condition: None + +(85) Project [codegen id : 51] +Output [6]: [cs_sold_date_sk#16, cs_quantity#45, cs_list_price#46, i_brand_id#6, i_class_id#7, i_category_id#8] +Input [8]: [cs_sold_date_sk#16, cs_item_sk#17, cs_quantity#45, cs_list_price#46, i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(86) ReusedExchange [Reuses operator id: 70] +Output [1]: [d_date_sk#10] + +(87) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [cs_sold_date_sk#16] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(88) Project [codegen id : 51] +Output [5]: [cs_quantity#45, cs_list_price#46, i_brand_id#6, i_class_id#7, i_category_id#8] +Input [7]: [cs_sold_date_sk#16, cs_quantity#45, cs_list_price#46, i_brand_id#6, i_class_id#7, i_category_id#8, d_date_sk#10] + +(89) HashAggregate [codegen id : 51] +Input [5]: [cs_quantity#45, cs_list_price#46, i_brand_id#6, i_class_id#7, i_category_id#8] +Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)] +Aggregate Attributes [3]: [sum#47, isEmpty#48, count#49] +Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#50, isEmpty#51, count#52] + +(90) Exchange +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#50, isEmpty#51, count#52] +Arguments: hashpartitioning(i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#53] + +(91) HashAggregate [codegen id : 52] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#50, isEmpty#51, count#52] +Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#54, count(1)#55] +Results [7]: [catalog AS channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#54 AS sales#57, count(1)#55 AS number_sales#58, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#54 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#59] + +(92) Filter [codegen id : 52] +Input [7]: [channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sales#57, number_sales#58, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#59] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#59) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#59 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#43, [id=#44] as decimal(32,6)))) + +(93) Project [codegen id : 52] +Output [6]: [sales#57, number_sales#58, channel#56, i_brand_id#6, i_class_id#7, i_category_id#8] +Input [7]: [channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sales#57, number_sales#58, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#59] + +(94) Scan parquet default.web_sales +Output [4]: [ws_sold_date_sk#20, ws_item_sk#21, ws_quantity#60, ws_list_price#61] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(95) ColumnarToRow [codegen id : 77] +Input [4]: [ws_sold_date_sk#20, ws_item_sk#21, ws_quantity#60, ws_list_price#61] + +(96) Filter [codegen id : 77] +Input [4]: [ws_sold_date_sk#20, ws_item_sk#21, ws_quantity#60, ws_list_price#61] +Condition : (isnotnull(ws_item_sk#21) AND isnotnull(ws_sold_date_sk#20)) + +(97) ReusedExchange [Reuses operator id: 56] +Output [1]: [ss_item_sk#25] + +(98) BroadcastHashJoin [codegen id : 77] +Left keys [1]: [ws_item_sk#21] +Right keys [1]: [ss_item_sk#25] +Join condition: None + +(99) ReusedExchange [Reuses operator id: 63] +Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(100) BroadcastHashJoin [codegen id : 77] +Left keys [1]: [ws_item_sk#21] +Right keys [1]: [i_item_sk#5] +Join condition: None + +(101) Project [codegen id : 77] +Output [6]: [ws_sold_date_sk#20, ws_quantity#60, ws_list_price#61, i_brand_id#6, i_class_id#7, i_category_id#8] +Input [8]: [ws_sold_date_sk#20, ws_item_sk#21, ws_quantity#60, ws_list_price#61, i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(102) ReusedExchange [Reuses operator id: 70] +Output [1]: [d_date_sk#10] + +(103) BroadcastHashJoin [codegen id : 77] +Left keys [1]: [ws_sold_date_sk#20] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(104) Project [codegen id : 77] +Output [5]: [ws_quantity#60, ws_list_price#61, i_brand_id#6, i_class_id#7, i_category_id#8] +Input [7]: [ws_sold_date_sk#20, ws_quantity#60, ws_list_price#61, i_brand_id#6, i_class_id#7, i_category_id#8, d_date_sk#10] + +(105) HashAggregate [codegen id : 77] +Input [5]: [ws_quantity#60, ws_list_price#61, i_brand_id#6, i_class_id#7, i_category_id#8] +Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)] +Aggregate Attributes [3]: [sum#62, isEmpty#63, count#64] +Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#65, isEmpty#66, count#67] + +(106) Exchange +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#65, isEmpty#66, count#67] +Arguments: hashpartitioning(i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#68] + +(107) HashAggregate [codegen id : 78] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#65, isEmpty#66, count#67] +Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#69, count(1)#70] +Results [7]: [web AS channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#69 AS sales#72, count(1)#70 AS number_sales#73, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#69 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#74] + +(108) Filter [codegen id : 78] +Input [7]: [channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sales#72, number_sales#73, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#74] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#74) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#74 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#43, [id=#44] as decimal(32,6)))) + +(109) Project [codegen id : 78] +Output [6]: [sales#72, number_sales#73, channel#71, i_brand_id#6, i_class_id#7, i_category_id#8] +Input [7]: [channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sales#72, number_sales#73, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#74] + +(110) Union + +(111) Expand [codegen id : 79] +Input [6]: [sales#40, number_sales#41, channel#39, i_brand_id#6, i_class_id#7, i_category_id#8] +Arguments: [List(sales#40, number_sales#41, channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, 0), List(sales#40, number_sales#41, channel#39, i_brand_id#6, i_class_id#7, null, 1), List(sales#40, number_sales#41, channel#39, i_brand_id#6, null, null, 3), List(sales#40, number_sales#41, channel#39, null, null, null, 7), List(sales#40, number_sales#41, null, null, null, null, 15)], [sales#40, number_sales#41, channel#75, i_brand_id#76, i_class_id#77, i_category_id#78, spark_grouping_id#79] + +(112) HashAggregate [codegen id : 79] +Input [7]: [sales#40, number_sales#41, channel#75, i_brand_id#76, i_class_id#77, i_category_id#78, spark_grouping_id#79] +Keys [5]: [channel#75, i_brand_id#76, i_class_id#77, i_category_id#78, spark_grouping_id#79] +Functions [2]: [partial_sum(sales#40), partial_sum(number_sales#41)] +Aggregate Attributes [3]: [sum#80, isEmpty#81, sum#82] +Results [8]: [channel#75, i_brand_id#76, i_class_id#77, i_category_id#78, spark_grouping_id#79, sum#83, isEmpty#84, sum#85] + +(113) Exchange +Input [8]: [channel#75, i_brand_id#76, i_class_id#77, i_category_id#78, spark_grouping_id#79, sum#83, isEmpty#84, sum#85] +Arguments: hashpartitioning(channel#75, i_brand_id#76, i_class_id#77, i_category_id#78, spark_grouping_id#79, 5), true, [id=#86] + +(114) HashAggregate [codegen id : 80] +Input [8]: [channel#75, i_brand_id#76, i_class_id#77, i_category_id#78, spark_grouping_id#79, sum#83, isEmpty#84, sum#85] +Keys [5]: [channel#75, i_brand_id#76, i_class_id#77, i_category_id#78, spark_grouping_id#79] +Functions [2]: [sum(sales#40), sum(number_sales#41)] +Aggregate Attributes [2]: [sum(sales#40)#87, sum(number_sales#41)#88] +Results [6]: [channel#75, i_brand_id#76, i_class_id#77, i_category_id#78, sum(sales#40)#87 AS sum(sales)#89, sum(number_sales#41)#88 AS sum(number_sales)#90] + +(115) TakeOrderedAndProject +Input [6]: [channel#75, i_brand_id#76, i_class_id#77, i_category_id#78, sum(sales)#89, sum(number_sales)#90] +Arguments: 100, [channel#75 ASC NULLS FIRST, i_brand_id#76 ASC NULLS FIRST, i_class_id#77 ASC NULLS FIRST, i_category_id#78 ASC NULLS FIRST], [channel#75, i_brand_id#76, i_class_id#77, i_category_id#78, sum(sales)#89, sum(number_sales)#90] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 76 Hosting Expression = Subquery scalar-subquery#43, [id=#44] +* HashAggregate (141) ++- Exchange (140) + +- * HashAggregate (139) + +- Union (138) + :- * Project (125) + : +- * BroadcastHashJoin Inner BuildRight (124) + : :- * Filter (118) + : : +- * ColumnarToRow (117) + : : +- Scan parquet default.store_sales (116) + : +- BroadcastExchange (123) + : +- * Project (122) + : +- * Filter (121) + : +- * ColumnarToRow (120) + : +- Scan parquet default.date_dim (119) + :- * Project (131) + : +- * BroadcastHashJoin Inner BuildRight (130) + : :- * Filter (128) + : : +- * ColumnarToRow (127) + : : +- Scan parquet default.catalog_sales (126) + : +- ReusedExchange (129) + +- * Project (137) + +- * BroadcastHashJoin Inner BuildRight (136) + :- * Filter (134) + : +- * ColumnarToRow (133) + : +- Scan parquet default.web_sales (132) + +- ReusedExchange (135) + + +(116) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(117) ColumnarToRow [codegen id : 2] +Input [3]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4] + +(118) Filter [codegen id : 2] +Input [3]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4] +Condition : isnotnull(ss_sold_date_sk#1) + +(119) Scan parquet default.date_dim +Output [2]: [d_date_sk#10, d_year#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(120) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#10, d_year#11] + +(121) Filter [codegen id : 1] +Input [2]: [d_date_sk#10, d_year#11] +Condition : (((isnotnull(d_year#11) AND (d_year#11 >= 1999)) AND (d_year#11 <= 2001)) AND isnotnull(d_date_sk#10)) + +(122) Project [codegen id : 1] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_year#11] + +(123) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#91] + +(124) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(125) Project [codegen id : 2] +Output [2]: [ss_quantity#3 AS quantity#92, ss_list_price#4 AS list_price#93] +Input [4]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4, d_date_sk#10] + +(126) Scan parquet default.catalog_sales +Output [3]: [cs_sold_date_sk#16, cs_quantity#45, cs_list_price#46] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(127) ColumnarToRow [codegen id : 4] +Input [3]: [cs_sold_date_sk#16, cs_quantity#45, cs_list_price#46] + +(128) Filter [codegen id : 4] +Input [3]: [cs_sold_date_sk#16, cs_quantity#45, cs_list_price#46] +Condition : isnotnull(cs_sold_date_sk#16) + +(129) ReusedExchange [Reuses operator id: 123] +Output [1]: [d_date_sk#10] + +(130) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_sold_date_sk#16] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(131) Project [codegen id : 4] +Output [2]: [cs_quantity#45 AS quantity#94, cs_list_price#46 AS list_price#95] +Input [4]: [cs_sold_date_sk#16, cs_quantity#45, cs_list_price#46, d_date_sk#10] + +(132) Scan parquet default.web_sales +Output [3]: [ws_sold_date_sk#20, ws_quantity#60, ws_list_price#61] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(133) ColumnarToRow [codegen id : 6] +Input [3]: [ws_sold_date_sk#20, ws_quantity#60, ws_list_price#61] + +(134) Filter [codegen id : 6] +Input [3]: [ws_sold_date_sk#20, ws_quantity#60, ws_list_price#61] +Condition : isnotnull(ws_sold_date_sk#20) + +(135) ReusedExchange [Reuses operator id: 123] +Output [1]: [d_date_sk#10] + +(136) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_sold_date_sk#20] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(137) Project [codegen id : 6] +Output [2]: [ws_quantity#60 AS quantity#96, ws_list_price#61 AS list_price#97] +Input [4]: [ws_sold_date_sk#20, ws_quantity#60, ws_list_price#61, d_date_sk#10] + +(138) Union + +(139) HashAggregate [codegen id : 7] +Input [2]: [quantity#92, list_price#93] +Keys: [] +Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#92 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#93 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [2]: [sum#98, count#99] +Results [2]: [sum#100, count#101] + +(140) Exchange +Input [2]: [sum#100, count#101] +Arguments: SinglePartition, true, [id=#102] + +(141) HashAggregate [codegen id : 8] +Input [2]: [sum#100, count#101] +Keys: [] +Functions [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#92 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#93 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#92 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#93 as decimal(12,2)))), DecimalType(18,2), true))#103] +Results [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#92 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#93 as decimal(12,2)))), DecimalType(18,2), true))#103 AS average_sales#104] + +Subquery:2 Hosting operator id = 92 Hosting Expression = ReusedSubquery Subquery scalar-subquery#43, [id=#44] + +Subquery:3 Hosting operator id = 108 Hosting Expression = ReusedSubquery Subquery scalar-subquery#43, [id=#44] + + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/simplified.txt new file mode 100644 index 0000000000000..9facf93115f1f --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/simplified.txt @@ -0,0 +1,214 @@ +TakeOrderedAndProject [channel,i_brand_id,i_category_id,i_class_id,sum(number_sales),sum(sales)] + WholeStageCodegen (80) + HashAggregate [channel,i_brand_id,i_category_id,i_class_id,isEmpty,spark_grouping_id,sum,sum] [isEmpty,sum,sum,sum(number_salesL),sum(number_sales),sum(sales),sum(sales)] + InputAdapter + Exchange [channel,i_brand_id,i_category_id,i_class_id,spark_grouping_id] #1 + WholeStageCodegen (79) + HashAggregate [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales,spark_grouping_id] [isEmpty,isEmpty,sum,sum,sum,sum] + Expand [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] + InputAdapter + Union + WholeStageCodegen (26) + Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] + Subquery #1 + WholeStageCodegen (8) + HashAggregate [count,sum] [average_sales,avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2), true)),count,sum] + InputAdapter + Exchange #13 + WholeStageCodegen (7) + HashAggregate [list_price,quantity] [count,count,sum,sum] + InputAdapter + Union + WholeStageCodegen (2) + Project [ss_list_price,ss_quantity] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_list_price,ss_quantity,ss_sold_date_sk] + InputAdapter + BroadcastExchange #14 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + WholeStageCodegen (4) + Project [cs_list_price,cs_quantity] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_list_price,cs_quantity,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #14 + WholeStageCodegen (6) + Project [ws_list_price,ws_quantity] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_list_price,ws_quantity,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #14 + HashAggregate [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] [channel,count,count(1),isEmpty,number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] + InputAdapter + Exchange [i_brand_id,i_category_id,i_class_id] #2 + WholeStageCodegen (25) + HashAggregate [i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity] [count,count,isEmpty,isEmpty,sum,sum] + Project [i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity,ss_sold_date_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + BroadcastHashJoin [ss_item_sk,ss_item_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_list_price,ss_quantity,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (11) + Project [i_item_sk] + BroadcastHashJoin [brand_id,category_id,class_id,i_brand_id,i_category_id,i_class_id] + Filter [i_brand_id,i_category_id,i_class_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (10) + HashAggregate [brand_id,category_id,class_id] + HashAggregate [brand_id,category_id,class_id] + HashAggregate [brand_id,category_id,class_id] + InputAdapter + Exchange [brand_id,category_id,class_id] #5 + WholeStageCodegen (9) + HashAggregate [brand_id,category_id,class_id] + BroadcastHashJoin [brand_id,category_id,class_id,i_brand_id,i_category_id,i_class_id] + BroadcastHashJoin [brand_id,category_id,class_id,i_brand_id,i_category_id,i_class_id] + Project [i_brand_id,i_category_id,i_class_id] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_brand_id,i_category_id,i_class_id,ss_sold_date_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (1) + Filter [i_brand_id,i_category_id,i_class_id,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (5) + Project [i_brand_id,i_category_id,i_class_id] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,i_brand_id,i_category_id,i_class_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Filter [cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_item_sk,cs_sold_date_sk] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (3) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] + InputAdapter + ReusedExchange [d_date_sk] #7 + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (8) + Project [i_brand_id,i_category_id,i_class_id] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [i_brand_id,i_category_id,i_class_id,ws_sold_date_sk] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Filter [ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_item_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk] #9 + InputAdapter + ReusedExchange [d_date_sk] #7 + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (23) + BroadcastHashJoin [i_item_sk,ss_item_sk] + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] + InputAdapter + ReusedExchange [ss_item_sk] #3 + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (24) + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + WholeStageCodegen (52) + Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #1 + HashAggregate [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] [channel,count,count(1),isEmpty,number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] + InputAdapter + Exchange [i_brand_id,i_category_id,i_class_id] #15 + WholeStageCodegen (51) + HashAggregate [cs_list_price,cs_quantity,i_brand_id,i_category_id,i_class_id] [count,count,isEmpty,isEmpty,sum,sum] + Project [cs_list_price,cs_quantity,i_brand_id,i_category_id,i_class_id] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_list_price,cs_quantity,cs_sold_date_sk,i_brand_id,i_category_id,i_class_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + BroadcastHashJoin [cs_item_sk,ss_item_sk] + Filter [cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_item_sk,cs_list_price,cs_quantity,cs_sold_date_sk] + InputAdapter + ReusedExchange [ss_item_sk] #3 + InputAdapter + ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk] #11 + InputAdapter + ReusedExchange [d_date_sk] #12 + WholeStageCodegen (78) + Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #1 + HashAggregate [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] [channel,count,count(1),isEmpty,number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] + InputAdapter + Exchange [i_brand_id,i_category_id,i_class_id] #16 + WholeStageCodegen (77) + HashAggregate [i_brand_id,i_category_id,i_class_id,ws_list_price,ws_quantity] [count,count,isEmpty,isEmpty,sum,sum] + Project [i_brand_id,i_category_id,i_class_id,ws_list_price,ws_quantity] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [i_brand_id,i_category_id,i_class_id,ws_list_price,ws_quantity,ws_sold_date_sk] + BroadcastHashJoin [i_item_sk,ws_item_sk] + BroadcastHashJoin [ss_item_sk,ws_item_sk] + Filter [ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_item_sk,ws_list_price,ws_quantity,ws_sold_date_sk] + InputAdapter + ReusedExchange [ss_item_sk] #3 + InputAdapter + ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk] #11 + InputAdapter + ReusedExchange [d_date_sk] #12 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/explain.txt new file mode 100644 index 0000000000000..8c27f7ddce018 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/explain.txt @@ -0,0 +1,810 @@ +== Physical Plan == +TakeOrderedAndProject (110) ++- * BroadcastHashJoin Inner BuildRight (109) + :- * Project (87) + : +- * Filter (86) + : +- * HashAggregate (85) + : +- Exchange (84) + : +- * HashAggregate (83) + : +- * Project (82) + : +- * BroadcastHashJoin Inner BuildRight (81) + : :- * Project (71) + : : +- * BroadcastHashJoin Inner BuildRight (70) + : : :- SortMergeJoin LeftSemi (64) + : : : :- * Sort (5) + : : : : +- Exchange (4) + : : : : +- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- * Sort (63) + : : : +- Exchange (62) + : : : +- * Project (61) + : : : +- * BroadcastHashJoin Inner BuildRight (60) + : : : :- * Filter (8) + : : : : +- * ColumnarToRow (7) + : : : : +- Scan parquet default.item (6) + : : : +- BroadcastExchange (59) + : : : +- * HashAggregate (58) + : : : +- * HashAggregate (57) + : : : +- * HashAggregate (56) + : : : +- Exchange (55) + : : : +- * HashAggregate (54) + : : : +- SortMergeJoin LeftSemi (53) + : : : :- SortMergeJoin LeftSemi (41) + : : : : :- * Sort (26) + : : : : : +- Exchange (25) + : : : : : +- * Project (24) + : : : : : +- * BroadcastHashJoin Inner BuildRight (23) + : : : : : :- * Project (18) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (17) + : : : : : : :- * Filter (11) + : : : : : : : +- * ColumnarToRow (10) + : : : : : : : +- Scan parquet default.store_sales (9) + : : : : : : +- BroadcastExchange (16) + : : : : : : +- * Project (15) + : : : : : : +- * Filter (14) + : : : : : : +- * ColumnarToRow (13) + : : : : : : +- Scan parquet default.date_dim (12) + : : : : : +- BroadcastExchange (22) + : : : : : +- * Filter (21) + : : : : : +- * ColumnarToRow (20) + : : : : : +- Scan parquet default.item (19) + : : : : +- * Sort (40) + : : : : +- Exchange (39) + : : : : +- * Project (38) + : : : : +- * BroadcastHashJoin Inner BuildRight (37) + : : : : :- * Project (32) + : : : : : +- * BroadcastHashJoin Inner BuildRight (31) + : : : : : :- * Filter (29) + : : : : : : +- * ColumnarToRow (28) + : : : : : : +- Scan parquet default.catalog_sales (27) + : : : : : +- ReusedExchange (30) + : : : : +- BroadcastExchange (36) + : : : : +- * Filter (35) + : : : : +- * ColumnarToRow (34) + : : : : +- Scan parquet default.item (33) + : : : +- * Sort (52) + : : : +- Exchange (51) + : : : +- * Project (50) + : : : +- * BroadcastHashJoin Inner BuildRight (49) + : : : :- * Project (47) + : : : : +- * BroadcastHashJoin Inner BuildRight (46) + : : : : :- * Filter (44) + : : : : : +- * ColumnarToRow (43) + : : : : : +- Scan parquet default.web_sales (42) + : : : : +- ReusedExchange (45) + : : : +- ReusedExchange (48) + : : +- BroadcastExchange (69) + : : +- * Project (68) + : : +- * Filter (67) + : : +- * ColumnarToRow (66) + : : +- Scan parquet default.date_dim (65) + : +- BroadcastExchange (80) + : +- SortMergeJoin LeftSemi (79) + : :- * Sort (76) + : : +- Exchange (75) + : : +- * Filter (74) + : : +- * ColumnarToRow (73) + : : +- Scan parquet default.item (72) + : +- * Sort (78) + : +- ReusedExchange (77) + +- BroadcastExchange (108) + +- * Project (107) + +- * Filter (106) + +- * HashAggregate (105) + +- Exchange (104) + +- * HashAggregate (103) + +- * Project (102) + +- * BroadcastHashJoin Inner BuildRight (101) + :- * Project (99) + : +- * BroadcastHashJoin Inner BuildRight (98) + : :- SortMergeJoin LeftSemi (92) + : : :- * Sort (89) + : : : +- ReusedExchange (88) + : : +- * Sort (91) + : : +- ReusedExchange (90) + : +- BroadcastExchange (97) + : +- * Project (96) + : +- * Filter (95) + : +- * ColumnarToRow (94) + : +- Scan parquet default.date_dim (93) + +- ReusedExchange (100) + + +(1) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] + +(3) Filter [codegen id : 1] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] +Condition : (isnotnull(ss_item_sk#2) AND isnotnull(ss_sold_date_sk#1)) + +(4) Exchange +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] +Arguments: hashpartitioning(ss_item_sk#2, 5), true, [id=#5] + +(5) Sort [codegen id : 2] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] +Arguments: [ss_item_sk#2 ASC NULLS FIRST], false, 0 + +(6) Scan parquet default.item +Output [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_class_id), IsNotNull(i_brand_id), IsNotNull(i_category_id)] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 17] +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] + +(8) Filter [codegen id : 17] +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] +Condition : ((isnotnull(i_class_id#8) AND isnotnull(i_brand_id#7)) AND isnotnull(i_category_id#9)) + +(9) Scan parquet default.store_sales +Output [2]: [ss_sold_date_sk#1, ss_item_sk#2] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(10) ColumnarToRow [codegen id : 5] +Input [2]: [ss_sold_date_sk#1, ss_item_sk#2] + +(11) Filter [codegen id : 5] +Input [2]: [ss_sold_date_sk#1, ss_item_sk#2] +Condition : (isnotnull(ss_item_sk#2) AND isnotnull(ss_sold_date_sk#1)) + +(12) Scan parquet default.date_dim +Output [2]: [d_date_sk#10, d_year#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(13) ColumnarToRow [codegen id : 3] +Input [2]: [d_date_sk#10, d_year#11] + +(14) Filter [codegen id : 3] +Input [2]: [d_date_sk#10, d_year#11] +Condition : (((isnotnull(d_year#11) AND (d_year#11 >= 1999)) AND (d_year#11 <= 2001)) AND isnotnull(d_date_sk#10)) + +(15) Project [codegen id : 3] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_year#11] + +(16) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(17) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(18) Project [codegen id : 5] +Output [1]: [ss_item_sk#2] +Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, d_date_sk#10] + +(19) Scan parquet default.item +Output [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_category_id), IsNotNull(i_class_id)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 4] +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] + +(21) Filter [codegen id : 4] +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] +Condition : (((isnotnull(i_item_sk#6) AND isnotnull(i_brand_id#7)) AND isnotnull(i_category_id#9)) AND isnotnull(i_class_id#8)) + +(22) BroadcastExchange +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#13] + +(23) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#6] +Join condition: None + +(24) Project [codegen id : 5] +Output [3]: [i_brand_id#7 AS brand_id#14, i_class_id#8 AS class_id#15, i_category_id#9 AS category_id#16] +Input [5]: [ss_item_sk#2, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] + +(25) Exchange +Input [3]: [brand_id#14, class_id#15, category_id#16] +Arguments: hashpartitioning(coalesce(brand_id#14, 0), isnull(brand_id#14), coalesce(class_id#15, 0), isnull(class_id#15), coalesce(category_id#16, 0), isnull(category_id#16), 5), true, [id=#17] + +(26) Sort [codegen id : 6] +Input [3]: [brand_id#14, class_id#15, category_id#16] +Arguments: [coalesce(brand_id#14, 0) ASC NULLS FIRST, isnull(brand_id#14) ASC NULLS FIRST, coalesce(class_id#15, 0) ASC NULLS FIRST, isnull(class_id#15) ASC NULLS FIRST, coalesce(category_id#16, 0) ASC NULLS FIRST, isnull(category_id#16) ASC NULLS FIRST], false, 0 + +(27) Scan parquet default.catalog_sales +Output [2]: [cs_sold_date_sk#18, cs_item_sk#19] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(28) ColumnarToRow [codegen id : 9] +Input [2]: [cs_sold_date_sk#18, cs_item_sk#19] + +(29) Filter [codegen id : 9] +Input [2]: [cs_sold_date_sk#18, cs_item_sk#19] +Condition : (isnotnull(cs_item_sk#19) AND isnotnull(cs_sold_date_sk#18)) + +(30) ReusedExchange [Reuses operator id: 16] +Output [1]: [d_date_sk#10] + +(31) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [cs_sold_date_sk#18] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(32) Project [codegen id : 9] +Output [1]: [cs_item_sk#19] +Input [3]: [cs_sold_date_sk#18, cs_item_sk#19, d_date_sk#10] + +(33) Scan parquet default.item +Output [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 8] +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] + +(35) Filter [codegen id : 8] +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] +Condition : isnotnull(i_item_sk#6) + +(36) BroadcastExchange +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#20] + +(37) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [cs_item_sk#19] +Right keys [1]: [i_item_sk#6] +Join condition: None + +(38) Project [codegen id : 9] +Output [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Input [5]: [cs_item_sk#19, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] + +(39) Exchange +Input [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Arguments: hashpartitioning(coalesce(i_brand_id#7, 0), isnull(i_brand_id#7), coalesce(i_class_id#8, 0), isnull(i_class_id#8), coalesce(i_category_id#9, 0), isnull(i_category_id#9), 5), true, [id=#21] + +(40) Sort [codegen id : 10] +Input [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Arguments: [coalesce(i_brand_id#7, 0) ASC NULLS FIRST, isnull(i_brand_id#7) ASC NULLS FIRST, coalesce(i_class_id#8, 0) ASC NULLS FIRST, isnull(i_class_id#8) ASC NULLS FIRST, coalesce(i_category_id#9, 0) ASC NULLS FIRST, isnull(i_category_id#9) ASC NULLS FIRST], false, 0 + +(41) SortMergeJoin +Left keys [6]: [coalesce(brand_id#14, 0), isnull(brand_id#14), coalesce(class_id#15, 0), isnull(class_id#15), coalesce(category_id#16, 0), isnull(category_id#16)] +Right keys [6]: [coalesce(i_brand_id#7, 0), isnull(i_brand_id#7), coalesce(i_class_id#8, 0), isnull(i_class_id#8), coalesce(i_category_id#9, 0), isnull(i_category_id#9)] +Join condition: None + +(42) Scan parquet default.web_sales +Output [2]: [ws_sold_date_sk#22, ws_item_sk#23] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(43) ColumnarToRow [codegen id : 13] +Input [2]: [ws_sold_date_sk#22, ws_item_sk#23] + +(44) Filter [codegen id : 13] +Input [2]: [ws_sold_date_sk#22, ws_item_sk#23] +Condition : (isnotnull(ws_item_sk#23) AND isnotnull(ws_sold_date_sk#22)) + +(45) ReusedExchange [Reuses operator id: 16] +Output [1]: [d_date_sk#10] + +(46) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [ws_sold_date_sk#22] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(47) Project [codegen id : 13] +Output [1]: [ws_item_sk#23] +Input [3]: [ws_sold_date_sk#22, ws_item_sk#23, d_date_sk#10] + +(48) ReusedExchange [Reuses operator id: 36] +Output [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] + +(49) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [ws_item_sk#23] +Right keys [1]: [i_item_sk#6] +Join condition: None + +(50) Project [codegen id : 13] +Output [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Input [5]: [ws_item_sk#23, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] + +(51) Exchange +Input [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Arguments: hashpartitioning(coalesce(i_brand_id#7, 0), isnull(i_brand_id#7), coalesce(i_class_id#8, 0), isnull(i_class_id#8), coalesce(i_category_id#9, 0), isnull(i_category_id#9), 5), true, [id=#24] + +(52) Sort [codegen id : 14] +Input [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Arguments: [coalesce(i_brand_id#7, 0) ASC NULLS FIRST, isnull(i_brand_id#7) ASC NULLS FIRST, coalesce(i_class_id#8, 0) ASC NULLS FIRST, isnull(i_class_id#8) ASC NULLS FIRST, coalesce(i_category_id#9, 0) ASC NULLS FIRST, isnull(i_category_id#9) ASC NULLS FIRST], false, 0 + +(53) SortMergeJoin +Left keys [6]: [coalesce(brand_id#14, 0), isnull(brand_id#14), coalesce(class_id#15, 0), isnull(class_id#15), coalesce(category_id#16, 0), isnull(category_id#16)] +Right keys [6]: [coalesce(i_brand_id#7, 0), isnull(i_brand_id#7), coalesce(i_class_id#8, 0), isnull(i_class_id#8), coalesce(i_category_id#9, 0), isnull(i_category_id#9)] +Join condition: None + +(54) HashAggregate [codegen id : 15] +Input [3]: [brand_id#14, class_id#15, category_id#16] +Keys [3]: [brand_id#14, class_id#15, category_id#16] +Functions: [] +Aggregate Attributes: [] +Results [3]: [brand_id#14, class_id#15, category_id#16] + +(55) Exchange +Input [3]: [brand_id#14, class_id#15, category_id#16] +Arguments: hashpartitioning(brand_id#14, class_id#15, category_id#16, 5), true, [id=#25] + +(56) HashAggregate [codegen id : 16] +Input [3]: [brand_id#14, class_id#15, category_id#16] +Keys [3]: [brand_id#14, class_id#15, category_id#16] +Functions: [] +Aggregate Attributes: [] +Results [3]: [brand_id#14, class_id#15, category_id#16] + +(57) HashAggregate [codegen id : 16] +Input [3]: [brand_id#14, class_id#15, category_id#16] +Keys [3]: [brand_id#14, class_id#15, category_id#16] +Functions: [] +Aggregate Attributes: [] +Results [3]: [brand_id#14, class_id#15, category_id#16] + +(58) HashAggregate [codegen id : 16] +Input [3]: [brand_id#14, class_id#15, category_id#16] +Keys [3]: [brand_id#14, class_id#15, category_id#16] +Functions: [] +Aggregate Attributes: [] +Results [3]: [brand_id#14, class_id#15, category_id#16] + +(59) BroadcastExchange +Input [3]: [brand_id#14, class_id#15, category_id#16] +Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [id=#26] + +(60) BroadcastHashJoin [codegen id : 17] +Left keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Right keys [3]: [brand_id#14, class_id#15, category_id#16] +Join condition: None + +(61) Project [codegen id : 17] +Output [1]: [i_item_sk#6 AS ss_item_sk#27] +Input [7]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, brand_id#14, class_id#15, category_id#16] + +(62) Exchange +Input [1]: [ss_item_sk#27] +Arguments: hashpartitioning(ss_item_sk#27, 5), true, [id=#28] + +(63) Sort [codegen id : 18] +Input [1]: [ss_item_sk#27] +Arguments: [ss_item_sk#27 ASC NULLS FIRST], false, 0 + +(64) SortMergeJoin +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [ss_item_sk#27] +Join condition: None + +(65) Scan parquet default.date_dim +Output [2]: [d_date_sk#10, d_week_seq#29] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)] +ReadSchema: struct + +(66) ColumnarToRow [codegen id : 19] +Input [2]: [d_date_sk#10, d_week_seq#29] + +(67) Filter [codegen id : 19] +Input [2]: [d_date_sk#10, d_week_seq#29] +Condition : ((isnotnull(d_week_seq#29) AND (d_week_seq#29 = Subquery scalar-subquery#30, [id=#31])) AND isnotnull(d_date_sk#10)) + +(68) Project [codegen id : 19] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_week_seq#29] + +(69) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#32] + +(70) BroadcastHashJoin [codegen id : 38] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(71) Project [codegen id : 38] +Output [3]: [ss_item_sk#2, ss_quantity#3, ss_list_price#4] +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4, d_date_sk#10] + +(72) Scan parquet default.item +Output [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_category_id), IsNotNull(i_class_id)] +ReadSchema: struct + +(73) ColumnarToRow [codegen id : 20] +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] + +(74) Filter [codegen id : 20] +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] +Condition : (((isnotnull(i_item_sk#6) AND isnotnull(i_brand_id#7)) AND isnotnull(i_category_id#9)) AND isnotnull(i_class_id#8)) + +(75) Exchange +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] +Arguments: hashpartitioning(i_item_sk#6, 5), true, [id=#33] + +(76) Sort [codegen id : 21] +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] +Arguments: [i_item_sk#6 ASC NULLS FIRST], false, 0 + +(77) ReusedExchange [Reuses operator id: 62] +Output [1]: [ss_item_sk#27] + +(78) Sort [codegen id : 37] +Input [1]: [ss_item_sk#27] +Arguments: [ss_item_sk#27 ASC NULLS FIRST], false, 0 + +(79) SortMergeJoin +Left keys [1]: [i_item_sk#6] +Right keys [1]: [ss_item_sk#27] +Join condition: None + +(80) BroadcastExchange +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#34] + +(81) BroadcastHashJoin [codegen id : 38] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#6] +Join condition: None + +(82) Project [codegen id : 38] +Output [5]: [ss_quantity#3, ss_list_price#4, i_brand_id#7, i_class_id#8, i_category_id#9] +Input [7]: [ss_item_sk#2, ss_quantity#3, ss_list_price#4, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] + +(83) HashAggregate [codegen id : 38] +Input [5]: [ss_quantity#3, ss_list_price#4, i_brand_id#7, i_class_id#8, i_category_id#9] +Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)] +Aggregate Attributes [3]: [sum#35, isEmpty#36, count#37] +Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#38, isEmpty#39, count#40] + +(84) Exchange +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#38, isEmpty#39, count#40] +Arguments: hashpartitioning(i_brand_id#7, i_class_id#8, i_category_id#9, 5), true, [id=#41] + +(85) HashAggregate [codegen id : 78] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#38, isEmpty#39, count#40] +Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#42, count(1)#43] +Results [7]: [store AS channel#44, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#42 AS sales#45, count(1)#43 AS number_sales#46, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#42 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#47] + +(86) Filter [codegen id : 78] +Input [7]: [channel#44, i_brand_id#7, i_class_id#8, i_category_id#9, sales#45, number_sales#46, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#47] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#47) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#47 as decimal(32,6)) > cast(Subquery scalar-subquery#48, [id=#49] as decimal(32,6)))) + +(87) Project [codegen id : 78] +Output [6]: [channel#44, i_brand_id#7, i_class_id#8, i_category_id#9, sales#45, number_sales#46] +Input [7]: [channel#44, i_brand_id#7, i_class_id#8, i_category_id#9, sales#45, number_sales#46, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#47] + +(88) ReusedExchange [Reuses operator id: 4] +Output [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] + +(89) Sort [codegen id : 40] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] +Arguments: [ss_item_sk#2 ASC NULLS FIRST], false, 0 + +(90) ReusedExchange [Reuses operator id: 62] +Output [1]: [ss_item_sk#27] + +(91) Sort [codegen id : 56] +Input [1]: [ss_item_sk#27] +Arguments: [ss_item_sk#27 ASC NULLS FIRST], false, 0 + +(92) SortMergeJoin +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [ss_item_sk#27] +Join condition: None + +(93) Scan parquet default.date_dim +Output [2]: [d_date_sk#10, d_week_seq#29] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)] +ReadSchema: struct + +(94) ColumnarToRow [codegen id : 57] +Input [2]: [d_date_sk#10, d_week_seq#29] + +(95) Filter [codegen id : 57] +Input [2]: [d_date_sk#10, d_week_seq#29] +Condition : ((isnotnull(d_week_seq#29) AND (d_week_seq#29 = Subquery scalar-subquery#50, [id=#51])) AND isnotnull(d_date_sk#10)) + +(96) Project [codegen id : 57] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_week_seq#29] + +(97) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#52] + +(98) BroadcastHashJoin [codegen id : 76] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(99) Project [codegen id : 76] +Output [3]: [ss_item_sk#2, ss_quantity#3, ss_list_price#4] +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4, d_date_sk#10] + +(100) ReusedExchange [Reuses operator id: 80] +Output [4]: [i_item_sk#53, i_brand_id#54, i_class_id#55, i_category_id#56] + +(101) BroadcastHashJoin [codegen id : 76] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#53] +Join condition: None + +(102) Project [codegen id : 76] +Output [5]: [ss_quantity#3, ss_list_price#4, i_brand_id#54, i_class_id#55, i_category_id#56] +Input [7]: [ss_item_sk#2, ss_quantity#3, ss_list_price#4, i_item_sk#53, i_brand_id#54, i_class_id#55, i_category_id#56] + +(103) HashAggregate [codegen id : 76] +Input [5]: [ss_quantity#3, ss_list_price#4, i_brand_id#54, i_class_id#55, i_category_id#56] +Keys [3]: [i_brand_id#54, i_class_id#55, i_category_id#56] +Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)] +Aggregate Attributes [3]: [sum#57, isEmpty#58, count#59] +Results [6]: [i_brand_id#54, i_class_id#55, i_category_id#56, sum#60, isEmpty#61, count#62] + +(104) Exchange +Input [6]: [i_brand_id#54, i_class_id#55, i_category_id#56, sum#60, isEmpty#61, count#62] +Arguments: hashpartitioning(i_brand_id#54, i_class_id#55, i_category_id#56, 5), true, [id=#63] + +(105) HashAggregate [codegen id : 77] +Input [6]: [i_brand_id#54, i_class_id#55, i_category_id#56, sum#60, isEmpty#61, count#62] +Keys [3]: [i_brand_id#54, i_class_id#55, i_category_id#56] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#64, count(1)#65] +Results [7]: [store AS channel#66, i_brand_id#54, i_class_id#55, i_category_id#56, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#64 AS sales#67, count(1)#65 AS number_sales#68, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#64 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#69] + +(106) Filter [codegen id : 77] +Input [7]: [channel#66, i_brand_id#54, i_class_id#55, i_category_id#56, sales#67, number_sales#68, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#69] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#69) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#69 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#48, [id=#49] as decimal(32,6)))) + +(107) Project [codegen id : 77] +Output [6]: [channel#66, i_brand_id#54, i_class_id#55, i_category_id#56, sales#67, number_sales#68] +Input [7]: [channel#66, i_brand_id#54, i_class_id#55, i_category_id#56, sales#67, number_sales#68, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#69] + +(108) BroadcastExchange +Input [6]: [channel#66, i_brand_id#54, i_class_id#55, i_category_id#56, sales#67, number_sales#68] +Arguments: HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true]),false), [id=#70] + +(109) BroadcastHashJoin [codegen id : 78] +Left keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Right keys [3]: [i_brand_id#54, i_class_id#55, i_category_id#56] +Join condition: None + +(110) TakeOrderedAndProject +Input [12]: [channel#44, i_brand_id#7, i_class_id#8, i_category_id#9, sales#45, number_sales#46, channel#66, i_brand_id#54, i_class_id#55, i_category_id#56, sales#67, number_sales#68] +Arguments: 100, [channel#44 ASC NULLS FIRST, i_brand_id#7 ASC NULLS FIRST, i_class_id#8 ASC NULLS FIRST, i_category_id#9 ASC NULLS FIRST], [channel#44, i_brand_id#7, i_class_id#8, i_category_id#9, sales#45, number_sales#46, channel#66, i_brand_id#54, i_class_id#55, i_category_id#56, sales#67, number_sales#68] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 86 Hosting Expression = Subquery scalar-subquery#48, [id=#49] +* HashAggregate (136) ++- Exchange (135) + +- * HashAggregate (134) + +- Union (133) + :- * Project (120) + : +- * BroadcastHashJoin Inner BuildRight (119) + : :- * Filter (113) + : : +- * ColumnarToRow (112) + : : +- Scan parquet default.store_sales (111) + : +- BroadcastExchange (118) + : +- * Project (117) + : +- * Filter (116) + : +- * ColumnarToRow (115) + : +- Scan parquet default.date_dim (114) + :- * Project (126) + : +- * BroadcastHashJoin Inner BuildRight (125) + : :- * Filter (123) + : : +- * ColumnarToRow (122) + : : +- Scan parquet default.catalog_sales (121) + : +- ReusedExchange (124) + +- * Project (132) + +- * BroadcastHashJoin Inner BuildRight (131) + :- * Filter (129) + : +- * ColumnarToRow (128) + : +- Scan parquet default.web_sales (127) + +- ReusedExchange (130) + + +(111) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(112) ColumnarToRow [codegen id : 2] +Input [3]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4] + +(113) Filter [codegen id : 2] +Input [3]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4] +Condition : isnotnull(ss_sold_date_sk#1) + +(114) Scan parquet default.date_dim +Output [2]: [d_date_sk#10, d_year#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(115) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#10, d_year#11] + +(116) Filter [codegen id : 1] +Input [2]: [d_date_sk#10, d_year#11] +Condition : (((isnotnull(d_year#11) AND (d_year#11 >= 1999)) AND (d_year#11 <= 2001)) AND isnotnull(d_date_sk#10)) + +(117) Project [codegen id : 1] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_year#11] + +(118) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#71] + +(119) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(120) Project [codegen id : 2] +Output [2]: [ss_quantity#3 AS quantity#72, ss_list_price#4 AS list_price#73] +Input [4]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4, d_date_sk#10] + +(121) Scan parquet default.catalog_sales +Output [3]: [cs_sold_date_sk#18, cs_quantity#74, cs_list_price#75] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(122) ColumnarToRow [codegen id : 4] +Input [3]: [cs_sold_date_sk#18, cs_quantity#74, cs_list_price#75] + +(123) Filter [codegen id : 4] +Input [3]: [cs_sold_date_sk#18, cs_quantity#74, cs_list_price#75] +Condition : isnotnull(cs_sold_date_sk#18) + +(124) ReusedExchange [Reuses operator id: 118] +Output [1]: [d_date_sk#10] + +(125) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_sold_date_sk#18] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(126) Project [codegen id : 4] +Output [2]: [cs_quantity#74 AS quantity#76, cs_list_price#75 AS list_price#77] +Input [4]: [cs_sold_date_sk#18, cs_quantity#74, cs_list_price#75, d_date_sk#10] + +(127) Scan parquet default.web_sales +Output [3]: [ws_sold_date_sk#22, ws_quantity#78, ws_list_price#79] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(128) ColumnarToRow [codegen id : 6] +Input [3]: [ws_sold_date_sk#22, ws_quantity#78, ws_list_price#79] + +(129) Filter [codegen id : 6] +Input [3]: [ws_sold_date_sk#22, ws_quantity#78, ws_list_price#79] +Condition : isnotnull(ws_sold_date_sk#22) + +(130) ReusedExchange [Reuses operator id: 118] +Output [1]: [d_date_sk#10] + +(131) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_sold_date_sk#22] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(132) Project [codegen id : 6] +Output [2]: [ws_quantity#78 AS quantity#80, ws_list_price#79 AS list_price#81] +Input [4]: [ws_sold_date_sk#22, ws_quantity#78, ws_list_price#79, d_date_sk#10] + +(133) Union + +(134) HashAggregate [codegen id : 7] +Input [2]: [quantity#72, list_price#73] +Keys: [] +Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#72 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#73 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [2]: [sum#82, count#83] +Results [2]: [sum#84, count#85] + +(135) Exchange +Input [2]: [sum#84, count#85] +Arguments: SinglePartition, true, [id=#86] + +(136) HashAggregate [codegen id : 8] +Input [2]: [sum#84, count#85] +Keys: [] +Functions [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#72 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#73 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#72 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#73 as decimal(12,2)))), DecimalType(18,2), true))#87] +Results [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#72 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#73 as decimal(12,2)))), DecimalType(18,2), true))#87 AS average_sales#88] + +Subquery:2 Hosting operator id = 67 Hosting Expression = Subquery scalar-subquery#30, [id=#31] +* Project (140) ++- * Filter (139) + +- * ColumnarToRow (138) + +- Scan parquet default.date_dim (137) + + +(137) Scan parquet default.date_dim +Output [4]: [d_week_seq#29, d_year#11, d_moy#89, d_dom#90] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), IsNotNull(d_dom), EqualTo(d_year,2000), EqualTo(d_moy,12), EqualTo(d_dom,11)] +ReadSchema: struct + +(138) ColumnarToRow [codegen id : 1] +Input [4]: [d_week_seq#29, d_year#11, d_moy#89, d_dom#90] + +(139) Filter [codegen id : 1] +Input [4]: [d_week_seq#29, d_year#11, d_moy#89, d_dom#90] +Condition : (((((isnotnull(d_year#11) AND isnotnull(d_moy#89)) AND isnotnull(d_dom#90)) AND (d_year#11 = 2000)) AND (d_moy#89 = 12)) AND (d_dom#90 = 11)) + +(140) Project [codegen id : 1] +Output [1]: [d_week_seq#29] +Input [4]: [d_week_seq#29, d_year#11, d_moy#89, d_dom#90] + +Subquery:3 Hosting operator id = 106 Hosting Expression = ReusedSubquery Subquery scalar-subquery#48, [id=#49] + +Subquery:4 Hosting operator id = 95 Hosting Expression = Subquery scalar-subquery#50, [id=#51] +* Project (144) ++- * Filter (143) + +- * ColumnarToRow (142) + +- Scan parquet default.date_dim (141) + + +(141) Scan parquet default.date_dim +Output [4]: [d_week_seq#29, d_year#11, d_moy#89, d_dom#90] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), IsNotNull(d_dom), EqualTo(d_year,1999), EqualTo(d_moy,12), EqualTo(d_dom,11)] +ReadSchema: struct + +(142) ColumnarToRow [codegen id : 1] +Input [4]: [d_week_seq#29, d_year#11, d_moy#89, d_dom#90] + +(143) Filter [codegen id : 1] +Input [4]: [d_week_seq#29, d_year#11, d_moy#89, d_dom#90] +Condition : (((((isnotnull(d_year#11) AND isnotnull(d_moy#89)) AND isnotnull(d_dom#90)) AND (d_year#11 = 1999)) AND (d_moy#89 = 12)) AND (d_dom#90 = 11)) + +(144) Project [codegen id : 1] +Output [1]: [d_week_seq#29] +Input [4]: [d_week_seq#29, d_year#11, d_moy#89, d_dom#90] + + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/simplified.txt new file mode 100644 index 0000000000000..5141d40e7325b --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/simplified.txt @@ -0,0 +1,231 @@ +TakeOrderedAndProject [channel,channel,i_brand_id,i_brand_id,i_category_id,i_category_id,i_class_id,i_class_id,number_sales,number_sales,sales,sales] + WholeStageCodegen (78) + BroadcastHashJoin [i_brand_id,i_brand_id,i_category_id,i_category_id,i_class_id,i_class_id] + Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] + Subquery #2 + WholeStageCodegen (8) + HashAggregate [count,sum] [average_sales,avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2), true)),count,sum] + InputAdapter + Exchange #15 + WholeStageCodegen (7) + HashAggregate [list_price,quantity] [count,count,sum,sum] + InputAdapter + Union + WholeStageCodegen (2) + Project [ss_list_price,ss_quantity] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_list_price,ss_quantity,ss_sold_date_sk] + InputAdapter + BroadcastExchange #16 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + WholeStageCodegen (4) + Project [cs_list_price,cs_quantity] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_list_price,cs_quantity,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #16 + WholeStageCodegen (6) + Project [ws_list_price,ws_quantity] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_list_price,ws_quantity,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #16 + HashAggregate [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] [channel,count,count(1),isEmpty,number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] + InputAdapter + Exchange [i_brand_id,i_category_id,i_class_id] #1 + WholeStageCodegen (38) + HashAggregate [i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity] [count,count,isEmpty,isEmpty,sum,sum] + Project [i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_item_sk,ss_list_price,ss_quantity] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + InputAdapter + SortMergeJoin [ss_item_sk,ss_item_sk] + WholeStageCodegen (2) + Sort [ss_item_sk] + InputAdapter + Exchange [ss_item_sk] #2 + WholeStageCodegen (1) + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_list_price,ss_quantity,ss_sold_date_sk] + WholeStageCodegen (18) + Sort [ss_item_sk] + InputAdapter + Exchange [ss_item_sk] #3 + WholeStageCodegen (17) + Project [i_item_sk] + BroadcastHashJoin [brand_id,category_id,class_id,i_brand_id,i_category_id,i_class_id] + Filter [i_brand_id,i_category_id,i_class_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (16) + HashAggregate [brand_id,category_id,class_id] + HashAggregate [brand_id,category_id,class_id] + HashAggregate [brand_id,category_id,class_id] + InputAdapter + Exchange [brand_id,category_id,class_id] #5 + WholeStageCodegen (15) + HashAggregate [brand_id,category_id,class_id] + InputAdapter + SortMergeJoin [brand_id,category_id,class_id,i_brand_id,i_category_id,i_class_id] + SortMergeJoin [brand_id,category_id,class_id,i_brand_id,i_category_id,i_class_id] + WholeStageCodegen (6) + Sort [brand_id,category_id,class_id] + InputAdapter + Exchange [brand_id,category_id,class_id] #6 + WholeStageCodegen (5) + Project [i_brand_id,i_category_id,i_class_id] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_item_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (4) + Filter [i_brand_id,i_category_id,i_class_id,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] + WholeStageCodegen (10) + Sort [i_brand_id,i_category_id,i_class_id] + InputAdapter + Exchange [i_brand_id,i_category_id,i_class_id] #9 + WholeStageCodegen (9) + Project [i_brand_id,i_category_id,i_class_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_item_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_item_sk,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #7 + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (8) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] + WholeStageCodegen (14) + Sort [i_brand_id,i_category_id,i_class_id] + InputAdapter + Exchange [i_brand_id,i_category_id,i_class_id] #11 + WholeStageCodegen (13) + Project [i_brand_id,i_category_id,i_class_id] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Project [ws_item_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_item_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #7 + InputAdapter + ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk] #10 + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (19) + Project [d_date_sk] + Filter [d_date_sk,d_week_seq] + Subquery #1 + WholeStageCodegen (1) + Project [d_week_seq] + Filter [d_dom,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_dom,d_moy,d_week_seq,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_week_seq] + InputAdapter + BroadcastExchange #13 + SortMergeJoin [i_item_sk,ss_item_sk] + WholeStageCodegen (21) + Sort [i_item_sk] + InputAdapter + Exchange [i_item_sk] #14 + WholeStageCodegen (20) + Filter [i_brand_id,i_category_id,i_class_id,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] + WholeStageCodegen (37) + Sort [ss_item_sk] + InputAdapter + ReusedExchange [ss_item_sk] #3 + InputAdapter + BroadcastExchange #17 + WholeStageCodegen (77) + Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #2 + HashAggregate [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] [channel,count,count(1),isEmpty,number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] + InputAdapter + Exchange [i_brand_id,i_category_id,i_class_id] #18 + WholeStageCodegen (76) + HashAggregate [i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity] [count,count,isEmpty,isEmpty,sum,sum] + Project [i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_item_sk,ss_list_price,ss_quantity] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + InputAdapter + SortMergeJoin [ss_item_sk,ss_item_sk] + WholeStageCodegen (40) + Sort [ss_item_sk] + InputAdapter + ReusedExchange [ss_item_sk,ss_list_price,ss_quantity,ss_sold_date_sk] #2 + WholeStageCodegen (56) + Sort [ss_item_sk] + InputAdapter + ReusedExchange [ss_item_sk] #3 + InputAdapter + BroadcastExchange #19 + WholeStageCodegen (57) + Project [d_date_sk] + Filter [d_date_sk,d_week_seq] + Subquery #3 + WholeStageCodegen (1) + Project [d_week_seq] + Filter [d_dom,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_dom,d_moy,d_week_seq,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_week_seq] + InputAdapter + ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk] #13 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/explain.txt new file mode 100644 index 0000000000000..54e984da09306 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/explain.txt @@ -0,0 +1,763 @@ +== Physical Plan == +TakeOrderedAndProject (100) ++- * BroadcastHashJoin Inner BuildRight (99) + :- * Project (77) + : +- * Filter (76) + : +- * HashAggregate (75) + : +- Exchange (74) + : +- * HashAggregate (73) + : +- * Project (72) + : +- * BroadcastHashJoin Inner BuildRight (71) + : :- * Project (65) + : : +- * BroadcastHashJoin Inner BuildRight (64) + : : :- * BroadcastHashJoin LeftSemi BuildRight (57) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (56) + : : : +- * Project (55) + : : : +- * BroadcastHashJoin Inner BuildRight (54) + : : : :- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.item (4) + : : : +- BroadcastExchange (53) + : : : +- * HashAggregate (52) + : : : +- * HashAggregate (51) + : : : +- * HashAggregate (50) + : : : +- Exchange (49) + : : : +- * HashAggregate (48) + : : : +- * BroadcastHashJoin LeftSemi BuildRight (47) + : : : :- * BroadcastHashJoin LeftSemi BuildRight (36) + : : : : :- * Project (22) + : : : : : +- * BroadcastHashJoin Inner BuildRight (21) + : : : : : :- * Project (15) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : : : : :- * Filter (9) + : : : : : : : +- * ColumnarToRow (8) + : : : : : : : +- Scan parquet default.store_sales (7) + : : : : : : +- BroadcastExchange (13) + : : : : : : +- * Filter (12) + : : : : : : +- * ColumnarToRow (11) + : : : : : : +- Scan parquet default.item (10) + : : : : : +- BroadcastExchange (20) + : : : : : +- * Project (19) + : : : : : +- * Filter (18) + : : : : : +- * ColumnarToRow (17) + : : : : : +- Scan parquet default.date_dim (16) + : : : : +- BroadcastExchange (35) + : : : : +- * Project (34) + : : : : +- * BroadcastHashJoin Inner BuildRight (33) + : : : : :- * Project (31) + : : : : : +- * BroadcastHashJoin Inner BuildRight (30) + : : : : : :- * Filter (25) + : : : : : : +- * ColumnarToRow (24) + : : : : : : +- Scan parquet default.catalog_sales (23) + : : : : : +- BroadcastExchange (29) + : : : : : +- * Filter (28) + : : : : : +- * ColumnarToRow (27) + : : : : : +- Scan parquet default.item (26) + : : : : +- ReusedExchange (32) + : : : +- BroadcastExchange (46) + : : : +- * Project (45) + : : : +- * BroadcastHashJoin Inner BuildRight (44) + : : : :- * Project (42) + : : : : +- * BroadcastHashJoin Inner BuildRight (41) + : : : : :- * Filter (39) + : : : : : +- * ColumnarToRow (38) + : : : : : +- Scan parquet default.web_sales (37) + : : : : +- ReusedExchange (40) + : : : +- ReusedExchange (43) + : : +- BroadcastExchange (63) + : : +- * BroadcastHashJoin LeftSemi BuildRight (62) + : : :- * Filter (60) + : : : +- * ColumnarToRow (59) + : : : +- Scan parquet default.item (58) + : : +- ReusedExchange (61) + : +- BroadcastExchange (70) + : +- * Project (69) + : +- * Filter (68) + : +- * ColumnarToRow (67) + : +- Scan parquet default.date_dim (66) + +- BroadcastExchange (98) + +- * Project (97) + +- * Filter (96) + +- * HashAggregate (95) + +- Exchange (94) + +- * HashAggregate (93) + +- * Project (92) + +- * BroadcastHashJoin Inner BuildRight (91) + :- * Project (85) + : +- * BroadcastHashJoin Inner BuildRight (84) + : :- * BroadcastHashJoin LeftSemi BuildRight (82) + : : :- * Filter (80) + : : : +- * ColumnarToRow (79) + : : : +- Scan parquet default.store_sales (78) + : : +- ReusedExchange (81) + : +- ReusedExchange (83) + +- BroadcastExchange (90) + +- * Project (89) + +- * Filter (88) + +- * ColumnarToRow (87) + +- Scan parquet default.date_dim (86) + + +(1) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 25] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] + +(3) Filter [codegen id : 25] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] +Condition : (isnotnull(ss_item_sk#2) AND isnotnull(ss_sold_date_sk#1)) + +(4) Scan parquet default.item +Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_class_id), IsNotNull(i_brand_id), IsNotNull(i_category_id)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 11] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(6) Filter [codegen id : 11] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Condition : ((isnotnull(i_class_id#7) AND isnotnull(i_brand_id#6)) AND isnotnull(i_category_id#8)) + +(7) Scan parquet default.store_sales +Output [2]: [ss_sold_date_sk#1, ss_item_sk#2] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(8) ColumnarToRow [codegen id : 9] +Input [2]: [ss_sold_date_sk#1, ss_item_sk#2] + +(9) Filter [codegen id : 9] +Input [2]: [ss_sold_date_sk#1, ss_item_sk#2] +Condition : (isnotnull(ss_item_sk#2) AND isnotnull(ss_sold_date_sk#1)) + +(10) Scan parquet default.item +Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_category_id), IsNotNull(i_class_id)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 1] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(12) Filter [codegen id : 1] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Condition : (((isnotnull(i_item_sk#5) AND isnotnull(i_brand_id#6)) AND isnotnull(i_category_id#8)) AND isnotnull(i_class_id#7)) + +(13) BroadcastExchange +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#9] + +(14) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#5] +Join condition: None + +(15) Project [codegen id : 9] +Output [4]: [ss_sold_date_sk#1, i_brand_id#6, i_class_id#7, i_category_id#8] +Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(16) Scan parquet default.date_dim +Output [2]: [d_date_sk#10, d_year#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#10, d_year#11] + +(18) Filter [codegen id : 2] +Input [2]: [d_date_sk#10, d_year#11] +Condition : (((isnotnull(d_year#11) AND (d_year#11 >= 1999)) AND (d_year#11 <= 2001)) AND isnotnull(d_date_sk#10)) + +(19) Project [codegen id : 2] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_year#11] + +(20) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(21) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(22) Project [codegen id : 9] +Output [3]: [i_brand_id#6 AS brand_id#13, i_class_id#7 AS class_id#14, i_category_id#8 AS category_id#15] +Input [5]: [ss_sold_date_sk#1, i_brand_id#6, i_class_id#7, i_category_id#8, d_date_sk#10] + +(23) Scan parquet default.catalog_sales +Output [2]: [cs_sold_date_sk#16, cs_item_sk#17] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 5] +Input [2]: [cs_sold_date_sk#16, cs_item_sk#17] + +(25) Filter [codegen id : 5] +Input [2]: [cs_sold_date_sk#16, cs_item_sk#17] +Condition : (isnotnull(cs_item_sk#17) AND isnotnull(cs_sold_date_sk#16)) + +(26) Scan parquet default.item +Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 3] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(28) Filter [codegen id : 3] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Condition : isnotnull(i_item_sk#5) + +(29) BroadcastExchange +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#18] + +(30) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_item_sk#17] +Right keys [1]: [i_item_sk#5] +Join condition: None + +(31) Project [codegen id : 5] +Output [4]: [cs_sold_date_sk#16, i_brand_id#6, i_class_id#7, i_category_id#8] +Input [6]: [cs_sold_date_sk#16, cs_item_sk#17, i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(32) ReusedExchange [Reuses operator id: 20] +Output [1]: [d_date_sk#10] + +(33) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_sold_date_sk#16] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(34) Project [codegen id : 5] +Output [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Input [5]: [cs_sold_date_sk#16, i_brand_id#6, i_class_id#7, i_category_id#8, d_date_sk#10] + +(35) BroadcastExchange +Input [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), isnull(input[0, int, true]), coalesce(input[1, int, true], 0), isnull(input[1, int, true]), coalesce(input[2, int, true], 0), isnull(input[2, int, true])),false), [id=#19] + +(36) BroadcastHashJoin [codegen id : 9] +Left keys [6]: [coalesce(brand_id#13, 0), isnull(brand_id#13), coalesce(class_id#14, 0), isnull(class_id#14), coalesce(category_id#15, 0), isnull(category_id#15)] +Right keys [6]: [coalesce(i_brand_id#6, 0), isnull(i_brand_id#6), coalesce(i_class_id#7, 0), isnull(i_class_id#7), coalesce(i_category_id#8, 0), isnull(i_category_id#8)] +Join condition: None + +(37) Scan parquet default.web_sales +Output [2]: [ws_sold_date_sk#20, ws_item_sk#21] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(38) ColumnarToRow [codegen id : 8] +Input [2]: [ws_sold_date_sk#20, ws_item_sk#21] + +(39) Filter [codegen id : 8] +Input [2]: [ws_sold_date_sk#20, ws_item_sk#21] +Condition : (isnotnull(ws_item_sk#21) AND isnotnull(ws_sold_date_sk#20)) + +(40) ReusedExchange [Reuses operator id: 29] +Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(41) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ws_item_sk#21] +Right keys [1]: [i_item_sk#5] +Join condition: None + +(42) Project [codegen id : 8] +Output [4]: [ws_sold_date_sk#20, i_brand_id#6, i_class_id#7, i_category_id#8] +Input [6]: [ws_sold_date_sk#20, ws_item_sk#21, i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(43) ReusedExchange [Reuses operator id: 20] +Output [1]: [d_date_sk#10] + +(44) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ws_sold_date_sk#20] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(45) Project [codegen id : 8] +Output [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Input [5]: [ws_sold_date_sk#20, i_brand_id#6, i_class_id#7, i_category_id#8, d_date_sk#10] + +(46) BroadcastExchange +Input [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), isnull(input[0, int, true]), coalesce(input[1, int, true], 0), isnull(input[1, int, true]), coalesce(input[2, int, true], 0), isnull(input[2, int, true])),false), [id=#22] + +(47) BroadcastHashJoin [codegen id : 9] +Left keys [6]: [coalesce(brand_id#13, 0), isnull(brand_id#13), coalesce(class_id#14, 0), isnull(class_id#14), coalesce(category_id#15, 0), isnull(category_id#15)] +Right keys [6]: [coalesce(i_brand_id#6, 0), isnull(i_brand_id#6), coalesce(i_class_id#7, 0), isnull(i_class_id#7), coalesce(i_category_id#8, 0), isnull(i_category_id#8)] +Join condition: None + +(48) HashAggregate [codegen id : 9] +Input [3]: [brand_id#13, class_id#14, category_id#15] +Keys [3]: [brand_id#13, class_id#14, category_id#15] +Functions: [] +Aggregate Attributes: [] +Results [3]: [brand_id#13, class_id#14, category_id#15] + +(49) Exchange +Input [3]: [brand_id#13, class_id#14, category_id#15] +Arguments: hashpartitioning(brand_id#13, class_id#14, category_id#15, 5), true, [id=#23] + +(50) HashAggregate [codegen id : 10] +Input [3]: [brand_id#13, class_id#14, category_id#15] +Keys [3]: [brand_id#13, class_id#14, category_id#15] +Functions: [] +Aggregate Attributes: [] +Results [3]: [brand_id#13, class_id#14, category_id#15] + +(51) HashAggregate [codegen id : 10] +Input [3]: [brand_id#13, class_id#14, category_id#15] +Keys [3]: [brand_id#13, class_id#14, category_id#15] +Functions: [] +Aggregate Attributes: [] +Results [3]: [brand_id#13, class_id#14, category_id#15] + +(52) HashAggregate [codegen id : 10] +Input [3]: [brand_id#13, class_id#14, category_id#15] +Keys [3]: [brand_id#13, class_id#14, category_id#15] +Functions: [] +Aggregate Attributes: [] +Results [3]: [brand_id#13, class_id#14, category_id#15] + +(53) BroadcastExchange +Input [3]: [brand_id#13, class_id#14, category_id#15] +Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [id=#24] + +(54) BroadcastHashJoin [codegen id : 11] +Left keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Right keys [3]: [brand_id#13, class_id#14, category_id#15] +Join condition: None + +(55) Project [codegen id : 11] +Output [1]: [i_item_sk#5 AS ss_item_sk#25] +Input [7]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8, brand_id#13, class_id#14, category_id#15] + +(56) BroadcastExchange +Input [1]: [ss_item_sk#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#26] + +(57) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [ss_item_sk#25] +Join condition: None + +(58) Scan parquet default.item +Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_class_id), IsNotNull(i_category_id), IsNotNull(i_brand_id)] +ReadSchema: struct + +(59) ColumnarToRow [codegen id : 23] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(60) Filter [codegen id : 23] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Condition : (((isnotnull(i_item_sk#5) AND isnotnull(i_class_id#7)) AND isnotnull(i_category_id#8)) AND isnotnull(i_brand_id#6)) + +(61) ReusedExchange [Reuses operator id: 56] +Output [1]: [ss_item_sk#25] + +(62) BroadcastHashJoin [codegen id : 23] +Left keys [1]: [i_item_sk#5] +Right keys [1]: [ss_item_sk#25] +Join condition: None + +(63) BroadcastExchange +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#27] + +(64) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#5] +Join condition: None + +(65) Project [codegen id : 25] +Output [6]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4, i_brand_id#6, i_class_id#7, i_category_id#8] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4, i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(66) Scan parquet default.date_dim +Output [2]: [d_date_sk#10, d_week_seq#28] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)] +ReadSchema: struct + +(67) ColumnarToRow [codegen id : 24] +Input [2]: [d_date_sk#10, d_week_seq#28] + +(68) Filter [codegen id : 24] +Input [2]: [d_date_sk#10, d_week_seq#28] +Condition : ((isnotnull(d_week_seq#28) AND (d_week_seq#28 = Subquery scalar-subquery#29, [id=#30])) AND isnotnull(d_date_sk#10)) + +(69) Project [codegen id : 24] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_week_seq#28] + +(70) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#31] + +(71) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(72) Project [codegen id : 25] +Output [5]: [ss_quantity#3, ss_list_price#4, i_brand_id#6, i_class_id#7, i_category_id#8] +Input [7]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4, i_brand_id#6, i_class_id#7, i_category_id#8, d_date_sk#10] + +(73) HashAggregate [codegen id : 25] +Input [5]: [ss_quantity#3, ss_list_price#4, i_brand_id#6, i_class_id#7, i_category_id#8] +Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)] +Aggregate Attributes [3]: [sum#32, isEmpty#33, count#34] +Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#35, isEmpty#36, count#37] + +(74) Exchange +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#35, isEmpty#36, count#37] +Arguments: hashpartitioning(i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#38] + +(75) HashAggregate [codegen id : 52] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#35, isEmpty#36, count#37] +Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#39, count(1)#40] +Results [7]: [store AS channel#41, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#39 AS sales#42, count(1)#40 AS number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#39 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#44] + +(76) Filter [codegen id : 52] +Input [7]: [channel#41, i_brand_id#6, i_class_id#7, i_category_id#8, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#44] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#44) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#44 as decimal(32,6)) > cast(Subquery scalar-subquery#45, [id=#46] as decimal(32,6)))) + +(77) Project [codegen id : 52] +Output [6]: [channel#41, i_brand_id#6, i_class_id#7, i_category_id#8, sales#42, number_sales#43] +Input [7]: [channel#41, i_brand_id#6, i_class_id#7, i_category_id#8, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#44] + +(78) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(79) ColumnarToRow [codegen id : 50] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] + +(80) Filter [codegen id : 50] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] +Condition : (isnotnull(ss_item_sk#2) AND isnotnull(ss_sold_date_sk#1)) + +(81) ReusedExchange [Reuses operator id: 56] +Output [1]: [ss_item_sk#25] + +(82) BroadcastHashJoin [codegen id : 50] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [ss_item_sk#25] +Join condition: None + +(83) ReusedExchange [Reuses operator id: 63] +Output [4]: [i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50] + +(84) BroadcastHashJoin [codegen id : 50] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#47] +Join condition: None + +(85) Project [codegen id : 50] +Output [6]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4, i_brand_id#48, i_class_id#49, i_category_id#50] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4, i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50] + +(86) Scan parquet default.date_dim +Output [2]: [d_date_sk#10, d_week_seq#28] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)] +ReadSchema: struct + +(87) ColumnarToRow [codegen id : 49] +Input [2]: [d_date_sk#10, d_week_seq#28] + +(88) Filter [codegen id : 49] +Input [2]: [d_date_sk#10, d_week_seq#28] +Condition : ((isnotnull(d_week_seq#28) AND (d_week_seq#28 = Subquery scalar-subquery#51, [id=#52])) AND isnotnull(d_date_sk#10)) + +(89) Project [codegen id : 49] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_week_seq#28] + +(90) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#53] + +(91) BroadcastHashJoin [codegen id : 50] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(92) Project [codegen id : 50] +Output [5]: [ss_quantity#3, ss_list_price#4, i_brand_id#48, i_class_id#49, i_category_id#50] +Input [7]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4, i_brand_id#48, i_class_id#49, i_category_id#50, d_date_sk#10] + +(93) HashAggregate [codegen id : 50] +Input [5]: [ss_quantity#3, ss_list_price#4, i_brand_id#48, i_class_id#49, i_category_id#50] +Keys [3]: [i_brand_id#48, i_class_id#49, i_category_id#50] +Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)] +Aggregate Attributes [3]: [sum#54, isEmpty#55, count#56] +Results [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sum#57, isEmpty#58, count#59] + +(94) Exchange +Input [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sum#57, isEmpty#58, count#59] +Arguments: hashpartitioning(i_brand_id#48, i_class_id#49, i_category_id#50, 5), true, [id=#60] + +(95) HashAggregate [codegen id : 51] +Input [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sum#57, isEmpty#58, count#59] +Keys [3]: [i_brand_id#48, i_class_id#49, i_category_id#50] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#61, count(1)#62] +Results [7]: [store AS channel#63, i_brand_id#48, i_class_id#49, i_category_id#50, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#61 AS sales#64, count(1)#62 AS number_sales#65, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#61 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#66] + +(96) Filter [codegen id : 51] +Input [7]: [channel#63, i_brand_id#48, i_class_id#49, i_category_id#50, sales#64, number_sales#65, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#66] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#66) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#66 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6)))) + +(97) Project [codegen id : 51] +Output [6]: [channel#63, i_brand_id#48, i_class_id#49, i_category_id#50, sales#64, number_sales#65] +Input [7]: [channel#63, i_brand_id#48, i_class_id#49, i_category_id#50, sales#64, number_sales#65, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#66] + +(98) BroadcastExchange +Input [6]: [channel#63, i_brand_id#48, i_class_id#49, i_category_id#50, sales#64, number_sales#65] +Arguments: HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true]),false), [id=#67] + +(99) BroadcastHashJoin [codegen id : 52] +Left keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Right keys [3]: [i_brand_id#48, i_class_id#49, i_category_id#50] +Join condition: None + +(100) TakeOrderedAndProject +Input [12]: [channel#41, i_brand_id#6, i_class_id#7, i_category_id#8, sales#42, number_sales#43, channel#63, i_brand_id#48, i_class_id#49, i_category_id#50, sales#64, number_sales#65] +Arguments: 100, [channel#41 ASC NULLS FIRST, i_brand_id#6 ASC NULLS FIRST, i_class_id#7 ASC NULLS FIRST, i_category_id#8 ASC NULLS FIRST], [channel#41, i_brand_id#6, i_class_id#7, i_category_id#8, sales#42, number_sales#43, channel#63, i_brand_id#48, i_class_id#49, i_category_id#50, sales#64, number_sales#65] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 76 Hosting Expression = Subquery scalar-subquery#45, [id=#46] +* HashAggregate (126) ++- Exchange (125) + +- * HashAggregate (124) + +- Union (123) + :- * Project (110) + : +- * BroadcastHashJoin Inner BuildRight (109) + : :- * Filter (103) + : : +- * ColumnarToRow (102) + : : +- Scan parquet default.store_sales (101) + : +- BroadcastExchange (108) + : +- * Project (107) + : +- * Filter (106) + : +- * ColumnarToRow (105) + : +- Scan parquet default.date_dim (104) + :- * Project (116) + : +- * BroadcastHashJoin Inner BuildRight (115) + : :- * Filter (113) + : : +- * ColumnarToRow (112) + : : +- Scan parquet default.catalog_sales (111) + : +- ReusedExchange (114) + +- * Project (122) + +- * BroadcastHashJoin Inner BuildRight (121) + :- * Filter (119) + : +- * ColumnarToRow (118) + : +- Scan parquet default.web_sales (117) + +- ReusedExchange (120) + + +(101) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(102) ColumnarToRow [codegen id : 2] +Input [3]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4] + +(103) Filter [codegen id : 2] +Input [3]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4] +Condition : isnotnull(ss_sold_date_sk#1) + +(104) Scan parquet default.date_dim +Output [2]: [d_date_sk#10, d_year#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(105) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#10, d_year#11] + +(106) Filter [codegen id : 1] +Input [2]: [d_date_sk#10, d_year#11] +Condition : (((isnotnull(d_year#11) AND (d_year#11 >= 1999)) AND (d_year#11 <= 2001)) AND isnotnull(d_date_sk#10)) + +(107) Project [codegen id : 1] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_year#11] + +(108) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#68] + +(109) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(110) Project [codegen id : 2] +Output [2]: [ss_quantity#3 AS quantity#69, ss_list_price#4 AS list_price#70] +Input [4]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4, d_date_sk#10] + +(111) Scan parquet default.catalog_sales +Output [3]: [cs_sold_date_sk#16, cs_quantity#71, cs_list_price#72] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(112) ColumnarToRow [codegen id : 4] +Input [3]: [cs_sold_date_sk#16, cs_quantity#71, cs_list_price#72] + +(113) Filter [codegen id : 4] +Input [3]: [cs_sold_date_sk#16, cs_quantity#71, cs_list_price#72] +Condition : isnotnull(cs_sold_date_sk#16) + +(114) ReusedExchange [Reuses operator id: 108] +Output [1]: [d_date_sk#10] + +(115) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_sold_date_sk#16] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(116) Project [codegen id : 4] +Output [2]: [cs_quantity#71 AS quantity#73, cs_list_price#72 AS list_price#74] +Input [4]: [cs_sold_date_sk#16, cs_quantity#71, cs_list_price#72, d_date_sk#10] + +(117) Scan parquet default.web_sales +Output [3]: [ws_sold_date_sk#20, ws_quantity#75, ws_list_price#76] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(118) ColumnarToRow [codegen id : 6] +Input [3]: [ws_sold_date_sk#20, ws_quantity#75, ws_list_price#76] + +(119) Filter [codegen id : 6] +Input [3]: [ws_sold_date_sk#20, ws_quantity#75, ws_list_price#76] +Condition : isnotnull(ws_sold_date_sk#20) + +(120) ReusedExchange [Reuses operator id: 108] +Output [1]: [d_date_sk#10] + +(121) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_sold_date_sk#20] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(122) Project [codegen id : 6] +Output [2]: [ws_quantity#75 AS quantity#77, ws_list_price#76 AS list_price#78] +Input [4]: [ws_sold_date_sk#20, ws_quantity#75, ws_list_price#76, d_date_sk#10] + +(123) Union + +(124) HashAggregate [codegen id : 7] +Input [2]: [quantity#69, list_price#70] +Keys: [] +Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#69 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#70 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [2]: [sum#79, count#80] +Results [2]: [sum#81, count#82] + +(125) Exchange +Input [2]: [sum#81, count#82] +Arguments: SinglePartition, true, [id=#83] + +(126) HashAggregate [codegen id : 8] +Input [2]: [sum#81, count#82] +Keys: [] +Functions [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#69 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#70 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#69 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#70 as decimal(12,2)))), DecimalType(18,2), true))#84] +Results [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#69 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#70 as decimal(12,2)))), DecimalType(18,2), true))#84 AS average_sales#85] + +Subquery:2 Hosting operator id = 68 Hosting Expression = Subquery scalar-subquery#29, [id=#30] +* Project (130) ++- * Filter (129) + +- * ColumnarToRow (128) + +- Scan parquet default.date_dim (127) + + +(127) Scan parquet default.date_dim +Output [4]: [d_week_seq#28, d_year#11, d_moy#86, d_dom#87] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), IsNotNull(d_dom), EqualTo(d_year,2000), EqualTo(d_moy,12), EqualTo(d_dom,11)] +ReadSchema: struct + +(128) ColumnarToRow [codegen id : 1] +Input [4]: [d_week_seq#28, d_year#11, d_moy#86, d_dom#87] + +(129) Filter [codegen id : 1] +Input [4]: [d_week_seq#28, d_year#11, d_moy#86, d_dom#87] +Condition : (((((isnotnull(d_moy#86) AND isnotnull(d_year#11)) AND isnotnull(d_dom#87)) AND (d_year#11 = 2000)) AND (d_moy#86 = 12)) AND (d_dom#87 = 11)) + +(130) Project [codegen id : 1] +Output [1]: [d_week_seq#28] +Input [4]: [d_week_seq#28, d_year#11, d_moy#86, d_dom#87] + +Subquery:3 Hosting operator id = 96 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46] + +Subquery:4 Hosting operator id = 88 Hosting Expression = Subquery scalar-subquery#51, [id=#52] +* Project (134) ++- * Filter (133) + +- * ColumnarToRow (132) + +- Scan parquet default.date_dim (131) + + +(131) Scan parquet default.date_dim +Output [4]: [d_week_seq#28, d_year#11, d_moy#86, d_dom#87] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), IsNotNull(d_dom), EqualTo(d_year,1999), EqualTo(d_moy,12), EqualTo(d_dom,11)] +ReadSchema: struct + +(132) ColumnarToRow [codegen id : 1] +Input [4]: [d_week_seq#28, d_year#11, d_moy#86, d_dom#87] + +(133) Filter [codegen id : 1] +Input [4]: [d_week_seq#28, d_year#11, d_moy#86, d_dom#87] +Condition : (((((isnotnull(d_moy#86) AND isnotnull(d_year#11)) AND isnotnull(d_dom#87)) AND (d_year#11 = 1999)) AND (d_moy#86 = 12)) AND (d_dom#87 = 11)) + +(134) Project [codegen id : 1] +Output [1]: [d_week_seq#28] +Input [4]: [d_week_seq#28, d_year#11, d_moy#86, d_dom#87] + + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/simplified.txt new file mode 100644 index 0000000000000..f1668ea399807 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/simplified.txt @@ -0,0 +1,204 @@ +TakeOrderedAndProject [channel,channel,i_brand_id,i_brand_id,i_category_id,i_category_id,i_class_id,i_class_id,number_sales,number_sales,sales,sales] + WholeStageCodegen (52) + BroadcastHashJoin [i_brand_id,i_brand_id,i_category_id,i_category_id,i_class_id,i_class_id] + Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] + Subquery #2 + WholeStageCodegen (8) + HashAggregate [count,sum] [average_sales,avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2), true)),count,sum] + InputAdapter + Exchange #12 + WholeStageCodegen (7) + HashAggregate [list_price,quantity] [count,count,sum,sum] + InputAdapter + Union + WholeStageCodegen (2) + Project [ss_list_price,ss_quantity] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_list_price,ss_quantity,ss_sold_date_sk] + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + WholeStageCodegen (4) + Project [cs_list_price,cs_quantity] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_list_price,cs_quantity,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #13 + WholeStageCodegen (6) + Project [ws_list_price,ws_quantity] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_list_price,ws_quantity,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #13 + HashAggregate [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] [channel,count,count(1),isEmpty,number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] + InputAdapter + Exchange [i_brand_id,i_category_id,i_class_id] #1 + WholeStageCodegen (25) + HashAggregate [i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity] [count,count,isEmpty,isEmpty,sum,sum] + Project [i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity,ss_sold_date_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + BroadcastHashJoin [ss_item_sk,ss_item_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_list_price,ss_quantity,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (11) + Project [i_item_sk] + BroadcastHashJoin [brand_id,category_id,class_id,i_brand_id,i_category_id,i_class_id] + Filter [i_brand_id,i_category_id,i_class_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (10) + HashAggregate [brand_id,category_id,class_id] + HashAggregate [brand_id,category_id,class_id] + HashAggregate [brand_id,category_id,class_id] + InputAdapter + Exchange [brand_id,category_id,class_id] #4 + WholeStageCodegen (9) + HashAggregate [brand_id,category_id,class_id] + BroadcastHashJoin [brand_id,category_id,class_id,i_brand_id,i_category_id,i_class_id] + BroadcastHashJoin [brand_id,category_id,class_id,i_brand_id,i_category_id,i_class_id] + Project [i_brand_id,i_category_id,i_class_id] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_brand_id,i_category_id,i_class_id,ss_sold_date_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (1) + Filter [i_brand_id,i_category_id,i_class_id,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (5) + Project [i_brand_id,i_category_id,i_class_id] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,i_brand_id,i_category_id,i_class_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Filter [cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_item_sk,cs_sold_date_sk] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (3) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] + InputAdapter + ReusedExchange [d_date_sk] #6 + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (8) + Project [i_brand_id,i_category_id,i_class_id] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [i_brand_id,i_category_id,i_class_id,ws_sold_date_sk] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Filter [ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_item_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk] #8 + InputAdapter + ReusedExchange [d_date_sk] #6 + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (23) + BroadcastHashJoin [i_item_sk,ss_item_sk] + Filter [i_brand_id,i_category_id,i_class_id,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] + InputAdapter + ReusedExchange [ss_item_sk] #2 + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (24) + Project [d_date_sk] + Filter [d_date_sk,d_week_seq] + Subquery #1 + WholeStageCodegen (1) + Project [d_week_seq] + Filter [d_dom,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_dom,d_moy,d_week_seq,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_week_seq] + InputAdapter + BroadcastExchange #14 + WholeStageCodegen (51) + Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #2 + HashAggregate [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] [channel,count,count(1),isEmpty,number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] + InputAdapter + Exchange [i_brand_id,i_category_id,i_class_id] #15 + WholeStageCodegen (50) + HashAggregate [i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity] [count,count,isEmpty,isEmpty,sum,sum] + Project [i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity,ss_sold_date_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + BroadcastHashJoin [ss_item_sk,ss_item_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_list_price,ss_quantity,ss_sold_date_sk] + InputAdapter + ReusedExchange [ss_item_sk] #2 + InputAdapter + ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk] #10 + InputAdapter + BroadcastExchange #16 + WholeStageCodegen (49) + Project [d_date_sk] + Filter [d_date_sk,d_week_seq] + Subquery #3 + WholeStageCodegen (1) + Project [d_week_seq] + Filter [d_dom,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_dom,d_moy,d_week_seq,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_week_seq] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15.sf100/explain.txt new file mode 100644 index 0000000000000..997fe4f5bfce5 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15.sf100/explain.txt @@ -0,0 +1,180 @@ +== Physical Plan == +TakeOrderedAndProject (32) ++- * HashAggregate (31) + +- Exchange (30) + +- * HashAggregate (29) + +- * Project (28) + +- * SortMergeJoin Inner (27) + :- * Sort (12) + : +- Exchange (11) + : +- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.catalog_sales (1) + : +- BroadcastExchange (8) + : +- * Project (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet default.date_dim (4) + +- * Sort (26) + +- Exchange (25) + +- * Project (24) + +- * SortMergeJoin Inner (23) + :- * Sort (17) + : +- Exchange (16) + : +- * Filter (15) + : +- * ColumnarToRow (14) + : +- Scan parquet default.customer (13) + +- * Sort (22) + +- Exchange (21) + +- * Filter (20) + +- * ColumnarToRow (19) + +- Scan parquet default.customer_address (18) + + +(1) Scan parquet default.catalog_sales +Output [3]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_sales_price#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 2] +Input [3]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_sales_price#3] + +(3) Filter [codegen id : 2] +Input [3]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_sales_price#3] +Condition : (isnotnull(cs_bill_customer_sk#2) AND isnotnull(cs_sold_date_sk#1)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#4, d_year#5, d_qoy#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#4, d_year#5, d_qoy#6] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#4, d_year#5, d_qoy#6] +Condition : ((((isnotnull(d_qoy#6) AND isnotnull(d_year#5)) AND (d_qoy#6 = 2)) AND (d_year#5 = 2001)) AND isnotnull(d_date_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#4] +Input [3]: [d_date_sk#4, d_year#5, d_qoy#6] + +(8) BroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#7] + +(9) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [cs_sold_date_sk#1] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(10) Project [codegen id : 2] +Output [2]: [cs_bill_customer_sk#2, cs_sales_price#3] +Input [4]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_sales_price#3, d_date_sk#4] + +(11) Exchange +Input [2]: [cs_bill_customer_sk#2, cs_sales_price#3] +Arguments: hashpartitioning(cs_bill_customer_sk#2, 5), true, [id=#8] + +(12) Sort [codegen id : 3] +Input [2]: [cs_bill_customer_sk#2, cs_sales_price#3] +Arguments: [cs_bill_customer_sk#2 ASC NULLS FIRST], false, 0 + +(13) Scan parquet default.customer +Output [2]: [c_customer_sk#9, c_current_addr_sk#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(14) ColumnarToRow [codegen id : 4] +Input [2]: [c_customer_sk#9, c_current_addr_sk#10] + +(15) Filter [codegen id : 4] +Input [2]: [c_customer_sk#9, c_current_addr_sk#10] +Condition : (isnotnull(c_customer_sk#9) AND isnotnull(c_current_addr_sk#10)) + +(16) Exchange +Input [2]: [c_customer_sk#9, c_current_addr_sk#10] +Arguments: hashpartitioning(c_current_addr_sk#10, 5), true, [id=#11] + +(17) Sort [codegen id : 5] +Input [2]: [c_customer_sk#9, c_current_addr_sk#10] +Arguments: [c_current_addr_sk#10 ASC NULLS FIRST], false, 0 + +(18) Scan parquet default.customer_address +Output [3]: [ca_address_sk#12, ca_state#13, ca_zip#14] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 6] +Input [3]: [ca_address_sk#12, ca_state#13, ca_zip#14] + +(20) Filter [codegen id : 6] +Input [3]: [ca_address_sk#12, ca_state#13, ca_zip#14] +Condition : isnotnull(ca_address_sk#12) + +(21) Exchange +Input [3]: [ca_address_sk#12, ca_state#13, ca_zip#14] +Arguments: hashpartitioning(ca_address_sk#12, 5), true, [id=#15] + +(22) Sort [codegen id : 7] +Input [3]: [ca_address_sk#12, ca_state#13, ca_zip#14] +Arguments: [ca_address_sk#12 ASC NULLS FIRST], false, 0 + +(23) SortMergeJoin [codegen id : 8] +Left keys [1]: [c_current_addr_sk#10] +Right keys [1]: [ca_address_sk#12] +Join condition: None + +(24) Project [codegen id : 8] +Output [3]: [c_customer_sk#9, ca_state#13, ca_zip#14] +Input [5]: [c_customer_sk#9, c_current_addr_sk#10, ca_address_sk#12, ca_state#13, ca_zip#14] + +(25) Exchange +Input [3]: [c_customer_sk#9, ca_state#13, ca_zip#14] +Arguments: hashpartitioning(c_customer_sk#9, 5), true, [id=#16] + +(26) Sort [codegen id : 9] +Input [3]: [c_customer_sk#9, ca_state#13, ca_zip#14] +Arguments: [c_customer_sk#9 ASC NULLS FIRST], false, 0 + +(27) SortMergeJoin [codegen id : 10] +Left keys [1]: [cs_bill_customer_sk#2] +Right keys [1]: [c_customer_sk#9] +Join condition: ((substr(ca_zip#14, 1, 5) IN (85669,86197,88274,83405,86475,85392,85460,80348,81792) OR ca_state#13 IN (CA,WA,GA)) OR (cs_sales_price#3 > 500.00)) + +(28) Project [codegen id : 10] +Output [2]: [cs_sales_price#3, ca_zip#14] +Input [5]: [cs_bill_customer_sk#2, cs_sales_price#3, c_customer_sk#9, ca_state#13, ca_zip#14] + +(29) HashAggregate [codegen id : 10] +Input [2]: [cs_sales_price#3, ca_zip#14] +Keys [1]: [ca_zip#14] +Functions [1]: [partial_sum(UnscaledValue(cs_sales_price#3))] +Aggregate Attributes [1]: [sum#17] +Results [2]: [ca_zip#14, sum#18] + +(30) Exchange +Input [2]: [ca_zip#14, sum#18] +Arguments: hashpartitioning(ca_zip#14, 5), true, [id=#19] + +(31) HashAggregate [codegen id : 11] +Input [2]: [ca_zip#14, sum#18] +Keys [1]: [ca_zip#14] +Functions [1]: [sum(UnscaledValue(cs_sales_price#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_sales_price#3))#20] +Results [2]: [ca_zip#14, MakeDecimal(sum(UnscaledValue(cs_sales_price#3))#20,17,2) AS sum(cs_sales_price)#21] + +(32) TakeOrderedAndProject +Input [2]: [ca_zip#14, sum(cs_sales_price)#21] +Arguments: 100, [ca_zip#14 ASC NULLS FIRST], [ca_zip#14, sum(cs_sales_price)#21] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15.sf100/simplified.txt new file mode 100644 index 0000000000000..e03fd039c07d7 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15.sf100/simplified.txt @@ -0,0 +1,57 @@ +TakeOrderedAndProject [ca_zip,sum(cs_sales_price)] + WholeStageCodegen (11) + HashAggregate [ca_zip,sum] [sum,sum(UnscaledValue(cs_sales_price)),sum(cs_sales_price)] + InputAdapter + Exchange [ca_zip] #1 + WholeStageCodegen (10) + HashAggregate [ca_zip,cs_sales_price] [sum,sum] + Project [ca_zip,cs_sales_price] + SortMergeJoin [c_customer_sk,ca_state,ca_zip,cs_bill_customer_sk,cs_sales_price] + InputAdapter + WholeStageCodegen (3) + Sort [cs_bill_customer_sk] + InputAdapter + Exchange [cs_bill_customer_sk] #2 + WholeStageCodegen (2) + Project [cs_bill_customer_sk,cs_sales_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_bill_customer_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_qoy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_qoy,d_year] + InputAdapter + WholeStageCodegen (9) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #4 + WholeStageCodegen (8) + Project [c_customer_sk,ca_state,ca_zip] + SortMergeJoin [c_current_addr_sk,ca_address_sk] + InputAdapter + WholeStageCodegen (5) + Sort [c_current_addr_sk] + InputAdapter + Exchange [c_current_addr_sk] #5 + WholeStageCodegen (4) + Filter [c_current_addr_sk,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_current_addr_sk,c_customer_sk] + InputAdapter + WholeStageCodegen (7) + Sort [ca_address_sk] + InputAdapter + Exchange [ca_address_sk] #6 + WholeStageCodegen (6) + Filter [ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_state,ca_zip] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15/explain.txt new file mode 100644 index 0000000000000..009db105d2cb0 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15/explain.txt @@ -0,0 +1,150 @@ +== Physical Plan == +TakeOrderedAndProject (26) ++- * HashAggregate (25) + +- Exchange (24) + +- * HashAggregate (23) + +- * Project (22) + +- * BroadcastHashJoin Inner BuildRight (21) + :- * Project (15) + : +- * BroadcastHashJoin Inner BuildRight (14) + : :- * Project (9) + : : +- * BroadcastHashJoin Inner BuildRight (8) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.catalog_sales (1) + : : +- BroadcastExchange (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet default.customer (4) + : +- BroadcastExchange (13) + : +- * Filter (12) + : +- * ColumnarToRow (11) + : +- Scan parquet default.customer_address (10) + +- BroadcastExchange (20) + +- * Project (19) + +- * Filter (18) + +- * ColumnarToRow (17) + +- Scan parquet default.date_dim (16) + + +(1) Scan parquet default.catalog_sales +Output [3]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_sales_price#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [3]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_sales_price#3] + +(3) Filter [codegen id : 4] +Input [3]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_sales_price#3] +Condition : (isnotnull(cs_bill_customer_sk#2) AND isnotnull(cs_sold_date_sk#1)) + +(4) Scan parquet default.customer +Output [2]: [c_customer_sk#4, c_current_addr_sk#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [c_customer_sk#4, c_current_addr_sk#5] + +(6) Filter [codegen id : 1] +Input [2]: [c_customer_sk#4, c_current_addr_sk#5] +Condition : (isnotnull(c_customer_sk#4) AND isnotnull(c_current_addr_sk#5)) + +(7) BroadcastExchange +Input [2]: [c_customer_sk#4, c_current_addr_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#6] + +(8) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_bill_customer_sk#2] +Right keys [1]: [c_customer_sk#4] +Join condition: None + +(9) Project [codegen id : 4] +Output [3]: [cs_sold_date_sk#1, cs_sales_price#3, c_current_addr_sk#5] +Input [5]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_sales_price#3, c_customer_sk#4, c_current_addr_sk#5] + +(10) Scan parquet default.customer_address +Output [3]: [ca_address_sk#7, ca_state#8, ca_zip#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [ca_address_sk#7, ca_state#8, ca_zip#9] + +(12) Filter [codegen id : 2] +Input [3]: [ca_address_sk#7, ca_state#8, ca_zip#9] +Condition : isnotnull(ca_address_sk#7) + +(13) BroadcastExchange +Input [3]: [ca_address_sk#7, ca_state#8, ca_zip#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#10] + +(14) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [c_current_addr_sk#5] +Right keys [1]: [ca_address_sk#7] +Join condition: ((substr(ca_zip#9, 1, 5) IN (85669,86197,88274,83405,86475,85392,85460,80348,81792) OR ca_state#8 IN (CA,WA,GA)) OR (cs_sales_price#3 > 500.00)) + +(15) Project [codegen id : 4] +Output [3]: [cs_sold_date_sk#1, cs_sales_price#3, ca_zip#9] +Input [6]: [cs_sold_date_sk#1, cs_sales_price#3, c_current_addr_sk#5, ca_address_sk#7, ca_state#8, ca_zip#9] + +(16) Scan parquet default.date_dim +Output [3]: [d_date_sk#11, d_year#12, d_qoy#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#11, d_year#12, d_qoy#13] + +(18) Filter [codegen id : 3] +Input [3]: [d_date_sk#11, d_year#12, d_qoy#13] +Condition : ((((isnotnull(d_qoy#13) AND isnotnull(d_year#12)) AND (d_qoy#13 = 2)) AND (d_year#12 = 2001)) AND isnotnull(d_date_sk#11)) + +(19) Project [codegen id : 3] +Output [1]: [d_date_sk#11] +Input [3]: [d_date_sk#11, d_year#12, d_qoy#13] + +(20) BroadcastExchange +Input [1]: [d_date_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_sold_date_sk#1] +Right keys [1]: [d_date_sk#11] +Join condition: None + +(22) Project [codegen id : 4] +Output [2]: [cs_sales_price#3, ca_zip#9] +Input [4]: [cs_sold_date_sk#1, cs_sales_price#3, ca_zip#9, d_date_sk#11] + +(23) HashAggregate [codegen id : 4] +Input [2]: [cs_sales_price#3, ca_zip#9] +Keys [1]: [ca_zip#9] +Functions [1]: [partial_sum(UnscaledValue(cs_sales_price#3))] +Aggregate Attributes [1]: [sum#15] +Results [2]: [ca_zip#9, sum#16] + +(24) Exchange +Input [2]: [ca_zip#9, sum#16] +Arguments: hashpartitioning(ca_zip#9, 5), true, [id=#17] + +(25) HashAggregate [codegen id : 5] +Input [2]: [ca_zip#9, sum#16] +Keys [1]: [ca_zip#9] +Functions [1]: [sum(UnscaledValue(cs_sales_price#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_sales_price#3))#18] +Results [2]: [ca_zip#9, MakeDecimal(sum(UnscaledValue(cs_sales_price#3))#18,17,2) AS sum(cs_sales_price)#19] + +(26) TakeOrderedAndProject +Input [2]: [ca_zip#9, sum(cs_sales_price)#19] +Arguments: 100, [ca_zip#9 ASC NULLS FIRST], [ca_zip#9, sum(cs_sales_price)#19] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15/simplified.txt new file mode 100644 index 0000000000000..e6e98be4b438f --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15/simplified.txt @@ -0,0 +1,39 @@ +TakeOrderedAndProject [ca_zip,sum(cs_sales_price)] + WholeStageCodegen (5) + HashAggregate [ca_zip,sum] [sum,sum(UnscaledValue(cs_sales_price)),sum(cs_sales_price)] + InputAdapter + Exchange [ca_zip] #1 + WholeStageCodegen (4) + HashAggregate [ca_zip,cs_sales_price] [sum,sum] + Project [ca_zip,cs_sales_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [ca_zip,cs_sales_price,cs_sold_date_sk] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk,ca_state,ca_zip,cs_sales_price] + Project [c_current_addr_sk,cs_sales_price,cs_sold_date_sk] + BroadcastHashJoin [c_customer_sk,cs_bill_customer_sk] + Filter [cs_bill_customer_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [c_current_addr_sk,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_current_addr_sk,c_customer_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_state,ca_zip] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_date_sk,d_qoy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_qoy,d_year] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/explain.txt new file mode 100644 index 0000000000000..66bf2dc518751 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/explain.txt @@ -0,0 +1,250 @@ +== Physical Plan == +TakeOrderedAndProject (44) ++- * HashAggregate (43) + +- Exchange (42) + +- * HashAggregate (41) + +- * HashAggregate (40) + +- * HashAggregate (39) + +- * Project (38) + +- * BroadcastHashJoin Inner BuildRight (37) + :- * Project (31) + : +- * BroadcastHashJoin Inner BuildRight (30) + : :- * Project (24) + : : +- * BroadcastHashJoin Inner BuildRight (23) + : : :- SortMergeJoin LeftAnti (17) + : : : :- * Project (12) + : : : : +- SortMergeJoin LeftSemi (11) + : : : : :- * Sort (5) + : : : : : +- Exchange (4) + : : : : : +- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.catalog_sales (1) + : : : : +- * Sort (10) + : : : : +- Exchange (9) + : : : : +- * Project (8) + : : : : +- * ColumnarToRow (7) + : : : : +- Scan parquet default.catalog_sales (6) + : : : +- * Sort (16) + : : : +- Exchange (15) + : : : +- * ColumnarToRow (14) + : : : +- Scan parquet default.catalog_returns (13) + : : +- BroadcastExchange (22) + : : +- * Project (21) + : : +- * Filter (20) + : : +- * ColumnarToRow (19) + : : +- Scan parquet default.customer_address (18) + : +- BroadcastExchange (29) + : +- * Project (28) + : +- * Filter (27) + : +- * ColumnarToRow (26) + : +- Scan parquet default.call_center (25) + +- BroadcastExchange (36) + +- * Project (35) + +- * Filter (34) + +- * ColumnarToRow (33) + +- Scan parquet default.date_dim (32) + + +(1) Scan parquet default.catalog_sales +Output [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_ship_date_sk), IsNotNull(cs_ship_addr_sk), IsNotNull(cs_call_center_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] + +(3) Filter [codegen id : 1] +Input [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Condition : ((isnotnull(cs_ship_date_sk#1) AND isnotnull(cs_ship_addr_sk#2)) AND isnotnull(cs_call_center_sk#3)) + +(4) Exchange +Input [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Arguments: hashpartitioning(cs_order_number#5, 5), true, [id=#8] + +(5) Sort [codegen id : 2] +Input [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Arguments: [cs_order_number#5 ASC NULLS FIRST], false, 0 + +(6) Scan parquet default.catalog_sales +Output [2]: [cs_warehouse_sk#4, cs_order_number#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_sales] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 3] +Input [2]: [cs_warehouse_sk#4, cs_order_number#5] + +(8) Project [codegen id : 3] +Output [2]: [cs_warehouse_sk#4 AS cs_warehouse_sk#4#9, cs_order_number#5 AS cs_order_number#5#10] +Input [2]: [cs_warehouse_sk#4, cs_order_number#5] + +(9) Exchange +Input [2]: [cs_warehouse_sk#4#9, cs_order_number#5#10] +Arguments: hashpartitioning(cs_order_number#5#10, 5), true, [id=#11] + +(10) Sort [codegen id : 4] +Input [2]: [cs_warehouse_sk#4#9, cs_order_number#5#10] +Arguments: [cs_order_number#5#10 ASC NULLS FIRST], false, 0 + +(11) SortMergeJoin +Left keys [1]: [cs_order_number#5] +Right keys [1]: [cs_order_number#5#10] +Join condition: NOT (cs_warehouse_sk#4 = cs_warehouse_sk#4#9) + +(12) Project [codegen id : 5] +Output [6]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Input [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] + +(13) Scan parquet default.catalog_returns +Output [1]: [cr_order_number#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_returns] +ReadSchema: struct + +(14) ColumnarToRow [codegen id : 6] +Input [1]: [cr_order_number#12] + +(15) Exchange +Input [1]: [cr_order_number#12] +Arguments: hashpartitioning(cr_order_number#12, 5), true, [id=#13] + +(16) Sort [codegen id : 7] +Input [1]: [cr_order_number#12] +Arguments: [cr_order_number#12 ASC NULLS FIRST], false, 0 + +(17) SortMergeJoin +Left keys [1]: [cs_order_number#5] +Right keys [1]: [cr_order_number#12] +Join condition: None + +(18) Scan parquet default.customer_address +Output [2]: [ca_address_sk#14, ca_state#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer_address] +PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,GA), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 8] +Input [2]: [ca_address_sk#14, ca_state#15] + +(20) Filter [codegen id : 8] +Input [2]: [ca_address_sk#14, ca_state#15] +Condition : ((isnotnull(ca_state#15) AND (ca_state#15 = GA)) AND isnotnull(ca_address_sk#14)) + +(21) Project [codegen id : 8] +Output [1]: [ca_address_sk#14] +Input [2]: [ca_address_sk#14, ca_state#15] + +(22) BroadcastExchange +Input [1]: [ca_address_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] + +(23) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_ship_addr_sk#2] +Right keys [1]: [ca_address_sk#14] +Join condition: None + +(24) Project [codegen id : 11] +Output [5]: [cs_ship_date_sk#1, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Input [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, ca_address_sk#14] + +(25) Scan parquet default.call_center +Output [2]: [cc_call_center_sk#17, cc_county#18] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/call_center] +PushedFilters: [IsNotNull(cc_county), EqualTo(cc_county,Williamson County), IsNotNull(cc_call_center_sk)] +ReadSchema: struct + +(26) ColumnarToRow [codegen id : 9] +Input [2]: [cc_call_center_sk#17, cc_county#18] + +(27) Filter [codegen id : 9] +Input [2]: [cc_call_center_sk#17, cc_county#18] +Condition : ((isnotnull(cc_county#18) AND (cc_county#18 = Williamson County)) AND isnotnull(cc_call_center_sk#17)) + +(28) Project [codegen id : 9] +Output [1]: [cc_call_center_sk#17] +Input [2]: [cc_call_center_sk#17, cc_county#18] + +(29) BroadcastExchange +Input [1]: [cc_call_center_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#19] + +(30) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_call_center_sk#3] +Right keys [1]: [cc_call_center_sk#17] +Join condition: None + +(31) Project [codegen id : 11] +Output [4]: [cs_ship_date_sk#1, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Input [6]: [cs_ship_date_sk#1, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, cc_call_center_sk#17] + +(32) Scan parquet default.date_dim +Output [2]: [d_date_sk#20, d_date#21] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2002-02-01), LessThanOrEqual(d_date,2002-04-02), IsNotNull(d_date_sk)] +ReadSchema: struct + +(33) ColumnarToRow [codegen id : 10] +Input [2]: [d_date_sk#20, d_date#21] + +(34) Filter [codegen id : 10] +Input [2]: [d_date_sk#20, d_date#21] +Condition : (((isnotnull(d_date#21) AND (d_date#21 >= 11719)) AND (d_date#21 <= 11779)) AND isnotnull(d_date_sk#20)) + +(35) Project [codegen id : 10] +Output [1]: [d_date_sk#20] +Input [2]: [d_date_sk#20, d_date#21] + +(36) BroadcastExchange +Input [1]: [d_date_sk#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#22] + +(37) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_ship_date_sk#1] +Right keys [1]: [d_date_sk#20] +Join condition: None + +(38) Project [codegen id : 11] +Output [3]: [cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Input [5]: [cs_ship_date_sk#1, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, d_date_sk#20] + +(39) HashAggregate [codegen id : 11] +Input [3]: [cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Keys [1]: [cs_order_number#5] +Functions [2]: [partial_sum(UnscaledValue(cs_ext_ship_cost#6)), partial_sum(UnscaledValue(cs_net_profit#7))] +Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_ship_cost#6))#23, sum(UnscaledValue(cs_net_profit#7))#24] +Results [3]: [cs_order_number#5, sum#25, sum#26] + +(40) HashAggregate [codegen id : 11] +Input [3]: [cs_order_number#5, sum#25, sum#26] +Keys [1]: [cs_order_number#5] +Functions [2]: [merge_sum(UnscaledValue(cs_ext_ship_cost#6)), merge_sum(UnscaledValue(cs_net_profit#7))] +Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_ship_cost#6))#23, sum(UnscaledValue(cs_net_profit#7))#24] +Results [3]: [cs_order_number#5, sum#25, sum#26] + +(41) HashAggregate [codegen id : 11] +Input [3]: [cs_order_number#5, sum#25, sum#26] +Keys: [] +Functions [3]: [merge_sum(UnscaledValue(cs_ext_ship_cost#6)), merge_sum(UnscaledValue(cs_net_profit#7)), partial_count(distinct cs_order_number#5)] +Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_ship_cost#6))#23, sum(UnscaledValue(cs_net_profit#7))#24, count(cs_order_number#5)#27] +Results [3]: [sum#25, sum#26, count#28] + +(42) Exchange +Input [3]: [sum#25, sum#26, count#28] +Arguments: SinglePartition, true, [id=#29] + +(43) HashAggregate [codegen id : 12] +Input [3]: [sum#25, sum#26, count#28] +Keys: [] +Functions [3]: [sum(UnscaledValue(cs_ext_ship_cost#6)), sum(UnscaledValue(cs_net_profit#7)), count(distinct cs_order_number#5)] +Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_ship_cost#6))#23, sum(UnscaledValue(cs_net_profit#7))#24, count(cs_order_number#5)#27] +Results [3]: [count(cs_order_number#5)#27 AS order count #30, MakeDecimal(sum(UnscaledValue(cs_ext_ship_cost#6))#23,17,2) AS total shipping cost #31, MakeDecimal(sum(UnscaledValue(cs_net_profit#7))#24,17,2) AS total net profit #32] + +(44) TakeOrderedAndProject +Input [3]: [order count #30, total shipping cost #31, total net profit #32] +Arguments: 100, [order count #30 ASC NULLS FIRST], [order count #30, total shipping cost #31, total net profit #32] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/simplified.txt new file mode 100644 index 0000000000000..dc78021b94fbf --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/simplified.txt @@ -0,0 +1,71 @@ +TakeOrderedAndProject [order count ,total net profit ,total shipping cost ] + WholeStageCodegen (12) + HashAggregate [count,sum,sum] [count,count(cs_order_number),order count ,sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit)),total net profit ,total shipping cost ] + InputAdapter + Exchange #1 + WholeStageCodegen (11) + HashAggregate [cs_order_number] [count,count,count(cs_order_number),sum,sum,sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit))] + HashAggregate [cs_order_number] [sum,sum,sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit))] + HashAggregate [cs_ext_ship_cost,cs_net_profit,cs_order_number] [sum,sum,sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit))] + Project [cs_ext_ship_cost,cs_net_profit,cs_order_number] + BroadcastHashJoin [cs_ship_date_sk,d_date_sk] + Project [cs_ext_ship_cost,cs_net_profit,cs_order_number,cs_ship_date_sk] + BroadcastHashJoin [cc_call_center_sk,cs_call_center_sk] + Project [cs_call_center_sk,cs_ext_ship_cost,cs_net_profit,cs_order_number,cs_ship_date_sk] + BroadcastHashJoin [ca_address_sk,cs_ship_addr_sk] + InputAdapter + SortMergeJoin [cr_order_number,cs_order_number] + WholeStageCodegen (5) + Project [cs_call_center_sk,cs_ext_ship_cost,cs_net_profit,cs_order_number,cs_ship_addr_sk,cs_ship_date_sk] + InputAdapter + SortMergeJoin [cs_order_number,cs_order_number,cs_warehouse_sk,cs_warehouse_sk] + WholeStageCodegen (2) + Sort [cs_order_number] + InputAdapter + Exchange [cs_order_number] #2 + WholeStageCodegen (1) + Filter [cs_call_center_sk,cs_ship_addr_sk,cs_ship_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_call_center_sk,cs_ext_ship_cost,cs_net_profit,cs_order_number,cs_ship_addr_sk,cs_ship_date_sk,cs_warehouse_sk] + WholeStageCodegen (4) + Sort [cs_order_number] + InputAdapter + Exchange [cs_order_number] #3 + WholeStageCodegen (3) + Project [cs_order_number,cs_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_order_number,cs_warehouse_sk] + WholeStageCodegen (7) + Sort [cr_order_number] + InputAdapter + Exchange [cr_order_number] #4 + WholeStageCodegen (6) + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_order_number] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (8) + Project [ca_address_sk] + Filter [ca_address_sk,ca_state] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (9) + Project [cc_call_center_sk] + Filter [cc_call_center_sk,cc_county] + ColumnarToRow + InputAdapter + Scan parquet default.call_center [cc_call_center_sk,cc_county] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (10) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/explain.txt new file mode 100644 index 0000000000000..ed45f7de91759 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/explain.txt @@ -0,0 +1,235 @@ +== Physical Plan == +TakeOrderedAndProject (41) ++- * HashAggregate (40) + +- Exchange (39) + +- * HashAggregate (38) + +- * HashAggregate (37) + +- Exchange (36) + +- * HashAggregate (35) + +- * Project (34) + +- * BroadcastHashJoin Inner BuildRight (33) + :- * Project (27) + : +- * BroadcastHashJoin Inner BuildRight (26) + : :- * Project (20) + : : +- * BroadcastHashJoin Inner BuildRight (19) + : : :- * BroadcastHashJoin LeftAnti BuildRight (13) + : : : :- * Project (9) + : : : : +- * BroadcastHashJoin LeftSemi BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.catalog_sales (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Project (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.catalog_sales (4) + : : : +- BroadcastExchange (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet default.catalog_returns (10) + : : +- BroadcastExchange (18) + : : +- * Project (17) + : : +- * Filter (16) + : : +- * ColumnarToRow (15) + : : +- Scan parquet default.date_dim (14) + : +- BroadcastExchange (25) + : +- * Project (24) + : +- * Filter (23) + : +- * ColumnarToRow (22) + : +- Scan parquet default.customer_address (21) + +- BroadcastExchange (32) + +- * Project (31) + +- * Filter (30) + +- * ColumnarToRow (29) + +- Scan parquet default.call_center (28) + + +(1) Scan parquet default.catalog_sales +Output [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_ship_date_sk), IsNotNull(cs_ship_addr_sk), IsNotNull(cs_call_center_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 6] +Input [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] + +(3) Filter [codegen id : 6] +Input [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Condition : ((isnotnull(cs_ship_date_sk#1) AND isnotnull(cs_ship_addr_sk#2)) AND isnotnull(cs_call_center_sk#3)) + +(4) Scan parquet default.catalog_sales +Output [2]: [cs_warehouse_sk#4, cs_order_number#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_sales] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [cs_warehouse_sk#4, cs_order_number#5] + +(6) Project [codegen id : 1] +Output [2]: [cs_warehouse_sk#4 AS cs_warehouse_sk#4#8, cs_order_number#5 AS cs_order_number#5#9] +Input [2]: [cs_warehouse_sk#4, cs_order_number#5] + +(7) BroadcastExchange +Input [2]: [cs_warehouse_sk#4#8, cs_order_number#5#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [id=#10] + +(8) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cs_order_number#5] +Right keys [1]: [cs_order_number#5#9] +Join condition: NOT (cs_warehouse_sk#4 = cs_warehouse_sk#4#8) + +(9) Project [codegen id : 6] +Output [6]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Input [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] + +(10) Scan parquet default.catalog_returns +Output [1]: [cr_order_number#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_returns] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [1]: [cr_order_number#11] + +(12) BroadcastExchange +Input [1]: [cr_order_number#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(13) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cs_order_number#5] +Right keys [1]: [cr_order_number#11] +Join condition: None + +(14) Scan parquet default.date_dim +Output [2]: [d_date_sk#13, d_date#14] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2002-02-01), LessThanOrEqual(d_date,2002-04-02), IsNotNull(d_date_sk)] +ReadSchema: struct + +(15) ColumnarToRow [codegen id : 3] +Input [2]: [d_date_sk#13, d_date#14] + +(16) Filter [codegen id : 3] +Input [2]: [d_date_sk#13, d_date#14] +Condition : (((isnotnull(d_date#14) AND (d_date#14 >= 11719)) AND (d_date#14 <= 11779)) AND isnotnull(d_date_sk#13)) + +(17) Project [codegen id : 3] +Output [1]: [d_date_sk#13] +Input [2]: [d_date_sk#13, d_date#14] + +(18) BroadcastExchange +Input [1]: [d_date_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] + +(19) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cs_ship_date_sk#1] +Right keys [1]: [d_date_sk#13] +Join condition: None + +(20) Project [codegen id : 6] +Output [5]: [cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Input [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, d_date_sk#13] + +(21) Scan parquet default.customer_address +Output [2]: [ca_address_sk#16, ca_state#17] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer_address] +PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,GA), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(22) ColumnarToRow [codegen id : 4] +Input [2]: [ca_address_sk#16, ca_state#17] + +(23) Filter [codegen id : 4] +Input [2]: [ca_address_sk#16, ca_state#17] +Condition : ((isnotnull(ca_state#17) AND (ca_state#17 = GA)) AND isnotnull(ca_address_sk#16)) + +(24) Project [codegen id : 4] +Output [1]: [ca_address_sk#16] +Input [2]: [ca_address_sk#16, ca_state#17] + +(25) BroadcastExchange +Input [1]: [ca_address_sk#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] + +(26) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cs_ship_addr_sk#2] +Right keys [1]: [ca_address_sk#16] +Join condition: None + +(27) Project [codegen id : 6] +Output [4]: [cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Input [6]: [cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, ca_address_sk#16] + +(28) Scan parquet default.call_center +Output [2]: [cc_call_center_sk#19, cc_county#20] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/call_center] +PushedFilters: [IsNotNull(cc_county), EqualTo(cc_county,Williamson County), IsNotNull(cc_call_center_sk)] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 5] +Input [2]: [cc_call_center_sk#19, cc_county#20] + +(30) Filter [codegen id : 5] +Input [2]: [cc_call_center_sk#19, cc_county#20] +Condition : ((isnotnull(cc_county#20) AND (cc_county#20 = Williamson County)) AND isnotnull(cc_call_center_sk#19)) + +(31) Project [codegen id : 5] +Output [1]: [cc_call_center_sk#19] +Input [2]: [cc_call_center_sk#19, cc_county#20] + +(32) BroadcastExchange +Input [1]: [cc_call_center_sk#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#21] + +(33) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cs_call_center_sk#3] +Right keys [1]: [cc_call_center_sk#19] +Join condition: None + +(34) Project [codegen id : 6] +Output [3]: [cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Input [5]: [cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, cc_call_center_sk#19] + +(35) HashAggregate [codegen id : 6] +Input [3]: [cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Keys [1]: [cs_order_number#5] +Functions [2]: [partial_sum(UnscaledValue(cs_ext_ship_cost#6)), partial_sum(UnscaledValue(cs_net_profit#7))] +Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_ship_cost#6))#22, sum(UnscaledValue(cs_net_profit#7))#23] +Results [3]: [cs_order_number#5, sum#24, sum#25] + +(36) Exchange +Input [3]: [cs_order_number#5, sum#24, sum#25] +Arguments: hashpartitioning(cs_order_number#5, 5), true, [id=#26] + +(37) HashAggregate [codegen id : 7] +Input [3]: [cs_order_number#5, sum#24, sum#25] +Keys [1]: [cs_order_number#5] +Functions [2]: [merge_sum(UnscaledValue(cs_ext_ship_cost#6)), merge_sum(UnscaledValue(cs_net_profit#7))] +Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_ship_cost#6))#22, sum(UnscaledValue(cs_net_profit#7))#23] +Results [3]: [cs_order_number#5, sum#24, sum#25] + +(38) HashAggregate [codegen id : 7] +Input [3]: [cs_order_number#5, sum#24, sum#25] +Keys: [] +Functions [3]: [merge_sum(UnscaledValue(cs_ext_ship_cost#6)), merge_sum(UnscaledValue(cs_net_profit#7)), partial_count(distinct cs_order_number#5)] +Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_ship_cost#6))#22, sum(UnscaledValue(cs_net_profit#7))#23, count(cs_order_number#5)#27] +Results [3]: [sum#24, sum#25, count#28] + +(39) Exchange +Input [3]: [sum#24, sum#25, count#28] +Arguments: SinglePartition, true, [id=#29] + +(40) HashAggregate [codegen id : 8] +Input [3]: [sum#24, sum#25, count#28] +Keys: [] +Functions [3]: [sum(UnscaledValue(cs_ext_ship_cost#6)), sum(UnscaledValue(cs_net_profit#7)), count(distinct cs_order_number#5)] +Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_ship_cost#6))#22, sum(UnscaledValue(cs_net_profit#7))#23, count(cs_order_number#5)#27] +Results [3]: [count(cs_order_number#5)#27 AS order count #30, MakeDecimal(sum(UnscaledValue(cs_ext_ship_cost#6))#22,17,2) AS total shipping cost #31, MakeDecimal(sum(UnscaledValue(cs_net_profit#7))#23,17,2) AS total net profit #32] + +(41) TakeOrderedAndProject +Input [3]: [order count #30, total shipping cost #31, total net profit #32] +Arguments: 100, [order count #30 ASC NULLS FIRST], [order count #30, total shipping cost #31, total net profit #32] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt new file mode 100644 index 0000000000000..cdf86a4813208 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt @@ -0,0 +1,62 @@ +TakeOrderedAndProject [order count ,total net profit ,total shipping cost ] + WholeStageCodegen (8) + HashAggregate [count,sum,sum] [count,count(cs_order_number),order count ,sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit)),total net profit ,total shipping cost ] + InputAdapter + Exchange #1 + WholeStageCodegen (7) + HashAggregate [cs_order_number] [count,count,count(cs_order_number),sum,sum,sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit))] + HashAggregate [cs_order_number] [sum,sum,sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit))] + InputAdapter + Exchange [cs_order_number] #2 + WholeStageCodegen (6) + HashAggregate [cs_ext_ship_cost,cs_net_profit,cs_order_number] [sum,sum,sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit))] + Project [cs_ext_ship_cost,cs_net_profit,cs_order_number] + BroadcastHashJoin [cc_call_center_sk,cs_call_center_sk] + Project [cs_call_center_sk,cs_ext_ship_cost,cs_net_profit,cs_order_number] + BroadcastHashJoin [ca_address_sk,cs_ship_addr_sk] + Project [cs_call_center_sk,cs_ext_ship_cost,cs_net_profit,cs_order_number,cs_ship_addr_sk] + BroadcastHashJoin [cs_ship_date_sk,d_date_sk] + BroadcastHashJoin [cr_order_number,cs_order_number] + Project [cs_call_center_sk,cs_ext_ship_cost,cs_net_profit,cs_order_number,cs_ship_addr_sk,cs_ship_date_sk] + BroadcastHashJoin [cs_order_number,cs_order_number,cs_warehouse_sk,cs_warehouse_sk] + Filter [cs_call_center_sk,cs_ship_addr_sk,cs_ship_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_call_center_sk,cs_ext_ship_cost,cs_net_profit,cs_order_number,cs_ship_addr_sk,cs_ship_date_sk,cs_warehouse_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [cs_order_number,cs_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_order_number,cs_warehouse_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_order_number] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (4) + Project [ca_address_sk] + Filter [ca_address_sk,ca_state] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (5) + Project [cc_call_center_sk] + Filter [cc_call_center_sk,cc_county] + ColumnarToRow + InputAdapter + Scan parquet default.call_center [cc_call_center_sk,cc_county] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/explain.txt new file mode 100644 index 0000000000000..36b0e72d2d0fd --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/explain.txt @@ -0,0 +1,314 @@ +== Physical Plan == +TakeOrderedAndProject (57) ++- * HashAggregate (56) + +- Exchange (55) + +- * HashAggregate (54) + +- * Project (53) + +- * SortMergeJoin Inner (52) + :- * Sort (43) + : +- Exchange (42) + : +- * Project (41) + : +- * SortMergeJoin Inner (40) + : :- * Sort (27) + : : +- Exchange (26) + : : +- * Project (25) + : : +- * SortMergeJoin Inner (24) + : : :- * Sort (18) + : : : +- Exchange (17) + : : : +- * Project (16) + : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : :- * Project (10) + : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.store_sales (1) + : : : : +- BroadcastExchange (8) + : : : : +- * Project (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.date_dim (4) + : : : +- BroadcastExchange (14) + : : : +- * Filter (13) + : : : +- * ColumnarToRow (12) + : : : +- Scan parquet default.store (11) + : : +- * Sort (23) + : : +- Exchange (22) + : : +- * Filter (21) + : : +- * ColumnarToRow (20) + : : +- Scan parquet default.item (19) + : +- * Sort (39) + : +- Exchange (38) + : +- * Project (37) + : +- * BroadcastHashJoin Inner BuildLeft (36) + : :- BroadcastExchange (32) + : : +- * Project (31) + : : +- * Filter (30) + : : +- * ColumnarToRow (29) + : : +- Scan parquet default.date_dim (28) + : +- * Filter (35) + : +- * ColumnarToRow (34) + : +- Scan parquet default.store_returns (33) + +- * Sort (51) + +- Exchange (50) + +- * Project (49) + +- * BroadcastHashJoin Inner BuildRight (48) + :- * Filter (46) + : +- * ColumnarToRow (45) + : +- Scan parquet default.catalog_sales (44) + +- ReusedExchange (47) + + +(1) Scan parquet default.store_sales +Output [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_quantity#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_item_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_quantity#6] + +(3) Filter [codegen id : 3] +Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_quantity#6] +Condition : ((((isnotnull(ss_customer_sk#3) AND isnotnull(ss_item_sk#2)) AND isnotnull(ss_ticket_number#5)) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_store_sk#4)) + +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#7, d_quarter_name#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_quarter_name), EqualTo(d_quarter_name,2001Q1), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#7, d_quarter_name#8] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#7, d_quarter_name#8] +Condition : ((isnotnull(d_quarter_name#8) AND (d_quarter_name#8 = 2001Q1)) AND isnotnull(d_date_sk#7)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#7] +Input [2]: [d_date_sk#7, d_quarter_name#8] + +(8) BroadcastExchange +Input [1]: [d_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#7] +Join condition: None + +(10) Project [codegen id : 3] +Output [5]: [ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_quantity#6] +Input [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_quantity#6, d_date_sk#7] + +(11) Scan parquet default.store +Output [2]: [s_store_sk#10, s_state#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [s_store_sk#10, s_state#11] + +(13) Filter [codegen id : 2] +Input [2]: [s_store_sk#10, s_state#11] +Condition : isnotnull(s_store_sk#10) + +(14) BroadcastExchange +Input [2]: [s_store_sk#10, s_state#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#12] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#10] +Join condition: None + +(16) Project [codegen id : 3] +Output [5]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_quantity#6, s_state#11] +Input [7]: [ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_quantity#6, s_store_sk#10, s_state#11] + +(17) Exchange +Input [5]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_quantity#6, s_state#11] +Arguments: hashpartitioning(ss_item_sk#2, 5), true, [id=#13] + +(18) Sort [codegen id : 4] +Input [5]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_quantity#6, s_state#11] +Arguments: [ss_item_sk#2 ASC NULLS FIRST], false, 0 + +(19) Scan parquet default.item +Output [3]: [i_item_sk#14, i_item_id#15, i_item_desc#16] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 5] +Input [3]: [i_item_sk#14, i_item_id#15, i_item_desc#16] + +(21) Filter [codegen id : 5] +Input [3]: [i_item_sk#14, i_item_id#15, i_item_desc#16] +Condition : isnotnull(i_item_sk#14) + +(22) Exchange +Input [3]: [i_item_sk#14, i_item_id#15, i_item_desc#16] +Arguments: hashpartitioning(i_item_sk#14, 5), true, [id=#17] + +(23) Sort [codegen id : 6] +Input [3]: [i_item_sk#14, i_item_id#15, i_item_desc#16] +Arguments: [i_item_sk#14 ASC NULLS FIRST], false, 0 + +(24) SortMergeJoin [codegen id : 7] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#14] +Join condition: None + +(25) Project [codegen id : 7] +Output [7]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_quantity#6, s_state#11, i_item_id#15, i_item_desc#16] +Input [8]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_quantity#6, s_state#11, i_item_sk#14, i_item_id#15, i_item_desc#16] + +(26) Exchange +Input [7]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_quantity#6, s_state#11, i_item_id#15, i_item_desc#16] +Arguments: hashpartitioning(cast(ss_ticket_number#5 as bigint), cast(ss_item_sk#2 as bigint), cast(ss_customer_sk#3 as bigint), 5), true, [id=#18] + +(27) Sort [codegen id : 8] +Input [7]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_quantity#6, s_state#11, i_item_id#15, i_item_desc#16] +Arguments: [cast(ss_ticket_number#5 as bigint) ASC NULLS FIRST, cast(ss_item_sk#2 as bigint) ASC NULLS FIRST, cast(ss_customer_sk#3 as bigint) ASC NULLS FIRST], false, 0 + +(28) Scan parquet default.date_dim +Output [2]: [d_date_sk#19, d_quarter_name#20] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [In(d_quarter_name, [2001Q1,2001Q2,2001Q3]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 9] +Input [2]: [d_date_sk#19, d_quarter_name#20] + +(30) Filter [codegen id : 9] +Input [2]: [d_date_sk#19, d_quarter_name#20] +Condition : (d_quarter_name#20 IN (2001Q1,2001Q2,2001Q3) AND isnotnull(d_date_sk#19)) + +(31) Project [codegen id : 9] +Output [1]: [d_date_sk#19] +Input [2]: [d_date_sk#19, d_quarter_name#20] + +(32) BroadcastExchange +Input [1]: [d_date_sk#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#21] + +(33) Scan parquet default.store_returns +Output [5]: [sr_returned_date_sk#22, sr_item_sk#23, sr_customer_sk#24, sr_ticket_number#25, sr_return_quantity#26] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_returns] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_customer_sk), IsNotNull(sr_item_sk), IsNotNull(sr_returned_date_sk)] +ReadSchema: struct + +(34) ColumnarToRow +Input [5]: [sr_returned_date_sk#22, sr_item_sk#23, sr_customer_sk#24, sr_ticket_number#25, sr_return_quantity#26] + +(35) Filter +Input [5]: [sr_returned_date_sk#22, sr_item_sk#23, sr_customer_sk#24, sr_ticket_number#25, sr_return_quantity#26] +Condition : (((isnotnull(sr_ticket_number#25) AND isnotnull(sr_customer_sk#24)) AND isnotnull(sr_item_sk#23)) AND isnotnull(sr_returned_date_sk#22)) + +(36) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cast(d_date_sk#19 as bigint)] +Right keys [1]: [sr_returned_date_sk#22] +Join condition: None + +(37) Project [codegen id : 10] +Output [4]: [sr_item_sk#23, sr_customer_sk#24, sr_ticket_number#25, sr_return_quantity#26] +Input [6]: [d_date_sk#19, sr_returned_date_sk#22, sr_item_sk#23, sr_customer_sk#24, sr_ticket_number#25, sr_return_quantity#26] + +(38) Exchange +Input [4]: [sr_item_sk#23, sr_customer_sk#24, sr_ticket_number#25, sr_return_quantity#26] +Arguments: hashpartitioning(sr_ticket_number#25, sr_item_sk#23, sr_customer_sk#24, 5), true, [id=#27] + +(39) Sort [codegen id : 11] +Input [4]: [sr_item_sk#23, sr_customer_sk#24, sr_ticket_number#25, sr_return_quantity#26] +Arguments: [sr_ticket_number#25 ASC NULLS FIRST, sr_item_sk#23 ASC NULLS FIRST, sr_customer_sk#24 ASC NULLS FIRST], false, 0 + +(40) SortMergeJoin [codegen id : 12] +Left keys [3]: [cast(ss_ticket_number#5 as bigint), cast(ss_item_sk#2 as bigint), cast(ss_customer_sk#3 as bigint)] +Right keys [3]: [sr_ticket_number#25, sr_item_sk#23, sr_customer_sk#24] +Join condition: None + +(41) Project [codegen id : 12] +Output [7]: [ss_quantity#6, s_state#11, i_item_id#15, i_item_desc#16, sr_item_sk#23, sr_customer_sk#24, sr_return_quantity#26] +Input [11]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_quantity#6, s_state#11, i_item_id#15, i_item_desc#16, sr_item_sk#23, sr_customer_sk#24, sr_ticket_number#25, sr_return_quantity#26] + +(42) Exchange +Input [7]: [ss_quantity#6, s_state#11, i_item_id#15, i_item_desc#16, sr_item_sk#23, sr_customer_sk#24, sr_return_quantity#26] +Arguments: hashpartitioning(sr_customer_sk#24, sr_item_sk#23, 5), true, [id=#28] + +(43) Sort [codegen id : 13] +Input [7]: [ss_quantity#6, s_state#11, i_item_id#15, i_item_desc#16, sr_item_sk#23, sr_customer_sk#24, sr_return_quantity#26] +Arguments: [sr_customer_sk#24 ASC NULLS FIRST, sr_item_sk#23 ASC NULLS FIRST], false, 0 + +(44) Scan parquet default.catalog_sales +Output [4]: [cs_sold_date_sk#29, cs_bill_customer_sk#30, cs_item_sk#31, cs_quantity#32] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(45) ColumnarToRow [codegen id : 15] +Input [4]: [cs_sold_date_sk#29, cs_bill_customer_sk#30, cs_item_sk#31, cs_quantity#32] + +(46) Filter [codegen id : 15] +Input [4]: [cs_sold_date_sk#29, cs_bill_customer_sk#30, cs_item_sk#31, cs_quantity#32] +Condition : ((isnotnull(cs_bill_customer_sk#30) AND isnotnull(cs_item_sk#31)) AND isnotnull(cs_sold_date_sk#29)) + +(47) ReusedExchange [Reuses operator id: 32] +Output [1]: [d_date_sk#33] + +(48) BroadcastHashJoin [codegen id : 15] +Left keys [1]: [cs_sold_date_sk#29] +Right keys [1]: [d_date_sk#33] +Join condition: None + +(49) Project [codegen id : 15] +Output [3]: [cs_bill_customer_sk#30, cs_item_sk#31, cs_quantity#32] +Input [5]: [cs_sold_date_sk#29, cs_bill_customer_sk#30, cs_item_sk#31, cs_quantity#32, d_date_sk#33] + +(50) Exchange +Input [3]: [cs_bill_customer_sk#30, cs_item_sk#31, cs_quantity#32] +Arguments: hashpartitioning(cast(cs_bill_customer_sk#30 as bigint), cast(cs_item_sk#31 as bigint), 5), true, [id=#34] + +(51) Sort [codegen id : 16] +Input [3]: [cs_bill_customer_sk#30, cs_item_sk#31, cs_quantity#32] +Arguments: [cast(cs_bill_customer_sk#30 as bigint) ASC NULLS FIRST, cast(cs_item_sk#31 as bigint) ASC NULLS FIRST], false, 0 + +(52) SortMergeJoin [codegen id : 17] +Left keys [2]: [sr_customer_sk#24, sr_item_sk#23] +Right keys [2]: [cast(cs_bill_customer_sk#30 as bigint), cast(cs_item_sk#31 as bigint)] +Join condition: None + +(53) Project [codegen id : 17] +Output [6]: [ss_quantity#6, sr_return_quantity#26, cs_quantity#32, s_state#11, i_item_id#15, i_item_desc#16] +Input [10]: [ss_quantity#6, s_state#11, i_item_id#15, i_item_desc#16, sr_item_sk#23, sr_customer_sk#24, sr_return_quantity#26, cs_bill_customer_sk#30, cs_item_sk#31, cs_quantity#32] + +(54) HashAggregate [codegen id : 17] +Input [6]: [ss_quantity#6, sr_return_quantity#26, cs_quantity#32, s_state#11, i_item_id#15, i_item_desc#16] +Keys [3]: [i_item_id#15, i_item_desc#16, s_state#11] +Functions [9]: [partial_count(ss_quantity#6), partial_avg(cast(ss_quantity#6 as bigint)), partial_stddev_samp(cast(ss_quantity#6 as double)), partial_count(sr_return_quantity#26), partial_avg(cast(sr_return_quantity#26 as bigint)), partial_stddev_samp(cast(sr_return_quantity#26 as double)), partial_count(cs_quantity#32), partial_avg(cast(cs_quantity#32 as bigint)), partial_stddev_samp(cast(cs_quantity#32 as double))] +Aggregate Attributes [18]: [count#35, sum#36, count#37, n#38, avg#39, m2#40, count#41, sum#42, count#43, n#44, avg#45, m2#46, count#47, sum#48, count#49, n#50, avg#51, m2#52] +Results [21]: [i_item_id#15, i_item_desc#16, s_state#11, count#53, sum#54, count#55, n#56, avg#57, m2#58, count#59, sum#60, count#61, n#62, avg#63, m2#64, count#65, sum#66, count#67, n#68, avg#69, m2#70] + +(55) Exchange +Input [21]: [i_item_id#15, i_item_desc#16, s_state#11, count#53, sum#54, count#55, n#56, avg#57, m2#58, count#59, sum#60, count#61, n#62, avg#63, m2#64, count#65, sum#66, count#67, n#68, avg#69, m2#70] +Arguments: hashpartitioning(i_item_id#15, i_item_desc#16, s_state#11, 5), true, [id=#71] + +(56) HashAggregate [codegen id : 18] +Input [21]: [i_item_id#15, i_item_desc#16, s_state#11, count#53, sum#54, count#55, n#56, avg#57, m2#58, count#59, sum#60, count#61, n#62, avg#63, m2#64, count#65, sum#66, count#67, n#68, avg#69, m2#70] +Keys [3]: [i_item_id#15, i_item_desc#16, s_state#11] +Functions [9]: [count(ss_quantity#6), avg(cast(ss_quantity#6 as bigint)), stddev_samp(cast(ss_quantity#6 as double)), count(sr_return_quantity#26), avg(cast(sr_return_quantity#26 as bigint)), stddev_samp(cast(sr_return_quantity#26 as double)), count(cs_quantity#32), avg(cast(cs_quantity#32 as bigint)), stddev_samp(cast(cs_quantity#32 as double))] +Aggregate Attributes [9]: [count(ss_quantity#6)#72, avg(cast(ss_quantity#6 as bigint))#73, stddev_samp(cast(ss_quantity#6 as double))#74, count(sr_return_quantity#26)#75, avg(cast(sr_return_quantity#26 as bigint))#76, stddev_samp(cast(sr_return_quantity#26 as double))#77, count(cs_quantity#32)#78, avg(cast(cs_quantity#32 as bigint))#79, stddev_samp(cast(cs_quantity#32 as double))#80] +Results [15]: [i_item_id#15, i_item_desc#16, s_state#11, count(ss_quantity#6)#72 AS store_sales_quantitycount#81, avg(cast(ss_quantity#6 as bigint))#73 AS store_sales_quantityave#82, stddev_samp(cast(ss_quantity#6 as double))#74 AS store_sales_quantitystdev#83, (stddev_samp(cast(ss_quantity#6 as double))#74 / avg(cast(ss_quantity#6 as bigint))#73) AS store_sales_quantitycov#84, count(sr_return_quantity#26)#75 AS as_store_returns_quantitycount#85, avg(cast(sr_return_quantity#26 as bigint))#76 AS as_store_returns_quantityave#86, stddev_samp(cast(sr_return_quantity#26 as double))#77 AS as_store_returns_quantitystdev#87, (stddev_samp(cast(sr_return_quantity#26 as double))#77 / avg(cast(sr_return_quantity#26 as bigint))#76) AS store_returns_quantitycov#88, count(cs_quantity#32)#78 AS catalog_sales_quantitycount#89, avg(cast(cs_quantity#32 as bigint))#79 AS catalog_sales_quantityave#90, (stddev_samp(cast(cs_quantity#32 as double))#80 / avg(cast(cs_quantity#32 as bigint))#79) AS catalog_sales_quantitystdev#91, (stddev_samp(cast(cs_quantity#32 as double))#80 / avg(cast(cs_quantity#32 as bigint))#79) AS catalog_sales_quantitycov#92] + +(57) TakeOrderedAndProject +Input [15]: [i_item_id#15, i_item_desc#16, s_state#11, store_sales_quantitycount#81, store_sales_quantityave#82, store_sales_quantitystdev#83, store_sales_quantitycov#84, as_store_returns_quantitycount#85, as_store_returns_quantityave#86, as_store_returns_quantitystdev#87, store_returns_quantitycov#88, catalog_sales_quantitycount#89, catalog_sales_quantityave#90, catalog_sales_quantitystdev#91, catalog_sales_quantitycov#92] +Arguments: 100, [i_item_id#15 ASC NULLS FIRST, i_item_desc#16 ASC NULLS FIRST, s_state#11 ASC NULLS FIRST], [i_item_id#15, i_item_desc#16, s_state#11, store_sales_quantitycount#81, store_sales_quantityave#82, store_sales_quantitystdev#83, store_sales_quantitycov#84, as_store_returns_quantitycount#85, as_store_returns_quantityave#86, as_store_returns_quantitystdev#87, store_returns_quantitycov#88, catalog_sales_quantitycount#89, catalog_sales_quantityave#90, catalog_sales_quantitystdev#91, catalog_sales_quantitycov#92] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/simplified.txt new file mode 100644 index 0000000000000..1a53b72ed00c3 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/simplified.txt @@ -0,0 +1,98 @@ +TakeOrderedAndProject [as_store_returns_quantityave,as_store_returns_quantitycount,as_store_returns_quantitystdev,catalog_sales_quantityave,catalog_sales_quantitycount,catalog_sales_quantitycov,catalog_sales_quantitystdev,i_item_desc,i_item_id,s_state,store_returns_quantitycov,store_sales_quantityave,store_sales_quantitycount,store_sales_quantitycov,store_sales_quantitystdev] + WholeStageCodegen (18) + HashAggregate [avg,avg,avg,count,count,count,count,count,count,i_item_desc,i_item_id,m2,m2,m2,n,n,n,s_state,sum,sum,sum] [as_store_returns_quantityave,as_store_returns_quantitycount,as_store_returns_quantitystdev,avg,avg,avg,avg(cast(cs_quantity as bigint)),avg(cast(sr_return_quantity as bigint)),avg(cast(ss_quantity as bigint)),catalog_sales_quantityave,catalog_sales_quantitycount,catalog_sales_quantitycov,catalog_sales_quantitystdev,count,count,count,count,count,count,count(cs_quantity),count(sr_return_quantity),count(ss_quantity),m2,m2,m2,n,n,n,stddev_samp(cast(cs_quantity as double)),stddev_samp(cast(sr_return_quantity as double)),stddev_samp(cast(ss_quantity as double)),store_returns_quantitycov,store_sales_quantityave,store_sales_quantitycount,store_sales_quantitycov,store_sales_quantitystdev,sum,sum,sum] + InputAdapter + Exchange [i_item_desc,i_item_id,s_state] #1 + WholeStageCodegen (17) + HashAggregate [cs_quantity,i_item_desc,i_item_id,s_state,sr_return_quantity,ss_quantity] [avg,avg,avg,avg,avg,avg,count,count,count,count,count,count,count,count,count,count,count,count,m2,m2,m2,m2,m2,m2,n,n,n,n,n,n,sum,sum,sum,sum,sum,sum] + Project [cs_quantity,i_item_desc,i_item_id,s_state,sr_return_quantity,ss_quantity] + SortMergeJoin [cs_bill_customer_sk,cs_item_sk,sr_customer_sk,sr_item_sk] + InputAdapter + WholeStageCodegen (13) + Sort [sr_customer_sk,sr_item_sk] + InputAdapter + Exchange [sr_customer_sk,sr_item_sk] #2 + WholeStageCodegen (12) + Project [i_item_desc,i_item_id,s_state,sr_customer_sk,sr_item_sk,sr_return_quantity,ss_quantity] + SortMergeJoin [sr_customer_sk,sr_item_sk,sr_ticket_number,ss_customer_sk,ss_item_sk,ss_ticket_number] + InputAdapter + WholeStageCodegen (8) + Sort [ss_customer_sk,ss_item_sk,ss_ticket_number] + InputAdapter + Exchange [ss_customer_sk,ss_item_sk,ss_ticket_number] #3 + WholeStageCodegen (7) + Project [i_item_desc,i_item_id,s_state,ss_customer_sk,ss_item_sk,ss_quantity,ss_ticket_number] + SortMergeJoin [i_item_sk,ss_item_sk] + InputAdapter + WholeStageCodegen (4) + Sort [ss_item_sk] + InputAdapter + Exchange [ss_item_sk] #4 + WholeStageCodegen (3) + Project [s_state,ss_customer_sk,ss_item_sk,ss_quantity,ss_ticket_number] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_customer_sk,ss_item_sk,ss_quantity,ss_store_sk,ss_ticket_number] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_quarter_name] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_quarter_name] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (2) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_state,s_store_sk] + InputAdapter + WholeStageCodegen (6) + Sort [i_item_sk] + InputAdapter + Exchange [i_item_sk] #7 + WholeStageCodegen (5) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_desc,i_item_id,i_item_sk] + InputAdapter + WholeStageCodegen (11) + Sort [sr_customer_sk,sr_item_sk,sr_ticket_number] + InputAdapter + Exchange [sr_customer_sk,sr_item_sk,sr_ticket_number] #8 + WholeStageCodegen (10) + Project [sr_customer_sk,sr_item_sk,sr_return_quantity,sr_ticket_number] + BroadcastHashJoin [d_date_sk,sr_returned_date_sk] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (9) + Project [d_date_sk] + Filter [d_date_sk,d_quarter_name] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_quarter_name] + Filter [sr_customer_sk,sr_item_sk,sr_returned_date_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_customer_sk,sr_item_sk,sr_return_quantity,sr_returned_date_sk,sr_ticket_number] + InputAdapter + WholeStageCodegen (16) + Sort [cs_bill_customer_sk,cs_item_sk] + InputAdapter + Exchange [cs_bill_customer_sk,cs_item_sk] #10 + WholeStageCodegen (15) + Project [cs_bill_customer_sk,cs_item_sk,cs_quantity] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #9 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17/explain.txt new file mode 100644 index 0000000000000..fddd2bb6fbde7 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17/explain.txt @@ -0,0 +1,269 @@ +== Physical Plan == +TakeOrderedAndProject (48) ++- * HashAggregate (47) + +- Exchange (46) + +- * HashAggregate (45) + +- * Project (44) + +- * BroadcastHashJoin Inner BuildRight (43) + :- * Project (38) + : +- * BroadcastHashJoin Inner BuildRight (37) + : :- * Project (32) + : : +- * BroadcastHashJoin Inner BuildRight (31) + : : :- * Project (29) + : : : +- * BroadcastHashJoin Inner BuildRight (28) + : : : :- * Project (22) + : : : : +- * BroadcastHashJoin Inner BuildRight (21) + : : : : :- * Project (15) + : : : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : : : :- * Project (9) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : : : :- * Filter (3) + : : : : : : : +- * ColumnarToRow (2) + : : : : : : : +- Scan parquet default.store_sales (1) + : : : : : : +- BroadcastExchange (7) + : : : : : : +- * Filter (6) + : : : : : : +- * ColumnarToRow (5) + : : : : : : +- Scan parquet default.store_returns (4) + : : : : : +- BroadcastExchange (13) + : : : : : +- * Filter (12) + : : : : : +- * ColumnarToRow (11) + : : : : : +- Scan parquet default.catalog_sales (10) + : : : : +- BroadcastExchange (20) + : : : : +- * Project (19) + : : : : +- * Filter (18) + : : : : +- * ColumnarToRow (17) + : : : : +- Scan parquet default.date_dim (16) + : : : +- BroadcastExchange (27) + : : : +- * Project (26) + : : : +- * Filter (25) + : : : +- * ColumnarToRow (24) + : : : +- Scan parquet default.date_dim (23) + : : +- ReusedExchange (30) + : +- BroadcastExchange (36) + : +- * Filter (35) + : +- * ColumnarToRow (34) + : +- Scan parquet default.store (33) + +- BroadcastExchange (42) + +- * Filter (41) + +- * ColumnarToRow (40) + +- Scan parquet default.item (39) + + +(1) Scan parquet default.store_sales +Output [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_quantity#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 8] +Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_quantity#6] + +(3) Filter [codegen id : 8] +Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_quantity#6] +Condition : ((((isnotnull(ss_item_sk#2) AND isnotnull(ss_customer_sk#3)) AND isnotnull(ss_ticket_number#5)) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_store_sk#4)) + +(4) Scan parquet default.store_returns +Output [5]: [sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_return_quantity#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_returns] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_customer_sk), IsNotNull(sr_item_sk), IsNotNull(sr_returned_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [5]: [sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_return_quantity#11] + +(6) Filter [codegen id : 1] +Input [5]: [sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_return_quantity#11] +Condition : (((isnotnull(sr_ticket_number#10) AND isnotnull(sr_customer_sk#9)) AND isnotnull(sr_item_sk#8)) AND isnotnull(sr_returned_date_sk#7)) + +(7) BroadcastExchange +Input [5]: [sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_return_quantity#11] +Arguments: HashedRelationBroadcastMode(List(input[2, bigint, false], input[1, bigint, false], input[3, bigint, false]),false), [id=#12] + +(8) BroadcastHashJoin [codegen id : 8] +Left keys [3]: [cast(ss_customer_sk#3 as bigint), cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#5 as bigint)] +Right keys [3]: [sr_customer_sk#9, sr_item_sk#8, sr_ticket_number#10] +Join condition: None + +(9) Project [codegen id : 8] +Output [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_return_quantity#11] +Input [11]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_quantity#6, sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_return_quantity#11] + +(10) Scan parquet default.catalog_sales +Output [4]: [cs_sold_date_sk#13, cs_bill_customer_sk#14, cs_item_sk#15, cs_quantity#16] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [4]: [cs_sold_date_sk#13, cs_bill_customer_sk#14, cs_item_sk#15, cs_quantity#16] + +(12) Filter [codegen id : 2] +Input [4]: [cs_sold_date_sk#13, cs_bill_customer_sk#14, cs_item_sk#15, cs_quantity#16] +Condition : ((isnotnull(cs_bill_customer_sk#14) AND isnotnull(cs_item_sk#15)) AND isnotnull(cs_sold_date_sk#13)) + +(13) BroadcastExchange +Input [4]: [cs_sold_date_sk#13, cs_bill_customer_sk#14, cs_item_sk#15, cs_quantity#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint), cast(input[2, int, false] as bigint)),false), [id=#17] + +(14) BroadcastHashJoin [codegen id : 8] +Left keys [2]: [sr_customer_sk#9, sr_item_sk#8] +Right keys [2]: [cast(cs_bill_customer_sk#14 as bigint), cast(cs_item_sk#15 as bigint)] +Join condition: None + +(15) Project [codegen id : 8] +Output [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_returned_date_sk#7, sr_return_quantity#11, cs_sold_date_sk#13, cs_quantity#16] +Input [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_return_quantity#11, cs_sold_date_sk#13, cs_bill_customer_sk#14, cs_item_sk#15, cs_quantity#16] + +(16) Scan parquet default.date_dim +Output [2]: [d_date_sk#18, d_quarter_name#19] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_quarter_name), EqualTo(d_quarter_name,2001Q1), IsNotNull(d_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [2]: [d_date_sk#18, d_quarter_name#19] + +(18) Filter [codegen id : 3] +Input [2]: [d_date_sk#18, d_quarter_name#19] +Condition : ((isnotnull(d_quarter_name#19) AND (d_quarter_name#19 = 2001Q1)) AND isnotnull(d_date_sk#18)) + +(19) Project [codegen id : 3] +Output [1]: [d_date_sk#18] +Input [2]: [d_date_sk#18, d_quarter_name#19] + +(20) BroadcastExchange +Input [1]: [d_date_sk#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] + +(21) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#18] +Join condition: None + +(22) Project [codegen id : 8] +Output [7]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_returned_date_sk#7, sr_return_quantity#11, cs_sold_date_sk#13, cs_quantity#16] +Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_returned_date_sk#7, sr_return_quantity#11, cs_sold_date_sk#13, cs_quantity#16, d_date_sk#18] + +(23) Scan parquet default.date_dim +Output [2]: [d_date_sk#21, d_quarter_name#22] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [In(d_quarter_name, [2001Q1,2001Q2,2001Q3]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 4] +Input [2]: [d_date_sk#21, d_quarter_name#22] + +(25) Filter [codegen id : 4] +Input [2]: [d_date_sk#21, d_quarter_name#22] +Condition : (d_quarter_name#22 IN (2001Q1,2001Q2,2001Q3) AND isnotnull(d_date_sk#21)) + +(26) Project [codegen id : 4] +Output [1]: [d_date_sk#21] +Input [2]: [d_date_sk#21, d_quarter_name#22] + +(27) BroadcastExchange +Input [1]: [d_date_sk#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#23] + +(28) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [sr_returned_date_sk#7] +Right keys [1]: [cast(d_date_sk#21 as bigint)] +Join condition: None + +(29) Project [codegen id : 8] +Output [6]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_return_quantity#11, cs_sold_date_sk#13, cs_quantity#16] +Input [8]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_returned_date_sk#7, sr_return_quantity#11, cs_sold_date_sk#13, cs_quantity#16, d_date_sk#21] + +(30) ReusedExchange [Reuses operator id: 27] +Output [1]: [d_date_sk#24] + +(31) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [cs_sold_date_sk#13] +Right keys [1]: [d_date_sk#24] +Join condition: None + +(32) Project [codegen id : 8] +Output [5]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_return_quantity#11, cs_quantity#16] +Input [7]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_return_quantity#11, cs_sold_date_sk#13, cs_quantity#16, d_date_sk#24] + +(33) Scan parquet default.store +Output [2]: [s_store_sk#25, s_state#26] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 6] +Input [2]: [s_store_sk#25, s_state#26] + +(35) Filter [codegen id : 6] +Input [2]: [s_store_sk#25, s_state#26] +Condition : isnotnull(s_store_sk#25) + +(36) BroadcastExchange +Input [2]: [s_store_sk#25, s_state#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#27] + +(37) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#25] +Join condition: None + +(38) Project [codegen id : 8] +Output [5]: [ss_item_sk#2, ss_quantity#6, sr_return_quantity#11, cs_quantity#16, s_state#26] +Input [7]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_return_quantity#11, cs_quantity#16, s_store_sk#25, s_state#26] + +(39) Scan parquet default.item +Output [3]: [i_item_sk#28, i_item_id#29, i_item_desc#30] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(40) ColumnarToRow [codegen id : 7] +Input [3]: [i_item_sk#28, i_item_id#29, i_item_desc#30] + +(41) Filter [codegen id : 7] +Input [3]: [i_item_sk#28, i_item_id#29, i_item_desc#30] +Condition : isnotnull(i_item_sk#28) + +(42) BroadcastExchange +Input [3]: [i_item_sk#28, i_item_id#29, i_item_desc#30] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#31] + +(43) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#28] +Join condition: None + +(44) Project [codegen id : 8] +Output [6]: [ss_quantity#6, sr_return_quantity#11, cs_quantity#16, s_state#26, i_item_id#29, i_item_desc#30] +Input [8]: [ss_item_sk#2, ss_quantity#6, sr_return_quantity#11, cs_quantity#16, s_state#26, i_item_sk#28, i_item_id#29, i_item_desc#30] + +(45) HashAggregate [codegen id : 8] +Input [6]: [ss_quantity#6, sr_return_quantity#11, cs_quantity#16, s_state#26, i_item_id#29, i_item_desc#30] +Keys [3]: [i_item_id#29, i_item_desc#30, s_state#26] +Functions [9]: [partial_count(ss_quantity#6), partial_avg(cast(ss_quantity#6 as bigint)), partial_stddev_samp(cast(ss_quantity#6 as double)), partial_count(sr_return_quantity#11), partial_avg(cast(sr_return_quantity#11 as bigint)), partial_stddev_samp(cast(sr_return_quantity#11 as double)), partial_count(cs_quantity#16), partial_avg(cast(cs_quantity#16 as bigint)), partial_stddev_samp(cast(cs_quantity#16 as double))] +Aggregate Attributes [18]: [count#32, sum#33, count#34, n#35, avg#36, m2#37, count#38, sum#39, count#40, n#41, avg#42, m2#43, count#44, sum#45, count#46, n#47, avg#48, m2#49] +Results [21]: [i_item_id#29, i_item_desc#30, s_state#26, count#50, sum#51, count#52, n#53, avg#54, m2#55, count#56, sum#57, count#58, n#59, avg#60, m2#61, count#62, sum#63, count#64, n#65, avg#66, m2#67] + +(46) Exchange +Input [21]: [i_item_id#29, i_item_desc#30, s_state#26, count#50, sum#51, count#52, n#53, avg#54, m2#55, count#56, sum#57, count#58, n#59, avg#60, m2#61, count#62, sum#63, count#64, n#65, avg#66, m2#67] +Arguments: hashpartitioning(i_item_id#29, i_item_desc#30, s_state#26, 5), true, [id=#68] + +(47) HashAggregate [codegen id : 9] +Input [21]: [i_item_id#29, i_item_desc#30, s_state#26, count#50, sum#51, count#52, n#53, avg#54, m2#55, count#56, sum#57, count#58, n#59, avg#60, m2#61, count#62, sum#63, count#64, n#65, avg#66, m2#67] +Keys [3]: [i_item_id#29, i_item_desc#30, s_state#26] +Functions [9]: [count(ss_quantity#6), avg(cast(ss_quantity#6 as bigint)), stddev_samp(cast(ss_quantity#6 as double)), count(sr_return_quantity#11), avg(cast(sr_return_quantity#11 as bigint)), stddev_samp(cast(sr_return_quantity#11 as double)), count(cs_quantity#16), avg(cast(cs_quantity#16 as bigint)), stddev_samp(cast(cs_quantity#16 as double))] +Aggregate Attributes [9]: [count(ss_quantity#6)#69, avg(cast(ss_quantity#6 as bigint))#70, stddev_samp(cast(ss_quantity#6 as double))#71, count(sr_return_quantity#11)#72, avg(cast(sr_return_quantity#11 as bigint))#73, stddev_samp(cast(sr_return_quantity#11 as double))#74, count(cs_quantity#16)#75, avg(cast(cs_quantity#16 as bigint))#76, stddev_samp(cast(cs_quantity#16 as double))#77] +Results [15]: [i_item_id#29, i_item_desc#30, s_state#26, count(ss_quantity#6)#69 AS store_sales_quantitycount#78, avg(cast(ss_quantity#6 as bigint))#70 AS store_sales_quantityave#79, stddev_samp(cast(ss_quantity#6 as double))#71 AS store_sales_quantitystdev#80, (stddev_samp(cast(ss_quantity#6 as double))#71 / avg(cast(ss_quantity#6 as bigint))#70) AS store_sales_quantitycov#81, count(sr_return_quantity#11)#72 AS as_store_returns_quantitycount#82, avg(cast(sr_return_quantity#11 as bigint))#73 AS as_store_returns_quantityave#83, stddev_samp(cast(sr_return_quantity#11 as double))#74 AS as_store_returns_quantitystdev#84, (stddev_samp(cast(sr_return_quantity#11 as double))#74 / avg(cast(sr_return_quantity#11 as bigint))#73) AS store_returns_quantitycov#85, count(cs_quantity#16)#75 AS catalog_sales_quantitycount#86, avg(cast(cs_quantity#16 as bigint))#76 AS catalog_sales_quantityave#87, (stddev_samp(cast(cs_quantity#16 as double))#77 / avg(cast(cs_quantity#16 as bigint))#76) AS catalog_sales_quantitystdev#88, (stddev_samp(cast(cs_quantity#16 as double))#77 / avg(cast(cs_quantity#16 as bigint))#76) AS catalog_sales_quantitycov#89] + +(48) TakeOrderedAndProject +Input [15]: [i_item_id#29, i_item_desc#30, s_state#26, store_sales_quantitycount#78, store_sales_quantityave#79, store_sales_quantitystdev#80, store_sales_quantitycov#81, as_store_returns_quantitycount#82, as_store_returns_quantityave#83, as_store_returns_quantitystdev#84, store_returns_quantitycov#85, catalog_sales_quantitycount#86, catalog_sales_quantityave#87, catalog_sales_quantitystdev#88, catalog_sales_quantitycov#89] +Arguments: 100, [i_item_id#29 ASC NULLS FIRST, i_item_desc#30 ASC NULLS FIRST, s_state#26 ASC NULLS FIRST], [i_item_id#29, i_item_desc#30, s_state#26, store_sales_quantitycount#78, store_sales_quantityave#79, store_sales_quantitystdev#80, store_sales_quantitycov#81, as_store_returns_quantitycount#82, as_store_returns_quantityave#83, as_store_returns_quantitystdev#84, store_returns_quantitycov#85, catalog_sales_quantitycount#86, catalog_sales_quantityave#87, catalog_sales_quantitystdev#88, catalog_sales_quantitycov#89] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17/simplified.txt new file mode 100644 index 0000000000000..1e346110a5348 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17/simplified.txt @@ -0,0 +1,71 @@ +TakeOrderedAndProject [as_store_returns_quantityave,as_store_returns_quantitycount,as_store_returns_quantitystdev,catalog_sales_quantityave,catalog_sales_quantitycount,catalog_sales_quantitycov,catalog_sales_quantitystdev,i_item_desc,i_item_id,s_state,store_returns_quantitycov,store_sales_quantityave,store_sales_quantitycount,store_sales_quantitycov,store_sales_quantitystdev] + WholeStageCodegen (9) + HashAggregate [avg,avg,avg,count,count,count,count,count,count,i_item_desc,i_item_id,m2,m2,m2,n,n,n,s_state,sum,sum,sum] [as_store_returns_quantityave,as_store_returns_quantitycount,as_store_returns_quantitystdev,avg,avg,avg,avg(cast(cs_quantity as bigint)),avg(cast(sr_return_quantity as bigint)),avg(cast(ss_quantity as bigint)),catalog_sales_quantityave,catalog_sales_quantitycount,catalog_sales_quantitycov,catalog_sales_quantitystdev,count,count,count,count,count,count,count(cs_quantity),count(sr_return_quantity),count(ss_quantity),m2,m2,m2,n,n,n,stddev_samp(cast(cs_quantity as double)),stddev_samp(cast(sr_return_quantity as double)),stddev_samp(cast(ss_quantity as double)),store_returns_quantitycov,store_sales_quantityave,store_sales_quantitycount,store_sales_quantitycov,store_sales_quantitystdev,sum,sum,sum] + InputAdapter + Exchange [i_item_desc,i_item_id,s_state] #1 + WholeStageCodegen (8) + HashAggregate [cs_quantity,i_item_desc,i_item_id,s_state,sr_return_quantity,ss_quantity] [avg,avg,avg,avg,avg,avg,count,count,count,count,count,count,count,count,count,count,count,count,m2,m2,m2,m2,m2,m2,n,n,n,n,n,n,sum,sum,sum,sum,sum,sum] + Project [cs_quantity,i_item_desc,i_item_id,s_state,sr_return_quantity,ss_quantity] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [cs_quantity,s_state,sr_return_quantity,ss_item_sk,ss_quantity] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [cs_quantity,sr_return_quantity,ss_item_sk,ss_quantity,ss_store_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_quantity,cs_sold_date_sk,sr_return_quantity,ss_item_sk,ss_quantity,ss_store_sk] + BroadcastHashJoin [d_date_sk,sr_returned_date_sk] + Project [cs_quantity,cs_sold_date_sk,sr_return_quantity,sr_returned_date_sk,ss_item_sk,ss_quantity,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [cs_quantity,cs_sold_date_sk,sr_return_quantity,sr_returned_date_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [cs_bill_customer_sk,cs_item_sk,sr_customer_sk,sr_item_sk] + Project [sr_customer_sk,sr_item_sk,sr_return_quantity,sr_returned_date_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [sr_customer_sk,sr_item_sk,sr_ticket_number,ss_customer_sk,ss_item_sk,ss_ticket_number] + Filter [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [sr_customer_sk,sr_item_sk,sr_returned_date_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_customer_sk,sr_item_sk,sr_return_quantity,sr_returned_date_sk,sr_ticket_number] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_date_sk,d_quarter_name] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_quarter_name] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Project [d_date_sk] + Filter [d_date_sk,d_quarter_name] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_quarter_name] + InputAdapter + ReusedExchange [d_date_sk] #5 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (6) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_state,s_store_sk] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (7) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_desc,i_item_id,i_item_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18.sf100/explain.txt new file mode 100644 index 0000000000000..646c5240fd09e --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18.sf100/explain.txt @@ -0,0 +1,294 @@ +== Physical Plan == +TakeOrderedAndProject (53) ++- * HashAggregate (52) + +- Exchange (51) + +- * HashAggregate (50) + +- * Expand (49) + +- * Project (48) + +- * SortMergeJoin Inner (47) + :- * Sort (25) + : +- Exchange (24) + : +- * Project (23) + : +- * BroadcastHashJoin Inner BuildRight (22) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.catalog_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.customer_demographics (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.date_dim (11) + : +- BroadcastExchange (21) + : +- * Filter (20) + : +- * ColumnarToRow (19) + : +- Scan parquet default.item (18) + +- * Sort (46) + +- Exchange (45) + +- * Project (44) + +- * SortMergeJoin Inner (43) + :- * Sort (37) + : +- Exchange (36) + : +- * Project (35) + : +- * BroadcastHashJoin Inner BuildRight (34) + : :- * Project (29) + : : +- * Filter (28) + : : +- * ColumnarToRow (27) + : : +- Scan parquet default.customer (26) + : +- BroadcastExchange (33) + : +- * Filter (32) + : +- * ColumnarToRow (31) + : +- Scan parquet default.customer_address (30) + +- * Sort (42) + +- Exchange (41) + +- * Filter (40) + +- * ColumnarToRow (39) + +- Scan parquet default.customer_demographics (38) + + +(1) Scan parquet default.catalog_sales +Output [9]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_customer_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [9]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9] + +(3) Filter [codegen id : 4] +Input [9]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9] +Condition : (((isnotnull(cs_bill_cdemo_sk#3) AND isnotnull(cs_bill_customer_sk#2)) AND isnotnull(cs_sold_date_sk#1)) AND isnotnull(cs_item_sk#4)) + +(4) Scan parquet default.customer_demographics +Output [4]: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer_demographics] +PushedFilters: [IsNotNull(cd_gender), IsNotNull(cd_education_status), EqualTo(cd_gender,F), EqualTo(cd_education_status,Unknown), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] + +(6) Filter [codegen id : 1] +Input [4]: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] +Condition : ((((isnotnull(cd_gender#11) AND isnotnull(cd_education_status#12)) AND (cd_gender#11 = F)) AND (cd_education_status#12 = Unknown)) AND isnotnull(cd_demo_sk#10)) + +(7) Project [codegen id : 1] +Output [2]: [cd_demo_sk#10, cd_dep_count#13] +Input [4]: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] + +(8) BroadcastExchange +Input [2]: [cd_demo_sk#10, cd_dep_count#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_bill_cdemo_sk#3] +Right keys [1]: [cd_demo_sk#10] +Join condition: None + +(10) Project [codegen id : 4] +Output [9]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13] +Input [11]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_demo_sk#10, cd_dep_count#13] + +(11) Scan parquet default.date_dim +Output [2]: [d_date_sk#15, d_year#16] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1998), IsNotNull(d_date_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#15, d_year#16] + +(13) Filter [codegen id : 2] +Input [2]: [d_date_sk#15, d_year#16] +Condition : ((isnotnull(d_year#16) AND (d_year#16 = 1998)) AND isnotnull(d_date_sk#15)) + +(14) Project [codegen id : 2] +Output [1]: [d_date_sk#15] +Input [2]: [d_date_sk#15, d_year#16] + +(15) BroadcastExchange +Input [1]: [d_date_sk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#17] + +(16) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_sold_date_sk#1] +Right keys [1]: [d_date_sk#15] +Join condition: None + +(17) Project [codegen id : 4] +Output [8]: [cs_bill_customer_sk#2, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13] +Input [10]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, d_date_sk#15] + +(18) Scan parquet default.item +Output [2]: [i_item_sk#18, i_item_id#19] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [2]: [i_item_sk#18, i_item_id#19] + +(20) Filter [codegen id : 3] +Input [2]: [i_item_sk#18, i_item_id#19] +Condition : isnotnull(i_item_sk#18) + +(21) BroadcastExchange +Input [2]: [i_item_sk#18, i_item_id#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#20] + +(22) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_item_sk#4] +Right keys [1]: [i_item_sk#18] +Join condition: None + +(23) Project [codegen id : 4] +Output [8]: [cs_bill_customer_sk#2, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, i_item_id#19] +Input [10]: [cs_bill_customer_sk#2, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, i_item_sk#18, i_item_id#19] + +(24) Exchange +Input [8]: [cs_bill_customer_sk#2, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, i_item_id#19] +Arguments: hashpartitioning(cs_bill_customer_sk#2, 5), true, [id=#21] + +(25) Sort [codegen id : 5] +Input [8]: [cs_bill_customer_sk#2, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, i_item_id#19] +Arguments: [cs_bill_customer_sk#2 ASC NULLS FIRST], false, 0 + +(26) Scan parquet default.customer +Output [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_month#25, c_birth_year#26] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer] +PushedFilters: [In(c_birth_month, [1,6,8,9,12,2]), IsNotNull(c_customer_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 7] +Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_month#25, c_birth_year#26] + +(28) Filter [codegen id : 7] +Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_month#25, c_birth_year#26] +Condition : (((c_birth_month#25 IN (1,6,8,9,12,2) AND isnotnull(c_customer_sk#22)) AND isnotnull(c_current_cdemo_sk#23)) AND isnotnull(c_current_addr_sk#24)) + +(29) Project [codegen id : 7] +Output [4]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_year#26] +Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_month#25, c_birth_year#26] + +(30) Scan parquet default.customer_address +Output [4]: [ca_address_sk#27, ca_county#28, ca_state#29, ca_country#30] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer_address] +PushedFilters: [In(ca_state, [MS,IN,ND,OK,NM,VA]), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 6] +Input [4]: [ca_address_sk#27, ca_county#28, ca_state#29, ca_country#30] + +(32) Filter [codegen id : 6] +Input [4]: [ca_address_sk#27, ca_county#28, ca_state#29, ca_country#30] +Condition : (ca_state#29 IN (MS,IN,ND,OK,NM,VA) AND isnotnull(ca_address_sk#27)) + +(33) BroadcastExchange +Input [4]: [ca_address_sk#27, ca_county#28, ca_state#29, ca_country#30] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#31] + +(34) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [c_current_addr_sk#24] +Right keys [1]: [ca_address_sk#27] +Join condition: None + +(35) Project [codegen id : 7] +Output [6]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_county#28, ca_state#29, ca_country#30] +Input [8]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_year#26, ca_address_sk#27, ca_county#28, ca_state#29, ca_country#30] + +(36) Exchange +Input [6]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_county#28, ca_state#29, ca_country#30] +Arguments: hashpartitioning(c_current_cdemo_sk#23, 5), true, [id=#32] + +(37) Sort [codegen id : 8] +Input [6]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_county#28, ca_state#29, ca_country#30] +Arguments: [c_current_cdemo_sk#23 ASC NULLS FIRST], false, 0 + +(38) Scan parquet default.customer_demographics +Output [1]: [cd_demo_sk#33] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(39) ColumnarToRow [codegen id : 9] +Input [1]: [cd_demo_sk#33] + +(40) Filter [codegen id : 9] +Input [1]: [cd_demo_sk#33] +Condition : isnotnull(cd_demo_sk#33) + +(41) Exchange +Input [1]: [cd_demo_sk#33] +Arguments: hashpartitioning(cd_demo_sk#33, 5), true, [id=#34] + +(42) Sort [codegen id : 10] +Input [1]: [cd_demo_sk#33] +Arguments: [cd_demo_sk#33 ASC NULLS FIRST], false, 0 + +(43) SortMergeJoin [codegen id : 11] +Left keys [1]: [c_current_cdemo_sk#23] +Right keys [1]: [cd_demo_sk#33] +Join condition: None + +(44) Project [codegen id : 11] +Output [5]: [c_customer_sk#22, c_birth_year#26, ca_county#28, ca_state#29, ca_country#30] +Input [7]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_county#28, ca_state#29, ca_country#30, cd_demo_sk#33] + +(45) Exchange +Input [5]: [c_customer_sk#22, c_birth_year#26, ca_county#28, ca_state#29, ca_country#30] +Arguments: hashpartitioning(c_customer_sk#22, 5), true, [id=#35] + +(46) Sort [codegen id : 12] +Input [5]: [c_customer_sk#22, c_birth_year#26, ca_county#28, ca_state#29, ca_country#30] +Arguments: [c_customer_sk#22 ASC NULLS FIRST], false, 0 + +(47) SortMergeJoin [codegen id : 13] +Left keys [1]: [cs_bill_customer_sk#2] +Right keys [1]: [c_customer_sk#22] +Join condition: None + +(48) Project [codegen id : 13] +Output [11]: [cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#26, i_item_id#19, ca_country#30, ca_state#29, ca_county#28] +Input [13]: [cs_bill_customer_sk#2, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, i_item_id#19, c_customer_sk#22, c_birth_year#26, ca_county#28, ca_state#29, ca_country#30] + +(49) Expand [codegen id : 13] +Input [11]: [cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#26, i_item_id#19, ca_country#30, ca_state#29, ca_county#28] +Arguments: [List(cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#26, i_item_id#19, ca_country#30, ca_state#29, ca_county#28, 0), List(cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#26, i_item_id#19, ca_country#30, ca_state#29, null, 1), List(cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#26, i_item_id#19, ca_country#30, null, null, 3), List(cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#26, i_item_id#19, null, null, null, 7), List(cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#26, null, null, null, null, 15)], [cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#26, i_item_id#36, ca_country#37, ca_state#38, ca_county#39, spark_grouping_id#40] + +(50) HashAggregate [codegen id : 13] +Input [12]: [cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#26, i_item_id#36, ca_country#37, ca_state#38, ca_county#39, spark_grouping_id#40] +Keys [5]: [i_item_id#36, ca_country#37, ca_state#38, ca_county#39, spark_grouping_id#40] +Functions [7]: [partial_avg(cast(cs_quantity#5 as decimal(12,2))), partial_avg(cast(cs_list_price#6 as decimal(12,2))), partial_avg(cast(cs_coupon_amt#8 as decimal(12,2))), partial_avg(cast(cs_sales_price#7 as decimal(12,2))), partial_avg(cast(cs_net_profit#9 as decimal(12,2))), partial_avg(cast(c_birth_year#26 as decimal(12,2))), partial_avg(cast(cd_dep_count#13 as decimal(12,2)))] +Aggregate Attributes [14]: [sum#41, count#42, sum#43, count#44, sum#45, count#46, sum#47, count#48, sum#49, count#50, sum#51, count#52, sum#53, count#54] +Results [19]: [i_item_id#36, ca_country#37, ca_state#38, ca_county#39, spark_grouping_id#40, sum#55, count#56, sum#57, count#58, sum#59, count#60, sum#61, count#62, sum#63, count#64, sum#65, count#66, sum#67, count#68] + +(51) Exchange +Input [19]: [i_item_id#36, ca_country#37, ca_state#38, ca_county#39, spark_grouping_id#40, sum#55, count#56, sum#57, count#58, sum#59, count#60, sum#61, count#62, sum#63, count#64, sum#65, count#66, sum#67, count#68] +Arguments: hashpartitioning(i_item_id#36, ca_country#37, ca_state#38, ca_county#39, spark_grouping_id#40, 5), true, [id=#69] + +(52) HashAggregate [codegen id : 14] +Input [19]: [i_item_id#36, ca_country#37, ca_state#38, ca_county#39, spark_grouping_id#40, sum#55, count#56, sum#57, count#58, sum#59, count#60, sum#61, count#62, sum#63, count#64, sum#65, count#66, sum#67, count#68] +Keys [5]: [i_item_id#36, ca_country#37, ca_state#38, ca_county#39, spark_grouping_id#40] +Functions [7]: [avg(cast(cs_quantity#5 as decimal(12,2))), avg(cast(cs_list_price#6 as decimal(12,2))), avg(cast(cs_coupon_amt#8 as decimal(12,2))), avg(cast(cs_sales_price#7 as decimal(12,2))), avg(cast(cs_net_profit#9 as decimal(12,2))), avg(cast(c_birth_year#26 as decimal(12,2))), avg(cast(cd_dep_count#13 as decimal(12,2)))] +Aggregate Attributes [7]: [avg(cast(cs_quantity#5 as decimal(12,2)))#70, avg(cast(cs_list_price#6 as decimal(12,2)))#71, avg(cast(cs_coupon_amt#8 as decimal(12,2)))#72, avg(cast(cs_sales_price#7 as decimal(12,2)))#73, avg(cast(cs_net_profit#9 as decimal(12,2)))#74, avg(cast(c_birth_year#26 as decimal(12,2)))#75, avg(cast(cd_dep_count#13 as decimal(12,2)))#76] +Results [11]: [i_item_id#36, ca_country#37, ca_state#38, ca_county#39, avg(cast(cs_quantity#5 as decimal(12,2)))#70 AS agg1#77, avg(cast(cs_list_price#6 as decimal(12,2)))#71 AS agg2#78, avg(cast(cs_coupon_amt#8 as decimal(12,2)))#72 AS agg3#79, avg(cast(cs_sales_price#7 as decimal(12,2)))#73 AS agg4#80, avg(cast(cs_net_profit#9 as decimal(12,2)))#74 AS agg5#81, avg(cast(c_birth_year#26 as decimal(12,2)))#75 AS agg6#82, avg(cast(cd_dep_count#13 as decimal(12,2)))#76 AS agg7#83] + +(53) TakeOrderedAndProject +Input [11]: [i_item_id#36, ca_country#37, ca_state#38, ca_county#39, agg1#77, agg2#78, agg3#79, agg4#80, agg5#81, agg6#82, agg7#83] +Arguments: 100, [ca_country#37 ASC NULLS FIRST, ca_state#38 ASC NULLS FIRST, ca_county#39 ASC NULLS FIRST, i_item_id#36 ASC NULLS FIRST], [i_item_id#36, ca_country#37, ca_state#38, ca_county#39, agg1#77, agg2#78, agg3#79, agg4#80, agg5#81, agg6#82, agg7#83] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18.sf100/simplified.txt new file mode 100644 index 0000000000000..cdc4bf9d37284 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18.sf100/simplified.txt @@ -0,0 +1,87 @@ +TakeOrderedAndProject [agg1,agg2,agg3,agg4,agg5,agg6,agg7,ca_country,ca_county,ca_state,i_item_id] + WholeStageCodegen (14) + HashAggregate [ca_country,ca_county,ca_state,count,count,count,count,count,count,count,i_item_id,spark_grouping_id,sum,sum,sum,sum,sum,sum,sum] [agg1,agg2,agg3,agg4,agg5,agg6,agg7,avg(cast(c_birth_year as decimal(12,2))),avg(cast(cd_dep_count as decimal(12,2))),avg(cast(cs_coupon_amt as decimal(12,2))),avg(cast(cs_list_price as decimal(12,2))),avg(cast(cs_net_profit as decimal(12,2))),avg(cast(cs_quantity as decimal(12,2))),avg(cast(cs_sales_price as decimal(12,2))),count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum] + InputAdapter + Exchange [ca_country,ca_county,ca_state,i_item_id,spark_grouping_id] #1 + WholeStageCodegen (13) + HashAggregate [c_birth_year,ca_country,ca_county,ca_state,cd_dep_count,cs_coupon_amt,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,i_item_id,spark_grouping_id] [count,count,count,count,count,count,count,count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Expand [c_birth_year,ca_country,ca_county,ca_state,cd_dep_count,cs_coupon_amt,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,i_item_id] + Project [c_birth_year,ca_country,ca_county,ca_state,cd_dep_count,cs_coupon_amt,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,i_item_id] + SortMergeJoin [c_customer_sk,cs_bill_customer_sk] + InputAdapter + WholeStageCodegen (5) + Sort [cs_bill_customer_sk] + InputAdapter + Exchange [cs_bill_customer_sk] #2 + WholeStageCodegen (4) + Project [cd_dep_count,cs_bill_customer_sk,cs_coupon_amt,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,i_item_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cd_dep_count,cs_bill_customer_sk,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cd_dep_count,cs_bill_customer_sk,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] + BroadcastHashJoin [cd_demo_sk,cs_bill_cdemo_sk] + Filter [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [cd_demo_sk,cd_dep_count] + Filter [cd_demo_sk,cd_education_status,cd_gender] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_dep_count,cd_education_status,cd_gender] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_id,i_item_sk] + InputAdapter + WholeStageCodegen (12) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #6 + WholeStageCodegen (11) + Project [c_birth_year,c_customer_sk,ca_country,ca_county,ca_state] + SortMergeJoin [c_current_cdemo_sk,cd_demo_sk] + InputAdapter + WholeStageCodegen (8) + Sort [c_current_cdemo_sk] + InputAdapter + Exchange [c_current_cdemo_sk] #7 + WholeStageCodegen (7) + Project [c_birth_year,c_current_cdemo_sk,c_customer_sk,ca_country,ca_county,ca_state] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_birth_year,c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] + Filter [c_birth_month,c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_birth_month,c_birth_year,c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (6) + Filter [ca_address_sk,ca_state] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_country,ca_county,ca_state] + InputAdapter + WholeStageCodegen (10) + Sort [cd_demo_sk] + InputAdapter + Exchange [cd_demo_sk] #9 + WholeStageCodegen (9) + Filter [cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18/explain.txt new file mode 100644 index 0000000000000..5ba71337ccacb --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18/explain.txt @@ -0,0 +1,264 @@ +== Physical Plan == +TakeOrderedAndProject (47) ++- * HashAggregate (46) + +- Exchange (45) + +- * HashAggregate (44) + +- * Expand (43) + +- * Project (42) + +- * BroadcastHashJoin Inner BuildRight (41) + :- * Project (36) + : +- * BroadcastHashJoin Inner BuildRight (35) + : :- * Project (29) + : : +- * BroadcastHashJoin Inner BuildRight (28) + : : :- * Project (23) + : : : +- * BroadcastHashJoin Inner BuildRight (22) + : : : :- * Project (17) + : : : : +- * BroadcastHashJoin Inner BuildRight (16) + : : : : :- * Project (10) + : : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : : :- * Filter (3) + : : : : : : +- * ColumnarToRow (2) + : : : : : : +- Scan parquet default.catalog_sales (1) + : : : : : +- BroadcastExchange (8) + : : : : : +- * Project (7) + : : : : : +- * Filter (6) + : : : : : +- * ColumnarToRow (5) + : : : : : +- Scan parquet default.customer_demographics (4) + : : : : +- BroadcastExchange (15) + : : : : +- * Project (14) + : : : : +- * Filter (13) + : : : : +- * ColumnarToRow (12) + : : : : +- Scan parquet default.customer (11) + : : : +- BroadcastExchange (21) + : : : +- * Filter (20) + : : : +- * ColumnarToRow (19) + : : : +- Scan parquet default.customer_demographics (18) + : : +- BroadcastExchange (27) + : : +- * Filter (26) + : : +- * ColumnarToRow (25) + : : +- Scan parquet default.customer_address (24) + : +- BroadcastExchange (34) + : +- * Project (33) + : +- * Filter (32) + : +- * ColumnarToRow (31) + : +- Scan parquet default.date_dim (30) + +- BroadcastExchange (40) + +- * Filter (39) + +- * ColumnarToRow (38) + +- Scan parquet default.item (37) + + +(1) Scan parquet default.catalog_sales +Output [9]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_customer_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 7] +Input [9]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9] + +(3) Filter [codegen id : 7] +Input [9]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9] +Condition : (((isnotnull(cs_bill_cdemo_sk#3) AND isnotnull(cs_bill_customer_sk#2)) AND isnotnull(cs_sold_date_sk#1)) AND isnotnull(cs_item_sk#4)) + +(4) Scan parquet default.customer_demographics +Output [4]: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer_demographics] +PushedFilters: [IsNotNull(cd_gender), IsNotNull(cd_education_status), EqualTo(cd_gender,F), EqualTo(cd_education_status,Unknown), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] + +(6) Filter [codegen id : 1] +Input [4]: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] +Condition : ((((isnotnull(cd_gender#11) AND isnotnull(cd_education_status#12)) AND (cd_gender#11 = F)) AND (cd_education_status#12 = Unknown)) AND isnotnull(cd_demo_sk#10)) + +(7) Project [codegen id : 1] +Output [2]: [cd_demo_sk#10, cd_dep_count#13] +Input [4]: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] + +(8) BroadcastExchange +Input [2]: [cd_demo_sk#10, cd_dep_count#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] + +(9) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [cs_bill_cdemo_sk#3] +Right keys [1]: [cd_demo_sk#10] +Join condition: None + +(10) Project [codegen id : 7] +Output [9]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13] +Input [11]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_demo_sk#10, cd_dep_count#13] + +(11) Scan parquet default.customer +Output [5]: [c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_month#18, c_birth_year#19] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer] +PushedFilters: [In(c_birth_month, [1,6,8,9,12,2]), IsNotNull(c_customer_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [5]: [c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_month#18, c_birth_year#19] + +(13) Filter [codegen id : 2] +Input [5]: [c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_month#18, c_birth_year#19] +Condition : (((c_birth_month#18 IN (1,6,8,9,12,2) AND isnotnull(c_customer_sk#15)) AND isnotnull(c_current_cdemo_sk#16)) AND isnotnull(c_current_addr_sk#17)) + +(14) Project [codegen id : 2] +Output [4]: [c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19] +Input [5]: [c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_month#18, c_birth_year#19] + +(15) BroadcastExchange +Input [4]: [c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] + +(16) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [cs_bill_customer_sk#2] +Right keys [1]: [c_customer_sk#15] +Join condition: None + +(17) Project [codegen id : 7] +Output [11]: [cs_sold_date_sk#1, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19] +Input [13]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19] + +(18) Scan parquet default.customer_demographics +Output [1]: [cd_demo_sk#21] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [1]: [cd_demo_sk#21] + +(20) Filter [codegen id : 3] +Input [1]: [cd_demo_sk#21] +Condition : isnotnull(cd_demo_sk#21) + +(21) BroadcastExchange +Input [1]: [cd_demo_sk#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#22] + +(22) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [c_current_cdemo_sk#16] +Right keys [1]: [cd_demo_sk#21] +Join condition: None + +(23) Project [codegen id : 7] +Output [10]: [cs_sold_date_sk#1, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_current_addr_sk#17, c_birth_year#19] +Input [12]: [cs_sold_date_sk#1, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19, cd_demo_sk#21] + +(24) Scan parquet default.customer_address +Output [4]: [ca_address_sk#23, ca_county#24, ca_state#25, ca_country#26] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer_address] +PushedFilters: [In(ca_state, [MS,IN,ND,OK,NM,VA]), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(25) ColumnarToRow [codegen id : 4] +Input [4]: [ca_address_sk#23, ca_county#24, ca_state#25, ca_country#26] + +(26) Filter [codegen id : 4] +Input [4]: [ca_address_sk#23, ca_county#24, ca_state#25, ca_country#26] +Condition : (ca_state#25 IN (MS,IN,ND,OK,NM,VA) AND isnotnull(ca_address_sk#23)) + +(27) BroadcastExchange +Input [4]: [ca_address_sk#23, ca_county#24, ca_state#25, ca_country#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#27] + +(28) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [c_current_addr_sk#17] +Right keys [1]: [ca_address_sk#23] +Join condition: None + +(29) Project [codegen id : 7] +Output [12]: [cs_sold_date_sk#1, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19, ca_county#24, ca_state#25, ca_country#26] +Input [14]: [cs_sold_date_sk#1, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_current_addr_sk#17, c_birth_year#19, ca_address_sk#23, ca_county#24, ca_state#25, ca_country#26] + +(30) Scan parquet default.date_dim +Output [2]: [d_date_sk#28, d_year#29] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1998), IsNotNull(d_date_sk)] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 5] +Input [2]: [d_date_sk#28, d_year#29] + +(32) Filter [codegen id : 5] +Input [2]: [d_date_sk#28, d_year#29] +Condition : ((isnotnull(d_year#29) AND (d_year#29 = 1998)) AND isnotnull(d_date_sk#28)) + +(33) Project [codegen id : 5] +Output [1]: [d_date_sk#28] +Input [2]: [d_date_sk#28, d_year#29] + +(34) BroadcastExchange +Input [1]: [d_date_sk#28] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#30] + +(35) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [cs_sold_date_sk#1] +Right keys [1]: [d_date_sk#28] +Join condition: None + +(36) Project [codegen id : 7] +Output [11]: [cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19, ca_county#24, ca_state#25, ca_country#26] +Input [13]: [cs_sold_date_sk#1, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19, ca_county#24, ca_state#25, ca_country#26, d_date_sk#28] + +(37) Scan parquet default.item +Output [2]: [i_item_sk#31, i_item_id#32] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(38) ColumnarToRow [codegen id : 6] +Input [2]: [i_item_sk#31, i_item_id#32] + +(39) Filter [codegen id : 6] +Input [2]: [i_item_sk#31, i_item_id#32] +Condition : isnotnull(i_item_sk#31) + +(40) BroadcastExchange +Input [2]: [i_item_sk#31, i_item_id#32] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#33] + +(41) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [cs_item_sk#4] +Right keys [1]: [i_item_sk#31] +Join condition: None + +(42) Project [codegen id : 7] +Output [11]: [cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19, i_item_id#32, ca_country#26, ca_state#25, ca_county#24] +Input [13]: [cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19, ca_county#24, ca_state#25, ca_country#26, i_item_sk#31, i_item_id#32] + +(43) Expand [codegen id : 7] +Input [11]: [cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19, i_item_id#32, ca_country#26, ca_state#25, ca_county#24] +Arguments: [List(cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19, i_item_id#32, ca_country#26, ca_state#25, ca_county#24, 0), List(cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19, i_item_id#32, ca_country#26, ca_state#25, null, 1), List(cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19, i_item_id#32, ca_country#26, null, null, 3), List(cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19, i_item_id#32, null, null, null, 7), List(cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19, null, null, null, null, 15)], [cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19, i_item_id#34, ca_country#35, ca_state#36, ca_county#37, spark_grouping_id#38] + +(44) HashAggregate [codegen id : 7] +Input [12]: [cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19, i_item_id#34, ca_country#35, ca_state#36, ca_county#37, spark_grouping_id#38] +Keys [5]: [i_item_id#34, ca_country#35, ca_state#36, ca_county#37, spark_grouping_id#38] +Functions [7]: [partial_avg(cast(cs_quantity#5 as decimal(12,2))), partial_avg(cast(cs_list_price#6 as decimal(12,2))), partial_avg(cast(cs_coupon_amt#8 as decimal(12,2))), partial_avg(cast(cs_sales_price#7 as decimal(12,2))), partial_avg(cast(cs_net_profit#9 as decimal(12,2))), partial_avg(cast(c_birth_year#19 as decimal(12,2))), partial_avg(cast(cd_dep_count#13 as decimal(12,2)))] +Aggregate Attributes [14]: [sum#39, count#40, sum#41, count#42, sum#43, count#44, sum#45, count#46, sum#47, count#48, sum#49, count#50, sum#51, count#52] +Results [19]: [i_item_id#34, ca_country#35, ca_state#36, ca_county#37, spark_grouping_id#38, sum#53, count#54, sum#55, count#56, sum#57, count#58, sum#59, count#60, sum#61, count#62, sum#63, count#64, sum#65, count#66] + +(45) Exchange +Input [19]: [i_item_id#34, ca_country#35, ca_state#36, ca_county#37, spark_grouping_id#38, sum#53, count#54, sum#55, count#56, sum#57, count#58, sum#59, count#60, sum#61, count#62, sum#63, count#64, sum#65, count#66] +Arguments: hashpartitioning(i_item_id#34, ca_country#35, ca_state#36, ca_county#37, spark_grouping_id#38, 5), true, [id=#67] + +(46) HashAggregate [codegen id : 8] +Input [19]: [i_item_id#34, ca_country#35, ca_state#36, ca_county#37, spark_grouping_id#38, sum#53, count#54, sum#55, count#56, sum#57, count#58, sum#59, count#60, sum#61, count#62, sum#63, count#64, sum#65, count#66] +Keys [5]: [i_item_id#34, ca_country#35, ca_state#36, ca_county#37, spark_grouping_id#38] +Functions [7]: [avg(cast(cs_quantity#5 as decimal(12,2))), avg(cast(cs_list_price#6 as decimal(12,2))), avg(cast(cs_coupon_amt#8 as decimal(12,2))), avg(cast(cs_sales_price#7 as decimal(12,2))), avg(cast(cs_net_profit#9 as decimal(12,2))), avg(cast(c_birth_year#19 as decimal(12,2))), avg(cast(cd_dep_count#13 as decimal(12,2)))] +Aggregate Attributes [7]: [avg(cast(cs_quantity#5 as decimal(12,2)))#68, avg(cast(cs_list_price#6 as decimal(12,2)))#69, avg(cast(cs_coupon_amt#8 as decimal(12,2)))#70, avg(cast(cs_sales_price#7 as decimal(12,2)))#71, avg(cast(cs_net_profit#9 as decimal(12,2)))#72, avg(cast(c_birth_year#19 as decimal(12,2)))#73, avg(cast(cd_dep_count#13 as decimal(12,2)))#74] +Results [11]: [i_item_id#34, ca_country#35, ca_state#36, ca_county#37, avg(cast(cs_quantity#5 as decimal(12,2)))#68 AS agg1#75, avg(cast(cs_list_price#6 as decimal(12,2)))#69 AS agg2#76, avg(cast(cs_coupon_amt#8 as decimal(12,2)))#70 AS agg3#77, avg(cast(cs_sales_price#7 as decimal(12,2)))#71 AS agg4#78, avg(cast(cs_net_profit#9 as decimal(12,2)))#72 AS agg5#79, avg(cast(c_birth_year#19 as decimal(12,2)))#73 AS agg6#80, avg(cast(cd_dep_count#13 as decimal(12,2)))#74 AS agg7#81] + +(47) TakeOrderedAndProject +Input [11]: [i_item_id#34, ca_country#35, ca_state#36, ca_county#37, agg1#75, agg2#76, agg3#77, agg4#78, agg5#79, agg6#80, agg7#81] +Arguments: 100, [ca_country#35 ASC NULLS FIRST, ca_state#36 ASC NULLS FIRST, ca_county#37 ASC NULLS FIRST, i_item_id#34 ASC NULLS FIRST], [i_item_id#34, ca_country#35, ca_state#36, ca_county#37, agg1#75, agg2#76, agg3#77, agg4#78, agg5#79, agg6#80, agg7#81] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18/simplified.txt new file mode 100644 index 0000000000000..7e967bf60236e --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18/simplified.txt @@ -0,0 +1,69 @@ +TakeOrderedAndProject [agg1,agg2,agg3,agg4,agg5,agg6,agg7,ca_country,ca_county,ca_state,i_item_id] + WholeStageCodegen (8) + HashAggregate [ca_country,ca_county,ca_state,count,count,count,count,count,count,count,i_item_id,spark_grouping_id,sum,sum,sum,sum,sum,sum,sum] [agg1,agg2,agg3,agg4,agg5,agg6,agg7,avg(cast(c_birth_year as decimal(12,2))),avg(cast(cd_dep_count as decimal(12,2))),avg(cast(cs_coupon_amt as decimal(12,2))),avg(cast(cs_list_price as decimal(12,2))),avg(cast(cs_net_profit as decimal(12,2))),avg(cast(cs_quantity as decimal(12,2))),avg(cast(cs_sales_price as decimal(12,2))),count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum] + InputAdapter + Exchange [ca_country,ca_county,ca_state,i_item_id,spark_grouping_id] #1 + WholeStageCodegen (7) + HashAggregate [c_birth_year,ca_country,ca_county,ca_state,cd_dep_count,cs_coupon_amt,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,i_item_id,spark_grouping_id] [count,count,count,count,count,count,count,count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Expand [c_birth_year,ca_country,ca_county,ca_state,cd_dep_count,cs_coupon_amt,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,i_item_id] + Project [c_birth_year,ca_country,ca_county,ca_state,cd_dep_count,cs_coupon_amt,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,i_item_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [c_birth_year,ca_country,ca_county,ca_state,cd_dep_count,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [c_birth_year,ca_country,ca_county,ca_state,cd_dep_count,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_birth_year,c_current_addr_sk,cd_dep_count,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [c_birth_year,c_current_addr_sk,c_current_cdemo_sk,cd_dep_count,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] + BroadcastHashJoin [c_customer_sk,cs_bill_customer_sk] + Project [cd_dep_count,cs_bill_customer_sk,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] + BroadcastHashJoin [cd_demo_sk,cs_bill_cdemo_sk] + Filter [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [cd_demo_sk,cd_dep_count] + Filter [cd_demo_sk,cd_education_status,cd_gender] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_dep_count,cd_education_status,cd_gender] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [c_birth_year,c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] + Filter [c_birth_month,c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_birth_month,c_birth_year,c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Filter [ca_address_sk,ca_state] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_country,ca_county,ca_state] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Project [d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (6) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_id,i_item_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.sf100/explain.txt new file mode 100644 index 0000000000000..2799fd0fbd634 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.sf100/explain.txt @@ -0,0 +1,251 @@ +== Physical Plan == +TakeOrderedAndProject (45) ++- * HashAggregate (44) + +- Exchange (43) + +- * HashAggregate (42) + +- * Project (41) + +- * SortMergeJoin Inner (40) + :- * Sort (25) + : +- Exchange (24) + : +- * Project (23) + : +- * BroadcastHashJoin Inner BuildRight (22) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildLeft (9) + : : : :- BroadcastExchange (5) + : : : : +- * Project (4) + : : : : +- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.item (1) + : : : +- * Filter (8) + : : : +- * ColumnarToRow (7) + : : : +- Scan parquet default.store_sales (6) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.date_dim (11) + : +- BroadcastExchange (21) + : +- * Filter (20) + : +- * ColumnarToRow (19) + : +- Scan parquet default.store (18) + +- * Sort (39) + +- Exchange (38) + +- * Project (37) + +- * SortMergeJoin Inner (36) + :- * Sort (30) + : +- Exchange (29) + : +- * Filter (28) + : +- * ColumnarToRow (27) + : +- Scan parquet default.customer_address (26) + +- * Sort (35) + +- Exchange (34) + +- * Filter (33) + +- * ColumnarToRow (32) + +- Scan parquet default.customer (31) + + +(1) Scan parquet default.item +Output [6]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manufact_id#4, i_manufact#5, i_manager_id#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,8), IsNotNull(i_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [6]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manufact_id#4, i_manufact#5, i_manager_id#6] + +(3) Filter [codegen id : 1] +Input [6]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manufact_id#4, i_manufact#5, i_manager_id#6] +Condition : ((isnotnull(i_manager_id#6) AND (i_manager_id#6 = 8)) AND isnotnull(i_item_sk#1)) + +(4) Project [codegen id : 1] +Output [5]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manufact_id#4, i_manufact#5] +Input [6]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manufact_id#4, i_manufact#5, i_manager_id#6] + +(5) BroadcastExchange +Input [5]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manufact_id#4, i_manufact#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#7] + +(6) Scan parquet default.store_sales +Output [5]: [ss_sold_date_sk#8, ss_item_sk#9, ss_customer_sk#10, ss_store_sk#11, ss_ext_sales_price#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(7) ColumnarToRow +Input [5]: [ss_sold_date_sk#8, ss_item_sk#9, ss_customer_sk#10, ss_store_sk#11, ss_ext_sales_price#12] + +(8) Filter +Input [5]: [ss_sold_date_sk#8, ss_item_sk#9, ss_customer_sk#10, ss_store_sk#11, ss_ext_sales_price#12] +Condition : (((isnotnull(ss_sold_date_sk#8) AND isnotnull(ss_item_sk#9)) AND isnotnull(ss_customer_sk#10)) AND isnotnull(ss_store_sk#11)) + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [ss_item_sk#9] +Join condition: None + +(10) Project [codegen id : 4] +Output [8]: [i_brand_id#2, i_brand#3, i_manufact_id#4, i_manufact#5, ss_sold_date_sk#8, ss_customer_sk#10, ss_store_sk#11, ss_ext_sales_price#12] +Input [10]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manufact_id#4, i_manufact#5, ss_sold_date_sk#8, ss_item_sk#9, ss_customer_sk#10, ss_store_sk#11, ss_ext_sales_price#12] + +(11) Scan parquet default.date_dim +Output [3]: [d_date_sk#13, d_year#14, d_moy#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,1998), IsNotNull(d_date_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#13, d_year#14, d_moy#15] + +(13) Filter [codegen id : 2] +Input [3]: [d_date_sk#13, d_year#14, d_moy#15] +Condition : ((((isnotnull(d_moy#15) AND isnotnull(d_year#14)) AND (d_moy#15 = 11)) AND (d_year#14 = 1998)) AND isnotnull(d_date_sk#13)) + +(14) Project [codegen id : 2] +Output [1]: [d_date_sk#13] +Input [3]: [d_date_sk#13, d_year#14, d_moy#15] + +(15) BroadcastExchange +Input [1]: [d_date_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] + +(16) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#8] +Right keys [1]: [d_date_sk#13] +Join condition: None + +(17) Project [codegen id : 4] +Output [7]: [i_brand_id#2, i_brand#3, i_manufact_id#4, i_manufact#5, ss_customer_sk#10, ss_store_sk#11, ss_ext_sales_price#12] +Input [9]: [i_brand_id#2, i_brand#3, i_manufact_id#4, i_manufact#5, ss_sold_date_sk#8, ss_customer_sk#10, ss_store_sk#11, ss_ext_sales_price#12, d_date_sk#13] + +(18) Scan parquet default.store +Output [2]: [s_store_sk#17, s_zip#18] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_zip), IsNotNull(s_store_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [2]: [s_store_sk#17, s_zip#18] + +(20) Filter [codegen id : 3] +Input [2]: [s_store_sk#17, s_zip#18] +Condition : (isnotnull(s_zip#18) AND isnotnull(s_store_sk#17)) + +(21) BroadcastExchange +Input [2]: [s_store_sk#17, s_zip#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] + +(22) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#11] +Right keys [1]: [s_store_sk#17] +Join condition: None + +(23) Project [codegen id : 4] +Output [7]: [i_brand_id#2, i_brand#3, i_manufact_id#4, i_manufact#5, ss_customer_sk#10, ss_ext_sales_price#12, s_zip#18] +Input [9]: [i_brand_id#2, i_brand#3, i_manufact_id#4, i_manufact#5, ss_customer_sk#10, ss_store_sk#11, ss_ext_sales_price#12, s_store_sk#17, s_zip#18] + +(24) Exchange +Input [7]: [i_brand_id#2, i_brand#3, i_manufact_id#4, i_manufact#5, ss_customer_sk#10, ss_ext_sales_price#12, s_zip#18] +Arguments: hashpartitioning(ss_customer_sk#10, 5), true, [id=#20] + +(25) Sort [codegen id : 5] +Input [7]: [i_brand_id#2, i_brand#3, i_manufact_id#4, i_manufact#5, ss_customer_sk#10, ss_ext_sales_price#12, s_zip#18] +Arguments: [ss_customer_sk#10 ASC NULLS FIRST], false, 0 + +(26) Scan parquet default.customer_address +Output [2]: [ca_address_sk#21, ca_zip#22] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_zip)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 6] +Input [2]: [ca_address_sk#21, ca_zip#22] + +(28) Filter [codegen id : 6] +Input [2]: [ca_address_sk#21, ca_zip#22] +Condition : (isnotnull(ca_address_sk#21) AND isnotnull(ca_zip#22)) + +(29) Exchange +Input [2]: [ca_address_sk#21, ca_zip#22] +Arguments: hashpartitioning(ca_address_sk#21, 5), true, [id=#23] + +(30) Sort [codegen id : 7] +Input [2]: [ca_address_sk#21, ca_zip#22] +Arguments: [ca_address_sk#21 ASC NULLS FIRST], false, 0 + +(31) Scan parquet default.customer +Output [2]: [c_customer_sk#24, c_current_addr_sk#25] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(32) ColumnarToRow [codegen id : 8] +Input [2]: [c_customer_sk#24, c_current_addr_sk#25] + +(33) Filter [codegen id : 8] +Input [2]: [c_customer_sk#24, c_current_addr_sk#25] +Condition : (isnotnull(c_customer_sk#24) AND isnotnull(c_current_addr_sk#25)) + +(34) Exchange +Input [2]: [c_customer_sk#24, c_current_addr_sk#25] +Arguments: hashpartitioning(c_current_addr_sk#25, 5), true, [id=#26] + +(35) Sort [codegen id : 9] +Input [2]: [c_customer_sk#24, c_current_addr_sk#25] +Arguments: [c_current_addr_sk#25 ASC NULLS FIRST], false, 0 + +(36) SortMergeJoin [codegen id : 10] +Left keys [1]: [ca_address_sk#21] +Right keys [1]: [c_current_addr_sk#25] +Join condition: None + +(37) Project [codegen id : 10] +Output [2]: [ca_zip#22, c_customer_sk#24] +Input [4]: [ca_address_sk#21, ca_zip#22, c_customer_sk#24, c_current_addr_sk#25] + +(38) Exchange +Input [2]: [ca_zip#22, c_customer_sk#24] +Arguments: hashpartitioning(c_customer_sk#24, 5), true, [id=#27] + +(39) Sort [codegen id : 11] +Input [2]: [ca_zip#22, c_customer_sk#24] +Arguments: [c_customer_sk#24 ASC NULLS FIRST], false, 0 + +(40) SortMergeJoin [codegen id : 12] +Left keys [1]: [ss_customer_sk#10] +Right keys [1]: [c_customer_sk#24] +Join condition: NOT (substr(ca_zip#22, 1, 5) = substr(s_zip#18, 1, 5)) + +(41) Project [codegen id : 12] +Output [5]: [ss_ext_sales_price#12, i_brand_id#2, i_brand#3, i_manufact_id#4, i_manufact#5] +Input [9]: [i_brand_id#2, i_brand#3, i_manufact_id#4, i_manufact#5, ss_customer_sk#10, ss_ext_sales_price#12, s_zip#18, ca_zip#22, c_customer_sk#24] + +(42) HashAggregate [codegen id : 12] +Input [5]: [ss_ext_sales_price#12, i_brand_id#2, i_brand#3, i_manufact_id#4, i_manufact#5] +Keys [4]: [i_brand#3, i_brand_id#2, i_manufact_id#4, i_manufact#5] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#12))] +Aggregate Attributes [1]: [sum#28] +Results [5]: [i_brand#3, i_brand_id#2, i_manufact_id#4, i_manufact#5, sum#29] + +(43) Exchange +Input [5]: [i_brand#3, i_brand_id#2, i_manufact_id#4, i_manufact#5, sum#29] +Arguments: hashpartitioning(i_brand#3, i_brand_id#2, i_manufact_id#4, i_manufact#5, 5), true, [id=#30] + +(44) HashAggregate [codegen id : 13] +Input [5]: [i_brand#3, i_brand_id#2, i_manufact_id#4, i_manufact#5, sum#29] +Keys [4]: [i_brand#3, i_brand_id#2, i_manufact_id#4, i_manufact#5] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#12))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#12))#31] +Results [5]: [i_brand_id#2 AS brand_id#32, i_brand#3 AS brand#33, i_manufact_id#4, i_manufact#5, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#12))#31,17,2) AS ext_price#34] + +(45) TakeOrderedAndProject +Input [5]: [brand_id#32, brand#33, i_manufact_id#4, i_manufact#5, ext_price#34] +Arguments: 100, [ext_price#34 DESC NULLS LAST, brand#33 ASC NULLS FIRST, brand_id#32 ASC NULLS FIRST, i_manufact_id#4 ASC NULLS FIRST, i_manufact#5 ASC NULLS FIRST], [brand_id#32, brand#33, i_manufact_id#4, i_manufact#5, ext_price#34] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.sf100/simplified.txt new file mode 100644 index 0000000000000..1a285ae598c44 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.sf100/simplified.txt @@ -0,0 +1,76 @@ +TakeOrderedAndProject [brand,brand_id,ext_price,i_manufact,i_manufact_id] + WholeStageCodegen (13) + HashAggregate [i_brand,i_brand_id,i_manufact,i_manufact_id,sum] [brand,brand_id,ext_price,sum,sum(UnscaledValue(ss_ext_sales_price))] + InputAdapter + Exchange [i_brand,i_brand_id,i_manufact,i_manufact_id] #1 + WholeStageCodegen (12) + HashAggregate [i_brand,i_brand_id,i_manufact,i_manufact_id,ss_ext_sales_price] [sum,sum] + Project [i_brand,i_brand_id,i_manufact,i_manufact_id,ss_ext_sales_price] + SortMergeJoin [c_customer_sk,ca_zip,s_zip,ss_customer_sk] + InputAdapter + WholeStageCodegen (5) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #2 + WholeStageCodegen (4) + Project [i_brand,i_brand_id,i_manufact,i_manufact_id,s_zip,ss_customer_sk,ss_ext_sales_price] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [i_brand,i_brand_id,i_manufact,i_manufact_id,ss_customer_sk,ss_ext_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_brand,i_brand_id,i_manufact,i_manufact_id,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [i_brand,i_brand_id,i_item_sk,i_manufact,i_manufact_id] + Filter [i_item_sk,i_manager_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand,i_brand_id,i_item_sk,i_manager_id,i_manufact,i_manufact_id] + Filter [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Filter [s_store_sk,s_zip] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk,s_zip] + InputAdapter + WholeStageCodegen (11) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #6 + WholeStageCodegen (10) + Project [c_customer_sk,ca_zip] + SortMergeJoin [c_current_addr_sk,ca_address_sk] + InputAdapter + WholeStageCodegen (7) + Sort [ca_address_sk] + InputAdapter + Exchange [ca_address_sk] #7 + WholeStageCodegen (6) + Filter [ca_address_sk,ca_zip] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_zip] + InputAdapter + WholeStageCodegen (9) + Sort [c_current_addr_sk] + InputAdapter + Exchange [c_current_addr_sk] #8 + WholeStageCodegen (8) + Filter [c_current_addr_sk,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_current_addr_sk,c_customer_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19/explain.txt new file mode 100644 index 0000000000000..1b9e0d10dfe90 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19/explain.txt @@ -0,0 +1,221 @@ +== Physical Plan == +TakeOrderedAndProject (39) ++- * HashAggregate (38) + +- Exchange (37) + +- * HashAggregate (36) + +- * Project (35) + +- * BroadcastHashJoin Inner BuildRight (34) + :- * Project (29) + : +- * BroadcastHashJoin Inner BuildRight (28) + : :- * Project (23) + : : +- * BroadcastHashJoin Inner BuildRight (22) + : : :- * Project (17) + : : : +- * BroadcastHashJoin Inner BuildRight (16) + : : : :- * Project (10) + : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : :- * Project (4) + : : : : : +- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.date_dim (1) + : : : : +- BroadcastExchange (8) + : : : : +- * Filter (7) + : : : : +- * ColumnarToRow (6) + : : : : +- Scan parquet default.store_sales (5) + : : : +- BroadcastExchange (15) + : : : +- * Project (14) + : : : +- * Filter (13) + : : : +- * ColumnarToRow (12) + : : : +- Scan parquet default.item (11) + : : +- BroadcastExchange (21) + : : +- * Filter (20) + : : +- * ColumnarToRow (19) + : : +- Scan parquet default.customer (18) + : +- BroadcastExchange (27) + : +- * Filter (26) + : +- * ColumnarToRow (25) + : +- Scan parquet default.customer_address (24) + +- BroadcastExchange (33) + +- * Filter (32) + +- * ColumnarToRow (31) + +- Scan parquet default.store (30) + + +(1) Scan parquet default.date_dim +Output [3]: [d_date_sk#1, d_year#2, d_moy#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,1998), IsNotNull(d_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 6] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(3) Filter [codegen id : 6] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Condition : ((((isnotnull(d_moy#3) AND isnotnull(d_year#2)) AND (d_moy#3 = 11)) AND (d_year#2 = 1998)) AND isnotnull(d_date_sk#1)) + +(4) Project [codegen id : 6] +Output [1]: [d_date_sk#1] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(5) Scan parquet default.store_sales +Output [5]: [ss_sold_date_sk#4, ss_item_sk#5, ss_customer_sk#6, ss_store_sk#7, ss_ext_sales_price#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [5]: [ss_sold_date_sk#4, ss_item_sk#5, ss_customer_sk#6, ss_store_sk#7, ss_ext_sales_price#8] + +(7) Filter [codegen id : 1] +Input [5]: [ss_sold_date_sk#4, ss_item_sk#5, ss_customer_sk#6, ss_store_sk#7, ss_ext_sales_price#8] +Condition : (((isnotnull(ss_sold_date_sk#4) AND isnotnull(ss_item_sk#5)) AND isnotnull(ss_customer_sk#6)) AND isnotnull(ss_store_sk#7)) + +(8) BroadcastExchange +Input [5]: [ss_sold_date_sk#4, ss_item_sk#5, ss_customer_sk#6, ss_store_sk#7, ss_ext_sales_price#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#9] + +(9) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [d_date_sk#1] +Right keys [1]: [ss_sold_date_sk#4] +Join condition: None + +(10) Project [codegen id : 6] +Output [4]: [ss_item_sk#5, ss_customer_sk#6, ss_store_sk#7, ss_ext_sales_price#8] +Input [6]: [d_date_sk#1, ss_sold_date_sk#4, ss_item_sk#5, ss_customer_sk#6, ss_store_sk#7, ss_ext_sales_price#8] + +(11) Scan parquet default.item +Output [6]: [i_item_sk#10, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14, i_manager_id#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,8), IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [6]: [i_item_sk#10, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14, i_manager_id#15] + +(13) Filter [codegen id : 2] +Input [6]: [i_item_sk#10, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14, i_manager_id#15] +Condition : ((isnotnull(i_manager_id#15) AND (i_manager_id#15 = 8)) AND isnotnull(i_item_sk#10)) + +(14) Project [codegen id : 2] +Output [5]: [i_item_sk#10, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14] +Input [6]: [i_item_sk#10, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14, i_manager_id#15] + +(15) BroadcastExchange +Input [5]: [i_item_sk#10, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] + +(16) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_item_sk#5] +Right keys [1]: [i_item_sk#10] +Join condition: None + +(17) Project [codegen id : 6] +Output [7]: [ss_customer_sk#6, ss_store_sk#7, ss_ext_sales_price#8, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14] +Input [9]: [ss_item_sk#5, ss_customer_sk#6, ss_store_sk#7, ss_ext_sales_price#8, i_item_sk#10, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14] + +(18) Scan parquet default.customer +Output [2]: [c_customer_sk#17, c_current_addr_sk#18] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [2]: [c_customer_sk#17, c_current_addr_sk#18] + +(20) Filter [codegen id : 3] +Input [2]: [c_customer_sk#17, c_current_addr_sk#18] +Condition : (isnotnull(c_customer_sk#17) AND isnotnull(c_current_addr_sk#18)) + +(21) BroadcastExchange +Input [2]: [c_customer_sk#17, c_current_addr_sk#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] + +(22) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_customer_sk#6] +Right keys [1]: [c_customer_sk#17] +Join condition: None + +(23) Project [codegen id : 6] +Output [7]: [ss_store_sk#7, ss_ext_sales_price#8, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14, c_current_addr_sk#18] +Input [9]: [ss_customer_sk#6, ss_store_sk#7, ss_ext_sales_price#8, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14, c_customer_sk#17, c_current_addr_sk#18] + +(24) Scan parquet default.customer_address +Output [2]: [ca_address_sk#20, ca_zip#21] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_zip)] +ReadSchema: struct + +(25) ColumnarToRow [codegen id : 4] +Input [2]: [ca_address_sk#20, ca_zip#21] + +(26) Filter [codegen id : 4] +Input [2]: [ca_address_sk#20, ca_zip#21] +Condition : (isnotnull(ca_address_sk#20) AND isnotnull(ca_zip#21)) + +(27) BroadcastExchange +Input [2]: [ca_address_sk#20, ca_zip#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#22] + +(28) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [c_current_addr_sk#18] +Right keys [1]: [ca_address_sk#20] +Join condition: None + +(29) Project [codegen id : 6] +Output [7]: [ss_store_sk#7, ss_ext_sales_price#8, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14, ca_zip#21] +Input [9]: [ss_store_sk#7, ss_ext_sales_price#8, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14, c_current_addr_sk#18, ca_address_sk#20, ca_zip#21] + +(30) Scan parquet default.store +Output [2]: [s_store_sk#23, s_zip#24] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_zip), IsNotNull(s_store_sk)] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 5] +Input [2]: [s_store_sk#23, s_zip#24] + +(32) Filter [codegen id : 5] +Input [2]: [s_store_sk#23, s_zip#24] +Condition : (isnotnull(s_zip#24) AND isnotnull(s_store_sk#23)) + +(33) BroadcastExchange +Input [2]: [s_store_sk#23, s_zip#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#25] + +(34) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_store_sk#7] +Right keys [1]: [s_store_sk#23] +Join condition: NOT (substr(ca_zip#21, 1, 5) = substr(s_zip#24, 1, 5)) + +(35) Project [codegen id : 6] +Output [5]: [ss_ext_sales_price#8, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14] +Input [9]: [ss_store_sk#7, ss_ext_sales_price#8, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14, ca_zip#21, s_store_sk#23, s_zip#24] + +(36) HashAggregate [codegen id : 6] +Input [5]: [ss_ext_sales_price#8, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14] +Keys [4]: [i_brand#12, i_brand_id#11, i_manufact_id#13, i_manufact#14] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#8))] +Aggregate Attributes [1]: [sum#26] +Results [5]: [i_brand#12, i_brand_id#11, i_manufact_id#13, i_manufact#14, sum#27] + +(37) Exchange +Input [5]: [i_brand#12, i_brand_id#11, i_manufact_id#13, i_manufact#14, sum#27] +Arguments: hashpartitioning(i_brand#12, i_brand_id#11, i_manufact_id#13, i_manufact#14, 5), true, [id=#28] + +(38) HashAggregate [codegen id : 7] +Input [5]: [i_brand#12, i_brand_id#11, i_manufact_id#13, i_manufact#14, sum#27] +Keys [4]: [i_brand#12, i_brand_id#11, i_manufact_id#13, i_manufact#14] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#8))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#8))#29] +Results [5]: [i_brand_id#11 AS brand_id#30, i_brand#12 AS brand#31, i_manufact_id#13, i_manufact#14, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#8))#29,17,2) AS ext_price#32] + +(39) TakeOrderedAndProject +Input [5]: [brand_id#30, brand#31, i_manufact_id#13, i_manufact#14, ext_price#32] +Arguments: 100, [ext_price#32 DESC NULLS LAST, brand#31 ASC NULLS FIRST, brand_id#30 ASC NULLS FIRST, i_manufact_id#13 ASC NULLS FIRST, i_manufact#14 ASC NULLS FIRST], [brand_id#30, brand#31, i_manufact_id#13, i_manufact#14, ext_price#32] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19/simplified.txt new file mode 100644 index 0000000000000..9217520556863 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19/simplified.txt @@ -0,0 +1,58 @@ +TakeOrderedAndProject [brand,brand_id,ext_price,i_manufact,i_manufact_id] + WholeStageCodegen (7) + HashAggregate [i_brand,i_brand_id,i_manufact,i_manufact_id,sum] [brand,brand_id,ext_price,sum,sum(UnscaledValue(ss_ext_sales_price))] + InputAdapter + Exchange [i_brand,i_brand_id,i_manufact,i_manufact_id] #1 + WholeStageCodegen (6) + HashAggregate [i_brand,i_brand_id,i_manufact,i_manufact_id,ss_ext_sales_price] [sum,sum] + Project [i_brand,i_brand_id,i_manufact,i_manufact_id,ss_ext_sales_price] + BroadcastHashJoin [ca_zip,s_store_sk,s_zip,ss_store_sk] + Project [ca_zip,i_brand,i_brand_id,i_manufact,i_manufact_id,ss_ext_sales_price,ss_store_sk] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_addr_sk,i_brand,i_brand_id,i_manufact,i_manufact_id,ss_ext_sales_price,ss_store_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Project [i_brand,i_brand_id,i_manufact,i_manufact_id,ss_customer_sk,ss_ext_sales_price,ss_store_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_customer_sk,ss_ext_sales_price,ss_item_sk,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [i_brand,i_brand_id,i_item_sk,i_manufact,i_manufact_id] + Filter [i_item_sk,i_manager_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand,i_brand_id,i_item_sk,i_manager_id,i_manufact,i_manufact_id] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [c_current_addr_sk,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_current_addr_sk,c_customer_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Filter [ca_address_sk,ca_zip] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_zip] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Filter [s_store_sk,s_zip] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk,s_zip] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/explain.txt new file mode 100644 index 0000000000000..a11f0125eee9b --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/explain.txt @@ -0,0 +1,233 @@ +== Physical Plan == +* Sort (42) ++- Exchange (41) + +- * Project (40) + +- * SortMergeJoin Inner (39) + :- * Sort (27) + : +- Exchange (26) + : +- * Project (25) + : +- * BroadcastHashJoin Inner BuildRight (24) + : :- * HashAggregate (18) + : : +- Exchange (17) + : : +- * HashAggregate (16) + : : +- * Project (15) + : : +- * BroadcastHashJoin Inner BuildRight (14) + : : :- Union (9) + : : : :- * Project (4) + : : : : +- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.web_sales (1) + : : : +- * Project (8) + : : : +- * Filter (7) + : : : +- * ColumnarToRow (6) + : : : +- Scan parquet default.catalog_sales (5) + : : +- BroadcastExchange (13) + : : +- * Filter (12) + : : +- * ColumnarToRow (11) + : : +- Scan parquet default.date_dim (10) + : +- BroadcastExchange (23) + : +- * Project (22) + : +- * Filter (21) + : +- * ColumnarToRow (20) + : +- Scan parquet default.date_dim (19) + +- * Sort (38) + +- Exchange (37) + +- * Project (36) + +- * BroadcastHashJoin Inner BuildRight (35) + :- * HashAggregate (29) + : +- ReusedExchange (28) + +- BroadcastExchange (34) + +- * Project (33) + +- * Filter (32) + +- * ColumnarToRow (31) + +- Scan parquet default.date_dim (30) + + +(1) Scan parquet default.web_sales +Output [2]: [ws_sold_date_sk#1, ws_ext_sales_price#2] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [2]: [ws_sold_date_sk#1, ws_ext_sales_price#2] + +(3) Filter [codegen id : 1] +Input [2]: [ws_sold_date_sk#1, ws_ext_sales_price#2] +Condition : isnotnull(ws_sold_date_sk#1) + +(4) Project [codegen id : 1] +Output [2]: [ws_sold_date_sk#1 AS sold_date_sk#3, ws_ext_sales_price#2 AS sales_price#4] +Input [2]: [ws_sold_date_sk#1, ws_ext_sales_price#2] + +(5) Scan parquet default.catalog_sales +Output [2]: [cs_sold_date_sk#5, cs_ext_sales_price#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 2] +Input [2]: [cs_sold_date_sk#5, cs_ext_sales_price#6] + +(7) Filter [codegen id : 2] +Input [2]: [cs_sold_date_sk#5, cs_ext_sales_price#6] +Condition : isnotnull(cs_sold_date_sk#5) + +(8) Project [codegen id : 2] +Output [2]: [cs_sold_date_sk#5 AS sold_date_sk#7, cs_ext_sales_price#6 AS sales_price#8] +Input [2]: [cs_sold_date_sk#5, cs_ext_sales_price#6] + +(9) Union + +(10) Scan parquet default.date_dim +Output [3]: [d_date_sk#9, d_week_seq#10, d_day_name#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_date_sk), IsNotNull(d_week_seq)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#9, d_week_seq#10, d_day_name#11] + +(12) Filter [codegen id : 3] +Input [3]: [d_date_sk#9, d_week_seq#10, d_day_name#11] +Condition : (isnotnull(d_date_sk#9) AND isnotnull(d_week_seq#10)) + +(13) BroadcastExchange +Input [3]: [d_date_sk#9, d_week_seq#10, d_day_name#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#12] + +(14) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [sold_date_sk#3] +Right keys [1]: [d_date_sk#9] +Join condition: None + +(15) Project [codegen id : 4] +Output [3]: [sales_price#4, d_week_seq#10, d_day_name#11] +Input [5]: [sold_date_sk#3, sales_price#4, d_date_sk#9, d_week_seq#10, d_day_name#11] + +(16) HashAggregate [codegen id : 4] +Input [3]: [sales_price#4, d_week_seq#10, d_day_name#11] +Keys [1]: [d_week_seq#10] +Functions [7]: [partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday) THEN sales_price#4 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday) THEN sales_price#4 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday) THEN sales_price#4 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday) THEN sales_price#4 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday) THEN sales_price#4 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday) THEN sales_price#4 ELSE null END))] +Aggregate Attributes [7]: [sum#13, sum#14, sum#15, sum#16, sum#17, sum#18, sum#19] +Results [8]: [d_week_seq#10, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26] + +(17) Exchange +Input [8]: [d_week_seq#10, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26] +Arguments: hashpartitioning(d_week_seq#10, 5), true, [id=#27] + +(18) HashAggregate [codegen id : 6] +Input [8]: [d_week_seq#10, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26] +Keys [1]: [d_week_seq#10] +Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday) THEN sales_price#4 ELSE null END))] +Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday) THEN sales_price#4 ELSE null END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday) THEN sales_price#4 ELSE null END))#29, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday) THEN sales_price#4 ELSE null END))#30, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 ELSE null END))#31, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday) THEN sales_price#4 ELSE null END))#32, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday) THEN sales_price#4 ELSE null END))#33, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday) THEN sales_price#4 ELSE null END))#34] +Results [8]: [d_week_seq#10, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday) THEN sales_price#4 ELSE null END))#28,17,2) AS sun_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday) THEN sales_price#4 ELSE null END))#29,17,2) AS mon_sales#36, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday) THEN sales_price#4 ELSE null END))#30,17,2) AS tue_sales#37, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 ELSE null END))#31,17,2) AS wed_sales#38, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday) THEN sales_price#4 ELSE null END))#32,17,2) AS thu_sales#39, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday) THEN sales_price#4 ELSE null END))#33,17,2) AS fri_sales#40, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday) THEN sales_price#4 ELSE null END))#34,17,2) AS sat_sales#41] + +(19) Scan parquet default.date_dim +Output [2]: [d_week_seq#42, d_year#43] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_week_seq)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 5] +Input [2]: [d_week_seq#42, d_year#43] + +(21) Filter [codegen id : 5] +Input [2]: [d_week_seq#42, d_year#43] +Condition : ((isnotnull(d_year#43) AND (d_year#43 = 2001)) AND isnotnull(d_week_seq#42)) + +(22) Project [codegen id : 5] +Output [1]: [d_week_seq#42] +Input [2]: [d_week_seq#42, d_year#43] + +(23) BroadcastExchange +Input [1]: [d_week_seq#42] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#44] + +(24) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [d_week_seq#10] +Right keys [1]: [d_week_seq#42] +Join condition: None + +(25) Project [codegen id : 6] +Output [8]: [d_week_seq#10 AS d_week_seq1#45, sun_sales#35 AS sun_sales1#46, mon_sales#36 AS mon_sales1#47, tue_sales#37 AS tue_sales1#48, wed_sales#38 AS wed_sales1#49, thu_sales#39 AS thu_sales1#50, fri_sales#40 AS fri_sales1#51, sat_sales#41 AS sat_sales1#52] +Input [9]: [d_week_seq#10, sun_sales#35, mon_sales#36, tue_sales#37, wed_sales#38, thu_sales#39, fri_sales#40, sat_sales#41, d_week_seq#42] + +(26) Exchange +Input [8]: [d_week_seq1#45, sun_sales1#46, mon_sales1#47, tue_sales1#48, wed_sales1#49, thu_sales1#50, fri_sales1#51, sat_sales1#52] +Arguments: hashpartitioning(d_week_seq1#45, 5), true, [id=#53] + +(27) Sort [codegen id : 7] +Input [8]: [d_week_seq1#45, sun_sales1#46, mon_sales1#47, tue_sales1#48, wed_sales1#49, thu_sales1#50, fri_sales1#51, sat_sales1#52] +Arguments: [d_week_seq1#45 ASC NULLS FIRST], false, 0 + +(28) ReusedExchange [Reuses operator id: 17] +Output [8]: [d_week_seq#10, sum#54, sum#55, sum#56, sum#57, sum#58, sum#59, sum#60] + +(29) HashAggregate [codegen id : 13] +Input [8]: [d_week_seq#10, sum#54, sum#55, sum#56, sum#57, sum#58, sum#59, sum#60] +Keys [1]: [d_week_seq#10] +Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday) THEN sales_price#4 ELSE null END))] +Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday) THEN sales_price#4 ELSE null END))#61, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday) THEN sales_price#4 ELSE null END))#62, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday) THEN sales_price#4 ELSE null END))#63, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 ELSE null END))#64, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday) THEN sales_price#4 ELSE null END))#65, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday) THEN sales_price#4 ELSE null END))#66, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday) THEN sales_price#4 ELSE null END))#67] +Results [8]: [d_week_seq#10, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday) THEN sales_price#4 ELSE null END))#61,17,2) AS sun_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday) THEN sales_price#4 ELSE null END))#62,17,2) AS mon_sales#36, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday) THEN sales_price#4 ELSE null END))#63,17,2) AS tue_sales#37, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 ELSE null END))#64,17,2) AS wed_sales#38, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday) THEN sales_price#4 ELSE null END))#65,17,2) AS thu_sales#39, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday) THEN sales_price#4 ELSE null END))#66,17,2) AS fri_sales#40, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday) THEN sales_price#4 ELSE null END))#67,17,2) AS sat_sales#41] + +(30) Scan parquet default.date_dim +Output [2]: [d_week_seq#68, d_year#69] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_week_seq)] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 12] +Input [2]: [d_week_seq#68, d_year#69] + +(32) Filter [codegen id : 12] +Input [2]: [d_week_seq#68, d_year#69] +Condition : ((isnotnull(d_year#69) AND (d_year#69 = 2002)) AND isnotnull(d_week_seq#68)) + +(33) Project [codegen id : 12] +Output [1]: [d_week_seq#68] +Input [2]: [d_week_seq#68, d_year#69] + +(34) BroadcastExchange +Input [1]: [d_week_seq#68] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#70] + +(35) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [d_week_seq#10] +Right keys [1]: [d_week_seq#68] +Join condition: None + +(36) Project [codegen id : 13] +Output [8]: [d_week_seq#10 AS d_week_seq2#71, sun_sales#35 AS sun_sales2#72, mon_sales#36 AS mon_sales2#73, tue_sales#37 AS tue_sales2#74, wed_sales#38 AS wed_sales2#75, thu_sales#39 AS thu_sales2#76, fri_sales#40 AS fri_sales2#77, sat_sales#41 AS sat_sales2#78] +Input [9]: [d_week_seq#10, sun_sales#35, mon_sales#36, tue_sales#37, wed_sales#38, thu_sales#39, fri_sales#40, sat_sales#41, d_week_seq#68] + +(37) Exchange +Input [8]: [d_week_seq2#71, sun_sales2#72, mon_sales2#73, tue_sales2#74, wed_sales2#75, thu_sales2#76, fri_sales2#77, sat_sales2#78] +Arguments: hashpartitioning((d_week_seq2#71 - 53), 5), true, [id=#79] + +(38) Sort [codegen id : 14] +Input [8]: [d_week_seq2#71, sun_sales2#72, mon_sales2#73, tue_sales2#74, wed_sales2#75, thu_sales2#76, fri_sales2#77, sat_sales2#78] +Arguments: [(d_week_seq2#71 - 53) ASC NULLS FIRST], false, 0 + +(39) SortMergeJoin [codegen id : 15] +Left keys [1]: [d_week_seq1#45] +Right keys [1]: [(d_week_seq2#71 - 53)] +Join condition: None + +(40) Project [codegen id : 15] +Output [8]: [d_week_seq1#45, round(CheckOverflow((promote_precision(sun_sales1#46) / promote_precision(sun_sales2#72)), DecimalType(37,20), true), 2) AS round((sun_sales1 / sun_sales2), 2)#80, round(CheckOverflow((promote_precision(mon_sales1#47) / promote_precision(mon_sales2#73)), DecimalType(37,20), true), 2) AS round((mon_sales1 / mon_sales2), 2)#81, round(CheckOverflow((promote_precision(tue_sales1#48) / promote_precision(tue_sales2#74)), DecimalType(37,20), true), 2) AS round((tue_sales1 / tue_sales2), 2)#82, round(CheckOverflow((promote_precision(wed_sales1#49) / promote_precision(wed_sales2#75)), DecimalType(37,20), true), 2) AS round((wed_sales1 / wed_sales2), 2)#83, round(CheckOverflow((promote_precision(thu_sales1#50) / promote_precision(thu_sales2#76)), DecimalType(37,20), true), 2) AS round((thu_sales1 / thu_sales2), 2)#84, round(CheckOverflow((promote_precision(fri_sales1#51) / promote_precision(fri_sales2#77)), DecimalType(37,20), true), 2) AS round((fri_sales1 / fri_sales2), 2)#85, round(CheckOverflow((promote_precision(sat_sales1#52) / promote_precision(sat_sales2#78)), DecimalType(37,20), true), 2) AS round((sat_sales1 / sat_sales2), 2)#86] +Input [16]: [d_week_seq1#45, sun_sales1#46, mon_sales1#47, tue_sales1#48, wed_sales1#49, thu_sales1#50, fri_sales1#51, sat_sales1#52, d_week_seq2#71, sun_sales2#72, mon_sales2#73, tue_sales2#74, wed_sales2#75, thu_sales2#76, fri_sales2#77, sat_sales2#78] + +(41) Exchange +Input [8]: [d_week_seq1#45, round((sun_sales1 / sun_sales2), 2)#80, round((mon_sales1 / mon_sales2), 2)#81, round((tue_sales1 / tue_sales2), 2)#82, round((wed_sales1 / wed_sales2), 2)#83, round((thu_sales1 / thu_sales2), 2)#84, round((fri_sales1 / fri_sales2), 2)#85, round((sat_sales1 / sat_sales2), 2)#86] +Arguments: rangepartitioning(d_week_seq1#45 ASC NULLS FIRST, 5), true, [id=#87] + +(42) Sort [codegen id : 16] +Input [8]: [d_week_seq1#45, round((sun_sales1 / sun_sales2), 2)#80, round((mon_sales1 / mon_sales2), 2)#81, round((tue_sales1 / tue_sales2), 2)#82, round((wed_sales1 / wed_sales2), 2)#83, round((thu_sales1 / thu_sales2), 2)#84, round((fri_sales1 / fri_sales2), 2)#85, round((sat_sales1 / sat_sales2), 2)#86] +Arguments: [d_week_seq1#45 ASC NULLS FIRST], true, 0 + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/simplified.txt new file mode 100644 index 0000000000000..f7d6dce3fbf97 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/simplified.txt @@ -0,0 +1,70 @@ +WholeStageCodegen (16) + Sort [d_week_seq1] + InputAdapter + Exchange [d_week_seq1] #1 + WholeStageCodegen (15) + Project [d_week_seq1,fri_sales1,fri_sales2,mon_sales1,mon_sales2,sat_sales1,sat_sales2,sun_sales1,sun_sales2,thu_sales1,thu_sales2,tue_sales1,tue_sales2,wed_sales1,wed_sales2] + SortMergeJoin [d_week_seq1,d_week_seq2] + InputAdapter + WholeStageCodegen (7) + Sort [d_week_seq1] + InputAdapter + Exchange [d_week_seq1] #2 + WholeStageCodegen (6) + Project [d_week_seq,fri_sales,mon_sales,sat_sales,sun_sales,thu_sales,tue_sales,wed_sales] + BroadcastHashJoin [d_week_seq,d_week_seq] + HashAggregate [d_week_seq,sum,sum,sum,sum,sum,sum,sum] [fri_sales,mon_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sun_sales,thu_sales,tue_sales,wed_sales] + InputAdapter + Exchange [d_week_seq] #3 + WholeStageCodegen (4) + HashAggregate [d_day_name,d_week_seq,sales_price] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [d_day_name,d_week_seq,sales_price] + BroadcastHashJoin [d_date_sk,sold_date_sk] + InputAdapter + Union + WholeStageCodegen (1) + Project [ws_ext_sales_price,ws_sold_date_sk] + Filter [ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_ext_sales_price,ws_sold_date_sk] + WholeStageCodegen (2) + Project [cs_ext_sales_price,cs_sold_date_sk] + Filter [cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_ext_sales_price,cs_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [d_date_sk,d_week_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_day_name,d_week_seq] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (5) + Project [d_week_seq] + Filter [d_week_seq,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_week_seq,d_year] + InputAdapter + WholeStageCodegen (14) + Sort [d_week_seq2] + InputAdapter + Exchange [d_week_seq2] #6 + WholeStageCodegen (13) + Project [d_week_seq,fri_sales,mon_sales,sat_sales,sun_sales,thu_sales,tue_sales,wed_sales] + BroadcastHashJoin [d_week_seq,d_week_seq] + HashAggregate [d_week_seq,sum,sum,sum,sum,sum,sum,sum] [fri_sales,mon_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sun_sales,thu_sales,tue_sales,wed_sales] + InputAdapter + ReusedExchange [d_week_seq,sum,sum,sum,sum,sum,sum,sum] #3 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (12) + Project [d_week_seq] + Filter [d_week_seq,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_week_seq,d_year] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2/explain.txt new file mode 100644 index 0000000000000..d944c21c2efb6 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2/explain.txt @@ -0,0 +1,218 @@ +== Physical Plan == +* Sort (39) ++- Exchange (38) + +- * Project (37) + +- * BroadcastHashJoin Inner BuildRight (36) + :- * Project (25) + : +- * BroadcastHashJoin Inner BuildRight (24) + : :- * HashAggregate (18) + : : +- Exchange (17) + : : +- * HashAggregate (16) + : : +- * Project (15) + : : +- * BroadcastHashJoin Inner BuildRight (14) + : : :- Union (9) + : : : :- * Project (4) + : : : : +- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.web_sales (1) + : : : +- * Project (8) + : : : +- * Filter (7) + : : : +- * ColumnarToRow (6) + : : : +- Scan parquet default.catalog_sales (5) + : : +- BroadcastExchange (13) + : : +- * Filter (12) + : : +- * ColumnarToRow (11) + : : +- Scan parquet default.date_dim (10) + : +- BroadcastExchange (23) + : +- * Project (22) + : +- * Filter (21) + : +- * ColumnarToRow (20) + : +- Scan parquet default.date_dim (19) + +- BroadcastExchange (35) + +- * Project (34) + +- * BroadcastHashJoin Inner BuildRight (33) + :- * HashAggregate (27) + : +- ReusedExchange (26) + +- BroadcastExchange (32) + +- * Project (31) + +- * Filter (30) + +- * ColumnarToRow (29) + +- Scan parquet default.date_dim (28) + + +(1) Scan parquet default.web_sales +Output [2]: [ws_sold_date_sk#1, ws_ext_sales_price#2] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [2]: [ws_sold_date_sk#1, ws_ext_sales_price#2] + +(3) Filter [codegen id : 1] +Input [2]: [ws_sold_date_sk#1, ws_ext_sales_price#2] +Condition : isnotnull(ws_sold_date_sk#1) + +(4) Project [codegen id : 1] +Output [2]: [ws_sold_date_sk#1 AS sold_date_sk#3, ws_ext_sales_price#2 AS sales_price#4] +Input [2]: [ws_sold_date_sk#1, ws_ext_sales_price#2] + +(5) Scan parquet default.catalog_sales +Output [2]: [cs_sold_date_sk#5, cs_ext_sales_price#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 2] +Input [2]: [cs_sold_date_sk#5, cs_ext_sales_price#6] + +(7) Filter [codegen id : 2] +Input [2]: [cs_sold_date_sk#5, cs_ext_sales_price#6] +Condition : isnotnull(cs_sold_date_sk#5) + +(8) Project [codegen id : 2] +Output [2]: [cs_sold_date_sk#5 AS sold_date_sk#7, cs_ext_sales_price#6 AS sales_price#8] +Input [2]: [cs_sold_date_sk#5, cs_ext_sales_price#6] + +(9) Union + +(10) Scan parquet default.date_dim +Output [3]: [d_date_sk#9, d_week_seq#10, d_day_name#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_date_sk), IsNotNull(d_week_seq)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#9, d_week_seq#10, d_day_name#11] + +(12) Filter [codegen id : 3] +Input [3]: [d_date_sk#9, d_week_seq#10, d_day_name#11] +Condition : (isnotnull(d_date_sk#9) AND isnotnull(d_week_seq#10)) + +(13) BroadcastExchange +Input [3]: [d_date_sk#9, d_week_seq#10, d_day_name#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#12] + +(14) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [sold_date_sk#3] +Right keys [1]: [d_date_sk#9] +Join condition: None + +(15) Project [codegen id : 4] +Output [3]: [sales_price#4, d_week_seq#10, d_day_name#11] +Input [5]: [sold_date_sk#3, sales_price#4, d_date_sk#9, d_week_seq#10, d_day_name#11] + +(16) HashAggregate [codegen id : 4] +Input [3]: [sales_price#4, d_week_seq#10, d_day_name#11] +Keys [1]: [d_week_seq#10] +Functions [7]: [partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday) THEN sales_price#4 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday) THEN sales_price#4 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday) THEN sales_price#4 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday) THEN sales_price#4 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday) THEN sales_price#4 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday) THEN sales_price#4 ELSE null END))] +Aggregate Attributes [7]: [sum#13, sum#14, sum#15, sum#16, sum#17, sum#18, sum#19] +Results [8]: [d_week_seq#10, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26] + +(17) Exchange +Input [8]: [d_week_seq#10, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26] +Arguments: hashpartitioning(d_week_seq#10, 5), true, [id=#27] + +(18) HashAggregate [codegen id : 12] +Input [8]: [d_week_seq#10, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26] +Keys [1]: [d_week_seq#10] +Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday) THEN sales_price#4 ELSE null END))] +Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday) THEN sales_price#4 ELSE null END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday) THEN sales_price#4 ELSE null END))#29, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday) THEN sales_price#4 ELSE null END))#30, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 ELSE null END))#31, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday) THEN sales_price#4 ELSE null END))#32, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday) THEN sales_price#4 ELSE null END))#33, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday) THEN sales_price#4 ELSE null END))#34] +Results [8]: [d_week_seq#10, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday) THEN sales_price#4 ELSE null END))#28,17,2) AS sun_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday) THEN sales_price#4 ELSE null END))#29,17,2) AS mon_sales#36, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday) THEN sales_price#4 ELSE null END))#30,17,2) AS tue_sales#37, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 ELSE null END))#31,17,2) AS wed_sales#38, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday) THEN sales_price#4 ELSE null END))#32,17,2) AS thu_sales#39, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday) THEN sales_price#4 ELSE null END))#33,17,2) AS fri_sales#40, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday) THEN sales_price#4 ELSE null END))#34,17,2) AS sat_sales#41] + +(19) Scan parquet default.date_dim +Output [2]: [d_week_seq#42, d_year#43] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_week_seq)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 5] +Input [2]: [d_week_seq#42, d_year#43] + +(21) Filter [codegen id : 5] +Input [2]: [d_week_seq#42, d_year#43] +Condition : ((isnotnull(d_year#43) AND (d_year#43 = 2001)) AND isnotnull(d_week_seq#42)) + +(22) Project [codegen id : 5] +Output [1]: [d_week_seq#42] +Input [2]: [d_week_seq#42, d_year#43] + +(23) BroadcastExchange +Input [1]: [d_week_seq#42] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#44] + +(24) BroadcastHashJoin [codegen id : 12] +Left keys [1]: [d_week_seq#10] +Right keys [1]: [d_week_seq#42] +Join condition: None + +(25) Project [codegen id : 12] +Output [8]: [d_week_seq#10 AS d_week_seq1#45, sun_sales#35 AS sun_sales1#46, mon_sales#36 AS mon_sales1#47, tue_sales#37 AS tue_sales1#48, wed_sales#38 AS wed_sales1#49, thu_sales#39 AS thu_sales1#50, fri_sales#40 AS fri_sales1#51, sat_sales#41 AS sat_sales1#52] +Input [9]: [d_week_seq#10, sun_sales#35, mon_sales#36, tue_sales#37, wed_sales#38, thu_sales#39, fri_sales#40, sat_sales#41, d_week_seq#42] + +(26) ReusedExchange [Reuses operator id: 17] +Output [8]: [d_week_seq#10, sum#53, sum#54, sum#55, sum#56, sum#57, sum#58, sum#59] + +(27) HashAggregate [codegen id : 11] +Input [8]: [d_week_seq#10, sum#53, sum#54, sum#55, sum#56, sum#57, sum#58, sum#59] +Keys [1]: [d_week_seq#10] +Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday) THEN sales_price#4 ELSE null END))] +Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday) THEN sales_price#4 ELSE null END))#60, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday) THEN sales_price#4 ELSE null END))#61, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday) THEN sales_price#4 ELSE null END))#62, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 ELSE null END))#63, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday) THEN sales_price#4 ELSE null END))#64, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday) THEN sales_price#4 ELSE null END))#65, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday) THEN sales_price#4 ELSE null END))#66] +Results [8]: [d_week_seq#10, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday) THEN sales_price#4 ELSE null END))#60,17,2) AS sun_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday) THEN sales_price#4 ELSE null END))#61,17,2) AS mon_sales#36, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday) THEN sales_price#4 ELSE null END))#62,17,2) AS tue_sales#37, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 ELSE null END))#63,17,2) AS wed_sales#38, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday) THEN sales_price#4 ELSE null END))#64,17,2) AS thu_sales#39, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday) THEN sales_price#4 ELSE null END))#65,17,2) AS fri_sales#40, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday) THEN sales_price#4 ELSE null END))#66,17,2) AS sat_sales#41] + +(28) Scan parquet default.date_dim +Output [2]: [d_week_seq#67, d_year#68] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_week_seq)] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 10] +Input [2]: [d_week_seq#67, d_year#68] + +(30) Filter [codegen id : 10] +Input [2]: [d_week_seq#67, d_year#68] +Condition : ((isnotnull(d_year#68) AND (d_year#68 = 2002)) AND isnotnull(d_week_seq#67)) + +(31) Project [codegen id : 10] +Output [1]: [d_week_seq#67] +Input [2]: [d_week_seq#67, d_year#68] + +(32) BroadcastExchange +Input [1]: [d_week_seq#67] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#69] + +(33) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [d_week_seq#10] +Right keys [1]: [d_week_seq#67] +Join condition: None + +(34) Project [codegen id : 11] +Output [8]: [d_week_seq#10 AS d_week_seq2#70, sun_sales#35 AS sun_sales2#71, mon_sales#36 AS mon_sales2#72, tue_sales#37 AS tue_sales2#73, wed_sales#38 AS wed_sales2#74, thu_sales#39 AS thu_sales2#75, fri_sales#40 AS fri_sales2#76, sat_sales#41 AS sat_sales2#77] +Input [9]: [d_week_seq#10, sun_sales#35, mon_sales#36, tue_sales#37, wed_sales#38, thu_sales#39, fri_sales#40, sat_sales#41, d_week_seq#67] + +(35) BroadcastExchange +Input [8]: [d_week_seq2#70, sun_sales2#71, mon_sales2#72, tue_sales2#73, wed_sales2#74, thu_sales2#75, fri_sales2#76, sat_sales2#77] +Arguments: HashedRelationBroadcastMode(List(cast((input[0, int, true] - 53) as bigint)),false), [id=#78] + +(36) BroadcastHashJoin [codegen id : 12] +Left keys [1]: [d_week_seq1#45] +Right keys [1]: [(d_week_seq2#70 - 53)] +Join condition: None + +(37) Project [codegen id : 12] +Output [8]: [d_week_seq1#45, round(CheckOverflow((promote_precision(sun_sales1#46) / promote_precision(sun_sales2#71)), DecimalType(37,20), true), 2) AS round((sun_sales1 / sun_sales2), 2)#79, round(CheckOverflow((promote_precision(mon_sales1#47) / promote_precision(mon_sales2#72)), DecimalType(37,20), true), 2) AS round((mon_sales1 / mon_sales2), 2)#80, round(CheckOverflow((promote_precision(tue_sales1#48) / promote_precision(tue_sales2#73)), DecimalType(37,20), true), 2) AS round((tue_sales1 / tue_sales2), 2)#81, round(CheckOverflow((promote_precision(wed_sales1#49) / promote_precision(wed_sales2#74)), DecimalType(37,20), true), 2) AS round((wed_sales1 / wed_sales2), 2)#82, round(CheckOverflow((promote_precision(thu_sales1#50) / promote_precision(thu_sales2#75)), DecimalType(37,20), true), 2) AS round((thu_sales1 / thu_sales2), 2)#83, round(CheckOverflow((promote_precision(fri_sales1#51) / promote_precision(fri_sales2#76)), DecimalType(37,20), true), 2) AS round((fri_sales1 / fri_sales2), 2)#84, round(CheckOverflow((promote_precision(sat_sales1#52) / promote_precision(sat_sales2#77)), DecimalType(37,20), true), 2) AS round((sat_sales1 / sat_sales2), 2)#85] +Input [16]: [d_week_seq1#45, sun_sales1#46, mon_sales1#47, tue_sales1#48, wed_sales1#49, thu_sales1#50, fri_sales1#51, sat_sales1#52, d_week_seq2#70, sun_sales2#71, mon_sales2#72, tue_sales2#73, wed_sales2#74, thu_sales2#75, fri_sales2#76, sat_sales2#77] + +(38) Exchange +Input [8]: [d_week_seq1#45, round((sun_sales1 / sun_sales2), 2)#79, round((mon_sales1 / mon_sales2), 2)#80, round((tue_sales1 / tue_sales2), 2)#81, round((wed_sales1 / wed_sales2), 2)#82, round((thu_sales1 / thu_sales2), 2)#83, round((fri_sales1 / fri_sales2), 2)#84, round((sat_sales1 / sat_sales2), 2)#85] +Arguments: rangepartitioning(d_week_seq1#45 ASC NULLS FIRST, 5), true, [id=#86] + +(39) Sort [codegen id : 13] +Input [8]: [d_week_seq1#45, round((sun_sales1 / sun_sales2), 2)#79, round((mon_sales1 / mon_sales2), 2)#80, round((tue_sales1 / tue_sales2), 2)#81, round((wed_sales1 / wed_sales2), 2)#82, round((thu_sales1 / thu_sales2), 2)#83, round((fri_sales1 / fri_sales2), 2)#84, round((sat_sales1 / sat_sales2), 2)#85] +Arguments: [d_week_seq1#45 ASC NULLS FIRST], true, 0 + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2/simplified.txt new file mode 100644 index 0000000000000..2e2d1e53fda09 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2/simplified.txt @@ -0,0 +1,61 @@ +WholeStageCodegen (13) + Sort [d_week_seq1] + InputAdapter + Exchange [d_week_seq1] #1 + WholeStageCodegen (12) + Project [d_week_seq1,fri_sales1,fri_sales2,mon_sales1,mon_sales2,sat_sales1,sat_sales2,sun_sales1,sun_sales2,thu_sales1,thu_sales2,tue_sales1,tue_sales2,wed_sales1,wed_sales2] + BroadcastHashJoin [d_week_seq1,d_week_seq2] + Project [d_week_seq,fri_sales,mon_sales,sat_sales,sun_sales,thu_sales,tue_sales,wed_sales] + BroadcastHashJoin [d_week_seq,d_week_seq] + HashAggregate [d_week_seq,sum,sum,sum,sum,sum,sum,sum] [fri_sales,mon_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sun_sales,thu_sales,tue_sales,wed_sales] + InputAdapter + Exchange [d_week_seq] #2 + WholeStageCodegen (4) + HashAggregate [d_day_name,d_week_seq,sales_price] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [d_day_name,d_week_seq,sales_price] + BroadcastHashJoin [d_date_sk,sold_date_sk] + InputAdapter + Union + WholeStageCodegen (1) + Project [ws_ext_sales_price,ws_sold_date_sk] + Filter [ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_ext_sales_price,ws_sold_date_sk] + WholeStageCodegen (2) + Project [cs_ext_sales_price,cs_sold_date_sk] + Filter [cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_ext_sales_price,cs_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (3) + Filter [d_date_sk,d_week_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_day_name,d_week_seq] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (5) + Project [d_week_seq] + Filter [d_week_seq,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_week_seq,d_year] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (11) + Project [d_week_seq,fri_sales,mon_sales,sat_sales,sun_sales,thu_sales,tue_sales,wed_sales] + BroadcastHashJoin [d_week_seq,d_week_seq] + HashAggregate [d_week_seq,sum,sum,sum,sum,sum,sum,sum] [fri_sales,mon_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sun_sales,thu_sales,tue_sales,wed_sales] + InputAdapter + ReusedExchange [d_week_seq,sum,sum,sum,sum,sum,sum,sum] #2 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (10) + Project [d_week_seq] + Filter [d_week_seq,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_week_seq,d_year] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20.sf100/explain.txt new file mode 100644 index 0000000000000..92ac79f525fb5 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20.sf100/explain.txt @@ -0,0 +1,152 @@ +== Physical Plan == +TakeOrderedAndProject (27) ++- * Project (26) + +- Window (25) + +- * Sort (24) + +- Exchange (23) + +- * HashAggregate (22) + +- Exchange (21) + +- * HashAggregate (20) + +- * Project (19) + +- * SortMergeJoin Inner (18) + :- * Sort (12) + : +- Exchange (11) + : +- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.catalog_sales (1) + : +- BroadcastExchange (8) + : +- * Project (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet default.date_dim (4) + +- * Sort (17) + +- Exchange (16) + +- * Filter (15) + +- * ColumnarToRow (14) + +- Scan parquet default.item (13) + + +(1) Scan parquet default.catalog_sales +Output [3]: [cs_sold_date_sk#1, cs_item_sk#2, cs_ext_sales_price#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 2] +Input [3]: [cs_sold_date_sk#1, cs_item_sk#2, cs_ext_sales_price#3] + +(3) Filter [codegen id : 2] +Input [3]: [cs_sold_date_sk#1, cs_item_sk#2, cs_ext_sales_price#3] +Condition : (isnotnull(cs_item_sk#2) AND isnotnull(cs_sold_date_sk#1)) + +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#4, d_date#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#4, d_date#5] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#4, d_date#5] +Condition : (((isnotnull(d_date#5) AND (d_date#5 >= 10644)) AND (d_date#5 <= 10674)) AND isnotnull(d_date_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#4] +Input [2]: [d_date_sk#4, d_date#5] + +(8) BroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#6] + +(9) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [cs_sold_date_sk#1] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(10) Project [codegen id : 2] +Output [2]: [cs_item_sk#2, cs_ext_sales_price#3] +Input [4]: [cs_sold_date_sk#1, cs_item_sk#2, cs_ext_sales_price#3, d_date_sk#4] + +(11) Exchange +Input [2]: [cs_item_sk#2, cs_ext_sales_price#3] +Arguments: hashpartitioning(cs_item_sk#2, 5), true, [id=#7] + +(12) Sort [codegen id : 3] +Input [2]: [cs_item_sk#2, cs_ext_sales_price#3] +Arguments: [cs_item_sk#2 ASC NULLS FIRST], false, 0 + +(13) Scan parquet default.item +Output [6]: [i_item_sk#8, i_item_id#9, i_item_desc#10, i_current_price#11, i_class#12, i_category#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [In(i_category, [Sports,Books,Home]), IsNotNull(i_item_sk)] +ReadSchema: struct + +(14) ColumnarToRow [codegen id : 4] +Input [6]: [i_item_sk#8, i_item_id#9, i_item_desc#10, i_current_price#11, i_class#12, i_category#13] + +(15) Filter [codegen id : 4] +Input [6]: [i_item_sk#8, i_item_id#9, i_item_desc#10, i_current_price#11, i_class#12, i_category#13] +Condition : (i_category#13 IN (Sports,Books,Home) AND isnotnull(i_item_sk#8)) + +(16) Exchange +Input [6]: [i_item_sk#8, i_item_id#9, i_item_desc#10, i_current_price#11, i_class#12, i_category#13] +Arguments: hashpartitioning(i_item_sk#8, 5), true, [id=#14] + +(17) Sort [codegen id : 5] +Input [6]: [i_item_sk#8, i_item_id#9, i_item_desc#10, i_current_price#11, i_class#12, i_category#13] +Arguments: [i_item_sk#8 ASC NULLS FIRST], false, 0 + +(18) SortMergeJoin [codegen id : 6] +Left keys [1]: [cs_item_sk#2] +Right keys [1]: [i_item_sk#8] +Join condition: None + +(19) Project [codegen id : 6] +Output [6]: [cs_ext_sales_price#3, i_item_id#9, i_item_desc#10, i_current_price#11, i_class#12, i_category#13] +Input [8]: [cs_item_sk#2, cs_ext_sales_price#3, i_item_sk#8, i_item_id#9, i_item_desc#10, i_current_price#11, i_class#12, i_category#13] + +(20) HashAggregate [codegen id : 6] +Input [6]: [cs_ext_sales_price#3, i_item_id#9, i_item_desc#10, i_current_price#11, i_class#12, i_category#13] +Keys [5]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#3))] +Aggregate Attributes [1]: [sum#15] +Results [6]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11, sum#16] + +(21) Exchange +Input [6]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11, sum#16] +Arguments: hashpartitioning(i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11, 5), true, [id=#17] + +(22) HashAggregate [codegen id : 7] +Input [6]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11, sum#16] +Keys [5]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11] +Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#3))#18] +Results [8]: [i_item_desc#10, i_category#13, i_class#12, i_current_price#11, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#3))#18,17,2) AS itemrevenue#19, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#3))#18,17,2) AS _w0#20, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#3))#18,17,2) AS _w1#21, i_item_id#9] + +(23) Exchange +Input [8]: [i_item_desc#10, i_category#13, i_class#12, i_current_price#11, itemrevenue#19, _w0#20, _w1#21, i_item_id#9] +Arguments: hashpartitioning(i_class#12, 5), true, [id=#22] + +(24) Sort [codegen id : 8] +Input [8]: [i_item_desc#10, i_category#13, i_class#12, i_current_price#11, itemrevenue#19, _w0#20, _w1#21, i_item_id#9] +Arguments: [i_class#12 ASC NULLS FIRST], false, 0 + +(25) Window +Input [8]: [i_item_desc#10, i_category#13, i_class#12, i_current_price#11, itemrevenue#19, _w0#20, _w1#21, i_item_id#9] +Arguments: [sum(_w1#21) windowspecdefinition(i_class#12, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#23], [i_class#12] + +(26) Project [codegen id : 9] +Output [7]: [i_item_desc#10, i_category#13, i_class#12, i_current_price#11, itemrevenue#19, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#20) * 100.00), DecimalType(21,2), true) as decimal(27,2))) / promote_precision(_we0#23)), DecimalType(38,17), true) AS revenueratio#24, i_item_id#9] +Input [9]: [i_item_desc#10, i_category#13, i_class#12, i_current_price#11, itemrevenue#19, _w0#20, _w1#21, i_item_id#9, _we0#23] + +(27) TakeOrderedAndProject +Input [7]: [i_item_desc#10, i_category#13, i_class#12, i_current_price#11, itemrevenue#19, revenueratio#24, i_item_id#9] +Arguments: 100, [i_category#13 ASC NULLS FIRST, i_class#12 ASC NULLS FIRST, i_item_id#9 ASC NULLS FIRST, i_item_desc#10 ASC NULLS FIRST, revenueratio#24 ASC NULLS FIRST], [i_item_desc#10, i_category#13, i_class#12, i_current_price#11, itemrevenue#19, revenueratio#24] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20.sf100/simplified.txt new file mode 100644 index 0000000000000..b7d7a77003325 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20.sf100/simplified.txt @@ -0,0 +1,47 @@ +TakeOrderedAndProject [i_category,i_class,i_current_price,i_item_desc,i_item_id,itemrevenue,revenueratio] + WholeStageCodegen (9) + Project [_w0,_we0,i_category,i_class,i_current_price,i_item_desc,i_item_id,itemrevenue] + InputAdapter + Window [_w1,i_class] + WholeStageCodegen (8) + Sort [i_class] + InputAdapter + Exchange [i_class] #1 + WholeStageCodegen (7) + HashAggregate [i_category,i_class,i_current_price,i_item_desc,i_item_id,sum] [_w0,_w1,itemrevenue,sum,sum(UnscaledValue(cs_ext_sales_price))] + InputAdapter + Exchange [i_category,i_class,i_current_price,i_item_desc,i_item_id] #2 + WholeStageCodegen (6) + HashAggregate [cs_ext_sales_price,i_category,i_class,i_current_price,i_item_desc,i_item_id] [sum,sum] + Project [cs_ext_sales_price,i_category,i_class,i_current_price,i_item_desc,i_item_id] + SortMergeJoin [cs_item_sk,i_item_sk] + InputAdapter + WholeStageCodegen (3) + Sort [cs_item_sk] + InputAdapter + Exchange [cs_item_sk] #3 + WholeStageCodegen (2) + Project [cs_ext_sales_price,cs_item_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk] + InputAdapter + WholeStageCodegen (5) + Sort [i_item_sk] + InputAdapter + Exchange [i_item_sk] #5 + WholeStageCodegen (4) + Filter [i_category,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_category,i_class,i_current_price,i_item_desc,i_item_id,i_item_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20/explain.txt new file mode 100644 index 0000000000000..3260eafea1b04 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20/explain.txt @@ -0,0 +1,137 @@ +== Physical Plan == +TakeOrderedAndProject (24) ++- * Project (23) + +- Window (22) + +- * Sort (21) + +- Exchange (20) + +- * HashAggregate (19) + +- Exchange (18) + +- * HashAggregate (17) + +- * Project (16) + +- * BroadcastHashJoin Inner BuildRight (15) + :- * Project (9) + : +- * BroadcastHashJoin Inner BuildRight (8) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.catalog_sales (1) + : +- BroadcastExchange (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet default.item (4) + +- BroadcastExchange (14) + +- * Project (13) + +- * Filter (12) + +- * ColumnarToRow (11) + +- Scan parquet default.date_dim (10) + + +(1) Scan parquet default.catalog_sales +Output [3]: [cs_sold_date_sk#1, cs_item_sk#2, cs_ext_sales_price#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [cs_sold_date_sk#1, cs_item_sk#2, cs_ext_sales_price#3] + +(3) Filter [codegen id : 3] +Input [3]: [cs_sold_date_sk#1, cs_item_sk#2, cs_ext_sales_price#3] +Condition : (isnotnull(cs_item_sk#2) AND isnotnull(cs_sold_date_sk#1)) + +(4) Scan parquet default.item +Output [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [In(i_category, [Sports,Books,Home]), IsNotNull(i_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(6) Filter [codegen id : 1] +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Condition : (i_category#9 IN (Sports,Books,Home) AND isnotnull(i_item_sk#4)) + +(7) BroadcastExchange +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#10] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cs_item_sk#2] +Right keys [1]: [i_item_sk#4] +Join condition: None + +(9) Project [codegen id : 3] +Output [7]: [cs_sold_date_sk#1, cs_ext_sales_price#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Input [9]: [cs_sold_date_sk#1, cs_item_sk#2, cs_ext_sales_price#3, i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(10) Scan parquet default.date_dim +Output [2]: [d_date_sk#11, d_date#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#11, d_date#12] + +(12) Filter [codegen id : 2] +Input [2]: [d_date_sk#11, d_date#12] +Condition : (((isnotnull(d_date#12) AND (d_date#12 >= 10644)) AND (d_date#12 <= 10674)) AND isnotnull(d_date_sk#11)) + +(13) Project [codegen id : 2] +Output [1]: [d_date_sk#11] +Input [2]: [d_date_sk#11, d_date#12] + +(14) BroadcastExchange +Input [1]: [d_date_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cs_sold_date_sk#1] +Right keys [1]: [d_date_sk#11] +Join condition: None + +(16) Project [codegen id : 3] +Output [6]: [cs_ext_sales_price#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Input [8]: [cs_sold_date_sk#1, cs_ext_sales_price#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9, d_date_sk#11] + +(17) HashAggregate [codegen id : 3] +Input [6]: [cs_ext_sales_price#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#3))] +Aggregate Attributes [1]: [sum#14] +Results [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#15] + +(18) Exchange +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#15] +Arguments: hashpartitioning(i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, 5), true, [id=#16] + +(19) HashAggregate [codegen id : 4] +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#15] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#3))#17] +Results [8]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#3))#17,17,2) AS itemrevenue#18, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#3))#17,17,2) AS _w0#19, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#3))#17,17,2) AS _w1#20, i_item_id#5] + +(20) Exchange +Input [8]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, _w0#19, _w1#20, i_item_id#5] +Arguments: hashpartitioning(i_class#8, 5), true, [id=#21] + +(21) Sort [codegen id : 5] +Input [8]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, _w0#19, _w1#20, i_item_id#5] +Arguments: [i_class#8 ASC NULLS FIRST], false, 0 + +(22) Window +Input [8]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, _w0#19, _w1#20, i_item_id#5] +Arguments: [sum(_w1#20) windowspecdefinition(i_class#8, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#22], [i_class#8] + +(23) Project [codegen id : 6] +Output [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#19) * 100.00), DecimalType(21,2), true) as decimal(27,2))) / promote_precision(_we0#22)), DecimalType(38,17), true) AS revenueratio#23, i_item_id#5] +Input [9]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, _w0#19, _w1#20, i_item_id#5, _we0#22] + +(24) TakeOrderedAndProject +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, revenueratio#23, i_item_id#5] +Arguments: 100, [i_category#9 ASC NULLS FIRST, i_class#8 ASC NULLS FIRST, i_item_id#5 ASC NULLS FIRST, i_item_desc#6 ASC NULLS FIRST, revenueratio#23 ASC NULLS FIRST], [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, revenueratio#23] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20/simplified.txt new file mode 100644 index 0000000000000..b462752d01701 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20/simplified.txt @@ -0,0 +1,38 @@ +TakeOrderedAndProject [i_category,i_class,i_current_price,i_item_desc,i_item_id,itemrevenue,revenueratio] + WholeStageCodegen (6) + Project [_w0,_we0,i_category,i_class,i_current_price,i_item_desc,i_item_id,itemrevenue] + InputAdapter + Window [_w1,i_class] + WholeStageCodegen (5) + Sort [i_class] + InputAdapter + Exchange [i_class] #1 + WholeStageCodegen (4) + HashAggregate [i_category,i_class,i_current_price,i_item_desc,i_item_id,sum] [_w0,_w1,itemrevenue,sum,sum(UnscaledValue(cs_ext_sales_price))] + InputAdapter + Exchange [i_category,i_class,i_current_price,i_item_desc,i_item_id] #2 + WholeStageCodegen (3) + HashAggregate [cs_ext_sales_price,i_category,i_class,i_current_price,i_item_desc,i_item_id] [sum,sum] + Project [cs_ext_sales_price,i_category,i_class,i_current_price,i_item_desc,i_item_id] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_ext_sales_price,cs_sold_date_sk,i_category,i_class,i_current_price,i_item_desc,i_item_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Filter [cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [i_category,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_category,i_class,i_current_price,i_item_desc,i_item_id,i_item_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21.sf100/explain.txt new file mode 100644 index 0000000000000..2a540094ddcd5 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21.sf100/explain.txt @@ -0,0 +1,155 @@ +== Physical Plan == +TakeOrderedAndProject (27) ++- * Filter (26) + +- * HashAggregate (25) + +- Exchange (24) + +- * HashAggregate (23) + +- * Project (22) + +- * BroadcastHashJoin Inner BuildRight (21) + :- * Project (16) + : +- * BroadcastHashJoin Inner BuildRight (15) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.inventory (1) + : : +- BroadcastExchange (8) + : : +- * Project (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet default.item (4) + : +- BroadcastExchange (14) + : +- * Filter (13) + : +- * ColumnarToRow (12) + : +- Scan parquet default.date_dim (11) + +- BroadcastExchange (20) + +- * Filter (19) + +- * ColumnarToRow (18) + +- Scan parquet default.warehouse (17) + + +(1) Scan parquet default.inventory +Output [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/inventory] +PushedFilters: [IsNotNull(inv_warehouse_sk), IsNotNull(inv_item_sk), IsNotNull(inv_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] + +(3) Filter [codegen id : 4] +Input [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] +Condition : ((isnotnull(inv_warehouse_sk#3) AND isnotnull(inv_item_sk#2)) AND isnotnull(inv_date_sk#1)) + +(4) Scan parquet default.item +Output [3]: [i_item_sk#5, i_item_id#6, i_current_price#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,0.99), LessThanOrEqual(i_current_price,1.49), IsNotNull(i_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [i_item_sk#5, i_item_id#6, i_current_price#7] + +(6) Filter [codegen id : 1] +Input [3]: [i_item_sk#5, i_item_id#6, i_current_price#7] +Condition : (((isnotnull(i_current_price#7) AND (i_current_price#7 >= 0.99)) AND (i_current_price#7 <= 1.49)) AND isnotnull(i_item_sk#5)) + +(7) Project [codegen id : 1] +Output [2]: [i_item_sk#5, i_item_id#6] +Input [3]: [i_item_sk#5, i_item_id#6, i_current_price#7] + +(8) BroadcastExchange +Input [2]: [i_item_sk#5, i_item_id#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#8] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_item_sk#2] +Right keys [1]: [i_item_sk#5] +Join condition: None + +(10) Project [codegen id : 4] +Output [4]: [inv_date_sk#1, inv_warehouse_sk#3, inv_quantity_on_hand#4, i_item_id#6] +Input [6]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4, i_item_sk#5, i_item_id#6] + +(11) Scan parquet default.date_dim +Output [2]: [d_date_sk#9, d_date#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-02-10), LessThanOrEqual(d_date,2000-04-10), IsNotNull(d_date_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#9, d_date#10] + +(13) Filter [codegen id : 2] +Input [2]: [d_date_sk#9, d_date#10] +Condition : (((isnotnull(d_date#10) AND (d_date#10 >= 10997)) AND (d_date#10 <= 11057)) AND isnotnull(d_date_sk#9)) + +(14) BroadcastExchange +Input [2]: [d_date_sk#9, d_date#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#11] + +(15) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_date_sk#1] +Right keys [1]: [d_date_sk#9] +Join condition: None + +(16) Project [codegen id : 4] +Output [4]: [inv_warehouse_sk#3, inv_quantity_on_hand#4, i_item_id#6, d_date#10] +Input [6]: [inv_date_sk#1, inv_warehouse_sk#3, inv_quantity_on_hand#4, i_item_id#6, d_date_sk#9, d_date#10] + +(17) Scan parquet default.warehouse +Output [2]: [w_warehouse_sk#12, w_warehouse_name#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [2]: [w_warehouse_sk#12, w_warehouse_name#13] + +(19) Filter [codegen id : 3] +Input [2]: [w_warehouse_sk#12, w_warehouse_name#13] +Condition : isnotnull(w_warehouse_sk#12) + +(20) BroadcastExchange +Input [2]: [w_warehouse_sk#12, w_warehouse_name#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_warehouse_sk#3] +Right keys [1]: [w_warehouse_sk#12] +Join condition: None + +(22) Project [codegen id : 4] +Output [4]: [inv_quantity_on_hand#4, w_warehouse_name#13, i_item_id#6, d_date#10] +Input [6]: [inv_warehouse_sk#3, inv_quantity_on_hand#4, i_item_id#6, d_date#10, w_warehouse_sk#12, w_warehouse_name#13] + +(23) HashAggregate [codegen id : 4] +Input [4]: [inv_quantity_on_hand#4, w_warehouse_name#13, i_item_id#6, d_date#10] +Keys [2]: [w_warehouse_name#13, i_item_id#6] +Functions [2]: [partial_sum(cast(CASE WHEN (d_date#10 < 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (d_date#10 >= 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint))] +Aggregate Attributes [2]: [sum#15, sum#16] +Results [4]: [w_warehouse_name#13, i_item_id#6, sum#17, sum#18] + +(24) Exchange +Input [4]: [w_warehouse_name#13, i_item_id#6, sum#17, sum#18] +Arguments: hashpartitioning(w_warehouse_name#13, i_item_id#6, 5), true, [id=#19] + +(25) HashAggregate [codegen id : 5] +Input [4]: [w_warehouse_name#13, i_item_id#6, sum#17, sum#18] +Keys [2]: [w_warehouse_name#13, i_item_id#6] +Functions [2]: [sum(cast(CASE WHEN (d_date#10 < 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint)), sum(cast(CASE WHEN (d_date#10 >= 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint))] +Aggregate Attributes [2]: [sum(cast(CASE WHEN (d_date#10 < 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint))#20, sum(cast(CASE WHEN (d_date#10 >= 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint))#21] +Results [4]: [w_warehouse_name#13, i_item_id#6, sum(cast(CASE WHEN (d_date#10 < 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint))#20 AS inv_before#22, sum(cast(CASE WHEN (d_date#10 >= 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint))#21 AS inv_after#23] + +(26) Filter [codegen id : 5] +Input [4]: [w_warehouse_name#13, i_item_id#6, inv_before#22, inv_after#23] +Condition : ((CASE WHEN (inv_before#22 > 0) THEN (cast(inv_after#23 as double) / cast(inv_before#22 as double)) ELSE null END >= 0.666667) AND (CASE WHEN (inv_before#22 > 0) THEN (cast(inv_after#23 as double) / cast(inv_before#22 as double)) ELSE null END <= 1.5)) + +(27) TakeOrderedAndProject +Input [4]: [w_warehouse_name#13, i_item_id#6, inv_before#22, inv_after#23] +Arguments: 100, [w_warehouse_name#13 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST], [w_warehouse_name#13, i_item_id#6, inv_before#22, inv_after#23] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21.sf100/simplified.txt new file mode 100644 index 0000000000000..f862d4135937d --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21.sf100/simplified.txt @@ -0,0 +1,40 @@ +TakeOrderedAndProject [i_item_id,inv_after,inv_before,w_warehouse_name] + WholeStageCodegen (5) + Filter [inv_after,inv_before] + HashAggregate [i_item_id,sum,sum,w_warehouse_name] [inv_after,inv_before,sum,sum,sum(cast(CASE WHEN (d_date < 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint)),sum(cast(CASE WHEN (d_date >= 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint))] + InputAdapter + Exchange [i_item_id,w_warehouse_name] #1 + WholeStageCodegen (4) + HashAggregate [d_date,i_item_id,inv_quantity_on_hand,w_warehouse_name] [sum,sum,sum,sum] + Project [d_date,i_item_id,inv_quantity_on_hand,w_warehouse_name] + BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] + Project [d_date,i_item_id,inv_quantity_on_hand,inv_warehouse_sk] + BroadcastHashJoin [d_date_sk,inv_date_sk] + Project [i_item_id,inv_date_sk,inv_quantity_on_hand,inv_warehouse_sk] + BroadcastHashJoin [i_item_sk,inv_item_sk] + Filter [inv_date_sk,inv_item_sk,inv_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [i_item_id,i_item_sk] + Filter [i_current_price,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_current_price,i_item_id,i_item_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [w_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.warehouse [w_warehouse_name,w_warehouse_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/explain.txt new file mode 100644 index 0000000000000..67d479c0d6a33 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/explain.txt @@ -0,0 +1,155 @@ +== Physical Plan == +TakeOrderedAndProject (27) ++- * Filter (26) + +- * HashAggregate (25) + +- Exchange (24) + +- * HashAggregate (23) + +- * Project (22) + +- * BroadcastHashJoin Inner BuildRight (21) + :- * Project (16) + : +- * BroadcastHashJoin Inner BuildRight (15) + : :- * Project (9) + : : +- * BroadcastHashJoin Inner BuildRight (8) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.inventory (1) + : : +- BroadcastExchange (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet default.warehouse (4) + : +- BroadcastExchange (14) + : +- * Project (13) + : +- * Filter (12) + : +- * ColumnarToRow (11) + : +- Scan parquet default.item (10) + +- BroadcastExchange (20) + +- * Filter (19) + +- * ColumnarToRow (18) + +- Scan parquet default.date_dim (17) + + +(1) Scan parquet default.inventory +Output [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/inventory] +PushedFilters: [IsNotNull(inv_warehouse_sk), IsNotNull(inv_item_sk), IsNotNull(inv_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] + +(3) Filter [codegen id : 4] +Input [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] +Condition : ((isnotnull(inv_warehouse_sk#3) AND isnotnull(inv_item_sk#2)) AND isnotnull(inv_date_sk#1)) + +(4) Scan parquet default.warehouse +Output [2]: [w_warehouse_sk#5, w_warehouse_name#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [w_warehouse_sk#5, w_warehouse_name#6] + +(6) Filter [codegen id : 1] +Input [2]: [w_warehouse_sk#5, w_warehouse_name#6] +Condition : isnotnull(w_warehouse_sk#5) + +(7) BroadcastExchange +Input [2]: [w_warehouse_sk#5, w_warehouse_name#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7] + +(8) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_warehouse_sk#3] +Right keys [1]: [w_warehouse_sk#5] +Join condition: None + +(9) Project [codegen id : 4] +Output [4]: [inv_date_sk#1, inv_item_sk#2, inv_quantity_on_hand#4, w_warehouse_name#6] +Input [6]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4, w_warehouse_sk#5, w_warehouse_name#6] + +(10) Scan parquet default.item +Output [3]: [i_item_sk#8, i_item_id#9, i_current_price#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,0.99), LessThanOrEqual(i_current_price,1.49), IsNotNull(i_item_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [i_item_sk#8, i_item_id#9, i_current_price#10] + +(12) Filter [codegen id : 2] +Input [3]: [i_item_sk#8, i_item_id#9, i_current_price#10] +Condition : (((isnotnull(i_current_price#10) AND (i_current_price#10 >= 0.99)) AND (i_current_price#10 <= 1.49)) AND isnotnull(i_item_sk#8)) + +(13) Project [codegen id : 2] +Output [2]: [i_item_sk#8, i_item_id#9] +Input [3]: [i_item_sk#8, i_item_id#9, i_current_price#10] + +(14) BroadcastExchange +Input [2]: [i_item_sk#8, i_item_id#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] + +(15) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_item_sk#2] +Right keys [1]: [i_item_sk#8] +Join condition: None + +(16) Project [codegen id : 4] +Output [4]: [inv_date_sk#1, inv_quantity_on_hand#4, w_warehouse_name#6, i_item_id#9] +Input [6]: [inv_date_sk#1, inv_item_sk#2, inv_quantity_on_hand#4, w_warehouse_name#6, i_item_sk#8, i_item_id#9] + +(17) Scan parquet default.date_dim +Output [2]: [d_date_sk#12, d_date#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-02-10), LessThanOrEqual(d_date,2000-04-10), IsNotNull(d_date_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [2]: [d_date_sk#12, d_date#13] + +(19) Filter [codegen id : 3] +Input [2]: [d_date_sk#12, d_date#13] +Condition : (((isnotnull(d_date#13) AND (d_date#13 >= 10997)) AND (d_date#13 <= 11057)) AND isnotnull(d_date_sk#12)) + +(20) BroadcastExchange +Input [2]: [d_date_sk#12, d_date#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_date_sk#1] +Right keys [1]: [d_date_sk#12] +Join condition: None + +(22) Project [codegen id : 4] +Output [4]: [inv_quantity_on_hand#4, w_warehouse_name#6, i_item_id#9, d_date#13] +Input [6]: [inv_date_sk#1, inv_quantity_on_hand#4, w_warehouse_name#6, i_item_id#9, d_date_sk#12, d_date#13] + +(23) HashAggregate [codegen id : 4] +Input [4]: [inv_quantity_on_hand#4, w_warehouse_name#6, i_item_id#9, d_date#13] +Keys [2]: [w_warehouse_name#6, i_item_id#9] +Functions [2]: [partial_sum(cast(CASE WHEN (d_date#13 < 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (d_date#13 >= 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint))] +Aggregate Attributes [2]: [sum#15, sum#16] +Results [4]: [w_warehouse_name#6, i_item_id#9, sum#17, sum#18] + +(24) Exchange +Input [4]: [w_warehouse_name#6, i_item_id#9, sum#17, sum#18] +Arguments: hashpartitioning(w_warehouse_name#6, i_item_id#9, 5), true, [id=#19] + +(25) HashAggregate [codegen id : 5] +Input [4]: [w_warehouse_name#6, i_item_id#9, sum#17, sum#18] +Keys [2]: [w_warehouse_name#6, i_item_id#9] +Functions [2]: [sum(cast(CASE WHEN (d_date#13 < 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint)), sum(cast(CASE WHEN (d_date#13 >= 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint))] +Aggregate Attributes [2]: [sum(cast(CASE WHEN (d_date#13 < 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint))#20, sum(cast(CASE WHEN (d_date#13 >= 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint))#21] +Results [4]: [w_warehouse_name#6, i_item_id#9, sum(cast(CASE WHEN (d_date#13 < 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint))#20 AS inv_before#22, sum(cast(CASE WHEN (d_date#13 >= 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint))#21 AS inv_after#23] + +(26) Filter [codegen id : 5] +Input [4]: [w_warehouse_name#6, i_item_id#9, inv_before#22, inv_after#23] +Condition : ((CASE WHEN (inv_before#22 > 0) THEN (cast(inv_after#23 as double) / cast(inv_before#22 as double)) ELSE null END >= 0.666667) AND (CASE WHEN (inv_before#22 > 0) THEN (cast(inv_after#23 as double) / cast(inv_before#22 as double)) ELSE null END <= 1.5)) + +(27) TakeOrderedAndProject +Input [4]: [w_warehouse_name#6, i_item_id#9, inv_before#22, inv_after#23] +Arguments: 100, [w_warehouse_name#6 ASC NULLS FIRST, i_item_id#9 ASC NULLS FIRST], [w_warehouse_name#6, i_item_id#9, inv_before#22, inv_after#23] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/simplified.txt new file mode 100644 index 0000000000000..d814563539bad --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/simplified.txt @@ -0,0 +1,40 @@ +TakeOrderedAndProject [i_item_id,inv_after,inv_before,w_warehouse_name] + WholeStageCodegen (5) + Filter [inv_after,inv_before] + HashAggregate [i_item_id,sum,sum,w_warehouse_name] [inv_after,inv_before,sum,sum,sum(cast(CASE WHEN (d_date < 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint)),sum(cast(CASE WHEN (d_date >= 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint))] + InputAdapter + Exchange [i_item_id,w_warehouse_name] #1 + WholeStageCodegen (4) + HashAggregate [d_date,i_item_id,inv_quantity_on_hand,w_warehouse_name] [sum,sum,sum,sum] + Project [d_date,i_item_id,inv_quantity_on_hand,w_warehouse_name] + BroadcastHashJoin [d_date_sk,inv_date_sk] + Project [i_item_id,inv_date_sk,inv_quantity_on_hand,w_warehouse_name] + BroadcastHashJoin [i_item_sk,inv_item_sk] + Project [inv_date_sk,inv_item_sk,inv_quantity_on_hand,w_warehouse_name] + BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] + Filter [inv_date_sk,inv_item_sk,inv_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [w_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.warehouse [w_warehouse_name,w_warehouse_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [i_item_id,i_item_sk] + Filter [i_current_price,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_current_price,i_item_id,i_item_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22.sf100/explain.txt new file mode 100644 index 0000000000000..e0c0319ef8ce5 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22.sf100/explain.txt @@ -0,0 +1,170 @@ +== Physical Plan == +TakeOrderedAndProject (30) ++- * HashAggregate (29) + +- Exchange (28) + +- * HashAggregate (27) + +- * Expand (26) + +- * Project (25) + +- * SortMergeJoin Inner (24) + :- * Sort (18) + : +- Exchange (17) + : +- * Project (16) + : +- * BroadcastHashJoin Inner BuildRight (15) + : :- * Project (9) + : : +- * BroadcastHashJoin Inner BuildRight (8) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.inventory (1) + : : +- BroadcastExchange (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet default.warehouse (4) + : +- BroadcastExchange (14) + : +- * Project (13) + : +- * Filter (12) + : +- * ColumnarToRow (11) + : +- Scan parquet default.date_dim (10) + +- * Sort (23) + +- Exchange (22) + +- * Filter (21) + +- * ColumnarToRow (20) + +- Scan parquet default.item (19) + + +(1) Scan parquet default.inventory +Output [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/inventory] +PushedFilters: [IsNotNull(inv_date_sk), IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] + +(3) Filter [codegen id : 3] +Input [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] +Condition : ((isnotnull(inv_date_sk#1) AND isnotnull(inv_item_sk#2)) AND isnotnull(inv_warehouse_sk#3)) + +(4) Scan parquet default.warehouse +Output [1]: [w_warehouse_sk#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [1]: [w_warehouse_sk#5] + +(6) Filter [codegen id : 1] +Input [1]: [w_warehouse_sk#5] +Condition : isnotnull(w_warehouse_sk#5) + +(7) BroadcastExchange +Input [1]: [w_warehouse_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#6] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [inv_warehouse_sk#3] +Right keys [1]: [w_warehouse_sk#5] +Join condition: None + +(9) Project [codegen id : 3] +Output [3]: [inv_date_sk#1, inv_item_sk#2, inv_quantity_on_hand#4] +Input [5]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4, w_warehouse_sk#5] + +(10) Scan parquet default.date_dim +Output [2]: [d_date_sk#7, d_month_seq#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#7, d_month_seq#8] + +(12) Filter [codegen id : 2] +Input [2]: [d_date_sk#7, d_month_seq#8] +Condition : (((isnotnull(d_month_seq#8) AND (d_month_seq#8 >= 1200)) AND (d_month_seq#8 <= 1211)) AND isnotnull(d_date_sk#7)) + +(13) Project [codegen id : 2] +Output [1]: [d_date_sk#7] +Input [2]: [d_date_sk#7, d_month_seq#8] + +(14) BroadcastExchange +Input [1]: [d_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [inv_date_sk#1] +Right keys [1]: [d_date_sk#7] +Join condition: None + +(16) Project [codegen id : 3] +Output [2]: [inv_item_sk#2, inv_quantity_on_hand#4] +Input [4]: [inv_date_sk#1, inv_item_sk#2, inv_quantity_on_hand#4, d_date_sk#7] + +(17) Exchange +Input [2]: [inv_item_sk#2, inv_quantity_on_hand#4] +Arguments: hashpartitioning(inv_item_sk#2, 5), true, [id=#10] + +(18) Sort [codegen id : 4] +Input [2]: [inv_item_sk#2, inv_quantity_on_hand#4] +Arguments: [inv_item_sk#2 ASC NULLS FIRST], false, 0 + +(19) Scan parquet default.item +Output [5]: [i_item_sk#11, i_brand#12, i_class#13, i_category#14, i_product_name#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 5] +Input [5]: [i_item_sk#11, i_brand#12, i_class#13, i_category#14, i_product_name#15] + +(21) Filter [codegen id : 5] +Input [5]: [i_item_sk#11, i_brand#12, i_class#13, i_category#14, i_product_name#15] +Condition : isnotnull(i_item_sk#11) + +(22) Exchange +Input [5]: [i_item_sk#11, i_brand#12, i_class#13, i_category#14, i_product_name#15] +Arguments: hashpartitioning(i_item_sk#11, 5), true, [id=#16] + +(23) Sort [codegen id : 6] +Input [5]: [i_item_sk#11, i_brand#12, i_class#13, i_category#14, i_product_name#15] +Arguments: [i_item_sk#11 ASC NULLS FIRST], false, 0 + +(24) SortMergeJoin [codegen id : 7] +Left keys [1]: [inv_item_sk#2] +Right keys [1]: [i_item_sk#11] +Join condition: None + +(25) Project [codegen id : 7] +Output [5]: [inv_quantity_on_hand#4, i_product_name#15, i_brand#12, i_class#13, i_category#14] +Input [7]: [inv_item_sk#2, inv_quantity_on_hand#4, i_item_sk#11, i_brand#12, i_class#13, i_category#14, i_product_name#15] + +(26) Expand [codegen id : 7] +Input [5]: [inv_quantity_on_hand#4, i_product_name#15, i_brand#12, i_class#13, i_category#14] +Arguments: [List(inv_quantity_on_hand#4, i_product_name#15, i_brand#12, i_class#13, i_category#14, 0), List(inv_quantity_on_hand#4, i_product_name#15, i_brand#12, i_class#13, null, 1), List(inv_quantity_on_hand#4, i_product_name#15, i_brand#12, null, null, 3), List(inv_quantity_on_hand#4, i_product_name#15, null, null, null, 7), List(inv_quantity_on_hand#4, null, null, null, null, 15)], [inv_quantity_on_hand#4, i_product_name#17, i_brand#18, i_class#19, i_category#20, spark_grouping_id#21] + +(27) HashAggregate [codegen id : 7] +Input [6]: [inv_quantity_on_hand#4, i_product_name#17, i_brand#18, i_class#19, i_category#20, spark_grouping_id#21] +Keys [5]: [i_product_name#17, i_brand#18, i_class#19, i_category#20, spark_grouping_id#21] +Functions [1]: [partial_avg(cast(inv_quantity_on_hand#4 as bigint))] +Aggregate Attributes [2]: [sum#22, count#23] +Results [7]: [i_product_name#17, i_brand#18, i_class#19, i_category#20, spark_grouping_id#21, sum#24, count#25] + +(28) Exchange +Input [7]: [i_product_name#17, i_brand#18, i_class#19, i_category#20, spark_grouping_id#21, sum#24, count#25] +Arguments: hashpartitioning(i_product_name#17, i_brand#18, i_class#19, i_category#20, spark_grouping_id#21, 5), true, [id=#26] + +(29) HashAggregate [codegen id : 8] +Input [7]: [i_product_name#17, i_brand#18, i_class#19, i_category#20, spark_grouping_id#21, sum#24, count#25] +Keys [5]: [i_product_name#17, i_brand#18, i_class#19, i_category#20, spark_grouping_id#21] +Functions [1]: [avg(cast(inv_quantity_on_hand#4 as bigint))] +Aggregate Attributes [1]: [avg(cast(inv_quantity_on_hand#4 as bigint))#27] +Results [5]: [i_product_name#17, i_brand#18, i_class#19, i_category#20, avg(cast(inv_quantity_on_hand#4 as bigint))#27 AS qoh#28] + +(30) TakeOrderedAndProject +Input [5]: [i_product_name#17, i_brand#18, i_class#19, i_category#20, qoh#28] +Arguments: 100, [qoh#28 ASC NULLS FIRST, i_product_name#17 ASC NULLS FIRST, i_brand#18 ASC NULLS FIRST, i_class#19 ASC NULLS FIRST, i_category#20 ASC NULLS FIRST], [i_product_name#17, i_brand#18, i_class#19, i_category#20, qoh#28] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22.sf100/simplified.txt new file mode 100644 index 0000000000000..784510aca5a2f --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22.sf100/simplified.txt @@ -0,0 +1,49 @@ +TakeOrderedAndProject [i_brand,i_category,i_class,i_product_name,qoh] + WholeStageCodegen (8) + HashAggregate [count,i_brand,i_category,i_class,i_product_name,spark_grouping_id,sum] [avg(cast(inv_quantity_on_hand as bigint)),count,qoh,sum] + InputAdapter + Exchange [i_brand,i_category,i_class,i_product_name,spark_grouping_id] #1 + WholeStageCodegen (7) + HashAggregate [i_brand,i_category,i_class,i_product_name,inv_quantity_on_hand,spark_grouping_id] [count,count,sum,sum] + Expand [i_brand,i_category,i_class,i_product_name,inv_quantity_on_hand] + Project [i_brand,i_category,i_class,i_product_name,inv_quantity_on_hand] + SortMergeJoin [i_item_sk,inv_item_sk] + InputAdapter + WholeStageCodegen (4) + Sort [inv_item_sk] + InputAdapter + Exchange [inv_item_sk] #2 + WholeStageCodegen (3) + Project [inv_item_sk,inv_quantity_on_hand] + BroadcastHashJoin [d_date_sk,inv_date_sk] + Project [inv_date_sk,inv_item_sk,inv_quantity_on_hand] + BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] + Filter [inv_date_sk,inv_item_sk,inv_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [w_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.warehouse [w_warehouse_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date_sk,d_month_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq] + InputAdapter + WholeStageCodegen (6) + Sort [i_item_sk] + InputAdapter + Exchange [i_item_sk] #5 + WholeStageCodegen (5) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand,i_category,i_class,i_item_sk,i_product_name] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22/explain.txt new file mode 100644 index 0000000000000..7ebdeb2615751 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22/explain.txt @@ -0,0 +1,155 @@ +== Physical Plan == +TakeOrderedAndProject (27) ++- * HashAggregate (26) + +- Exchange (25) + +- * HashAggregate (24) + +- * Expand (23) + +- * Project (22) + +- * BroadcastHashJoin Inner BuildRight (21) + :- * Project (16) + : +- * BroadcastHashJoin Inner BuildRight (15) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.inventory (1) + : : +- BroadcastExchange (8) + : : +- * Project (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet default.date_dim (4) + : +- BroadcastExchange (14) + : +- * Filter (13) + : +- * ColumnarToRow (12) + : +- Scan parquet default.item (11) + +- BroadcastExchange (20) + +- * Filter (19) + +- * ColumnarToRow (18) + +- Scan parquet default.warehouse (17) + + +(1) Scan parquet default.inventory +Output [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/inventory] +PushedFilters: [IsNotNull(inv_date_sk), IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] + +(3) Filter [codegen id : 4] +Input [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] +Condition : ((isnotnull(inv_date_sk#1) AND isnotnull(inv_item_sk#2)) AND isnotnull(inv_warehouse_sk#3)) + +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#5, d_month_seq#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#5, d_month_seq#6] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#5, d_month_seq#6] +Condition : (((isnotnull(d_month_seq#6) AND (d_month_seq#6 >= 1200)) AND (d_month_seq#6 <= 1211)) AND isnotnull(d_date_sk#5)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#5] +Input [2]: [d_date_sk#5, d_month_seq#6] + +(8) BroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#7] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_date_sk#1] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(10) Project [codegen id : 4] +Output [3]: [inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] +Input [5]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4, d_date_sk#5] + +(11) Scan parquet default.item +Output [5]: [i_item_sk#8, i_brand#9, i_class#10, i_category#11, i_product_name#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [5]: [i_item_sk#8, i_brand#9, i_class#10, i_category#11, i_product_name#12] + +(13) Filter [codegen id : 2] +Input [5]: [i_item_sk#8, i_brand#9, i_class#10, i_category#11, i_product_name#12] +Condition : isnotnull(i_item_sk#8) + +(14) BroadcastExchange +Input [5]: [i_item_sk#8, i_brand#9, i_class#10, i_category#11, i_product_name#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#13] + +(15) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_item_sk#2] +Right keys [1]: [i_item_sk#8] +Join condition: None + +(16) Project [codegen id : 4] +Output [6]: [inv_warehouse_sk#3, inv_quantity_on_hand#4, i_brand#9, i_class#10, i_category#11, i_product_name#12] +Input [8]: [inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4, i_item_sk#8, i_brand#9, i_class#10, i_category#11, i_product_name#12] + +(17) Scan parquet default.warehouse +Output [1]: [w_warehouse_sk#14] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [1]: [w_warehouse_sk#14] + +(19) Filter [codegen id : 3] +Input [1]: [w_warehouse_sk#14] +Condition : isnotnull(w_warehouse_sk#14) + +(20) BroadcastExchange +Input [1]: [w_warehouse_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#15] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_warehouse_sk#3] +Right keys [1]: [w_warehouse_sk#14] +Join condition: None + +(22) Project [codegen id : 4] +Output [5]: [inv_quantity_on_hand#4, i_product_name#12, i_brand#9, i_class#10, i_category#11] +Input [7]: [inv_warehouse_sk#3, inv_quantity_on_hand#4, i_brand#9, i_class#10, i_category#11, i_product_name#12, w_warehouse_sk#14] + +(23) Expand [codegen id : 4] +Input [5]: [inv_quantity_on_hand#4, i_product_name#12, i_brand#9, i_class#10, i_category#11] +Arguments: [List(inv_quantity_on_hand#4, i_product_name#12, i_brand#9, i_class#10, i_category#11, 0), List(inv_quantity_on_hand#4, i_product_name#12, i_brand#9, i_class#10, null, 1), List(inv_quantity_on_hand#4, i_product_name#12, i_brand#9, null, null, 3), List(inv_quantity_on_hand#4, i_product_name#12, null, null, null, 7), List(inv_quantity_on_hand#4, null, null, null, null, 15)], [inv_quantity_on_hand#4, i_product_name#16, i_brand#17, i_class#18, i_category#19, spark_grouping_id#20] + +(24) HashAggregate [codegen id : 4] +Input [6]: [inv_quantity_on_hand#4, i_product_name#16, i_brand#17, i_class#18, i_category#19, spark_grouping_id#20] +Keys [5]: [i_product_name#16, i_brand#17, i_class#18, i_category#19, spark_grouping_id#20] +Functions [1]: [partial_avg(cast(inv_quantity_on_hand#4 as bigint))] +Aggregate Attributes [2]: [sum#21, count#22] +Results [7]: [i_product_name#16, i_brand#17, i_class#18, i_category#19, spark_grouping_id#20, sum#23, count#24] + +(25) Exchange +Input [7]: [i_product_name#16, i_brand#17, i_class#18, i_category#19, spark_grouping_id#20, sum#23, count#24] +Arguments: hashpartitioning(i_product_name#16, i_brand#17, i_class#18, i_category#19, spark_grouping_id#20, 5), true, [id=#25] + +(26) HashAggregate [codegen id : 5] +Input [7]: [i_product_name#16, i_brand#17, i_class#18, i_category#19, spark_grouping_id#20, sum#23, count#24] +Keys [5]: [i_product_name#16, i_brand#17, i_class#18, i_category#19, spark_grouping_id#20] +Functions [1]: [avg(cast(inv_quantity_on_hand#4 as bigint))] +Aggregate Attributes [1]: [avg(cast(inv_quantity_on_hand#4 as bigint))#26] +Results [5]: [i_product_name#16, i_brand#17, i_class#18, i_category#19, avg(cast(inv_quantity_on_hand#4 as bigint))#26 AS qoh#27] + +(27) TakeOrderedAndProject +Input [5]: [i_product_name#16, i_brand#17, i_class#18, i_category#19, qoh#27] +Arguments: 100, [qoh#27 ASC NULLS FIRST, i_product_name#16 ASC NULLS FIRST, i_brand#17 ASC NULLS FIRST, i_class#18 ASC NULLS FIRST, i_category#19 ASC NULLS FIRST], [i_product_name#16, i_brand#17, i_class#18, i_category#19, qoh#27] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22/simplified.txt new file mode 100644 index 0000000000000..cf0e275812deb --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22/simplified.txt @@ -0,0 +1,40 @@ +TakeOrderedAndProject [i_brand,i_category,i_class,i_product_name,qoh] + WholeStageCodegen (5) + HashAggregate [count,i_brand,i_category,i_class,i_product_name,spark_grouping_id,sum] [avg(cast(inv_quantity_on_hand as bigint)),count,qoh,sum] + InputAdapter + Exchange [i_brand,i_category,i_class,i_product_name,spark_grouping_id] #1 + WholeStageCodegen (4) + HashAggregate [i_brand,i_category,i_class,i_product_name,inv_quantity_on_hand,spark_grouping_id] [count,count,sum,sum] + Expand [i_brand,i_category,i_class,i_product_name,inv_quantity_on_hand] + Project [i_brand,i_category,i_class,i_product_name,inv_quantity_on_hand] + BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] + Project [i_brand,i_category,i_class,i_product_name,inv_quantity_on_hand,inv_warehouse_sk] + BroadcastHashJoin [i_item_sk,inv_item_sk] + Project [inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] + BroadcastHashJoin [d_date_sk,inv_date_sk] + Filter [inv_date_sk,inv_item_sk,inv_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_month_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand,i_category,i_class,i_item_sk,i_product_name] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [w_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.warehouse [w_warehouse_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/explain.txt new file mode 100644 index 0000000000000..702901b354289 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/explain.txt @@ -0,0 +1,655 @@ +== Physical Plan == +CollectLimit (92) ++- * HashAggregate (91) + +- Exchange (90) + +- * HashAggregate (89) + +- Union (88) + :- * Project (60) + : +- * BroadcastHashJoin Inner BuildRight (59) + : :- * Project (53) + : : +- SortMergeJoin LeftSemi (52) + : : :- * Sort (34) + : : : +- Exchange (33) + : : : +- * Project (32) + : : : +- SortMergeJoin LeftSemi (31) + : : : :- * Sort (5) + : : : : +- Exchange (4) + : : : : +- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.catalog_sales (1) + : : : +- * Sort (30) + : : : +- Exchange (29) + : : : +- * Project (28) + : : : +- * Filter (27) + : : : +- * HashAggregate (26) + : : : +- * HashAggregate (25) + : : : +- * Project (24) + : : : +- * SortMergeJoin Inner (23) + : : : :- * Sort (17) + : : : : +- Exchange (16) + : : : : +- * Project (15) + : : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : : :- * Filter (8) + : : : : : +- * ColumnarToRow (7) + : : : : : +- Scan parquet default.store_sales (6) + : : : : +- BroadcastExchange (13) + : : : : +- * Project (12) + : : : : +- * Filter (11) + : : : : +- * ColumnarToRow (10) + : : : : +- Scan parquet default.date_dim (9) + : : : +- * Sort (22) + : : : +- Exchange (21) + : : : +- * Filter (20) + : : : +- * ColumnarToRow (19) + : : : +- Scan parquet default.item (18) + : : +- * Sort (51) + : : +- * Project (50) + : : +- * Filter (49) + : : +- * HashAggregate (48) + : : +- * HashAggregate (47) + : : +- * Project (46) + : : +- * SortMergeJoin Inner (45) + : : :- * Sort (39) + : : : +- Exchange (38) + : : : +- * Filter (37) + : : : +- * ColumnarToRow (36) + : : : +- Scan parquet default.store_sales (35) + : : +- * Sort (44) + : : +- Exchange (43) + : : +- * Filter (42) + : : +- * ColumnarToRow (41) + : : +- Scan parquet default.customer (40) + : +- BroadcastExchange (58) + : +- * Project (57) + : +- * Filter (56) + : +- * ColumnarToRow (55) + : +- Scan parquet default.date_dim (54) + +- * Project (87) + +- * BroadcastHashJoin Inner BuildRight (86) + :- * Project (84) + : +- SortMergeJoin LeftSemi (83) + : :- * Sort (71) + : : +- Exchange (70) + : : +- * Project (69) + : : +- SortMergeJoin LeftSemi (68) + : : :- * Sort (65) + : : : +- Exchange (64) + : : : +- * Filter (63) + : : : +- * ColumnarToRow (62) + : : : +- Scan parquet default.web_sales (61) + : : +- * Sort (67) + : : +- ReusedExchange (66) + : +- * Sort (82) + : +- * Project (81) + : +- * Filter (80) + : +- * HashAggregate (79) + : +- * HashAggregate (78) + : +- * Project (77) + : +- * SortMergeJoin Inner (76) + : :- * Sort (73) + : : +- ReusedExchange (72) + : +- * Sort (75) + : +- ReusedExchange (74) + +- ReusedExchange (85) + + +(1) Scan parquet default.catalog_sales +Output [5]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [5]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5] + +(3) Filter [codegen id : 1] +Input [5]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5] +Condition : isnotnull(cs_sold_date_sk#1) + +(4) Exchange +Input [5]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5] +Arguments: hashpartitioning(cs_item_sk#3, 5), true, [id=#6] + +(5) Sort [codegen id : 2] +Input [5]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5] +Arguments: [cs_item_sk#3 ASC NULLS FIRST], false, 0 + +(6) Scan parquet default.store_sales +Output [2]: [ss_sold_date_sk#7, ss_item_sk#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 4] +Input [2]: [ss_sold_date_sk#7, ss_item_sk#8] + +(8) Filter [codegen id : 4] +Input [2]: [ss_sold_date_sk#7, ss_item_sk#8] +Condition : (isnotnull(ss_sold_date_sk#7) AND isnotnull(ss_item_sk#8)) + +(9) Scan parquet default.date_dim +Output [3]: [d_date_sk#9, d_date#10, d_year#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(10) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#9, d_date#10, d_year#11] + +(11) Filter [codegen id : 3] +Input [3]: [d_date_sk#9, d_date#10, d_year#11] +Condition : (d_year#11 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#9)) + +(12) Project [codegen id : 3] +Output [2]: [d_date_sk#9, d_date#10] +Input [3]: [d_date_sk#9, d_date#10, d_year#11] + +(13) BroadcastExchange +Input [2]: [d_date_sk#9, d_date#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(14) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#7] +Right keys [1]: [d_date_sk#9] +Join condition: None + +(15) Project [codegen id : 4] +Output [2]: [ss_item_sk#8, d_date#10] +Input [4]: [ss_sold_date_sk#7, ss_item_sk#8, d_date_sk#9, d_date#10] + +(16) Exchange +Input [2]: [ss_item_sk#8, d_date#10] +Arguments: hashpartitioning(ss_item_sk#8, 5), true, [id=#13] + +(17) Sort [codegen id : 5] +Input [2]: [ss_item_sk#8, d_date#10] +Arguments: [ss_item_sk#8 ASC NULLS FIRST], false, 0 + +(18) Scan parquet default.item +Output [2]: [i_item_sk#14, i_item_desc#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 6] +Input [2]: [i_item_sk#14, i_item_desc#15] + +(20) Filter [codegen id : 6] +Input [2]: [i_item_sk#14, i_item_desc#15] +Condition : isnotnull(i_item_sk#14) + +(21) Exchange +Input [2]: [i_item_sk#14, i_item_desc#15] +Arguments: hashpartitioning(i_item_sk#14, 5), true, [id=#16] + +(22) Sort [codegen id : 7] +Input [2]: [i_item_sk#14, i_item_desc#15] +Arguments: [i_item_sk#14 ASC NULLS FIRST], false, 0 + +(23) SortMergeJoin [codegen id : 8] +Left keys [1]: [ss_item_sk#8] +Right keys [1]: [i_item_sk#14] +Join condition: None + +(24) Project [codegen id : 8] +Output [3]: [d_date#10, i_item_sk#14, i_item_desc#15] +Input [4]: [ss_item_sk#8, d_date#10, i_item_sk#14, i_item_desc#15] + +(25) HashAggregate [codegen id : 8] +Input [3]: [d_date#10, i_item_sk#14, i_item_desc#15] +Keys [3]: [substr(i_item_desc#15, 1, 30) AS substr(i_item_desc#15, 1, 30)#17, i_item_sk#14, d_date#10] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#18] +Results [4]: [substr(i_item_desc#15, 1, 30)#17, i_item_sk#14, d_date#10, count#19] + +(26) HashAggregate [codegen id : 8] +Input [4]: [substr(i_item_desc#15, 1, 30)#17, i_item_sk#14, d_date#10, count#19] +Keys [3]: [substr(i_item_desc#15, 1, 30)#17, i_item_sk#14, d_date#10] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#20] +Results [2]: [i_item_sk#14 AS item_sk#21, count(1)#20 AS count(1)#22] + +(27) Filter [codegen id : 8] +Input [2]: [item_sk#21, count(1)#22] +Condition : (count(1)#22 > 4) + +(28) Project [codegen id : 8] +Output [1]: [item_sk#21] +Input [2]: [item_sk#21, count(1)#22] + +(29) Exchange +Input [1]: [item_sk#21] +Arguments: hashpartitioning(item_sk#21, 5), true, [id=#23] + +(30) Sort [codegen id : 9] +Input [1]: [item_sk#21] +Arguments: [item_sk#21 ASC NULLS FIRST], false, 0 + +(31) SortMergeJoin +Left keys [1]: [cs_item_sk#3] +Right keys [1]: [item_sk#21] +Join condition: None + +(32) Project [codegen id : 10] +Output [4]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_quantity#4, cs_list_price#5] +Input [5]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5] + +(33) Exchange +Input [4]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_quantity#4, cs_list_price#5] +Arguments: hashpartitioning(cs_bill_customer_sk#2, 5), true, [id=#24] + +(34) Sort [codegen id : 11] +Input [4]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_quantity#4, cs_list_price#5] +Arguments: [cs_bill_customer_sk#2 ASC NULLS FIRST], false, 0 + +(35) Scan parquet default.store_sales +Output [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(36) ColumnarToRow [codegen id : 12] +Input [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27] + +(37) Filter [codegen id : 12] +Input [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27] +Condition : isnotnull(ss_customer_sk#25) + +(38) Exchange +Input [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27] +Arguments: hashpartitioning(ss_customer_sk#25, 5), true, [id=#28] + +(39) Sort [codegen id : 13] +Input [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27] +Arguments: [ss_customer_sk#25 ASC NULLS FIRST], false, 0 + +(40) Scan parquet default.customer +Output [1]: [c_customer_sk#29] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(41) ColumnarToRow [codegen id : 14] +Input [1]: [c_customer_sk#29] + +(42) Filter [codegen id : 14] +Input [1]: [c_customer_sk#29] +Condition : isnotnull(c_customer_sk#29) + +(43) Exchange +Input [1]: [c_customer_sk#29] +Arguments: hashpartitioning(c_customer_sk#29, 5), true, [id=#30] + +(44) Sort [codegen id : 15] +Input [1]: [c_customer_sk#29] +Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0 + +(45) SortMergeJoin [codegen id : 16] +Left keys [1]: [ss_customer_sk#25] +Right keys [1]: [c_customer_sk#29] +Join condition: None + +(46) Project [codegen id : 16] +Output [3]: [ss_quantity#26, ss_sales_price#27, c_customer_sk#29] +Input [4]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27, c_customer_sk#29] + +(47) HashAggregate [codegen id : 16] +Input [3]: [ss_quantity#26, ss_sales_price#27, c_customer_sk#29] +Keys [1]: [c_customer_sk#29] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [2]: [sum#31, isEmpty#32] +Results [3]: [c_customer_sk#29, sum#33, isEmpty#34] + +(48) HashAggregate [codegen id : 16] +Input [3]: [c_customer_sk#29, sum#33, isEmpty#34] +Keys [1]: [c_customer_sk#29] +Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#35] +Results [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#35 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#36] + +(49) Filter [codegen id : 16] +Input [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#36] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#36) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#36 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(Subquery scalar-subquery#37, [id=#38] as decimal(32,6)))), DecimalType(38,8), true))) + +(50) Project [codegen id : 16] +Output [1]: [c_customer_sk#29] +Input [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#36] + +(51) Sort [codegen id : 16] +Input [1]: [c_customer_sk#29] +Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0 + +(52) SortMergeJoin +Left keys [1]: [cs_bill_customer_sk#2] +Right keys [1]: [c_customer_sk#29] +Join condition: None + +(53) Project [codegen id : 18] +Output [3]: [cs_sold_date_sk#1, cs_quantity#4, cs_list_price#5] +Input [4]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_quantity#4, cs_list_price#5] + +(54) Scan parquet default.date_dim +Output [3]: [d_date_sk#9, d_year#11, d_moy#39] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,2), IsNotNull(d_date_sk)] +ReadSchema: struct + +(55) ColumnarToRow [codegen id : 17] +Input [3]: [d_date_sk#9, d_year#11, d_moy#39] + +(56) Filter [codegen id : 17] +Input [3]: [d_date_sk#9, d_year#11, d_moy#39] +Condition : ((((isnotnull(d_year#11) AND isnotnull(d_moy#39)) AND (d_year#11 = 2000)) AND (d_moy#39 = 2)) AND isnotnull(d_date_sk#9)) + +(57) Project [codegen id : 17] +Output [1]: [d_date_sk#9] +Input [3]: [d_date_sk#9, d_year#11, d_moy#39] + +(58) BroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#40] + +(59) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [cs_sold_date_sk#1] +Right keys [1]: [d_date_sk#9] +Join condition: None + +(60) Project [codegen id : 18] +Output [1]: [CheckOverflow((promote_precision(cast(cast(cs_quantity#4 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#5 as decimal(12,2)))), DecimalType(18,2), true) AS sales#41] +Input [4]: [cs_sold_date_sk#1, cs_quantity#4, cs_list_price#5, d_date_sk#9] + +(61) Scan parquet default.web_sales +Output [5]: [ws_sold_date_sk#42, ws_item_sk#43, ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(62) ColumnarToRow [codegen id : 19] +Input [5]: [ws_sold_date_sk#42, ws_item_sk#43, ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46] + +(63) Filter [codegen id : 19] +Input [5]: [ws_sold_date_sk#42, ws_item_sk#43, ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46] +Condition : isnotnull(ws_sold_date_sk#42) + +(64) Exchange +Input [5]: [ws_sold_date_sk#42, ws_item_sk#43, ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46] +Arguments: hashpartitioning(ws_item_sk#43, 5), true, [id=#47] + +(65) Sort [codegen id : 20] +Input [5]: [ws_sold_date_sk#42, ws_item_sk#43, ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46] +Arguments: [ws_item_sk#43 ASC NULLS FIRST], false, 0 + +(66) ReusedExchange [Reuses operator id: 29] +Output [1]: [item_sk#21] + +(67) Sort [codegen id : 27] +Input [1]: [item_sk#21] +Arguments: [item_sk#21 ASC NULLS FIRST], false, 0 + +(68) SortMergeJoin +Left keys [1]: [ws_item_sk#43] +Right keys [1]: [item_sk#21] +Join condition: None + +(69) Project [codegen id : 28] +Output [4]: [ws_sold_date_sk#42, ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46] +Input [5]: [ws_sold_date_sk#42, ws_item_sk#43, ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46] + +(70) Exchange +Input [4]: [ws_sold_date_sk#42, ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46] +Arguments: hashpartitioning(ws_bill_customer_sk#44, 5), true, [id=#48] + +(71) Sort [codegen id : 29] +Input [4]: [ws_sold_date_sk#42, ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46] +Arguments: [ws_bill_customer_sk#44 ASC NULLS FIRST], false, 0 + +(72) ReusedExchange [Reuses operator id: 38] +Output [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27] + +(73) Sort [codegen id : 31] +Input [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27] +Arguments: [ss_customer_sk#25 ASC NULLS FIRST], false, 0 + +(74) ReusedExchange [Reuses operator id: 43] +Output [1]: [c_customer_sk#29] + +(75) Sort [codegen id : 33] +Input [1]: [c_customer_sk#29] +Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0 + +(76) SortMergeJoin [codegen id : 34] +Left keys [1]: [ss_customer_sk#25] +Right keys [1]: [c_customer_sk#29] +Join condition: None + +(77) Project [codegen id : 34] +Output [3]: [ss_quantity#26, ss_sales_price#27, c_customer_sk#29] +Input [4]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27, c_customer_sk#29] + +(78) HashAggregate [codegen id : 34] +Input [3]: [ss_quantity#26, ss_sales_price#27, c_customer_sk#29] +Keys [1]: [c_customer_sk#29] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [2]: [sum#49, isEmpty#50] +Results [3]: [c_customer_sk#29, sum#51, isEmpty#52] + +(79) HashAggregate [codegen id : 34] +Input [3]: [c_customer_sk#29, sum#51, isEmpty#52] +Keys [1]: [c_customer_sk#29] +Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#53] +Results [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#53 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#54] + +(80) Filter [codegen id : 34] +Input [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#54] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#54) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#54 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#37, [id=#38] as decimal(32,6)))), DecimalType(38,8), true))) + +(81) Project [codegen id : 34] +Output [1]: [c_customer_sk#29] +Input [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#54] + +(82) Sort [codegen id : 34] +Input [1]: [c_customer_sk#29] +Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0 + +(83) SortMergeJoin +Left keys [1]: [ws_bill_customer_sk#44] +Right keys [1]: [c_customer_sk#29] +Join condition: None + +(84) Project [codegen id : 36] +Output [3]: [ws_sold_date_sk#42, ws_quantity#45, ws_list_price#46] +Input [4]: [ws_sold_date_sk#42, ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46] + +(85) ReusedExchange [Reuses operator id: 58] +Output [1]: [d_date_sk#9] + +(86) BroadcastHashJoin [codegen id : 36] +Left keys [1]: [ws_sold_date_sk#42] +Right keys [1]: [d_date_sk#9] +Join condition: None + +(87) Project [codegen id : 36] +Output [1]: [CheckOverflow((promote_precision(cast(cast(ws_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#46 as decimal(12,2)))), DecimalType(18,2), true) AS sales#55] +Input [4]: [ws_sold_date_sk#42, ws_quantity#45, ws_list_price#46, d_date_sk#9] + +(88) Union + +(89) HashAggregate [codegen id : 37] +Input [1]: [sales#41] +Keys: [] +Functions [1]: [partial_sum(sales#41)] +Aggregate Attributes [2]: [sum#56, isEmpty#57] +Results [2]: [sum#58, isEmpty#59] + +(90) Exchange +Input [2]: [sum#58, isEmpty#59] +Arguments: SinglePartition, true, [id=#60] + +(91) HashAggregate [codegen id : 38] +Input [2]: [sum#58, isEmpty#59] +Keys: [] +Functions [1]: [sum(sales#41)] +Aggregate Attributes [1]: [sum(sales#41)#61] +Results [1]: [sum(sales#41)#61 AS sum(sales)#62] + +(92) CollectLimit +Input [1]: [sum(sales)#62] +Arguments: 100 + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 49 Hosting Expression = Subquery scalar-subquery#37, [id=#38] +* HashAggregate (116) ++- Exchange (115) + +- * HashAggregate (114) + +- * HashAggregate (113) + +- * HashAggregate (112) + +- * Project (111) + +- * SortMergeJoin Inner (110) + :- * Sort (104) + : +- Exchange (103) + : +- * Project (102) + : +- * BroadcastHashJoin Inner BuildRight (101) + : :- * Filter (95) + : : +- * ColumnarToRow (94) + : : +- Scan parquet default.store_sales (93) + : +- BroadcastExchange (100) + : +- * Project (99) + : +- * Filter (98) + : +- * ColumnarToRow (97) + : +- Scan parquet default.date_dim (96) + +- * Sort (109) + +- Exchange (108) + +- * Filter (107) + +- * ColumnarToRow (106) + +- Scan parquet default.customer (105) + + +(93) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#7, ss_customer_sk#25, ss_quantity#26, ss_sales_price#27] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(94) ColumnarToRow [codegen id : 2] +Input [4]: [ss_sold_date_sk#7, ss_customer_sk#25, ss_quantity#26, ss_sales_price#27] + +(95) Filter [codegen id : 2] +Input [4]: [ss_sold_date_sk#7, ss_customer_sk#25, ss_quantity#26, ss_sales_price#27] +Condition : (isnotnull(ss_customer_sk#25) AND isnotnull(ss_sold_date_sk#7)) + +(96) Scan parquet default.date_dim +Output [2]: [d_date_sk#9, d_year#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(97) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#9, d_year#11] + +(98) Filter [codegen id : 1] +Input [2]: [d_date_sk#9, d_year#11] +Condition : (d_year#11 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#9)) + +(99) Project [codegen id : 1] +Output [1]: [d_date_sk#9] +Input [2]: [d_date_sk#9, d_year#11] + +(100) BroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#63] + +(101) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#7] +Right keys [1]: [d_date_sk#9] +Join condition: None + +(102) Project [codegen id : 2] +Output [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27] +Input [5]: [ss_sold_date_sk#7, ss_customer_sk#25, ss_quantity#26, ss_sales_price#27, d_date_sk#9] + +(103) Exchange +Input [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27] +Arguments: hashpartitioning(ss_customer_sk#25, 5), true, [id=#64] + +(104) Sort [codegen id : 3] +Input [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27] +Arguments: [ss_customer_sk#25 ASC NULLS FIRST], false, 0 + +(105) Scan parquet default.customer +Output [1]: [c_customer_sk#29] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(106) ColumnarToRow [codegen id : 4] +Input [1]: [c_customer_sk#29] + +(107) Filter [codegen id : 4] +Input [1]: [c_customer_sk#29] +Condition : isnotnull(c_customer_sk#29) + +(108) Exchange +Input [1]: [c_customer_sk#29] +Arguments: hashpartitioning(c_customer_sk#29, 5), true, [id=#65] + +(109) Sort [codegen id : 5] +Input [1]: [c_customer_sk#29] +Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0 + +(110) SortMergeJoin [codegen id : 6] +Left keys [1]: [ss_customer_sk#25] +Right keys [1]: [c_customer_sk#29] +Join condition: None + +(111) Project [codegen id : 6] +Output [3]: [ss_quantity#26, ss_sales_price#27, c_customer_sk#29] +Input [4]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27, c_customer_sk#29] + +(112) HashAggregate [codegen id : 6] +Input [3]: [ss_quantity#26, ss_sales_price#27, c_customer_sk#29] +Keys [1]: [c_customer_sk#29] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [2]: [sum#66, isEmpty#67] +Results [3]: [c_customer_sk#29, sum#68, isEmpty#69] + +(113) HashAggregate [codegen id : 6] +Input [3]: [c_customer_sk#29, sum#68, isEmpty#69] +Keys [1]: [c_customer_sk#29] +Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#70] +Results [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#70 AS csales#71] + +(114) HashAggregate [codegen id : 6] +Input [1]: [csales#71] +Keys: [] +Functions [1]: [partial_max(csales#71)] +Aggregate Attributes [1]: [max#72] +Results [1]: [max#73] + +(115) Exchange +Input [1]: [max#73] +Arguments: SinglePartition, true, [id=#74] + +(116) HashAggregate [codegen id : 7] +Input [1]: [max#73] +Keys: [] +Functions [1]: [max(csales#71)] +Aggregate Attributes [1]: [max(csales#71)#75] +Results [1]: [max(csales#71)#75 AS tpcds_cmax#76] + +Subquery:2 Hosting operator id = 80 Hosting Expression = ReusedSubquery Subquery scalar-subquery#37, [id=#38] + + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/simplified.txt new file mode 100644 index 0000000000000..7b08c6a571b4c --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/simplified.txt @@ -0,0 +1,198 @@ +CollectLimit + WholeStageCodegen (38) + HashAggregate [isEmpty,sum] [isEmpty,sum,sum(sales),sum(sales)] + InputAdapter + Exchange #1 + WholeStageCodegen (37) + HashAggregate [sales] [isEmpty,isEmpty,sum,sum] + InputAdapter + Union + WholeStageCodegen (18) + Project [cs_list_price,cs_quantity] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_list_price,cs_quantity,cs_sold_date_sk] + InputAdapter + SortMergeJoin [c_customer_sk,cs_bill_customer_sk] + WholeStageCodegen (11) + Sort [cs_bill_customer_sk] + InputAdapter + Exchange [cs_bill_customer_sk] #2 + WholeStageCodegen (10) + Project [cs_bill_customer_sk,cs_list_price,cs_quantity,cs_sold_date_sk] + InputAdapter + SortMergeJoin [cs_item_sk,item_sk] + WholeStageCodegen (2) + Sort [cs_item_sk] + InputAdapter + Exchange [cs_item_sk] #3 + WholeStageCodegen (1) + Filter [cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_list_price,cs_quantity,cs_sold_date_sk] + WholeStageCodegen (9) + Sort [item_sk] + InputAdapter + Exchange [item_sk] #4 + WholeStageCodegen (8) + Project [item_sk] + Filter [count(1)] + HashAggregate [count,d_date,i_item_sk,substr(i_item_desc, 1, 30)] [count,count(1),count(1),item_sk] + HashAggregate [d_date,i_item_desc,i_item_sk] [count,count,substr(i_item_desc, 1, 30)] + Project [d_date,i_item_desc,i_item_sk] + SortMergeJoin [i_item_sk,ss_item_sk] + InputAdapter + WholeStageCodegen (5) + Sort [ss_item_sk] + InputAdapter + Exchange [ss_item_sk] #5 + WholeStageCodegen (4) + Project [d_date,ss_item_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (3) + Project [d_date,d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk,d_year] + InputAdapter + WholeStageCodegen (7) + Sort [i_item_sk] + InputAdapter + Exchange [i_item_sk] #7 + WholeStageCodegen (6) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_desc,i_item_sk] + WholeStageCodegen (16) + Sort [c_customer_sk] + Project [c_customer_sk] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] + Subquery #1 + WholeStageCodegen (7) + HashAggregate [max] [max,max(csales),tpcds_cmax] + InputAdapter + Exchange #10 + WholeStageCodegen (6) + HashAggregate [csales] [max,max] + HashAggregate [c_customer_sk,isEmpty,sum] [csales,isEmpty,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] + HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [isEmpty,isEmpty,sum,sum] + Project [c_customer_sk,ss_quantity,ss_sales_price] + SortMergeJoin [c_customer_sk,ss_customer_sk] + InputAdapter + WholeStageCodegen (3) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #11 + WholeStageCodegen (2) + Project [ss_customer_sk,ss_quantity,ss_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_customer_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + WholeStageCodegen (5) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #13 + WholeStageCodegen (4) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk] + HashAggregate [c_customer_sk,isEmpty,sum] [isEmpty,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] + HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [isEmpty,isEmpty,sum,sum] + Project [c_customer_sk,ss_quantity,ss_sales_price] + SortMergeJoin [c_customer_sk,ss_customer_sk] + InputAdapter + WholeStageCodegen (13) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #8 + WholeStageCodegen (12) + Filter [ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price] + InputAdapter + WholeStageCodegen (15) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #9 + WholeStageCodegen (14) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk] + InputAdapter + BroadcastExchange #14 + WholeStageCodegen (17) + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + WholeStageCodegen (36) + Project [ws_list_price,ws_quantity] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ws_list_price,ws_quantity,ws_sold_date_sk] + InputAdapter + SortMergeJoin [c_customer_sk,ws_bill_customer_sk] + WholeStageCodegen (29) + Sort [ws_bill_customer_sk] + InputAdapter + Exchange [ws_bill_customer_sk] #15 + WholeStageCodegen (28) + Project [ws_bill_customer_sk,ws_list_price,ws_quantity,ws_sold_date_sk] + InputAdapter + SortMergeJoin [item_sk,ws_item_sk] + WholeStageCodegen (20) + Sort [ws_item_sk] + InputAdapter + Exchange [ws_item_sk] #16 + WholeStageCodegen (19) + Filter [ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_customer_sk,ws_item_sk,ws_list_price,ws_quantity,ws_sold_date_sk] + WholeStageCodegen (27) + Sort [item_sk] + InputAdapter + ReusedExchange [item_sk] #4 + WholeStageCodegen (34) + Sort [c_customer_sk] + Project [c_customer_sk] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [tpcds_cmax] #1 + HashAggregate [c_customer_sk,isEmpty,sum] [isEmpty,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] + HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [isEmpty,isEmpty,sum,sum] + Project [c_customer_sk,ss_quantity,ss_sales_price] + SortMergeJoin [c_customer_sk,ss_customer_sk] + InputAdapter + WholeStageCodegen (31) + Sort [ss_customer_sk] + InputAdapter + ReusedExchange [ss_customer_sk,ss_quantity,ss_sales_price] #8 + InputAdapter + WholeStageCodegen (33) + Sort [c_customer_sk] + InputAdapter + ReusedExchange [c_customer_sk] #9 + InputAdapter + ReusedExchange [d_date_sk] #14 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt new file mode 100644 index 0000000000000..37c10d8acd77b --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt @@ -0,0 +1,542 @@ +== Physical Plan == +CollectLimit (72) ++- * HashAggregate (71) + +- Exchange (70) + +- * HashAggregate (69) + +- Union (68) + :- * Project (51) + : +- * BroadcastHashJoin Inner BuildRight (50) + : :- * Project (44) + : : +- * BroadcastHashJoin LeftSemi BuildRight (43) + : : :- * Project (27) + : : : +- * BroadcastHashJoin LeftSemi BuildRight (26) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.catalog_sales (1) + : : : +- BroadcastExchange (25) + : : : +- * Project (24) + : : : +- * Filter (23) + : : : +- * HashAggregate (22) + : : : +- Exchange (21) + : : : +- * HashAggregate (20) + : : : +- * Project (19) + : : : +- * BroadcastHashJoin Inner BuildRight (18) + : : : :- * Project (13) + : : : : +- * BroadcastHashJoin Inner BuildRight (12) + : : : : :- * Filter (6) + : : : : : +- * ColumnarToRow (5) + : : : : : +- Scan parquet default.store_sales (4) + : : : : +- BroadcastExchange (11) + : : : : +- * Project (10) + : : : : +- * Filter (9) + : : : : +- * ColumnarToRow (8) + : : : : +- Scan parquet default.date_dim (7) + : : : +- BroadcastExchange (17) + : : : +- * Filter (16) + : : : +- * ColumnarToRow (15) + : : : +- Scan parquet default.item (14) + : : +- BroadcastExchange (42) + : : +- * Project (41) + : : +- * Filter (40) + : : +- * HashAggregate (39) + : : +- Exchange (38) + : : +- * HashAggregate (37) + : : +- * Project (36) + : : +- * BroadcastHashJoin Inner BuildRight (35) + : : :- * Filter (30) + : : : +- * ColumnarToRow (29) + : : : +- Scan parquet default.store_sales (28) + : : +- BroadcastExchange (34) + : : +- * Filter (33) + : : +- * ColumnarToRow (32) + : : +- Scan parquet default.customer (31) + : +- BroadcastExchange (49) + : +- * Project (48) + : +- * Filter (47) + : +- * ColumnarToRow (46) + : +- Scan parquet default.date_dim (45) + +- * Project (67) + +- * BroadcastHashJoin Inner BuildRight (66) + :- * Project (64) + : +- * BroadcastHashJoin LeftSemi BuildRight (63) + : :- * Project (57) + : : +- * BroadcastHashJoin LeftSemi BuildRight (56) + : : :- * Filter (54) + : : : +- * ColumnarToRow (53) + : : : +- Scan parquet default.web_sales (52) + : : +- ReusedExchange (55) + : +- BroadcastExchange (62) + : +- * Project (61) + : +- * Filter (60) + : +- * HashAggregate (59) + : +- ReusedExchange (58) + +- ReusedExchange (65) + + +(1) Scan parquet default.catalog_sales +Output [5]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 9] +Input [5]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5] + +(3) Filter [codegen id : 9] +Input [5]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5] +Condition : isnotnull(cs_sold_date_sk#1) + +(4) Scan parquet default.store_sales +Output [2]: [ss_sold_date_sk#6, ss_item_sk#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 3] +Input [2]: [ss_sold_date_sk#6, ss_item_sk#7] + +(6) Filter [codegen id : 3] +Input [2]: [ss_sold_date_sk#6, ss_item_sk#7] +Condition : (isnotnull(ss_sold_date_sk#6) AND isnotnull(ss_item_sk#7)) + +(7) Scan parquet default.date_dim +Output [3]: [d_date_sk#8, d_date#9, d_year#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(8) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#8, d_date#9, d_year#10] + +(9) Filter [codegen id : 1] +Input [3]: [d_date_sk#8, d_date#9, d_year#10] +Condition : (d_year#10 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#8)) + +(10) Project [codegen id : 1] +Output [2]: [d_date_sk#8, d_date#9] +Input [3]: [d_date_sk#8, d_date#9, d_year#10] + +(11) BroadcastExchange +Input [2]: [d_date_sk#8, d_date#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] + +(12) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#6] +Right keys [1]: [d_date_sk#8] +Join condition: None + +(13) Project [codegen id : 3] +Output [2]: [ss_item_sk#7, d_date#9] +Input [4]: [ss_sold_date_sk#6, ss_item_sk#7, d_date_sk#8, d_date#9] + +(14) Scan parquet default.item +Output [2]: [i_item_sk#12, i_item_desc#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(15) ColumnarToRow [codegen id : 2] +Input [2]: [i_item_sk#12, i_item_desc#13] + +(16) Filter [codegen id : 2] +Input [2]: [i_item_sk#12, i_item_desc#13] +Condition : isnotnull(i_item_sk#12) + +(17) BroadcastExchange +Input [2]: [i_item_sk#12, i_item_desc#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] + +(18) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_item_sk#7] +Right keys [1]: [i_item_sk#12] +Join condition: None + +(19) Project [codegen id : 3] +Output [3]: [d_date#9, i_item_sk#12, i_item_desc#13] +Input [4]: [ss_item_sk#7, d_date#9, i_item_sk#12, i_item_desc#13] + +(20) HashAggregate [codegen id : 3] +Input [3]: [d_date#9, i_item_sk#12, i_item_desc#13] +Keys [3]: [substr(i_item_desc#13, 1, 30) AS substr(i_item_desc#13, 1, 30)#15, i_item_sk#12, d_date#9] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#16] +Results [4]: [substr(i_item_desc#13, 1, 30)#15, i_item_sk#12, d_date#9, count#17] + +(21) Exchange +Input [4]: [substr(i_item_desc#13, 1, 30)#15, i_item_sk#12, d_date#9, count#17] +Arguments: hashpartitioning(substr(i_item_desc#13, 1, 30)#15, i_item_sk#12, d_date#9, 5), true, [id=#18] + +(22) HashAggregate [codegen id : 4] +Input [4]: [substr(i_item_desc#13, 1, 30)#15, i_item_sk#12, d_date#9, count#17] +Keys [3]: [substr(i_item_desc#13, 1, 30)#15, i_item_sk#12, d_date#9] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#19] +Results [2]: [i_item_sk#12 AS item_sk#20, count(1)#19 AS count(1)#21] + +(23) Filter [codegen id : 4] +Input [2]: [item_sk#20, count(1)#21] +Condition : (count(1)#21 > 4) + +(24) Project [codegen id : 4] +Output [1]: [item_sk#20] +Input [2]: [item_sk#20, count(1)#21] + +(25) BroadcastExchange +Input [1]: [item_sk#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#22] + +(26) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [cs_item_sk#3] +Right keys [1]: [item_sk#20] +Join condition: None + +(27) Project [codegen id : 9] +Output [4]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_quantity#4, cs_list_price#5] +Input [5]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5] + +(28) Scan parquet default.store_sales +Output [3]: [ss_customer_sk#23, ss_quantity#24, ss_sales_price#25] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 6] +Input [3]: [ss_customer_sk#23, ss_quantity#24, ss_sales_price#25] + +(30) Filter [codegen id : 6] +Input [3]: [ss_customer_sk#23, ss_quantity#24, ss_sales_price#25] +Condition : isnotnull(ss_customer_sk#23) + +(31) Scan parquet default.customer +Output [1]: [c_customer_sk#26] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(32) ColumnarToRow [codegen id : 5] +Input [1]: [c_customer_sk#26] + +(33) Filter [codegen id : 5] +Input [1]: [c_customer_sk#26] +Condition : isnotnull(c_customer_sk#26) + +(34) BroadcastExchange +Input [1]: [c_customer_sk#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#27] + +(35) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_customer_sk#23] +Right keys [1]: [c_customer_sk#26] +Join condition: None + +(36) Project [codegen id : 6] +Output [3]: [ss_quantity#24, ss_sales_price#25, c_customer_sk#26] +Input [4]: [ss_customer_sk#23, ss_quantity#24, ss_sales_price#25, c_customer_sk#26] + +(37) HashAggregate [codegen id : 6] +Input [3]: [ss_quantity#24, ss_sales_price#25, c_customer_sk#26] +Keys [1]: [c_customer_sk#26] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [2]: [sum#28, isEmpty#29] +Results [3]: [c_customer_sk#26, sum#30, isEmpty#31] + +(38) Exchange +Input [3]: [c_customer_sk#26, sum#30, isEmpty#31] +Arguments: hashpartitioning(c_customer_sk#26, 5), true, [id=#32] + +(39) HashAggregate [codegen id : 7] +Input [3]: [c_customer_sk#26, sum#30, isEmpty#31] +Keys [1]: [c_customer_sk#26] +Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#33] +Results [2]: [c_customer_sk#26, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#33 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#34] + +(40) Filter [codegen id : 7] +Input [2]: [c_customer_sk#26, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#34] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#34) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#34 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(Subquery scalar-subquery#35, [id=#36] as decimal(32,6)))), DecimalType(38,8), true))) + +(41) Project [codegen id : 7] +Output [1]: [c_customer_sk#26] +Input [2]: [c_customer_sk#26, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#34] + +(42) BroadcastExchange +Input [1]: [c_customer_sk#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#37] + +(43) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [cs_bill_customer_sk#2] +Right keys [1]: [c_customer_sk#26] +Join condition: None + +(44) Project [codegen id : 9] +Output [3]: [cs_sold_date_sk#1, cs_quantity#4, cs_list_price#5] +Input [4]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_quantity#4, cs_list_price#5] + +(45) Scan parquet default.date_dim +Output [3]: [d_date_sk#8, d_year#10, d_moy#38] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,2), IsNotNull(d_date_sk)] +ReadSchema: struct + +(46) ColumnarToRow [codegen id : 8] +Input [3]: [d_date_sk#8, d_year#10, d_moy#38] + +(47) Filter [codegen id : 8] +Input [3]: [d_date_sk#8, d_year#10, d_moy#38] +Condition : ((((isnotnull(d_year#10) AND isnotnull(d_moy#38)) AND (d_year#10 = 2000)) AND (d_moy#38 = 2)) AND isnotnull(d_date_sk#8)) + +(48) Project [codegen id : 8] +Output [1]: [d_date_sk#8] +Input [3]: [d_date_sk#8, d_year#10, d_moy#38] + +(49) BroadcastExchange +Input [1]: [d_date_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#39] + +(50) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [cs_sold_date_sk#1] +Right keys [1]: [d_date_sk#8] +Join condition: None + +(51) Project [codegen id : 9] +Output [1]: [CheckOverflow((promote_precision(cast(cast(cs_quantity#4 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#5 as decimal(12,2)))), DecimalType(18,2), true) AS sales#40] +Input [4]: [cs_sold_date_sk#1, cs_quantity#4, cs_list_price#5, d_date_sk#8] + +(52) Scan parquet default.web_sales +Output [5]: [ws_sold_date_sk#41, ws_item_sk#42, ws_bill_customer_sk#43, ws_quantity#44, ws_list_price#45] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(53) ColumnarToRow [codegen id : 18] +Input [5]: [ws_sold_date_sk#41, ws_item_sk#42, ws_bill_customer_sk#43, ws_quantity#44, ws_list_price#45] + +(54) Filter [codegen id : 18] +Input [5]: [ws_sold_date_sk#41, ws_item_sk#42, ws_bill_customer_sk#43, ws_quantity#44, ws_list_price#45] +Condition : isnotnull(ws_sold_date_sk#41) + +(55) ReusedExchange [Reuses operator id: 25] +Output [1]: [item_sk#20] + +(56) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [ws_item_sk#42] +Right keys [1]: [item_sk#20] +Join condition: None + +(57) Project [codegen id : 18] +Output [4]: [ws_sold_date_sk#41, ws_bill_customer_sk#43, ws_quantity#44, ws_list_price#45] +Input [5]: [ws_sold_date_sk#41, ws_item_sk#42, ws_bill_customer_sk#43, ws_quantity#44, ws_list_price#45] + +(58) ReusedExchange [Reuses operator id: 38] +Output [3]: [c_customer_sk#26, sum#46, isEmpty#47] + +(59) HashAggregate [codegen id : 16] +Input [3]: [c_customer_sk#26, sum#46, isEmpty#47] +Keys [1]: [c_customer_sk#26] +Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#48] +Results [2]: [c_customer_sk#26, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#48 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#49] + +(60) Filter [codegen id : 16] +Input [2]: [c_customer_sk#26, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#49] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#49) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#49 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#35, [id=#36] as decimal(32,6)))), DecimalType(38,8), true))) + +(61) Project [codegen id : 16] +Output [1]: [c_customer_sk#26] +Input [2]: [c_customer_sk#26, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#49] + +(62) BroadcastExchange +Input [1]: [c_customer_sk#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#50] + +(63) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [ws_bill_customer_sk#43] +Right keys [1]: [c_customer_sk#26] +Join condition: None + +(64) Project [codegen id : 18] +Output [3]: [ws_sold_date_sk#41, ws_quantity#44, ws_list_price#45] +Input [4]: [ws_sold_date_sk#41, ws_bill_customer_sk#43, ws_quantity#44, ws_list_price#45] + +(65) ReusedExchange [Reuses operator id: 49] +Output [1]: [d_date_sk#8] + +(66) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [ws_sold_date_sk#41] +Right keys [1]: [d_date_sk#8] +Join condition: None + +(67) Project [codegen id : 18] +Output [1]: [CheckOverflow((promote_precision(cast(cast(ws_quantity#44 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#45 as decimal(12,2)))), DecimalType(18,2), true) AS sales#51] +Input [4]: [ws_sold_date_sk#41, ws_quantity#44, ws_list_price#45, d_date_sk#8] + +(68) Union + +(69) HashAggregate [codegen id : 19] +Input [1]: [sales#40] +Keys: [] +Functions [1]: [partial_sum(sales#40)] +Aggregate Attributes [2]: [sum#52, isEmpty#53] +Results [2]: [sum#54, isEmpty#55] + +(70) Exchange +Input [2]: [sum#54, isEmpty#55] +Arguments: SinglePartition, true, [id=#56] + +(71) HashAggregate [codegen id : 20] +Input [2]: [sum#54, isEmpty#55] +Keys: [] +Functions [1]: [sum(sales#40)] +Aggregate Attributes [1]: [sum(sales#40)#57] +Results [1]: [sum(sales#40)#57 AS sum(sales)#58] + +(72) CollectLimit +Input [1]: [sum(sales)#58] +Arguments: 100 + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 40 Hosting Expression = Subquery scalar-subquery#35, [id=#36] +* HashAggregate (94) ++- Exchange (93) + +- * HashAggregate (92) + +- * HashAggregate (91) + +- Exchange (90) + +- * HashAggregate (89) + +- * Project (88) + +- * BroadcastHashJoin Inner BuildRight (87) + :- * Project (81) + : +- * BroadcastHashJoin Inner BuildRight (80) + : :- * Filter (75) + : : +- * ColumnarToRow (74) + : : +- Scan parquet default.store_sales (73) + : +- BroadcastExchange (79) + : +- * Filter (78) + : +- * ColumnarToRow (77) + : +- Scan parquet default.customer (76) + +- BroadcastExchange (86) + +- * Project (85) + +- * Filter (84) + +- * ColumnarToRow (83) + +- Scan parquet default.date_dim (82) + + +(73) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#6, ss_customer_sk#23, ss_quantity#24, ss_sales_price#25] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(74) ColumnarToRow [codegen id : 3] +Input [4]: [ss_sold_date_sk#6, ss_customer_sk#23, ss_quantity#24, ss_sales_price#25] + +(75) Filter [codegen id : 3] +Input [4]: [ss_sold_date_sk#6, ss_customer_sk#23, ss_quantity#24, ss_sales_price#25] +Condition : (isnotnull(ss_customer_sk#23) AND isnotnull(ss_sold_date_sk#6)) + +(76) Scan parquet default.customer +Output [1]: [c_customer_sk#26] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(77) ColumnarToRow [codegen id : 1] +Input [1]: [c_customer_sk#26] + +(78) Filter [codegen id : 1] +Input [1]: [c_customer_sk#26] +Condition : isnotnull(c_customer_sk#26) + +(79) BroadcastExchange +Input [1]: [c_customer_sk#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#59] + +(80) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_customer_sk#23] +Right keys [1]: [c_customer_sk#26] +Join condition: None + +(81) Project [codegen id : 3] +Output [4]: [ss_sold_date_sk#6, ss_quantity#24, ss_sales_price#25, c_customer_sk#26] +Input [5]: [ss_sold_date_sk#6, ss_customer_sk#23, ss_quantity#24, ss_sales_price#25, c_customer_sk#26] + +(82) Scan parquet default.date_dim +Output [2]: [d_date_sk#8, d_year#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(83) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#8, d_year#10] + +(84) Filter [codegen id : 2] +Input [2]: [d_date_sk#8, d_year#10] +Condition : (d_year#10 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#8)) + +(85) Project [codegen id : 2] +Output [1]: [d_date_sk#8] +Input [2]: [d_date_sk#8, d_year#10] + +(86) BroadcastExchange +Input [1]: [d_date_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#60] + +(87) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#6] +Right keys [1]: [d_date_sk#8] +Join condition: None + +(88) Project [codegen id : 3] +Output [3]: [ss_quantity#24, ss_sales_price#25, c_customer_sk#26] +Input [5]: [ss_sold_date_sk#6, ss_quantity#24, ss_sales_price#25, c_customer_sk#26, d_date_sk#8] + +(89) HashAggregate [codegen id : 3] +Input [3]: [ss_quantity#24, ss_sales_price#25, c_customer_sk#26] +Keys [1]: [c_customer_sk#26] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [2]: [sum#61, isEmpty#62] +Results [3]: [c_customer_sk#26, sum#63, isEmpty#64] + +(90) Exchange +Input [3]: [c_customer_sk#26, sum#63, isEmpty#64] +Arguments: hashpartitioning(c_customer_sk#26, 5), true, [id=#65] + +(91) HashAggregate [codegen id : 4] +Input [3]: [c_customer_sk#26, sum#63, isEmpty#64] +Keys [1]: [c_customer_sk#26] +Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#66] +Results [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#66 AS csales#67] + +(92) HashAggregate [codegen id : 4] +Input [1]: [csales#67] +Keys: [] +Functions [1]: [partial_max(csales#67)] +Aggregate Attributes [1]: [max#68] +Results [1]: [max#69] + +(93) Exchange +Input [1]: [max#69] +Arguments: SinglePartition, true, [id=#70] + +(94) HashAggregate [codegen id : 5] +Input [1]: [max#69] +Keys: [] +Functions [1]: [max(csales#67)] +Aggregate Attributes [1]: [max(csales#67)#71] +Results [1]: [max(csales#67)#71 AS tpcds_cmax#72] + +Subquery:2 Hosting operator id = 60 Hosting Expression = ReusedSubquery Subquery scalar-subquery#35, [id=#36] + + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/simplified.txt new file mode 100644 index 0000000000000..a69293edd6218 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/simplified.txt @@ -0,0 +1,143 @@ +CollectLimit + WholeStageCodegen (20) + HashAggregate [isEmpty,sum] [isEmpty,sum,sum(sales),sum(sales)] + InputAdapter + Exchange #1 + WholeStageCodegen (19) + HashAggregate [sales] [isEmpty,isEmpty,sum,sum] + InputAdapter + Union + WholeStageCodegen (9) + Project [cs_list_price,cs_quantity] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_list_price,cs_quantity,cs_sold_date_sk] + BroadcastHashJoin [c_customer_sk,cs_bill_customer_sk] + Project [cs_bill_customer_sk,cs_list_price,cs_quantity,cs_sold_date_sk] + BroadcastHashJoin [cs_item_sk,item_sk] + Filter [cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_list_price,cs_quantity,cs_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (4) + Project [item_sk] + Filter [count(1)] + HashAggregate [count,d_date,i_item_sk,substr(i_item_desc, 1, 30)] [count,count(1),count(1),item_sk] + InputAdapter + Exchange [d_date,i_item_sk,substr(i_item_desc, 1, 30)] #3 + WholeStageCodegen (3) + HashAggregate [d_date,i_item_desc,i_item_sk] [count,count,substr(i_item_desc, 1, 30)] + Project [d_date,i_item_desc,i_item_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [d_date,ss_item_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Project [d_date,d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk,d_year] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_desc,i_item_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (7) + Project [c_customer_sk] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] + Subquery #1 + WholeStageCodegen (5) + HashAggregate [max] [max,max(csales),tpcds_cmax] + InputAdapter + Exchange #9 + WholeStageCodegen (4) + HashAggregate [csales] [max,max] + HashAggregate [c_customer_sk,isEmpty,sum] [csales,isEmpty,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] + InputAdapter + Exchange [c_customer_sk] #10 + WholeStageCodegen (3) + HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [isEmpty,isEmpty,sum,sum] + Project [c_customer_sk,ss_quantity,ss_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [c_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [ss_customer_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (1) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk] + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + HashAggregate [c_customer_sk,isEmpty,sum] [isEmpty,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] + InputAdapter + Exchange [c_customer_sk] #7 + WholeStageCodegen (6) + HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [isEmpty,isEmpty,sum,sum] + Project [c_customer_sk,ss_quantity,ss_sales_price] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (5) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk] + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (8) + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + WholeStageCodegen (18) + Project [ws_list_price,ws_quantity] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ws_list_price,ws_quantity,ws_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + Project [ws_bill_customer_sk,ws_list_price,ws_quantity,ws_sold_date_sk] + BroadcastHashJoin [item_sk,ws_item_sk] + Filter [ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_customer_sk,ws_item_sk,ws_list_price,ws_quantity,ws_sold_date_sk] + InputAdapter + ReusedExchange [item_sk] #2 + InputAdapter + BroadcastExchange #14 + WholeStageCodegen (16) + Project [c_customer_sk] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [tpcds_cmax] #1 + HashAggregate [c_customer_sk,isEmpty,sum] [isEmpty,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] + InputAdapter + ReusedExchange [c_customer_sk,isEmpty,sum] #7 + InputAdapter + ReusedExchange [d_date_sk] #13 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.sf100/explain.txt new file mode 100644 index 0000000000000..6039d13e74edb --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.sf100/explain.txt @@ -0,0 +1,870 @@ +== Physical Plan == +TakeOrderedAndProject (130) ++- Union (129) + :- * HashAggregate (82) + : +- Exchange (81) + : +- * HashAggregate (80) + : +- * Project (79) + : +- * SortMergeJoin Inner (78) + : :- * Project (59) + : : +- * BroadcastHashJoin Inner BuildRight (58) + : : :- SortMergeJoin LeftSemi (52) + : : : :- * Sort (34) + : : : : +- Exchange (33) + : : : : +- * Project (32) + : : : : +- SortMergeJoin LeftSemi (31) + : : : : :- * Sort (5) + : : : : : +- Exchange (4) + : : : : : +- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.catalog_sales (1) + : : : : +- * Sort (30) + : : : : +- Exchange (29) + : : : : +- * Project (28) + : : : : +- * Filter (27) + : : : : +- * HashAggregate (26) + : : : : +- * HashAggregate (25) + : : : : +- * Project (24) + : : : : +- * SortMergeJoin Inner (23) + : : : : :- * Sort (17) + : : : : : +- Exchange (16) + : : : : : +- * Project (15) + : : : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : : : :- * Filter (8) + : : : : : : +- * ColumnarToRow (7) + : : : : : : +- Scan parquet default.store_sales (6) + : : : : : +- BroadcastExchange (13) + : : : : : +- * Project (12) + : : : : : +- * Filter (11) + : : : : : +- * ColumnarToRow (10) + : : : : : +- Scan parquet default.date_dim (9) + : : : : +- * Sort (22) + : : : : +- Exchange (21) + : : : : +- * Filter (20) + : : : : +- * ColumnarToRow (19) + : : : : +- Scan parquet default.item (18) + : : : +- * Sort (51) + : : : +- * Project (50) + : : : +- * Filter (49) + : : : +- * HashAggregate (48) + : : : +- * HashAggregate (47) + : : : +- * Project (46) + : : : +- * SortMergeJoin Inner (45) + : : : :- * Sort (39) + : : : : +- Exchange (38) + : : : : +- * Filter (37) + : : : : +- * ColumnarToRow (36) + : : : : +- Scan parquet default.store_sales (35) + : : : +- * Sort (44) + : : : +- Exchange (43) + : : : +- * Filter (42) + : : : +- * ColumnarToRow (41) + : : : +- Scan parquet default.customer (40) + : : +- BroadcastExchange (57) + : : +- * Project (56) + : : +- * Filter (55) + : : +- * ColumnarToRow (54) + : : +- Scan parquet default.date_dim (53) + : +- SortMergeJoin LeftSemi (77) + : :- * Sort (64) + : : +- Exchange (63) + : : +- * Filter (62) + : : +- * ColumnarToRow (61) + : : +- Scan parquet default.customer (60) + : +- * Sort (76) + : +- Exchange (75) + : +- * Project (74) + : +- * Filter (73) + : +- * HashAggregate (72) + : +- * HashAggregate (71) + : +- * Project (70) + : +- * SortMergeJoin Inner (69) + : :- * Sort (66) + : : +- ReusedExchange (65) + : +- * Sort (68) + : +- ReusedExchange (67) + +- * HashAggregate (128) + +- Exchange (127) + +- * HashAggregate (126) + +- * Project (125) + +- * SortMergeJoin Inner (124) + :- * Project (108) + : +- * BroadcastHashJoin Inner BuildRight (107) + : :- SortMergeJoin LeftSemi (105) + : : :- * Sort (93) + : : : +- Exchange (92) + : : : +- * Project (91) + : : : +- SortMergeJoin LeftSemi (90) + : : : :- * Sort (87) + : : : : +- Exchange (86) + : : : : +- * Filter (85) + : : : : +- * ColumnarToRow (84) + : : : : +- Scan parquet default.web_sales (83) + : : : +- * Sort (89) + : : : +- ReusedExchange (88) + : : +- * Sort (104) + : : +- * Project (103) + : : +- * Filter (102) + : : +- * HashAggregate (101) + : : +- * HashAggregate (100) + : : +- * Project (99) + : : +- * SortMergeJoin Inner (98) + : : :- * Sort (95) + : : : +- ReusedExchange (94) + : : +- * Sort (97) + : : +- ReusedExchange (96) + : +- ReusedExchange (106) + +- SortMergeJoin LeftSemi (123) + :- * Sort (110) + : +- ReusedExchange (109) + +- * Sort (122) + +- Exchange (121) + +- * Project (120) + +- * Filter (119) + +- * HashAggregate (118) + +- * HashAggregate (117) + +- * Project (116) + +- * SortMergeJoin Inner (115) + :- * Sort (112) + : +- ReusedExchange (111) + +- * Sort (114) + +- ReusedExchange (113) + + +(1) Scan parquet default.catalog_sales +Output [5]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [5]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5] + +(3) Filter [codegen id : 1] +Input [5]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5] +Condition : (isnotnull(cs_bill_customer_sk#2) AND isnotnull(cs_sold_date_sk#1)) + +(4) Exchange +Input [5]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5] +Arguments: hashpartitioning(cs_item_sk#3, 5), true, [id=#6] + +(5) Sort [codegen id : 2] +Input [5]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5] +Arguments: [cs_item_sk#3 ASC NULLS FIRST], false, 0 + +(6) Scan parquet default.store_sales +Output [2]: [ss_sold_date_sk#7, ss_item_sk#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 4] +Input [2]: [ss_sold_date_sk#7, ss_item_sk#8] + +(8) Filter [codegen id : 4] +Input [2]: [ss_sold_date_sk#7, ss_item_sk#8] +Condition : (isnotnull(ss_sold_date_sk#7) AND isnotnull(ss_item_sk#8)) + +(9) Scan parquet default.date_dim +Output [3]: [d_date_sk#9, d_date#10, d_year#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(10) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#9, d_date#10, d_year#11] + +(11) Filter [codegen id : 3] +Input [3]: [d_date_sk#9, d_date#10, d_year#11] +Condition : (d_year#11 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#9)) + +(12) Project [codegen id : 3] +Output [2]: [d_date_sk#9, d_date#10] +Input [3]: [d_date_sk#9, d_date#10, d_year#11] + +(13) BroadcastExchange +Input [2]: [d_date_sk#9, d_date#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(14) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#7] +Right keys [1]: [d_date_sk#9] +Join condition: None + +(15) Project [codegen id : 4] +Output [2]: [ss_item_sk#8, d_date#10] +Input [4]: [ss_sold_date_sk#7, ss_item_sk#8, d_date_sk#9, d_date#10] + +(16) Exchange +Input [2]: [ss_item_sk#8, d_date#10] +Arguments: hashpartitioning(ss_item_sk#8, 5), true, [id=#13] + +(17) Sort [codegen id : 5] +Input [2]: [ss_item_sk#8, d_date#10] +Arguments: [ss_item_sk#8 ASC NULLS FIRST], false, 0 + +(18) Scan parquet default.item +Output [2]: [i_item_sk#14, i_item_desc#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 6] +Input [2]: [i_item_sk#14, i_item_desc#15] + +(20) Filter [codegen id : 6] +Input [2]: [i_item_sk#14, i_item_desc#15] +Condition : isnotnull(i_item_sk#14) + +(21) Exchange +Input [2]: [i_item_sk#14, i_item_desc#15] +Arguments: hashpartitioning(i_item_sk#14, 5), true, [id=#16] + +(22) Sort [codegen id : 7] +Input [2]: [i_item_sk#14, i_item_desc#15] +Arguments: [i_item_sk#14 ASC NULLS FIRST], false, 0 + +(23) SortMergeJoin [codegen id : 8] +Left keys [1]: [ss_item_sk#8] +Right keys [1]: [i_item_sk#14] +Join condition: None + +(24) Project [codegen id : 8] +Output [3]: [d_date#10, i_item_sk#14, i_item_desc#15] +Input [4]: [ss_item_sk#8, d_date#10, i_item_sk#14, i_item_desc#15] + +(25) HashAggregate [codegen id : 8] +Input [3]: [d_date#10, i_item_sk#14, i_item_desc#15] +Keys [3]: [substr(i_item_desc#15, 1, 30) AS substr(i_item_desc#15, 1, 30)#17, i_item_sk#14, d_date#10] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#18] +Results [4]: [substr(i_item_desc#15, 1, 30)#17, i_item_sk#14, d_date#10, count#19] + +(26) HashAggregate [codegen id : 8] +Input [4]: [substr(i_item_desc#15, 1, 30)#17, i_item_sk#14, d_date#10, count#19] +Keys [3]: [substr(i_item_desc#15, 1, 30)#17, i_item_sk#14, d_date#10] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#20] +Results [2]: [i_item_sk#14 AS item_sk#21, count(1)#20 AS count(1)#22] + +(27) Filter [codegen id : 8] +Input [2]: [item_sk#21, count(1)#22] +Condition : (count(1)#22 > 4) + +(28) Project [codegen id : 8] +Output [1]: [item_sk#21] +Input [2]: [item_sk#21, count(1)#22] + +(29) Exchange +Input [1]: [item_sk#21] +Arguments: hashpartitioning(item_sk#21, 5), true, [id=#23] + +(30) Sort [codegen id : 9] +Input [1]: [item_sk#21] +Arguments: [item_sk#21 ASC NULLS FIRST], false, 0 + +(31) SortMergeJoin +Left keys [1]: [cs_item_sk#3] +Right keys [1]: [item_sk#21] +Join condition: None + +(32) Project [codegen id : 10] +Output [4]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_quantity#4, cs_list_price#5] +Input [5]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5] + +(33) Exchange +Input [4]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_quantity#4, cs_list_price#5] +Arguments: hashpartitioning(cs_bill_customer_sk#2, 5), true, [id=#24] + +(34) Sort [codegen id : 11] +Input [4]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_quantity#4, cs_list_price#5] +Arguments: [cs_bill_customer_sk#2 ASC NULLS FIRST], false, 0 + +(35) Scan parquet default.store_sales +Output [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(36) ColumnarToRow [codegen id : 12] +Input [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27] + +(37) Filter [codegen id : 12] +Input [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27] +Condition : isnotnull(ss_customer_sk#25) + +(38) Exchange +Input [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27] +Arguments: hashpartitioning(ss_customer_sk#25, 5), true, [id=#28] + +(39) Sort [codegen id : 13] +Input [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27] +Arguments: [ss_customer_sk#25 ASC NULLS FIRST], false, 0 + +(40) Scan parquet default.customer +Output [1]: [c_customer_sk#29] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(41) ColumnarToRow [codegen id : 14] +Input [1]: [c_customer_sk#29] + +(42) Filter [codegen id : 14] +Input [1]: [c_customer_sk#29] +Condition : isnotnull(c_customer_sk#29) + +(43) Exchange +Input [1]: [c_customer_sk#29] +Arguments: hashpartitioning(c_customer_sk#29, 5), true, [id=#30] + +(44) Sort [codegen id : 15] +Input [1]: [c_customer_sk#29] +Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0 + +(45) SortMergeJoin [codegen id : 16] +Left keys [1]: [ss_customer_sk#25] +Right keys [1]: [c_customer_sk#29] +Join condition: None + +(46) Project [codegen id : 16] +Output [3]: [ss_quantity#26, ss_sales_price#27, c_customer_sk#29] +Input [4]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27, c_customer_sk#29] + +(47) HashAggregate [codegen id : 16] +Input [3]: [ss_quantity#26, ss_sales_price#27, c_customer_sk#29] +Keys [1]: [c_customer_sk#29] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [2]: [sum#31, isEmpty#32] +Results [3]: [c_customer_sk#29, sum#33, isEmpty#34] + +(48) HashAggregate [codegen id : 16] +Input [3]: [c_customer_sk#29, sum#33, isEmpty#34] +Keys [1]: [c_customer_sk#29] +Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#35] +Results [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#35 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#36] + +(49) Filter [codegen id : 16] +Input [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#36] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#36) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#36 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(Subquery scalar-subquery#37, [id=#38] as decimal(32,6)))), DecimalType(38,8), true))) + +(50) Project [codegen id : 16] +Output [1]: [c_customer_sk#29] +Input [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#36] + +(51) Sort [codegen id : 16] +Input [1]: [c_customer_sk#29] +Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0 + +(52) SortMergeJoin +Left keys [1]: [cs_bill_customer_sk#2] +Right keys [1]: [c_customer_sk#29] +Join condition: None + +(53) Scan parquet default.date_dim +Output [3]: [d_date_sk#9, d_year#11, d_moy#39] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,2), IsNotNull(d_date_sk)] +ReadSchema: struct + +(54) ColumnarToRow [codegen id : 17] +Input [3]: [d_date_sk#9, d_year#11, d_moy#39] + +(55) Filter [codegen id : 17] +Input [3]: [d_date_sk#9, d_year#11, d_moy#39] +Condition : ((((isnotnull(d_year#11) AND isnotnull(d_moy#39)) AND (d_year#11 = 2000)) AND (d_moy#39 = 2)) AND isnotnull(d_date_sk#9)) + +(56) Project [codegen id : 17] +Output [1]: [d_date_sk#9] +Input [3]: [d_date_sk#9, d_year#11, d_moy#39] + +(57) BroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#40] + +(58) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [cs_sold_date_sk#1] +Right keys [1]: [d_date_sk#9] +Join condition: None + +(59) Project [codegen id : 18] +Output [3]: [cs_bill_customer_sk#2, cs_quantity#4, cs_list_price#5] +Input [5]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_quantity#4, cs_list_price#5, d_date_sk#9] + +(60) Scan parquet default.customer +Output [3]: [c_customer_sk#29, c_first_name#41, c_last_name#42] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(61) ColumnarToRow [codegen id : 19] +Input [3]: [c_customer_sk#29, c_first_name#41, c_last_name#42] + +(62) Filter [codegen id : 19] +Input [3]: [c_customer_sk#29, c_first_name#41, c_last_name#42] +Condition : isnotnull(c_customer_sk#29) + +(63) Exchange +Input [3]: [c_customer_sk#29, c_first_name#41, c_last_name#42] +Arguments: hashpartitioning(c_customer_sk#29, 5), true, [id=#43] + +(64) Sort [codegen id : 20] +Input [3]: [c_customer_sk#29, c_first_name#41, c_last_name#42] +Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0 + +(65) ReusedExchange [Reuses operator id: 38] +Output [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27] + +(66) Sort [codegen id : 22] +Input [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27] +Arguments: [ss_customer_sk#25 ASC NULLS FIRST], false, 0 + +(67) ReusedExchange [Reuses operator id: 43] +Output [1]: [c_customer_sk#29] + +(68) Sort [codegen id : 24] +Input [1]: [c_customer_sk#29] +Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0 + +(69) SortMergeJoin [codegen id : 25] +Left keys [1]: [ss_customer_sk#25] +Right keys [1]: [c_customer_sk#29] +Join condition: None + +(70) Project [codegen id : 25] +Output [3]: [ss_quantity#26, ss_sales_price#27, c_customer_sk#29] +Input [4]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27, c_customer_sk#29] + +(71) HashAggregate [codegen id : 25] +Input [3]: [ss_quantity#26, ss_sales_price#27, c_customer_sk#29] +Keys [1]: [c_customer_sk#29] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [2]: [sum#31, isEmpty#32] +Results [3]: [c_customer_sk#29, sum#33, isEmpty#34] + +(72) HashAggregate [codegen id : 25] +Input [3]: [c_customer_sk#29, sum#33, isEmpty#34] +Keys [1]: [c_customer_sk#29] +Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#35] +Results [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#35 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#36] + +(73) Filter [codegen id : 25] +Input [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#36] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#36) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#36 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#37, [id=#38] as decimal(32,6)))), DecimalType(38,8), true))) + +(74) Project [codegen id : 25] +Output [1]: [c_customer_sk#29 AS c_customer_sk#29#44] +Input [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#36] + +(75) Exchange +Input [1]: [c_customer_sk#29#44] +Arguments: hashpartitioning(c_customer_sk#29#44, 5), true, [id=#45] + +(76) Sort [codegen id : 26] +Input [1]: [c_customer_sk#29#44] +Arguments: [c_customer_sk#29#44 ASC NULLS FIRST], false, 0 + +(77) SortMergeJoin +Left keys [1]: [c_customer_sk#29] +Right keys [1]: [c_customer_sk#29#44] +Join condition: None + +(78) SortMergeJoin [codegen id : 27] +Left keys [1]: [cs_bill_customer_sk#2] +Right keys [1]: [c_customer_sk#29] +Join condition: None + +(79) Project [codegen id : 27] +Output [4]: [cs_quantity#4, cs_list_price#5, c_first_name#41, c_last_name#42] +Input [6]: [cs_bill_customer_sk#2, cs_quantity#4, cs_list_price#5, c_customer_sk#29, c_first_name#41, c_last_name#42] + +(80) HashAggregate [codegen id : 27] +Input [4]: [cs_quantity#4, cs_list_price#5, c_first_name#41, c_last_name#42] +Keys [2]: [c_last_name#42, c_first_name#41] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#4 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#5 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [2]: [sum#46, isEmpty#47] +Results [4]: [c_last_name#42, c_first_name#41, sum#48, isEmpty#49] + +(81) Exchange +Input [4]: [c_last_name#42, c_first_name#41, sum#48, isEmpty#49] +Arguments: hashpartitioning(c_last_name#42, c_first_name#41, 5), true, [id=#50] + +(82) HashAggregate [codegen id : 28] +Input [4]: [c_last_name#42, c_first_name#41, sum#48, isEmpty#49] +Keys [2]: [c_last_name#42, c_first_name#41] +Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#4 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#5 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#4 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#5 as decimal(12,2)))), DecimalType(18,2), true))#51] +Results [3]: [c_last_name#42, c_first_name#41, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#4 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#5 as decimal(12,2)))), DecimalType(18,2), true))#51 AS sales#52] + +(83) Scan parquet default.web_sales +Output [5]: [ws_sold_date_sk#53, ws_item_sk#54, ws_bill_customer_sk#55, ws_quantity#56, ws_list_price#57] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_bill_customer_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(84) ColumnarToRow [codegen id : 29] +Input [5]: [ws_sold_date_sk#53, ws_item_sk#54, ws_bill_customer_sk#55, ws_quantity#56, ws_list_price#57] + +(85) Filter [codegen id : 29] +Input [5]: [ws_sold_date_sk#53, ws_item_sk#54, ws_bill_customer_sk#55, ws_quantity#56, ws_list_price#57] +Condition : (isnotnull(ws_bill_customer_sk#55) AND isnotnull(ws_sold_date_sk#53)) + +(86) Exchange +Input [5]: [ws_sold_date_sk#53, ws_item_sk#54, ws_bill_customer_sk#55, ws_quantity#56, ws_list_price#57] +Arguments: hashpartitioning(ws_item_sk#54, 5), true, [id=#58] + +(87) Sort [codegen id : 30] +Input [5]: [ws_sold_date_sk#53, ws_item_sk#54, ws_bill_customer_sk#55, ws_quantity#56, ws_list_price#57] +Arguments: [ws_item_sk#54 ASC NULLS FIRST], false, 0 + +(88) ReusedExchange [Reuses operator id: 29] +Output [1]: [item_sk#21] + +(89) Sort [codegen id : 37] +Input [1]: [item_sk#21] +Arguments: [item_sk#21 ASC NULLS FIRST], false, 0 + +(90) SortMergeJoin +Left keys [1]: [ws_item_sk#54] +Right keys [1]: [item_sk#21] +Join condition: None + +(91) Project [codegen id : 38] +Output [4]: [ws_sold_date_sk#53, ws_bill_customer_sk#55, ws_quantity#56, ws_list_price#57] +Input [5]: [ws_sold_date_sk#53, ws_item_sk#54, ws_bill_customer_sk#55, ws_quantity#56, ws_list_price#57] + +(92) Exchange +Input [4]: [ws_sold_date_sk#53, ws_bill_customer_sk#55, ws_quantity#56, ws_list_price#57] +Arguments: hashpartitioning(ws_bill_customer_sk#55, 5), true, [id=#59] + +(93) Sort [codegen id : 39] +Input [4]: [ws_sold_date_sk#53, ws_bill_customer_sk#55, ws_quantity#56, ws_list_price#57] +Arguments: [ws_bill_customer_sk#55 ASC NULLS FIRST], false, 0 + +(94) ReusedExchange [Reuses operator id: 38] +Output [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27] + +(95) Sort [codegen id : 41] +Input [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27] +Arguments: [ss_customer_sk#25 ASC NULLS FIRST], false, 0 + +(96) ReusedExchange [Reuses operator id: 43] +Output [1]: [c_customer_sk#29] + +(97) Sort [codegen id : 43] +Input [1]: [c_customer_sk#29] +Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0 + +(98) SortMergeJoin [codegen id : 44] +Left keys [1]: [ss_customer_sk#25] +Right keys [1]: [c_customer_sk#29] +Join condition: None + +(99) Project [codegen id : 44] +Output [3]: [ss_quantity#26, ss_sales_price#27, c_customer_sk#29] +Input [4]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27, c_customer_sk#29] + +(100) HashAggregate [codegen id : 44] +Input [3]: [ss_quantity#26, ss_sales_price#27, c_customer_sk#29] +Keys [1]: [c_customer_sk#29] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [2]: [sum#60, isEmpty#61] +Results [3]: [c_customer_sk#29, sum#62, isEmpty#63] + +(101) HashAggregate [codegen id : 44] +Input [3]: [c_customer_sk#29, sum#62, isEmpty#63] +Keys [1]: [c_customer_sk#29] +Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#64] +Results [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#64 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#65] + +(102) Filter [codegen id : 44] +Input [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#65] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#65) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#65 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#37, [id=#38] as decimal(32,6)))), DecimalType(38,8), true))) + +(103) Project [codegen id : 44] +Output [1]: [c_customer_sk#29] +Input [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#65] + +(104) Sort [codegen id : 44] +Input [1]: [c_customer_sk#29] +Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0 + +(105) SortMergeJoin +Left keys [1]: [ws_bill_customer_sk#55] +Right keys [1]: [c_customer_sk#29] +Join condition: None + +(106) ReusedExchange [Reuses operator id: 57] +Output [1]: [d_date_sk#9] + +(107) BroadcastHashJoin [codegen id : 46] +Left keys [1]: [ws_sold_date_sk#53] +Right keys [1]: [d_date_sk#9] +Join condition: None + +(108) Project [codegen id : 46] +Output [3]: [ws_bill_customer_sk#55, ws_quantity#56, ws_list_price#57] +Input [5]: [ws_sold_date_sk#53, ws_bill_customer_sk#55, ws_quantity#56, ws_list_price#57, d_date_sk#9] + +(109) ReusedExchange [Reuses operator id: 63] +Output [3]: [c_customer_sk#29, c_first_name#41, c_last_name#42] + +(110) Sort [codegen id : 48] +Input [3]: [c_customer_sk#29, c_first_name#41, c_last_name#42] +Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0 + +(111) ReusedExchange [Reuses operator id: 38] +Output [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27] + +(112) Sort [codegen id : 50] +Input [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27] +Arguments: [ss_customer_sk#25 ASC NULLS FIRST], false, 0 + +(113) ReusedExchange [Reuses operator id: 43] +Output [1]: [c_customer_sk#29] + +(114) Sort [codegen id : 52] +Input [1]: [c_customer_sk#29] +Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0 + +(115) SortMergeJoin [codegen id : 53] +Left keys [1]: [ss_customer_sk#25] +Right keys [1]: [c_customer_sk#29] +Join condition: None + +(116) Project [codegen id : 53] +Output [3]: [ss_quantity#26, ss_sales_price#27, c_customer_sk#29] +Input [4]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27, c_customer_sk#29] + +(117) HashAggregate [codegen id : 53] +Input [3]: [ss_quantity#26, ss_sales_price#27, c_customer_sk#29] +Keys [1]: [c_customer_sk#29] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [2]: [sum#60, isEmpty#61] +Results [3]: [c_customer_sk#29, sum#62, isEmpty#63] + +(118) HashAggregate [codegen id : 53] +Input [3]: [c_customer_sk#29, sum#62, isEmpty#63] +Keys [1]: [c_customer_sk#29] +Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#64] +Results [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#64 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#65] + +(119) Filter [codegen id : 53] +Input [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#65] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#65) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#65 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#37, [id=#38] as decimal(32,6)))), DecimalType(38,8), true))) + +(120) Project [codegen id : 53] +Output [1]: [c_customer_sk#29 AS c_customer_sk#29#66] +Input [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#65] + +(121) Exchange +Input [1]: [c_customer_sk#29#66] +Arguments: hashpartitioning(c_customer_sk#29#66, 5), true, [id=#67] + +(122) Sort [codegen id : 54] +Input [1]: [c_customer_sk#29#66] +Arguments: [c_customer_sk#29#66 ASC NULLS FIRST], false, 0 + +(123) SortMergeJoin +Left keys [1]: [c_customer_sk#29] +Right keys [1]: [c_customer_sk#29#66] +Join condition: None + +(124) SortMergeJoin [codegen id : 55] +Left keys [1]: [ws_bill_customer_sk#55] +Right keys [1]: [c_customer_sk#29] +Join condition: None + +(125) Project [codegen id : 55] +Output [4]: [ws_quantity#56, ws_list_price#57, c_first_name#41, c_last_name#42] +Input [6]: [ws_bill_customer_sk#55, ws_quantity#56, ws_list_price#57, c_customer_sk#29, c_first_name#41, c_last_name#42] + +(126) HashAggregate [codegen id : 55] +Input [4]: [ws_quantity#56, ws_list_price#57, c_first_name#41, c_last_name#42] +Keys [2]: [c_last_name#42, c_first_name#41] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#56 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#57 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [2]: [sum#68, isEmpty#69] +Results [4]: [c_last_name#42, c_first_name#41, sum#70, isEmpty#71] + +(127) Exchange +Input [4]: [c_last_name#42, c_first_name#41, sum#70, isEmpty#71] +Arguments: hashpartitioning(c_last_name#42, c_first_name#41, 5), true, [id=#72] + +(128) HashAggregate [codegen id : 56] +Input [4]: [c_last_name#42, c_first_name#41, sum#70, isEmpty#71] +Keys [2]: [c_last_name#42, c_first_name#41] +Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#56 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#57 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#56 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#57 as decimal(12,2)))), DecimalType(18,2), true))#73] +Results [3]: [c_last_name#42, c_first_name#41, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#56 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#57 as decimal(12,2)))), DecimalType(18,2), true))#73 AS sales#74] + +(129) Union + +(130) TakeOrderedAndProject +Input [3]: [c_last_name#42, c_first_name#41, sales#52] +Arguments: 100, [c_last_name#42 ASC NULLS FIRST, c_first_name#41 ASC NULLS FIRST, sales#52 ASC NULLS FIRST], [c_last_name#42, c_first_name#41, sales#52] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 49 Hosting Expression = Subquery scalar-subquery#37, [id=#38] +* HashAggregate (154) ++- Exchange (153) + +- * HashAggregate (152) + +- * HashAggregate (151) + +- * HashAggregate (150) + +- * Project (149) + +- * SortMergeJoin Inner (148) + :- * Sort (142) + : +- Exchange (141) + : +- * Project (140) + : +- * BroadcastHashJoin Inner BuildRight (139) + : :- * Filter (133) + : : +- * ColumnarToRow (132) + : : +- Scan parquet default.store_sales (131) + : +- BroadcastExchange (138) + : +- * Project (137) + : +- * Filter (136) + : +- * ColumnarToRow (135) + : +- Scan parquet default.date_dim (134) + +- * Sort (147) + +- Exchange (146) + +- * Filter (145) + +- * ColumnarToRow (144) + +- Scan parquet default.customer (143) + + +(131) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#7, ss_customer_sk#25, ss_quantity#26, ss_sales_price#27] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(132) ColumnarToRow [codegen id : 2] +Input [4]: [ss_sold_date_sk#7, ss_customer_sk#25, ss_quantity#26, ss_sales_price#27] + +(133) Filter [codegen id : 2] +Input [4]: [ss_sold_date_sk#7, ss_customer_sk#25, ss_quantity#26, ss_sales_price#27] +Condition : (isnotnull(ss_customer_sk#25) AND isnotnull(ss_sold_date_sk#7)) + +(134) Scan parquet default.date_dim +Output [2]: [d_date_sk#9, d_year#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(135) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#9, d_year#11] + +(136) Filter [codegen id : 1] +Input [2]: [d_date_sk#9, d_year#11] +Condition : (d_year#11 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#9)) + +(137) Project [codegen id : 1] +Output [1]: [d_date_sk#9] +Input [2]: [d_date_sk#9, d_year#11] + +(138) BroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#75] + +(139) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#7] +Right keys [1]: [d_date_sk#9] +Join condition: None + +(140) Project [codegen id : 2] +Output [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27] +Input [5]: [ss_sold_date_sk#7, ss_customer_sk#25, ss_quantity#26, ss_sales_price#27, d_date_sk#9] + +(141) Exchange +Input [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27] +Arguments: hashpartitioning(ss_customer_sk#25, 5), true, [id=#76] + +(142) Sort [codegen id : 3] +Input [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27] +Arguments: [ss_customer_sk#25 ASC NULLS FIRST], false, 0 + +(143) Scan parquet default.customer +Output [1]: [c_customer_sk#29] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(144) ColumnarToRow [codegen id : 4] +Input [1]: [c_customer_sk#29] + +(145) Filter [codegen id : 4] +Input [1]: [c_customer_sk#29] +Condition : isnotnull(c_customer_sk#29) + +(146) Exchange +Input [1]: [c_customer_sk#29] +Arguments: hashpartitioning(c_customer_sk#29, 5), true, [id=#77] + +(147) Sort [codegen id : 5] +Input [1]: [c_customer_sk#29] +Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0 + +(148) SortMergeJoin [codegen id : 6] +Left keys [1]: [ss_customer_sk#25] +Right keys [1]: [c_customer_sk#29] +Join condition: None + +(149) Project [codegen id : 6] +Output [3]: [ss_quantity#26, ss_sales_price#27, c_customer_sk#29] +Input [4]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27, c_customer_sk#29] + +(150) HashAggregate [codegen id : 6] +Input [3]: [ss_quantity#26, ss_sales_price#27, c_customer_sk#29] +Keys [1]: [c_customer_sk#29] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [2]: [sum#78, isEmpty#79] +Results [3]: [c_customer_sk#29, sum#80, isEmpty#81] + +(151) HashAggregate [codegen id : 6] +Input [3]: [c_customer_sk#29, sum#80, isEmpty#81] +Keys [1]: [c_customer_sk#29] +Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#82] +Results [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#82 AS csales#83] + +(152) HashAggregate [codegen id : 6] +Input [1]: [csales#83] +Keys: [] +Functions [1]: [partial_max(csales#83)] +Aggregate Attributes [1]: [max#84] +Results [1]: [max#85] + +(153) Exchange +Input [1]: [max#85] +Arguments: SinglePartition, true, [id=#86] + +(154) HashAggregate [codegen id : 7] +Input [1]: [max#85] +Keys: [] +Functions [1]: [max(csales#83)] +Aggregate Attributes [1]: [max(csales#83)#87] +Results [1]: [max(csales#83)#87 AS tpcds_cmax#88] + +Subquery:2 Hosting operator id = 73 Hosting Expression = ReusedSubquery Subquery scalar-subquery#37, [id=#38] + +Subquery:3 Hosting operator id = 102 Hosting Expression = ReusedSubquery Subquery scalar-subquery#37, [id=#38] + +Subquery:4 Hosting operator id = 119 Hosting Expression = ReusedSubquery Subquery scalar-subquery#37, [id=#38] + + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.sf100/simplified.txt new file mode 100644 index 0000000000000..3964f50895888 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.sf100/simplified.txt @@ -0,0 +1,268 @@ +TakeOrderedAndProject [c_first_name,c_last_name,sales] + Union + WholeStageCodegen (28) + HashAggregate [c_first_name,c_last_name,isEmpty,sum] [isEmpty,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] + InputAdapter + Exchange [c_first_name,c_last_name] #1 + WholeStageCodegen (27) + HashAggregate [c_first_name,c_last_name,cs_list_price,cs_quantity] [isEmpty,isEmpty,sum,sum] + Project [c_first_name,c_last_name,cs_list_price,cs_quantity] + SortMergeJoin [c_customer_sk,cs_bill_customer_sk] + InputAdapter + WholeStageCodegen (18) + Project [cs_bill_customer_sk,cs_list_price,cs_quantity] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + InputAdapter + SortMergeJoin [c_customer_sk,cs_bill_customer_sk] + WholeStageCodegen (11) + Sort [cs_bill_customer_sk] + InputAdapter + Exchange [cs_bill_customer_sk] #2 + WholeStageCodegen (10) + Project [cs_bill_customer_sk,cs_list_price,cs_quantity,cs_sold_date_sk] + InputAdapter + SortMergeJoin [cs_item_sk,item_sk] + WholeStageCodegen (2) + Sort [cs_item_sk] + InputAdapter + Exchange [cs_item_sk] #3 + WholeStageCodegen (1) + Filter [cs_bill_customer_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_list_price,cs_quantity,cs_sold_date_sk] + WholeStageCodegen (9) + Sort [item_sk] + InputAdapter + Exchange [item_sk] #4 + WholeStageCodegen (8) + Project [item_sk] + Filter [count(1)] + HashAggregate [count,d_date,i_item_sk,substr(i_item_desc, 1, 30)] [count,count(1),count(1),item_sk] + HashAggregate [d_date,i_item_desc,i_item_sk] [count,count,substr(i_item_desc, 1, 30)] + Project [d_date,i_item_desc,i_item_sk] + SortMergeJoin [i_item_sk,ss_item_sk] + InputAdapter + WholeStageCodegen (5) + Sort [ss_item_sk] + InputAdapter + Exchange [ss_item_sk] #5 + WholeStageCodegen (4) + Project [d_date,ss_item_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (3) + Project [d_date,d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk,d_year] + InputAdapter + WholeStageCodegen (7) + Sort [i_item_sk] + InputAdapter + Exchange [i_item_sk] #7 + WholeStageCodegen (6) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_desc,i_item_sk] + WholeStageCodegen (16) + Sort [c_customer_sk] + Project [c_customer_sk] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] + Subquery #1 + WholeStageCodegen (7) + HashAggregate [max] [max,max(csales),tpcds_cmax] + InputAdapter + Exchange #10 + WholeStageCodegen (6) + HashAggregate [csales] [max,max] + HashAggregate [c_customer_sk,isEmpty,sum] [csales,isEmpty,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] + HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [isEmpty,isEmpty,sum,sum] + Project [c_customer_sk,ss_quantity,ss_sales_price] + SortMergeJoin [c_customer_sk,ss_customer_sk] + InputAdapter + WholeStageCodegen (3) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #11 + WholeStageCodegen (2) + Project [ss_customer_sk,ss_quantity,ss_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_customer_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + WholeStageCodegen (5) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #13 + WholeStageCodegen (4) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk] + HashAggregate [c_customer_sk,isEmpty,sum] [isEmpty,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] + HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [isEmpty,isEmpty,sum,sum] + Project [c_customer_sk,ss_quantity,ss_sales_price] + SortMergeJoin [c_customer_sk,ss_customer_sk] + InputAdapter + WholeStageCodegen (13) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #8 + WholeStageCodegen (12) + Filter [ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price] + InputAdapter + WholeStageCodegen (15) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #9 + WholeStageCodegen (14) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk] + InputAdapter + BroadcastExchange #14 + WholeStageCodegen (17) + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + InputAdapter + SortMergeJoin [c_customer_sk,c_customer_sk] + WholeStageCodegen (20) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #15 + WholeStageCodegen (19) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name] + WholeStageCodegen (26) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #16 + WholeStageCodegen (25) + Project [c_customer_sk] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [tpcds_cmax] #1 + HashAggregate [c_customer_sk,isEmpty,sum] [isEmpty,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] + HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [isEmpty,isEmpty,sum,sum] + Project [c_customer_sk,ss_quantity,ss_sales_price] + SortMergeJoin [c_customer_sk,ss_customer_sk] + InputAdapter + WholeStageCodegen (22) + Sort [ss_customer_sk] + InputAdapter + ReusedExchange [ss_customer_sk,ss_quantity,ss_sales_price] #8 + InputAdapter + WholeStageCodegen (24) + Sort [c_customer_sk] + InputAdapter + ReusedExchange [c_customer_sk] #9 + WholeStageCodegen (56) + HashAggregate [c_first_name,c_last_name,isEmpty,sum] [isEmpty,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] + InputAdapter + Exchange [c_first_name,c_last_name] #17 + WholeStageCodegen (55) + HashAggregate [c_first_name,c_last_name,ws_list_price,ws_quantity] [isEmpty,isEmpty,sum,sum] + Project [c_first_name,c_last_name,ws_list_price,ws_quantity] + SortMergeJoin [c_customer_sk,ws_bill_customer_sk] + InputAdapter + WholeStageCodegen (46) + Project [ws_bill_customer_sk,ws_list_price,ws_quantity] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + InputAdapter + SortMergeJoin [c_customer_sk,ws_bill_customer_sk] + WholeStageCodegen (39) + Sort [ws_bill_customer_sk] + InputAdapter + Exchange [ws_bill_customer_sk] #18 + WholeStageCodegen (38) + Project [ws_bill_customer_sk,ws_list_price,ws_quantity,ws_sold_date_sk] + InputAdapter + SortMergeJoin [item_sk,ws_item_sk] + WholeStageCodegen (30) + Sort [ws_item_sk] + InputAdapter + Exchange [ws_item_sk] #19 + WholeStageCodegen (29) + Filter [ws_bill_customer_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_customer_sk,ws_item_sk,ws_list_price,ws_quantity,ws_sold_date_sk] + WholeStageCodegen (37) + Sort [item_sk] + InputAdapter + ReusedExchange [item_sk] #4 + WholeStageCodegen (44) + Sort [c_customer_sk] + Project [c_customer_sk] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [tpcds_cmax] #1 + HashAggregate [c_customer_sk,isEmpty,sum] [isEmpty,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] + HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [isEmpty,isEmpty,sum,sum] + Project [c_customer_sk,ss_quantity,ss_sales_price] + SortMergeJoin [c_customer_sk,ss_customer_sk] + InputAdapter + WholeStageCodegen (41) + Sort [ss_customer_sk] + InputAdapter + ReusedExchange [ss_customer_sk,ss_quantity,ss_sales_price] #8 + InputAdapter + WholeStageCodegen (43) + Sort [c_customer_sk] + InputAdapter + ReusedExchange [c_customer_sk] #9 + InputAdapter + ReusedExchange [d_date_sk] #14 + InputAdapter + SortMergeJoin [c_customer_sk,c_customer_sk] + WholeStageCodegen (48) + Sort [c_customer_sk] + InputAdapter + ReusedExchange [c_customer_sk,c_first_name,c_last_name] #15 + WholeStageCodegen (54) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #20 + WholeStageCodegen (53) + Project [c_customer_sk] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [tpcds_cmax] #1 + HashAggregate [c_customer_sk,isEmpty,sum] [isEmpty,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] + HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [isEmpty,isEmpty,sum,sum] + Project [c_customer_sk,ss_quantity,ss_sales_price] + SortMergeJoin [c_customer_sk,ss_customer_sk] + InputAdapter + WholeStageCodegen (50) + Sort [ss_customer_sk] + InputAdapter + ReusedExchange [ss_customer_sk,ss_quantity,ss_sales_price] #8 + InputAdapter + WholeStageCodegen (52) + Sort [c_customer_sk] + InputAdapter + ReusedExchange [c_customer_sk] #9 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/explain.txt new file mode 100644 index 0000000000000..61e4b21189a86 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/explain.txt @@ -0,0 +1,689 @@ +== Physical Plan == +TakeOrderedAndProject (97) ++- Union (96) + :- * HashAggregate (65) + : +- Exchange (64) + : +- * HashAggregate (63) + : +- * Project (62) + : +- * BroadcastHashJoin Inner BuildRight (61) + : :- * Project (55) + : : +- * BroadcastHashJoin Inner BuildRight (54) + : : :- * BroadcastHashJoin LeftSemi BuildRight (43) + : : : :- * Project (27) + : : : : +- * BroadcastHashJoin LeftSemi BuildRight (26) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.catalog_sales (1) + : : : : +- BroadcastExchange (25) + : : : : +- * Project (24) + : : : : +- * Filter (23) + : : : : +- * HashAggregate (22) + : : : : +- Exchange (21) + : : : : +- * HashAggregate (20) + : : : : +- * Project (19) + : : : : +- * BroadcastHashJoin Inner BuildRight (18) + : : : : :- * Project (13) + : : : : : +- * BroadcastHashJoin Inner BuildRight (12) + : : : : : :- * Filter (6) + : : : : : : +- * ColumnarToRow (5) + : : : : : : +- Scan parquet default.store_sales (4) + : : : : : +- BroadcastExchange (11) + : : : : : +- * Project (10) + : : : : : +- * Filter (9) + : : : : : +- * ColumnarToRow (8) + : : : : : +- Scan parquet default.date_dim (7) + : : : : +- BroadcastExchange (17) + : : : : +- * Filter (16) + : : : : +- * ColumnarToRow (15) + : : : : +- Scan parquet default.item (14) + : : : +- BroadcastExchange (42) + : : : +- * Project (41) + : : : +- * Filter (40) + : : : +- * HashAggregate (39) + : : : +- Exchange (38) + : : : +- * HashAggregate (37) + : : : +- * Project (36) + : : : +- * BroadcastHashJoin Inner BuildRight (35) + : : : :- * Filter (30) + : : : : +- * ColumnarToRow (29) + : : : : +- Scan parquet default.store_sales (28) + : : : +- BroadcastExchange (34) + : : : +- * Filter (33) + : : : +- * ColumnarToRow (32) + : : : +- Scan parquet default.customer (31) + : : +- BroadcastExchange (53) + : : +- * BroadcastHashJoin LeftSemi BuildRight (52) + : : :- * Filter (46) + : : : +- * ColumnarToRow (45) + : : : +- Scan parquet default.customer (44) + : : +- BroadcastExchange (51) + : : +- * Project (50) + : : +- * Filter (49) + : : +- * HashAggregate (48) + : : +- ReusedExchange (47) + : +- BroadcastExchange (60) + : +- * Project (59) + : +- * Filter (58) + : +- * ColumnarToRow (57) + : +- Scan parquet default.date_dim (56) + +- * HashAggregate (95) + +- Exchange (94) + +- * HashAggregate (93) + +- * Project (92) + +- * BroadcastHashJoin Inner BuildRight (91) + :- * Project (89) + : +- * BroadcastHashJoin Inner BuildRight (88) + : :- * BroadcastHashJoin LeftSemi BuildRight (77) + : : :- * Project (71) + : : : +- * BroadcastHashJoin LeftSemi BuildRight (70) + : : : :- * Filter (68) + : : : : +- * ColumnarToRow (67) + : : : : +- Scan parquet default.web_sales (66) + : : : +- ReusedExchange (69) + : : +- BroadcastExchange (76) + : : +- * Project (75) + : : +- * Filter (74) + : : +- * HashAggregate (73) + : : +- ReusedExchange (72) + : +- BroadcastExchange (87) + : +- * BroadcastHashJoin LeftSemi BuildRight (86) + : :- * Filter (80) + : : +- * ColumnarToRow (79) + : : +- Scan parquet default.customer (78) + : +- BroadcastExchange (85) + : +- * Project (84) + : +- * Filter (83) + : +- * HashAggregate (82) + : +- ReusedExchange (81) + +- ReusedExchange (90) + + +(1) Scan parquet default.catalog_sales +Output [5]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 13] +Input [5]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5] + +(3) Filter [codegen id : 13] +Input [5]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5] +Condition : (isnotnull(cs_bill_customer_sk#2) AND isnotnull(cs_sold_date_sk#1)) + +(4) Scan parquet default.store_sales +Output [2]: [ss_sold_date_sk#6, ss_item_sk#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 3] +Input [2]: [ss_sold_date_sk#6, ss_item_sk#7] + +(6) Filter [codegen id : 3] +Input [2]: [ss_sold_date_sk#6, ss_item_sk#7] +Condition : (isnotnull(ss_sold_date_sk#6) AND isnotnull(ss_item_sk#7)) + +(7) Scan parquet default.date_dim +Output [3]: [d_date_sk#8, d_date#9, d_year#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(8) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#8, d_date#9, d_year#10] + +(9) Filter [codegen id : 1] +Input [3]: [d_date_sk#8, d_date#9, d_year#10] +Condition : (d_year#10 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#8)) + +(10) Project [codegen id : 1] +Output [2]: [d_date_sk#8, d_date#9] +Input [3]: [d_date_sk#8, d_date#9, d_year#10] + +(11) BroadcastExchange +Input [2]: [d_date_sk#8, d_date#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] + +(12) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#6] +Right keys [1]: [d_date_sk#8] +Join condition: None + +(13) Project [codegen id : 3] +Output [2]: [ss_item_sk#7, d_date#9] +Input [4]: [ss_sold_date_sk#6, ss_item_sk#7, d_date_sk#8, d_date#9] + +(14) Scan parquet default.item +Output [2]: [i_item_sk#12, i_item_desc#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(15) ColumnarToRow [codegen id : 2] +Input [2]: [i_item_sk#12, i_item_desc#13] + +(16) Filter [codegen id : 2] +Input [2]: [i_item_sk#12, i_item_desc#13] +Condition : isnotnull(i_item_sk#12) + +(17) BroadcastExchange +Input [2]: [i_item_sk#12, i_item_desc#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] + +(18) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_item_sk#7] +Right keys [1]: [i_item_sk#12] +Join condition: None + +(19) Project [codegen id : 3] +Output [3]: [d_date#9, i_item_sk#12, i_item_desc#13] +Input [4]: [ss_item_sk#7, d_date#9, i_item_sk#12, i_item_desc#13] + +(20) HashAggregate [codegen id : 3] +Input [3]: [d_date#9, i_item_sk#12, i_item_desc#13] +Keys [3]: [substr(i_item_desc#13, 1, 30) AS substr(i_item_desc#13, 1, 30)#15, i_item_sk#12, d_date#9] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#16] +Results [4]: [substr(i_item_desc#13, 1, 30)#15, i_item_sk#12, d_date#9, count#17] + +(21) Exchange +Input [4]: [substr(i_item_desc#13, 1, 30)#15, i_item_sk#12, d_date#9, count#17] +Arguments: hashpartitioning(substr(i_item_desc#13, 1, 30)#15, i_item_sk#12, d_date#9, 5), true, [id=#18] + +(22) HashAggregate [codegen id : 4] +Input [4]: [substr(i_item_desc#13, 1, 30)#15, i_item_sk#12, d_date#9, count#17] +Keys [3]: [substr(i_item_desc#13, 1, 30)#15, i_item_sk#12, d_date#9] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#19] +Results [2]: [i_item_sk#12 AS item_sk#20, count(1)#19 AS count(1)#21] + +(23) Filter [codegen id : 4] +Input [2]: [item_sk#20, count(1)#21] +Condition : (count(1)#21 > 4) + +(24) Project [codegen id : 4] +Output [1]: [item_sk#20] +Input [2]: [item_sk#20, count(1)#21] + +(25) BroadcastExchange +Input [1]: [item_sk#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#22] + +(26) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [cs_item_sk#3] +Right keys [1]: [item_sk#20] +Join condition: None + +(27) Project [codegen id : 13] +Output [4]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_quantity#4, cs_list_price#5] +Input [5]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5] + +(28) Scan parquet default.store_sales +Output [3]: [ss_customer_sk#23, ss_quantity#24, ss_sales_price#25] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 6] +Input [3]: [ss_customer_sk#23, ss_quantity#24, ss_sales_price#25] + +(30) Filter [codegen id : 6] +Input [3]: [ss_customer_sk#23, ss_quantity#24, ss_sales_price#25] +Condition : isnotnull(ss_customer_sk#23) + +(31) Scan parquet default.customer +Output [1]: [c_customer_sk#26] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(32) ColumnarToRow [codegen id : 5] +Input [1]: [c_customer_sk#26] + +(33) Filter [codegen id : 5] +Input [1]: [c_customer_sk#26] +Condition : isnotnull(c_customer_sk#26) + +(34) BroadcastExchange +Input [1]: [c_customer_sk#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#27] + +(35) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_customer_sk#23] +Right keys [1]: [c_customer_sk#26] +Join condition: None + +(36) Project [codegen id : 6] +Output [3]: [ss_quantity#24, ss_sales_price#25, c_customer_sk#26] +Input [4]: [ss_customer_sk#23, ss_quantity#24, ss_sales_price#25, c_customer_sk#26] + +(37) HashAggregate [codegen id : 6] +Input [3]: [ss_quantity#24, ss_sales_price#25, c_customer_sk#26] +Keys [1]: [c_customer_sk#26] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [2]: [sum#28, isEmpty#29] +Results [3]: [c_customer_sk#26, sum#30, isEmpty#31] + +(38) Exchange +Input [3]: [c_customer_sk#26, sum#30, isEmpty#31] +Arguments: hashpartitioning(c_customer_sk#26, 5), true, [id=#32] + +(39) HashAggregate [codegen id : 7] +Input [3]: [c_customer_sk#26, sum#30, isEmpty#31] +Keys [1]: [c_customer_sk#26] +Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#33] +Results [2]: [c_customer_sk#26, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#33 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#34] + +(40) Filter [codegen id : 7] +Input [2]: [c_customer_sk#26, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#34] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#34) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#34 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(Subquery scalar-subquery#35, [id=#36] as decimal(32,6)))), DecimalType(38,8), true))) + +(41) Project [codegen id : 7] +Output [1]: [c_customer_sk#26] +Input [2]: [c_customer_sk#26, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#34] + +(42) BroadcastExchange +Input [1]: [c_customer_sk#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#37] + +(43) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [cs_bill_customer_sk#2] +Right keys [1]: [c_customer_sk#26] +Join condition: None + +(44) Scan parquet default.customer +Output [3]: [c_customer_sk#26, c_first_name#38, c_last_name#39] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(45) ColumnarToRow [codegen id : 11] +Input [3]: [c_customer_sk#26, c_first_name#38, c_last_name#39] + +(46) Filter [codegen id : 11] +Input [3]: [c_customer_sk#26, c_first_name#38, c_last_name#39] +Condition : isnotnull(c_customer_sk#26) + +(47) ReusedExchange [Reuses operator id: 38] +Output [3]: [c_customer_sk#26, sum#30, isEmpty#31] + +(48) HashAggregate [codegen id : 10] +Input [3]: [c_customer_sk#26, sum#30, isEmpty#31] +Keys [1]: [c_customer_sk#26] +Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#33] +Results [2]: [c_customer_sk#26, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#33 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#34] + +(49) Filter [codegen id : 10] +Input [2]: [c_customer_sk#26, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#34] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#34) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#34 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#35, [id=#36] as decimal(32,6)))), DecimalType(38,8), true))) + +(50) Project [codegen id : 10] +Output [1]: [c_customer_sk#26 AS c_customer_sk#26#40] +Input [2]: [c_customer_sk#26, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#34] + +(51) BroadcastExchange +Input [1]: [c_customer_sk#26#40] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#41] + +(52) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [c_customer_sk#26] +Right keys [1]: [c_customer_sk#26#40] +Join condition: None + +(53) BroadcastExchange +Input [3]: [c_customer_sk#26, c_first_name#38, c_last_name#39] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#42] + +(54) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [cs_bill_customer_sk#2] +Right keys [1]: [c_customer_sk#26] +Join condition: None + +(55) Project [codegen id : 13] +Output [5]: [cs_sold_date_sk#1, cs_quantity#4, cs_list_price#5, c_first_name#38, c_last_name#39] +Input [7]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_quantity#4, cs_list_price#5, c_customer_sk#26, c_first_name#38, c_last_name#39] + +(56) Scan parquet default.date_dim +Output [3]: [d_date_sk#8, d_year#10, d_moy#43] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,2), IsNotNull(d_date_sk)] +ReadSchema: struct + +(57) ColumnarToRow [codegen id : 12] +Input [3]: [d_date_sk#8, d_year#10, d_moy#43] + +(58) Filter [codegen id : 12] +Input [3]: [d_date_sk#8, d_year#10, d_moy#43] +Condition : ((((isnotnull(d_year#10) AND isnotnull(d_moy#43)) AND (d_year#10 = 2000)) AND (d_moy#43 = 2)) AND isnotnull(d_date_sk#8)) + +(59) Project [codegen id : 12] +Output [1]: [d_date_sk#8] +Input [3]: [d_date_sk#8, d_year#10, d_moy#43] + +(60) BroadcastExchange +Input [1]: [d_date_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#44] + +(61) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [cs_sold_date_sk#1] +Right keys [1]: [d_date_sk#8] +Join condition: None + +(62) Project [codegen id : 13] +Output [4]: [cs_quantity#4, cs_list_price#5, c_first_name#38, c_last_name#39] +Input [6]: [cs_sold_date_sk#1, cs_quantity#4, cs_list_price#5, c_first_name#38, c_last_name#39, d_date_sk#8] + +(63) HashAggregate [codegen id : 13] +Input [4]: [cs_quantity#4, cs_list_price#5, c_first_name#38, c_last_name#39] +Keys [2]: [c_last_name#39, c_first_name#38] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#4 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#5 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [2]: [sum#45, isEmpty#46] +Results [4]: [c_last_name#39, c_first_name#38, sum#47, isEmpty#48] + +(64) Exchange +Input [4]: [c_last_name#39, c_first_name#38, sum#47, isEmpty#48] +Arguments: hashpartitioning(c_last_name#39, c_first_name#38, 5), true, [id=#49] + +(65) HashAggregate [codegen id : 14] +Input [4]: [c_last_name#39, c_first_name#38, sum#47, isEmpty#48] +Keys [2]: [c_last_name#39, c_first_name#38] +Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#4 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#5 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#4 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#5 as decimal(12,2)))), DecimalType(18,2), true))#50] +Results [3]: [c_last_name#39, c_first_name#38, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#4 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#5 as decimal(12,2)))), DecimalType(18,2), true))#50 AS sales#51] + +(66) Scan parquet default.web_sales +Output [5]: [ws_sold_date_sk#52, ws_item_sk#53, ws_bill_customer_sk#54, ws_quantity#55, ws_list_price#56] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_bill_customer_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(67) ColumnarToRow [codegen id : 27] +Input [5]: [ws_sold_date_sk#52, ws_item_sk#53, ws_bill_customer_sk#54, ws_quantity#55, ws_list_price#56] + +(68) Filter [codegen id : 27] +Input [5]: [ws_sold_date_sk#52, ws_item_sk#53, ws_bill_customer_sk#54, ws_quantity#55, ws_list_price#56] +Condition : (isnotnull(ws_bill_customer_sk#54) AND isnotnull(ws_sold_date_sk#52)) + +(69) ReusedExchange [Reuses operator id: 25] +Output [1]: [item_sk#20] + +(70) BroadcastHashJoin [codegen id : 27] +Left keys [1]: [ws_item_sk#53] +Right keys [1]: [item_sk#20] +Join condition: None + +(71) Project [codegen id : 27] +Output [4]: [ws_sold_date_sk#52, ws_bill_customer_sk#54, ws_quantity#55, ws_list_price#56] +Input [5]: [ws_sold_date_sk#52, ws_item_sk#53, ws_bill_customer_sk#54, ws_quantity#55, ws_list_price#56] + +(72) ReusedExchange [Reuses operator id: 38] +Output [3]: [c_customer_sk#26, sum#57, isEmpty#58] + +(73) HashAggregate [codegen id : 21] +Input [3]: [c_customer_sk#26, sum#57, isEmpty#58] +Keys [1]: [c_customer_sk#26] +Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#59] +Results [2]: [c_customer_sk#26, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#59 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#60] + +(74) Filter [codegen id : 21] +Input [2]: [c_customer_sk#26, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#60] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#60) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#60 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#35, [id=#36] as decimal(32,6)))), DecimalType(38,8), true))) + +(75) Project [codegen id : 21] +Output [1]: [c_customer_sk#26] +Input [2]: [c_customer_sk#26, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#60] + +(76) BroadcastExchange +Input [1]: [c_customer_sk#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#61] + +(77) BroadcastHashJoin [codegen id : 27] +Left keys [1]: [ws_bill_customer_sk#54] +Right keys [1]: [c_customer_sk#26] +Join condition: None + +(78) Scan parquet default.customer +Output [3]: [c_customer_sk#26, c_first_name#38, c_last_name#39] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(79) ColumnarToRow [codegen id : 25] +Input [3]: [c_customer_sk#26, c_first_name#38, c_last_name#39] + +(80) Filter [codegen id : 25] +Input [3]: [c_customer_sk#26, c_first_name#38, c_last_name#39] +Condition : isnotnull(c_customer_sk#26) + +(81) ReusedExchange [Reuses operator id: 38] +Output [3]: [c_customer_sk#26, sum#57, isEmpty#58] + +(82) HashAggregate [codegen id : 24] +Input [3]: [c_customer_sk#26, sum#57, isEmpty#58] +Keys [1]: [c_customer_sk#26] +Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#59] +Results [2]: [c_customer_sk#26, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#59 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#60] + +(83) Filter [codegen id : 24] +Input [2]: [c_customer_sk#26, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#60] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#60) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#60 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#35, [id=#36] as decimal(32,6)))), DecimalType(38,8), true))) + +(84) Project [codegen id : 24] +Output [1]: [c_customer_sk#26 AS c_customer_sk#26#62] +Input [2]: [c_customer_sk#26, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#60] + +(85) BroadcastExchange +Input [1]: [c_customer_sk#26#62] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#63] + +(86) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [c_customer_sk#26] +Right keys [1]: [c_customer_sk#26#62] +Join condition: None + +(87) BroadcastExchange +Input [3]: [c_customer_sk#26, c_first_name#38, c_last_name#39] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#64] + +(88) BroadcastHashJoin [codegen id : 27] +Left keys [1]: [ws_bill_customer_sk#54] +Right keys [1]: [c_customer_sk#26] +Join condition: None + +(89) Project [codegen id : 27] +Output [5]: [ws_sold_date_sk#52, ws_quantity#55, ws_list_price#56, c_first_name#38, c_last_name#39] +Input [7]: [ws_sold_date_sk#52, ws_bill_customer_sk#54, ws_quantity#55, ws_list_price#56, c_customer_sk#26, c_first_name#38, c_last_name#39] + +(90) ReusedExchange [Reuses operator id: 60] +Output [1]: [d_date_sk#8] + +(91) BroadcastHashJoin [codegen id : 27] +Left keys [1]: [ws_sold_date_sk#52] +Right keys [1]: [d_date_sk#8] +Join condition: None + +(92) Project [codegen id : 27] +Output [4]: [ws_quantity#55, ws_list_price#56, c_first_name#38, c_last_name#39] +Input [6]: [ws_sold_date_sk#52, ws_quantity#55, ws_list_price#56, c_first_name#38, c_last_name#39, d_date_sk#8] + +(93) HashAggregate [codegen id : 27] +Input [4]: [ws_quantity#55, ws_list_price#56, c_first_name#38, c_last_name#39] +Keys [2]: [c_last_name#39, c_first_name#38] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#55 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#56 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [2]: [sum#65, isEmpty#66] +Results [4]: [c_last_name#39, c_first_name#38, sum#67, isEmpty#68] + +(94) Exchange +Input [4]: [c_last_name#39, c_first_name#38, sum#67, isEmpty#68] +Arguments: hashpartitioning(c_last_name#39, c_first_name#38, 5), true, [id=#69] + +(95) HashAggregate [codegen id : 28] +Input [4]: [c_last_name#39, c_first_name#38, sum#67, isEmpty#68] +Keys [2]: [c_last_name#39, c_first_name#38] +Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#55 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#56 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#55 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#56 as decimal(12,2)))), DecimalType(18,2), true))#70] +Results [3]: [c_last_name#39, c_first_name#38, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#55 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#56 as decimal(12,2)))), DecimalType(18,2), true))#70 AS sales#71] + +(96) Union + +(97) TakeOrderedAndProject +Input [3]: [c_last_name#39, c_first_name#38, sales#51] +Arguments: 100, [c_last_name#39 ASC NULLS FIRST, c_first_name#38 ASC NULLS FIRST, sales#51 ASC NULLS FIRST], [c_last_name#39, c_first_name#38, sales#51] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 40 Hosting Expression = Subquery scalar-subquery#35, [id=#36] +* HashAggregate (119) ++- Exchange (118) + +- * HashAggregate (117) + +- * HashAggregate (116) + +- Exchange (115) + +- * HashAggregate (114) + +- * Project (113) + +- * BroadcastHashJoin Inner BuildRight (112) + :- * Project (106) + : +- * BroadcastHashJoin Inner BuildRight (105) + : :- * Filter (100) + : : +- * ColumnarToRow (99) + : : +- Scan parquet default.store_sales (98) + : +- BroadcastExchange (104) + : +- * Filter (103) + : +- * ColumnarToRow (102) + : +- Scan parquet default.customer (101) + +- BroadcastExchange (111) + +- * Project (110) + +- * Filter (109) + +- * ColumnarToRow (108) + +- Scan parquet default.date_dim (107) + + +(98) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#6, ss_customer_sk#23, ss_quantity#24, ss_sales_price#25] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(99) ColumnarToRow [codegen id : 3] +Input [4]: [ss_sold_date_sk#6, ss_customer_sk#23, ss_quantity#24, ss_sales_price#25] + +(100) Filter [codegen id : 3] +Input [4]: [ss_sold_date_sk#6, ss_customer_sk#23, ss_quantity#24, ss_sales_price#25] +Condition : (isnotnull(ss_customer_sk#23) AND isnotnull(ss_sold_date_sk#6)) + +(101) Scan parquet default.customer +Output [1]: [c_customer_sk#26] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(102) ColumnarToRow [codegen id : 1] +Input [1]: [c_customer_sk#26] + +(103) Filter [codegen id : 1] +Input [1]: [c_customer_sk#26] +Condition : isnotnull(c_customer_sk#26) + +(104) BroadcastExchange +Input [1]: [c_customer_sk#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#72] + +(105) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_customer_sk#23] +Right keys [1]: [c_customer_sk#26] +Join condition: None + +(106) Project [codegen id : 3] +Output [4]: [ss_sold_date_sk#6, ss_quantity#24, ss_sales_price#25, c_customer_sk#26] +Input [5]: [ss_sold_date_sk#6, ss_customer_sk#23, ss_quantity#24, ss_sales_price#25, c_customer_sk#26] + +(107) Scan parquet default.date_dim +Output [2]: [d_date_sk#8, d_year#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(108) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#8, d_year#10] + +(109) Filter [codegen id : 2] +Input [2]: [d_date_sk#8, d_year#10] +Condition : (d_year#10 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#8)) + +(110) Project [codegen id : 2] +Output [1]: [d_date_sk#8] +Input [2]: [d_date_sk#8, d_year#10] + +(111) BroadcastExchange +Input [1]: [d_date_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#73] + +(112) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#6] +Right keys [1]: [d_date_sk#8] +Join condition: None + +(113) Project [codegen id : 3] +Output [3]: [ss_quantity#24, ss_sales_price#25, c_customer_sk#26] +Input [5]: [ss_sold_date_sk#6, ss_quantity#24, ss_sales_price#25, c_customer_sk#26, d_date_sk#8] + +(114) HashAggregate [codegen id : 3] +Input [3]: [ss_quantity#24, ss_sales_price#25, c_customer_sk#26] +Keys [1]: [c_customer_sk#26] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [2]: [sum#74, isEmpty#75] +Results [3]: [c_customer_sk#26, sum#76, isEmpty#77] + +(115) Exchange +Input [3]: [c_customer_sk#26, sum#76, isEmpty#77] +Arguments: hashpartitioning(c_customer_sk#26, 5), true, [id=#78] + +(116) HashAggregate [codegen id : 4] +Input [3]: [c_customer_sk#26, sum#76, isEmpty#77] +Keys [1]: [c_customer_sk#26] +Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#79] +Results [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#79 AS csales#80] + +(117) HashAggregate [codegen id : 4] +Input [1]: [csales#80] +Keys: [] +Functions [1]: [partial_max(csales#80)] +Aggregate Attributes [1]: [max#81] +Results [1]: [max#82] + +(118) Exchange +Input [1]: [max#82] +Arguments: SinglePartition, true, [id=#83] + +(119) HashAggregate [codegen id : 5] +Input [1]: [max#82] +Keys: [] +Functions [1]: [max(csales#80)] +Aggregate Attributes [1]: [max(csales#80)#84] +Results [1]: [max(csales#80)#84 AS tpcds_cmax#85] + +Subquery:2 Hosting operator id = 49 Hosting Expression = ReusedSubquery Subquery scalar-subquery#35, [id=#36] + +Subquery:3 Hosting operator id = 74 Hosting Expression = ReusedSubquery Subquery scalar-subquery#35, [id=#36] + +Subquery:4 Hosting operator id = 83 Hosting Expression = ReusedSubquery Subquery scalar-subquery#35, [id=#36] + + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/simplified.txt new file mode 100644 index 0000000000000..277966b363866 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/simplified.txt @@ -0,0 +1,182 @@ +TakeOrderedAndProject [c_first_name,c_last_name,sales] + Union + WholeStageCodegen (14) + HashAggregate [c_first_name,c_last_name,isEmpty,sum] [isEmpty,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] + InputAdapter + Exchange [c_first_name,c_last_name] #1 + WholeStageCodegen (13) + HashAggregate [c_first_name,c_last_name,cs_list_price,cs_quantity] [isEmpty,isEmpty,sum,sum] + Project [c_first_name,c_last_name,cs_list_price,cs_quantity] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [c_first_name,c_last_name,cs_list_price,cs_quantity,cs_sold_date_sk] + BroadcastHashJoin [c_customer_sk,cs_bill_customer_sk] + BroadcastHashJoin [c_customer_sk,cs_bill_customer_sk] + Project [cs_bill_customer_sk,cs_list_price,cs_quantity,cs_sold_date_sk] + BroadcastHashJoin [cs_item_sk,item_sk] + Filter [cs_bill_customer_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_list_price,cs_quantity,cs_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (4) + Project [item_sk] + Filter [count(1)] + HashAggregate [count,d_date,i_item_sk,substr(i_item_desc, 1, 30)] [count,count(1),count(1),item_sk] + InputAdapter + Exchange [d_date,i_item_sk,substr(i_item_desc, 1, 30)] #3 + WholeStageCodegen (3) + HashAggregate [d_date,i_item_desc,i_item_sk] [count,count,substr(i_item_desc, 1, 30)] + Project [d_date,i_item_desc,i_item_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [d_date,ss_item_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Project [d_date,d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk,d_year] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_desc,i_item_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (7) + Project [c_customer_sk] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] + Subquery #1 + WholeStageCodegen (5) + HashAggregate [max] [max,max(csales),tpcds_cmax] + InputAdapter + Exchange #9 + WholeStageCodegen (4) + HashAggregate [csales] [max,max] + HashAggregate [c_customer_sk,isEmpty,sum] [csales,isEmpty,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] + InputAdapter + Exchange [c_customer_sk] #10 + WholeStageCodegen (3) + HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [isEmpty,isEmpty,sum,sum] + Project [c_customer_sk,ss_quantity,ss_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [c_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [ss_customer_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (1) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk] + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + HashAggregate [c_customer_sk,isEmpty,sum] [isEmpty,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] + InputAdapter + Exchange [c_customer_sk] #7 + WholeStageCodegen (6) + HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [isEmpty,isEmpty,sum,sum] + Project [c_customer_sk,ss_quantity,ss_sales_price] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (5) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk] + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (11) + BroadcastHashJoin [c_customer_sk,c_customer_sk] + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name] + InputAdapter + BroadcastExchange #14 + WholeStageCodegen (10) + Project [c_customer_sk] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [tpcds_cmax] #1 + HashAggregate [c_customer_sk,isEmpty,sum] [isEmpty,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] + InputAdapter + ReusedExchange [c_customer_sk,isEmpty,sum] #7 + InputAdapter + BroadcastExchange #15 + WholeStageCodegen (12) + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + WholeStageCodegen (28) + HashAggregate [c_first_name,c_last_name,isEmpty,sum] [isEmpty,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] + InputAdapter + Exchange [c_first_name,c_last_name] #16 + WholeStageCodegen (27) + HashAggregate [c_first_name,c_last_name,ws_list_price,ws_quantity] [isEmpty,isEmpty,sum,sum] + Project [c_first_name,c_last_name,ws_list_price,ws_quantity] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [c_first_name,c_last_name,ws_list_price,ws_quantity,ws_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + Project [ws_bill_customer_sk,ws_list_price,ws_quantity,ws_sold_date_sk] + BroadcastHashJoin [item_sk,ws_item_sk] + Filter [ws_bill_customer_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_customer_sk,ws_item_sk,ws_list_price,ws_quantity,ws_sold_date_sk] + InputAdapter + ReusedExchange [item_sk] #2 + InputAdapter + BroadcastExchange #17 + WholeStageCodegen (21) + Project [c_customer_sk] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [tpcds_cmax] #1 + HashAggregate [c_customer_sk,isEmpty,sum] [isEmpty,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] + InputAdapter + ReusedExchange [c_customer_sk,isEmpty,sum] #7 + InputAdapter + BroadcastExchange #18 + WholeStageCodegen (25) + BroadcastHashJoin [c_customer_sk,c_customer_sk] + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name] + InputAdapter + BroadcastExchange #19 + WholeStageCodegen (24) + Project [c_customer_sk] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [tpcds_cmax] #1 + HashAggregate [c_customer_sk,isEmpty,sum] [isEmpty,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] + InputAdapter + ReusedExchange [c_customer_sk,isEmpty,sum] #7 + InputAdapter + ReusedExchange [d_date_sk] #15 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.sf100/explain.txt new file mode 100644 index 0000000000000..d53db33027ed1 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.sf100/explain.txt @@ -0,0 +1,567 @@ +== Physical Plan == +* Project (48) ++- * Filter (47) + +- * HashAggregate (46) + +- Exchange (45) + +- * HashAggregate (44) + +- * HashAggregate (43) + +- Exchange (42) + +- * HashAggregate (41) + +- * Project (40) + +- * SortMergeJoin Inner (39) + :- * Sort (33) + : +- Exchange (32) + : +- * Project (31) + : +- * BroadcastHashJoin Inner BuildRight (30) + : :- * Project (18) + : : +- * SortMergeJoin Inner (17) + : : :- * Sort (11) + : : : +- Exchange (10) + : : : +- * Project (9) + : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.item (4) + : : +- * Sort (16) + : : +- Exchange (15) + : : +- * Filter (14) + : : +- * ColumnarToRow (13) + : : +- Scan parquet default.customer (12) + : +- BroadcastExchange (29) + : +- * Project (28) + : +- * BroadcastHashJoin Inner BuildLeft (27) + : :- BroadcastExchange (23) + : : +- * Project (22) + : : +- * Filter (21) + : : +- * ColumnarToRow (20) + : : +- Scan parquet default.store (19) + : +- * Filter (26) + : +- * ColumnarToRow (25) + : +- Scan parquet default.customer_address (24) + +- * Sort (38) + +- Exchange (37) + +- * Filter (36) + +- * ColumnarToRow (35) + +- Scan parquet default.store_returns (34) + + +(1) Scan parquet default.store_sales +Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 2] +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] + +(3) Filter [codegen id : 2] +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Condition : (((isnotnull(ss_ticket_number#4) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_customer_sk#2)) + +(4) Scan parquet default.item +Output [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_color), EqualTo(i_color,pale), IsNotNull(i_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] + +(6) Filter [codegen id : 1] +Input [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] +Condition : ((isnotnull(i_color#9) AND (i_color#9 = pale)) AND isnotnull(i_item_sk#6)) + +(7) BroadcastExchange +Input [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#12] + +(8) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#6] +Join condition: None + +(9) Project [codegen id : 2] +Output [10]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] +Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] + +(10) Exchange +Input [10]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] +Arguments: hashpartitioning(ss_customer_sk#2, 5), true, [id=#13] + +(11) Sort [codegen id : 3] +Input [10]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] +Arguments: [ss_customer_sk#2 ASC NULLS FIRST], false, 0 + +(12) Scan parquet default.customer +Output [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_birth_country)] +ReadSchema: struct + +(13) ColumnarToRow [codegen id : 4] +Input [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17] + +(14) Filter [codegen id : 4] +Input [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17] +Condition : (isnotnull(c_customer_sk#14) AND isnotnull(c_birth_country#17)) + +(15) Exchange +Input [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17] +Arguments: hashpartitioning(c_customer_sk#14, 5), true, [id=#18] + +(16) Sort [codegen id : 5] +Input [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17] +Arguments: [c_customer_sk#14 ASC NULLS FIRST], false, 0 + +(17) SortMergeJoin [codegen id : 8] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#14] +Join condition: None + +(18) Project [codegen id : 8] +Output [12]: [ss_item_sk#1, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17] +Input [14]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17] + +(19) Scan parquet default.store +Output [5]: [s_store_sk#19, s_store_name#20, s_market_id#21, s_state#22, s_zip#23] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 6] +Input [5]: [s_store_sk#19, s_store_name#20, s_market_id#21, s_state#22, s_zip#23] + +(21) Filter [codegen id : 6] +Input [5]: [s_store_sk#19, s_store_name#20, s_market_id#21, s_state#22, s_zip#23] +Condition : (((isnotnull(s_market_id#21) AND (s_market_id#21 = 8)) AND isnotnull(s_store_sk#19)) AND isnotnull(s_zip#23)) + +(22) Project [codegen id : 6] +Output [4]: [s_store_sk#19, s_store_name#20, s_state#22, s_zip#23] +Input [5]: [s_store_sk#19, s_store_name#20, s_market_id#21, s_state#22, s_zip#23] + +(23) BroadcastExchange +Input [4]: [s_store_sk#19, s_store_name#20, s_state#22, s_zip#23] +Arguments: HashedRelationBroadcastMode(List(input[3, string, true]),false), [id=#24] + +(24) Scan parquet default.customer_address +Output [3]: [ca_state#25, ca_zip#26, ca_country#27] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer_address] +PushedFilters: [IsNotNull(ca_zip), IsNotNull(ca_country)] +ReadSchema: struct + +(25) ColumnarToRow +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] + +(26) Filter +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] +Condition : (isnotnull(ca_zip#26) AND isnotnull(ca_country#27)) + +(27) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [s_zip#23] +Right keys [1]: [ca_zip#26] +Join condition: None + +(28) Project [codegen id : 7] +Output [5]: [s_store_sk#19, s_store_name#20, s_state#22, ca_state#25, ca_country#27] +Input [7]: [s_store_sk#19, s_store_name#20, s_state#22, s_zip#23, ca_state#25, ca_zip#26, ca_country#27] + +(29) BroadcastExchange +Input [5]: [s_store_sk#19, s_store_name#20, s_state#22, ca_state#25, ca_country#27] +Arguments: HashedRelationBroadcastMode(List(upper(input[4, string, true]), input[0, int, true]),false), [id=#28] + +(30) BroadcastHashJoin [codegen id : 8] +Left keys [2]: [c_birth_country#17, ss_store_sk#3] +Right keys [2]: [upper(ca_country#27), s_store_sk#19] +Join condition: None + +(31) Project [codegen id : 8] +Output [13]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, s_store_name#20, s_state#22, ca_state#25] +Input [17]: [ss_item_sk#1, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17, s_store_sk#19, s_store_name#20, s_state#22, ca_state#25, ca_country#27] + +(32) Exchange +Input [13]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, s_store_name#20, s_state#22, ca_state#25] +Arguments: hashpartitioning(cast(ss_ticket_number#4 as bigint), cast(ss_item_sk#1 as bigint), 5), true, [id=#29] + +(33) Sort [codegen id : 9] +Input [13]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, s_store_name#20, s_state#22, ca_state#25] +Arguments: [cast(ss_ticket_number#4 as bigint) ASC NULLS FIRST, cast(ss_item_sk#1 as bigint) ASC NULLS FIRST], false, 0 + +(34) Scan parquet default.store_returns +Output [2]: [sr_item_sk#30, sr_ticket_number#31] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_returns] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] +ReadSchema: struct + +(35) ColumnarToRow [codegen id : 10] +Input [2]: [sr_item_sk#30, sr_ticket_number#31] + +(36) Filter [codegen id : 10] +Input [2]: [sr_item_sk#30, sr_ticket_number#31] +Condition : (isnotnull(sr_ticket_number#31) AND isnotnull(sr_item_sk#30)) + +(37) Exchange +Input [2]: [sr_item_sk#30, sr_ticket_number#31] +Arguments: hashpartitioning(sr_ticket_number#31, sr_item_sk#30, 5), true, [id=#32] + +(38) Sort [codegen id : 11] +Input [2]: [sr_item_sk#30, sr_ticket_number#31] +Arguments: [sr_ticket_number#31 ASC NULLS FIRST, sr_item_sk#30 ASC NULLS FIRST], false, 0 + +(39) SortMergeJoin [codegen id : 12] +Left keys [2]: [cast(ss_ticket_number#4 as bigint), cast(ss_item_sk#1 as bigint)] +Right keys [2]: [sr_ticket_number#31, sr_item_sk#30] +Join condition: None + +(40) Project [codegen id : 12] +Output [11]: [ss_net_paid#5, s_store_name#20, s_state#22, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25] +Input [15]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, s_store_name#20, s_state#22, ca_state#25, sr_item_sk#30, sr_ticket_number#31] + +(41) HashAggregate [codegen id : 12] +Input [11]: [ss_net_paid#5, s_store_name#20, s_state#22, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25] +Keys [10]: [c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_state#22, i_color#9, i_current_price#7, i_manager_id#11, i_units#10, i_size#8] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum#33] +Results [11]: [c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_state#22, i_color#9, i_current_price#7, i_manager_id#11, i_units#10, i_size#8, sum#34] + +(42) Exchange +Input [11]: [c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_state#22, i_color#9, i_current_price#7, i_manager_id#11, i_units#10, i_size#8, sum#34] +Arguments: hashpartitioning(c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_state#22, i_color#9, i_current_price#7, i_manager_id#11, i_units#10, i_size#8, 5), true, [id=#35] + +(43) HashAggregate [codegen id : 13] +Input [11]: [c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_state#22, i_color#9, i_current_price#7, i_manager_id#11, i_units#10, i_size#8, sum#34] +Keys [10]: [c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_state#22, i_color#9, i_current_price#7, i_manager_id#11, i_units#10, i_size#8] +Functions [1]: [sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#36] +Results [4]: [c_last_name#16, c_first_name#15, s_store_name#20, MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#36,17,2) AS netpaid#37] + +(44) HashAggregate [codegen id : 13] +Input [4]: [c_last_name#16, c_first_name#15, s_store_name#20, netpaid#37] +Keys [3]: [c_last_name#16, c_first_name#15, s_store_name#20] +Functions [1]: [partial_sum(netpaid#37)] +Aggregate Attributes [2]: [sum#38, isEmpty#39] +Results [5]: [c_last_name#16, c_first_name#15, s_store_name#20, sum#40, isEmpty#41] + +(45) Exchange +Input [5]: [c_last_name#16, c_first_name#15, s_store_name#20, sum#40, isEmpty#41] +Arguments: hashpartitioning(c_last_name#16, c_first_name#15, s_store_name#20, 5), true, [id=#42] + +(46) HashAggregate [codegen id : 14] +Input [5]: [c_last_name#16, c_first_name#15, s_store_name#20, sum#40, isEmpty#41] +Keys [3]: [c_last_name#16, c_first_name#15, s_store_name#20] +Functions [1]: [sum(netpaid#37)] +Aggregate Attributes [1]: [sum(netpaid#37)#43] +Results [5]: [c_last_name#16, c_first_name#15, s_store_name#20, sum(netpaid#37)#43 AS paid#44, sum(netpaid#37)#43 AS sum(netpaid#37)#45] + +(47) Filter [codegen id : 14] +Input [5]: [c_last_name#16, c_first_name#15, s_store_name#20, paid#44, sum(netpaid#37)#45] +Condition : (isnotnull(sum(netpaid#37)#45) AND (cast(sum(netpaid#37)#45 as decimal(33,8)) > cast(Subquery scalar-subquery#46, [id=#47] as decimal(33,8)))) + +(48) Project [codegen id : 14] +Output [4]: [c_last_name#16, c_first_name#15, s_store_name#20, paid#44] +Input [5]: [c_last_name#16, c_first_name#15, s_store_name#20, paid#44, sum(netpaid#37)#45] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 47 Hosting Expression = Subquery scalar-subquery#46, [id=#47] +* HashAggregate (100) ++- Exchange (99) + +- * HashAggregate (98) + +- * HashAggregate (97) + +- Exchange (96) + +- * HashAggregate (95) + +- * Project (94) + +- * SortMergeJoin Inner (93) + :- * Sort (87) + : +- Exchange (86) + : +- * Project (85) + : +- * SortMergeJoin Inner (84) + : :- * Sort (78) + : : +- Exchange (77) + : : +- * Project (76) + : : +- * SortMergeJoin Inner (75) + : : :- * Sort (69) + : : : +- Exchange (68) + : : : +- * Project (67) + : : : +- * SortMergeJoin Inner (66) + : : : :- * Sort (60) + : : : : +- Exchange (59) + : : : : +- * Project (58) + : : : : +- * BroadcastHashJoin Inner BuildLeft (57) + : : : : :- BroadcastExchange (53) + : : : : : +- * Project (52) + : : : : : +- * Filter (51) + : : : : : +- * ColumnarToRow (50) + : : : : : +- Scan parquet default.store (49) + : : : : +- * Filter (56) + : : : : +- * ColumnarToRow (55) + : : : : +- Scan parquet default.store_sales (54) + : : : +- * Sort (65) + : : : +- Exchange (64) + : : : +- * Filter (63) + : : : +- * ColumnarToRow (62) + : : : +- Scan parquet default.item (61) + : : +- * Sort (74) + : : +- Exchange (73) + : : +- * Filter (72) + : : +- * ColumnarToRow (71) + : : +- Scan parquet default.customer (70) + : +- * Sort (83) + : +- Exchange (82) + : +- * Filter (81) + : +- * ColumnarToRow (80) + : +- Scan parquet default.customer_address (79) + +- * Sort (92) + +- Exchange (91) + +- * Filter (90) + +- * ColumnarToRow (89) + +- Scan parquet default.store_returns (88) + + +(49) Scan parquet default.store +Output [5]: [s_store_sk#19, s_store_name#20, s_market_id#21, s_state#22, s_zip#23] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)] +ReadSchema: struct + +(50) ColumnarToRow [codegen id : 1] +Input [5]: [s_store_sk#19, s_store_name#20, s_market_id#21, s_state#22, s_zip#23] + +(51) Filter [codegen id : 1] +Input [5]: [s_store_sk#19, s_store_name#20, s_market_id#21, s_state#22, s_zip#23] +Condition : (((isnotnull(s_market_id#21) AND (s_market_id#21 = 8)) AND isnotnull(s_store_sk#19)) AND isnotnull(s_zip#23)) + +(52) Project [codegen id : 1] +Output [4]: [s_store_sk#19, s_store_name#20, s_state#22, s_zip#23] +Input [5]: [s_store_sk#19, s_store_name#20, s_market_id#21, s_state#22, s_zip#23] + +(53) BroadcastExchange +Input [4]: [s_store_sk#19, s_store_name#20, s_state#22, s_zip#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#48] + +(54) Scan parquet default.store_sales +Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(55) ColumnarToRow +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] + +(56) Filter +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Condition : (((isnotnull(ss_ticket_number#4) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_customer_sk#2)) + +(57) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [s_store_sk#19] +Right keys [1]: [ss_store_sk#3] +Join condition: None + +(58) Project [codegen id : 2] +Output [7]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5] +Input [9]: [s_store_sk#19, s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] + +(59) Exchange +Input [7]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5] +Arguments: hashpartitioning(ss_item_sk#1, 5), true, [id=#49] + +(60) Sort [codegen id : 3] +Input [7]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5] +Arguments: [ss_item_sk#1 ASC NULLS FIRST], false, 0 + +(61) Scan parquet default.item +Output [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(62) ColumnarToRow [codegen id : 4] +Input [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] + +(63) Filter [codegen id : 4] +Input [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] +Condition : isnotnull(i_item_sk#6) + +(64) Exchange +Input [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] +Arguments: hashpartitioning(i_item_sk#6, 5), true, [id=#50] + +(65) Sort [codegen id : 5] +Input [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] +Arguments: [i_item_sk#6 ASC NULLS FIRST], false, 0 + +(66) SortMergeJoin [codegen id : 6] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#6] +Join condition: None + +(67) Project [codegen id : 6] +Output [12]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] +Input [13]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] + +(68) Exchange +Input [12]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] +Arguments: hashpartitioning(ss_customer_sk#2, 5), true, [id=#51] + +(69) Sort [codegen id : 7] +Input [12]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] +Arguments: [ss_customer_sk#2 ASC NULLS FIRST], false, 0 + +(70) Scan parquet default.customer +Output [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_birth_country)] +ReadSchema: struct + +(71) ColumnarToRow [codegen id : 8] +Input [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17] + +(72) Filter [codegen id : 8] +Input [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17] +Condition : (isnotnull(c_customer_sk#14) AND isnotnull(c_birth_country#17)) + +(73) Exchange +Input [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17] +Arguments: hashpartitioning(c_customer_sk#14, 5), true, [id=#52] + +(74) Sort [codegen id : 9] +Input [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17] +Arguments: [c_customer_sk#14 ASC NULLS FIRST], false, 0 + +(75) SortMergeJoin [codegen id : 10] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#14] +Join condition: None + +(76) Project [codegen id : 10] +Output [14]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17] +Input [16]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17] + +(77) Exchange +Input [14]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17] +Arguments: hashpartitioning(s_zip#23, c_birth_country#17, 5), true, [id=#53] + +(78) Sort [codegen id : 11] +Input [14]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17] +Arguments: [s_zip#23 ASC NULLS FIRST, c_birth_country#17 ASC NULLS FIRST], false, 0 + +(79) Scan parquet default.customer_address +Output [3]: [ca_state#25, ca_zip#26, ca_country#27] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer_address] +PushedFilters: [IsNotNull(ca_country), IsNotNull(ca_zip)] +ReadSchema: struct + +(80) ColumnarToRow [codegen id : 12] +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] + +(81) Filter [codegen id : 12] +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] +Condition : (isnotnull(ca_country#27) AND isnotnull(ca_zip#26)) + +(82) Exchange +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] +Arguments: hashpartitioning(ca_zip#26, upper(ca_country#27), 5), true, [id=#54] + +(83) Sort [codegen id : 13] +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] +Arguments: [ca_zip#26 ASC NULLS FIRST, upper(ca_country#27) ASC NULLS FIRST], false, 0 + +(84) SortMergeJoin [codegen id : 14] +Left keys [2]: [s_zip#23, c_birth_country#17] +Right keys [2]: [ca_zip#26, upper(ca_country#27)] +Join condition: None + +(85) Project [codegen id : 14] +Output [13]: [s_store_name#20, s_state#22, ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25] +Input [17]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17, ca_state#25, ca_zip#26, ca_country#27] + +(86) Exchange +Input [13]: [s_store_name#20, s_state#22, ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25] +Arguments: hashpartitioning(cast(ss_ticket_number#4 as bigint), cast(ss_item_sk#1 as bigint), 5), true, [id=#55] + +(87) Sort [codegen id : 15] +Input [13]: [s_store_name#20, s_state#22, ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25] +Arguments: [cast(ss_ticket_number#4 as bigint) ASC NULLS FIRST, cast(ss_item_sk#1 as bigint) ASC NULLS FIRST], false, 0 + +(88) Scan parquet default.store_returns +Output [2]: [sr_item_sk#30, sr_ticket_number#31] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_returns] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] +ReadSchema: struct + +(89) ColumnarToRow [codegen id : 16] +Input [2]: [sr_item_sk#30, sr_ticket_number#31] + +(90) Filter [codegen id : 16] +Input [2]: [sr_item_sk#30, sr_ticket_number#31] +Condition : (isnotnull(sr_ticket_number#31) AND isnotnull(sr_item_sk#30)) + +(91) Exchange +Input [2]: [sr_item_sk#30, sr_ticket_number#31] +Arguments: hashpartitioning(sr_ticket_number#31, sr_item_sk#30, 5), true, [id=#56] + +(92) Sort [codegen id : 17] +Input [2]: [sr_item_sk#30, sr_ticket_number#31] +Arguments: [sr_ticket_number#31 ASC NULLS FIRST, sr_item_sk#30 ASC NULLS FIRST], false, 0 + +(93) SortMergeJoin [codegen id : 18] +Left keys [2]: [cast(ss_ticket_number#4 as bigint), cast(ss_item_sk#1 as bigint)] +Right keys [2]: [sr_ticket_number#31, sr_item_sk#30] +Join condition: None + +(94) Project [codegen id : 18] +Output [11]: [ss_net_paid#5, s_store_name#20, s_state#22, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25] +Input [15]: [s_store_name#20, s_state#22, ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25, sr_item_sk#30, sr_ticket_number#31] + +(95) HashAggregate [codegen id : 18] +Input [11]: [ss_net_paid#5, s_store_name#20, s_state#22, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25] +Keys [10]: [c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_state#22, i_color#9, i_current_price#7, i_manager_id#11, i_units#10, i_size#8] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum#57] +Results [11]: [c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_state#22, i_color#9, i_current_price#7, i_manager_id#11, i_units#10, i_size#8, sum#58] + +(96) Exchange +Input [11]: [c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_state#22, i_color#9, i_current_price#7, i_manager_id#11, i_units#10, i_size#8, sum#58] +Arguments: hashpartitioning(c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_state#22, i_color#9, i_current_price#7, i_manager_id#11, i_units#10, i_size#8, 5), true, [id=#59] + +(97) HashAggregate [codegen id : 19] +Input [11]: [c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_state#22, i_color#9, i_current_price#7, i_manager_id#11, i_units#10, i_size#8, sum#58] +Keys [10]: [c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_state#22, i_color#9, i_current_price#7, i_manager_id#11, i_units#10, i_size#8] +Functions [1]: [sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#60] +Results [1]: [MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#60,17,2) AS netpaid#37] + +(98) HashAggregate [codegen id : 19] +Input [1]: [netpaid#37] +Keys: [] +Functions [1]: [partial_avg(netpaid#37)] +Aggregate Attributes [2]: [sum#61, count#62] +Results [2]: [sum#63, count#64] + +(99) Exchange +Input [2]: [sum#63, count#64] +Arguments: SinglePartition, true, [id=#65] + +(100) HashAggregate [codegen id : 20] +Input [2]: [sum#63, count#64] +Keys: [] +Functions [1]: [avg(netpaid#37)] +Aggregate Attributes [1]: [avg(netpaid#37)#66] +Results [1]: [CheckOverflow((0.050000 * promote_precision(avg(netpaid#37)#66)), DecimalType(24,8), true) AS (CAST(0.05 AS DECIMAL(21,6)) * CAST(avg(netpaid) AS DECIMAL(21,6)))#67] + + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.sf100/simplified.txt new file mode 100644 index 0000000000000..6eb86a35357b0 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.sf100/simplified.txt @@ -0,0 +1,179 @@ +WholeStageCodegen (14) + Project [c_first_name,c_last_name,paid,s_store_name] + Filter [sum(netpaid)] + Subquery #1 + WholeStageCodegen (20) + HashAggregate [count,sum] [(CAST(0.05 AS DECIMAL(21,6)) * CAST(avg(netpaid) AS DECIMAL(21,6))),avg(netpaid),count,sum] + InputAdapter + Exchange #10 + WholeStageCodegen (19) + HashAggregate [netpaid] [count,count,sum,sum] + HashAggregate [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,sum] [netpaid,sum,sum(UnscaledValue(ss_net_paid))] + InputAdapter + Exchange [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name] #11 + WholeStageCodegen (18) + HashAggregate [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_net_paid] [sum,sum] + Project [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_net_paid] + SortMergeJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + InputAdapter + WholeStageCodegen (15) + Sort [ss_item_sk,ss_ticket_number] + InputAdapter + Exchange [ss_item_sk,ss_ticket_number] #12 + WholeStageCodegen (14) + Project [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_item_sk,ss_net_paid,ss_ticket_number] + SortMergeJoin [c_birth_country,ca_country,ca_zip,s_zip] + InputAdapter + WholeStageCodegen (11) + Sort [c_birth_country,s_zip] + InputAdapter + Exchange [c_birth_country,s_zip] #13 + WholeStageCodegen (10) + Project [c_birth_country,c_first_name,c_last_name,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,s_zip,ss_item_sk,ss_net_paid,ss_ticket_number] + SortMergeJoin [c_customer_sk,ss_customer_sk] + InputAdapter + WholeStageCodegen (7) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #14 + WholeStageCodegen (6) + Project [i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,s_zip,ss_customer_sk,ss_item_sk,ss_net_paid,ss_ticket_number] + SortMergeJoin [i_item_sk,ss_item_sk] + InputAdapter + WholeStageCodegen (3) + Sort [ss_item_sk] + InputAdapter + Exchange [ss_item_sk] #15 + WholeStageCodegen (2) + Project [s_state,s_store_name,s_zip,ss_customer_sk,ss_item_sk,ss_net_paid,ss_ticket_number] + BroadcastHashJoin [s_store_sk,ss_store_sk] + InputAdapter + BroadcastExchange #16 + WholeStageCodegen (1) + Project [s_state,s_store_name,s_store_sk,s_zip] + Filter [s_market_id,s_store_sk,s_zip] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_market_id,s_state,s_store_name,s_store_sk,s_zip] + Filter [ss_customer_sk,ss_item_sk,ss_store_sk,ss_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] + InputAdapter + WholeStageCodegen (5) + Sort [i_item_sk] + InputAdapter + Exchange [i_item_sk] #17 + WholeStageCodegen (4) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] + InputAdapter + WholeStageCodegen (9) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #18 + WholeStageCodegen (8) + Filter [c_birth_country,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_birth_country,c_customer_sk,c_first_name,c_last_name] + InputAdapter + WholeStageCodegen (13) + Sort [ca_country,ca_zip] + InputAdapter + Exchange [ca_country,ca_zip] #19 + WholeStageCodegen (12) + Filter [ca_country,ca_zip] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_country,ca_state,ca_zip] + InputAdapter + WholeStageCodegen (17) + Sort [sr_item_sk,sr_ticket_number] + InputAdapter + Exchange [sr_item_sk,sr_ticket_number] #20 + WholeStageCodegen (16) + Filter [sr_item_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] + HashAggregate [c_first_name,c_last_name,isEmpty,s_store_name,sum] [isEmpty,paid,sum,sum(netpaid),sum(netpaid)] + InputAdapter + Exchange [c_first_name,c_last_name,s_store_name] #1 + WholeStageCodegen (13) + HashAggregate [c_first_name,c_last_name,netpaid,s_store_name] [isEmpty,isEmpty,sum,sum] + HashAggregate [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,sum] [netpaid,sum,sum(UnscaledValue(ss_net_paid))] + InputAdapter + Exchange [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name] #2 + WholeStageCodegen (12) + HashAggregate [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_net_paid] [sum,sum] + Project [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_net_paid] + SortMergeJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + InputAdapter + WholeStageCodegen (9) + Sort [ss_item_sk,ss_ticket_number] + InputAdapter + Exchange [ss_item_sk,ss_ticket_number] #3 + WholeStageCodegen (8) + Project [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_item_sk,ss_net_paid,ss_ticket_number] + BroadcastHashJoin [c_birth_country,ca_country,s_store_sk,ss_store_sk] + Project [c_birth_country,c_first_name,c_last_name,i_color,i_current_price,i_manager_id,i_size,i_units,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] + SortMergeJoin [c_customer_sk,ss_customer_sk] + InputAdapter + WholeStageCodegen (3) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #4 + WholeStageCodegen (2) + Project [i_color,i_current_price,i_manager_id,i_size,i_units,ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Filter [ss_customer_sk,ss_item_sk,ss_store_sk,ss_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (1) + Filter [i_color,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] + InputAdapter + WholeStageCodegen (5) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #6 + WholeStageCodegen (4) + Filter [c_birth_country,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_birth_country,c_customer_sk,c_first_name,c_last_name] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (7) + Project [ca_country,ca_state,s_state,s_store_name,s_store_sk] + BroadcastHashJoin [ca_zip,s_zip] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (6) + Project [s_state,s_store_name,s_store_sk,s_zip] + Filter [s_market_id,s_store_sk,s_zip] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_market_id,s_state,s_store_name,s_store_sk,s_zip] + Filter [ca_country,ca_zip] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_country,ca_state,ca_zip] + InputAdapter + WholeStageCodegen (11) + Sort [sr_item_sk,sr_ticket_number] + InputAdapter + Exchange [sr_item_sk,sr_ticket_number] #9 + WholeStageCodegen (10) + Filter [sr_item_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a/explain.txt new file mode 100644 index 0000000000000..09942dc6d5009 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a/explain.txt @@ -0,0 +1,477 @@ +== Physical Plan == +* Project (42) ++- * Filter (41) + +- * HashAggregate (40) + +- Exchange (39) + +- * HashAggregate (38) + +- * HashAggregate (37) + +- Exchange (36) + +- * HashAggregate (35) + +- * Project (34) + +- * BroadcastHashJoin Inner BuildRight (33) + :- * Project (28) + : +- * BroadcastHashJoin Inner BuildRight (27) + : :- * Project (22) + : : +- * BroadcastHashJoin Inner BuildRight (21) + : : :- * Project (16) + : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : :- * Project (9) + : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.store_sales (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.store_returns (4) + : : : +- BroadcastExchange (14) + : : : +- * Project (13) + : : : +- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet default.store (10) + : : +- BroadcastExchange (20) + : : +- * Filter (19) + : : +- * ColumnarToRow (18) + : : +- Scan parquet default.item (17) + : +- BroadcastExchange (26) + : +- * Filter (25) + : +- * ColumnarToRow (24) + : +- Scan parquet default.customer (23) + +- BroadcastExchange (32) + +- * Filter (31) + +- * ColumnarToRow (30) + +- Scan parquet default.customer_address (29) + + +(1) Scan parquet default.store_sales +Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 6] +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] + +(3) Filter [codegen id : 6] +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Condition : (((isnotnull(ss_ticket_number#4) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_customer_sk#2)) + +(4) Scan parquet default.store_returns +Output [2]: [sr_item_sk#6, sr_ticket_number#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_returns] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [sr_item_sk#6, sr_ticket_number#7] + +(6) Filter [codegen id : 1] +Input [2]: [sr_item_sk#6, sr_ticket_number#7] +Condition : (isnotnull(sr_ticket_number#7) AND isnotnull(sr_item_sk#6)) + +(7) BroadcastExchange +Input [2]: [sr_item_sk#6, sr_ticket_number#7] +Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false], input[0, bigint, false]),false), [id=#8] + +(8) BroadcastHashJoin [codegen id : 6] +Left keys [2]: [cast(ss_ticket_number#4 as bigint), cast(ss_item_sk#1 as bigint)] +Right keys [2]: [sr_ticket_number#7, sr_item_sk#6] +Join condition: None + +(9) Project [codegen id : 6] +Output [4]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5] +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, sr_item_sk#6, sr_ticket_number#7] + +(10) Scan parquet default.store +Output [5]: [s_store_sk#9, s_store_name#10, s_market_id#11, s_state#12, s_zip#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [5]: [s_store_sk#9, s_store_name#10, s_market_id#11, s_state#12, s_zip#13] + +(12) Filter [codegen id : 2] +Input [5]: [s_store_sk#9, s_store_name#10, s_market_id#11, s_state#12, s_zip#13] +Condition : (((isnotnull(s_market_id#11) AND (s_market_id#11 = 8)) AND isnotnull(s_store_sk#9)) AND isnotnull(s_zip#13)) + +(13) Project [codegen id : 2] +Output [4]: [s_store_sk#9, s_store_name#10, s_state#12, s_zip#13] +Input [5]: [s_store_sk#9, s_store_name#10, s_market_id#11, s_state#12, s_zip#13] + +(14) BroadcastExchange +Input [4]: [s_store_sk#9, s_store_name#10, s_state#12, s_zip#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] + +(15) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#9] +Join condition: None + +(16) Project [codegen id : 6] +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13] +Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5, s_store_sk#9, s_store_name#10, s_state#12, s_zip#13] + +(17) Scan parquet default.item +Output [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_color), EqualTo(i_color,pale), IsNotNull(i_item_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(19) Filter [codegen id : 3] +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Condition : ((isnotnull(i_color#18) AND (i_color#18 = pale)) AND isnotnull(i_item_sk#15)) + +(20) BroadcastExchange +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#21] + +(21) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#15] +Join condition: None + +(22) Project [codegen id : 6] +Output [10]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(23) Scan parquet default.customer +Output [4]: [c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_birth_country)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 4] +Input [4]: [c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] + +(25) Filter [codegen id : 4] +Input [4]: [c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] +Condition : (isnotnull(c_customer_sk#22) AND isnotnull(c_birth_country#25)) + +(26) BroadcastExchange +Input [4]: [c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#26] + +(27) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#22] +Join condition: None + +(28) Project [codegen id : 6] +Output [12]: [ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, c_birth_country#25] +Input [14]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] + +(29) Scan parquet default.customer_address +Output [3]: [ca_state#27, ca_zip#28, ca_country#29] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer_address] +PushedFilters: [IsNotNull(ca_zip), IsNotNull(ca_country)] +ReadSchema: struct + +(30) ColumnarToRow [codegen id : 5] +Input [3]: [ca_state#27, ca_zip#28, ca_country#29] + +(31) Filter [codegen id : 5] +Input [3]: [ca_state#27, ca_zip#28, ca_country#29] +Condition : (isnotnull(ca_zip#28) AND isnotnull(ca_country#29)) + +(32) BroadcastExchange +Input [3]: [ca_state#27, ca_zip#28, ca_country#29] +Arguments: HashedRelationBroadcastMode(List(upper(input[2, string, false]), input[1, string, false]),false), [id=#30] + +(33) BroadcastHashJoin [codegen id : 6] +Left keys [2]: [c_birth_country#25, s_zip#13] +Right keys [2]: [upper(ca_country#29), ca_zip#28] +Join condition: None + +(34) Project [codegen id : 6] +Output [11]: [ss_net_paid#5, s_store_name#10, s_state#12, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, ca_state#27] +Input [15]: [ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, c_birth_country#25, ca_state#27, ca_zip#28, ca_country#29] + +(35) HashAggregate [codegen id : 6] +Input [11]: [ss_net_paid#5, s_store_name#10, s_state#12, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, ca_state#27] +Keys [10]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum#31] +Results [11]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#32] + +(36) Exchange +Input [11]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#32] +Arguments: hashpartitioning(c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, 5), true, [id=#33] + +(37) HashAggregate [codegen id : 7] +Input [11]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#32] +Keys [10]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#34] +Results [4]: [c_last_name#24, c_first_name#23, s_store_name#10, MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#34,17,2) AS netpaid#35] + +(38) HashAggregate [codegen id : 7] +Input [4]: [c_last_name#24, c_first_name#23, s_store_name#10, netpaid#35] +Keys [3]: [c_last_name#24, c_first_name#23, s_store_name#10] +Functions [1]: [partial_sum(netpaid#35)] +Aggregate Attributes [2]: [sum#36, isEmpty#37] +Results [5]: [c_last_name#24, c_first_name#23, s_store_name#10, sum#38, isEmpty#39] + +(39) Exchange +Input [5]: [c_last_name#24, c_first_name#23, s_store_name#10, sum#38, isEmpty#39] +Arguments: hashpartitioning(c_last_name#24, c_first_name#23, s_store_name#10, 5), true, [id=#40] + +(40) HashAggregate [codegen id : 8] +Input [5]: [c_last_name#24, c_first_name#23, s_store_name#10, sum#38, isEmpty#39] +Keys [3]: [c_last_name#24, c_first_name#23, s_store_name#10] +Functions [1]: [sum(netpaid#35)] +Aggregate Attributes [1]: [sum(netpaid#35)#41] +Results [5]: [c_last_name#24, c_first_name#23, s_store_name#10, sum(netpaid#35)#41 AS paid#42, sum(netpaid#35)#41 AS sum(netpaid#35)#43] + +(41) Filter [codegen id : 8] +Input [5]: [c_last_name#24, c_first_name#23, s_store_name#10, paid#42, sum(netpaid#35)#43] +Condition : (isnotnull(sum(netpaid#35)#43) AND (cast(sum(netpaid#35)#43 as decimal(33,8)) > cast(Subquery scalar-subquery#44, [id=#45] as decimal(33,8)))) + +(42) Project [codegen id : 8] +Output [4]: [c_last_name#24, c_first_name#23, s_store_name#10, paid#42] +Input [5]: [c_last_name#24, c_first_name#23, s_store_name#10, paid#42, sum(netpaid#35)#43] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 41 Hosting Expression = Subquery scalar-subquery#44, [id=#45] +* HashAggregate (82) ++- Exchange (81) + +- * HashAggregate (80) + +- * HashAggregate (79) + +- Exchange (78) + +- * HashAggregate (77) + +- * Project (76) + +- * BroadcastHashJoin Inner BuildRight (75) + :- * Project (70) + : +- * BroadcastHashJoin Inner BuildRight (69) + : :- * Project (64) + : : +- * BroadcastHashJoin Inner BuildRight (63) + : : :- * Project (58) + : : : +- * BroadcastHashJoin Inner BuildRight (57) + : : : :- * Project (51) + : : : : +- * BroadcastHashJoin Inner BuildRight (50) + : : : : :- * Filter (45) + : : : : : +- * ColumnarToRow (44) + : : : : : +- Scan parquet default.store_sales (43) + : : : : +- BroadcastExchange (49) + : : : : +- * Filter (48) + : : : : +- * ColumnarToRow (47) + : : : : +- Scan parquet default.store_returns (46) + : : : +- BroadcastExchange (56) + : : : +- * Project (55) + : : : +- * Filter (54) + : : : +- * ColumnarToRow (53) + : : : +- Scan parquet default.store (52) + : : +- BroadcastExchange (62) + : : +- * Filter (61) + : : +- * ColumnarToRow (60) + : : +- Scan parquet default.item (59) + : +- BroadcastExchange (68) + : +- * Filter (67) + : +- * ColumnarToRow (66) + : +- Scan parquet default.customer (65) + +- BroadcastExchange (74) + +- * Filter (73) + +- * ColumnarToRow (72) + +- Scan parquet default.customer_address (71) + + +(43) Scan parquet default.store_sales +Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(44) ColumnarToRow [codegen id : 6] +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] + +(45) Filter [codegen id : 6] +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Condition : (((isnotnull(ss_ticket_number#4) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_customer_sk#2)) + +(46) Scan parquet default.store_returns +Output [2]: [sr_item_sk#6, sr_ticket_number#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_returns] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] +ReadSchema: struct + +(47) ColumnarToRow [codegen id : 1] +Input [2]: [sr_item_sk#6, sr_ticket_number#7] + +(48) Filter [codegen id : 1] +Input [2]: [sr_item_sk#6, sr_ticket_number#7] +Condition : (isnotnull(sr_ticket_number#7) AND isnotnull(sr_item_sk#6)) + +(49) BroadcastExchange +Input [2]: [sr_item_sk#6, sr_ticket_number#7] +Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false], input[0, bigint, false]),false), [id=#46] + +(50) BroadcastHashJoin [codegen id : 6] +Left keys [2]: [cast(ss_ticket_number#4 as bigint), cast(ss_item_sk#1 as bigint)] +Right keys [2]: [sr_ticket_number#7, sr_item_sk#6] +Join condition: None + +(51) Project [codegen id : 6] +Output [4]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5] +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, sr_item_sk#6, sr_ticket_number#7] + +(52) Scan parquet default.store +Output [5]: [s_store_sk#9, s_store_name#10, s_market_id#11, s_state#12, s_zip#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)] +ReadSchema: struct + +(53) ColumnarToRow [codegen id : 2] +Input [5]: [s_store_sk#9, s_store_name#10, s_market_id#11, s_state#12, s_zip#13] + +(54) Filter [codegen id : 2] +Input [5]: [s_store_sk#9, s_store_name#10, s_market_id#11, s_state#12, s_zip#13] +Condition : (((isnotnull(s_market_id#11) AND (s_market_id#11 = 8)) AND isnotnull(s_store_sk#9)) AND isnotnull(s_zip#13)) + +(55) Project [codegen id : 2] +Output [4]: [s_store_sk#9, s_store_name#10, s_state#12, s_zip#13] +Input [5]: [s_store_sk#9, s_store_name#10, s_market_id#11, s_state#12, s_zip#13] + +(56) BroadcastExchange +Input [4]: [s_store_sk#9, s_store_name#10, s_state#12, s_zip#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#47] + +(57) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#9] +Join condition: None + +(58) Project [codegen id : 6] +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13] +Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5, s_store_sk#9, s_store_name#10, s_state#12, s_zip#13] + +(59) Scan parquet default.item +Output [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(60) ColumnarToRow [codegen id : 3] +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(61) Filter [codegen id : 3] +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Condition : isnotnull(i_item_sk#15) + +(62) BroadcastExchange +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#48] + +(63) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#15] +Join condition: None + +(64) Project [codegen id : 6] +Output [10]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(65) Scan parquet default.customer +Output [4]: [c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_birth_country)] +ReadSchema: struct + +(66) ColumnarToRow [codegen id : 4] +Input [4]: [c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] + +(67) Filter [codegen id : 4] +Input [4]: [c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] +Condition : (isnotnull(c_customer_sk#22) AND isnotnull(c_birth_country#25)) + +(68) BroadcastExchange +Input [4]: [c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#49] + +(69) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#22] +Join condition: None + +(70) Project [codegen id : 6] +Output [12]: [ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, c_birth_country#25] +Input [14]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] + +(71) Scan parquet default.customer_address +Output [3]: [ca_state#27, ca_zip#28, ca_country#29] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer_address] +PushedFilters: [IsNotNull(ca_country), IsNotNull(ca_zip)] +ReadSchema: struct + +(72) ColumnarToRow [codegen id : 5] +Input [3]: [ca_state#27, ca_zip#28, ca_country#29] + +(73) Filter [codegen id : 5] +Input [3]: [ca_state#27, ca_zip#28, ca_country#29] +Condition : (isnotnull(ca_country#29) AND isnotnull(ca_zip#28)) + +(74) BroadcastExchange +Input [3]: [ca_state#27, ca_zip#28, ca_country#29] +Arguments: HashedRelationBroadcastMode(List(upper(input[2, string, false]), input[1, string, false]),false), [id=#50] + +(75) BroadcastHashJoin [codegen id : 6] +Left keys [2]: [c_birth_country#25, s_zip#13] +Right keys [2]: [upper(ca_country#29), ca_zip#28] +Join condition: None + +(76) Project [codegen id : 6] +Output [11]: [ss_net_paid#5, s_store_name#10, s_state#12, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, ca_state#27] +Input [15]: [ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, c_birth_country#25, ca_state#27, ca_zip#28, ca_country#29] + +(77) HashAggregate [codegen id : 6] +Input [11]: [ss_net_paid#5, s_store_name#10, s_state#12, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, ca_state#27] +Keys [10]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum#51] +Results [11]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#52] + +(78) Exchange +Input [11]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#52] +Arguments: hashpartitioning(c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, 5), true, [id=#53] + +(79) HashAggregate [codegen id : 7] +Input [11]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#52] +Keys [10]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#54] +Results [1]: [MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#54,17,2) AS netpaid#35] + +(80) HashAggregate [codegen id : 7] +Input [1]: [netpaid#35] +Keys: [] +Functions [1]: [partial_avg(netpaid#35)] +Aggregate Attributes [2]: [sum#55, count#56] +Results [2]: [sum#57, count#58] + +(81) Exchange +Input [2]: [sum#57, count#58] +Arguments: SinglePartition, true, [id=#59] + +(82) HashAggregate [codegen id : 8] +Input [2]: [sum#57, count#58] +Keys: [] +Functions [1]: [avg(netpaid#35)] +Aggregate Attributes [1]: [avg(netpaid#35)#60] +Results [1]: [CheckOverflow((0.050000 * promote_precision(avg(netpaid#35)#60)), DecimalType(24,8), true) AS (CAST(0.05 AS DECIMAL(21,6)) * CAST(avg(netpaid) AS DECIMAL(21,6)))#61] + + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a/simplified.txt new file mode 100644 index 0000000000000..306d7f1f78e11 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a/simplified.txt @@ -0,0 +1,125 @@ +WholeStageCodegen (8) + Project [c_first_name,c_last_name,paid,s_store_name] + Filter [sum(netpaid)] + Subquery #1 + WholeStageCodegen (8) + HashAggregate [count,sum] [(CAST(0.05 AS DECIMAL(21,6)) * CAST(avg(netpaid) AS DECIMAL(21,6))),avg(netpaid),count,sum] + InputAdapter + Exchange #8 + WholeStageCodegen (7) + HashAggregate [netpaid] [count,count,sum,sum] + HashAggregate [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,sum] [netpaid,sum,sum(UnscaledValue(ss_net_paid))] + InputAdapter + Exchange [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name] #9 + WholeStageCodegen (6) + HashAggregate [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_net_paid] [sum,sum] + Project [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_net_paid] + BroadcastHashJoin [c_birth_country,ca_country,ca_zip,s_zip] + Project [c_birth_country,c_first_name,c_last_name,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,s_zip,ss_net_paid] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Project [i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,s_zip,ss_customer_sk,ss_net_paid] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [s_state,s_store_name,s_zip,ss_customer_sk,ss_item_sk,ss_net_paid] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk] + BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + Filter [ss_customer_sk,ss_item_sk,ss_store_sk,ss_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (1) + Filter [sr_item_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (2) + Project [s_state,s_store_name,s_store_sk,s_zip] + Filter [s_market_id,s_store_sk,s_zip] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_market_id,s_state,s_store_name,s_store_sk,s_zip] + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (3) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (4) + Filter [c_birth_country,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_birth_country,c_customer_sk,c_first_name,c_last_name] + InputAdapter + BroadcastExchange #14 + WholeStageCodegen (5) + Filter [ca_country,ca_zip] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_country,ca_state,ca_zip] + HashAggregate [c_first_name,c_last_name,isEmpty,s_store_name,sum] [isEmpty,paid,sum,sum(netpaid),sum(netpaid)] + InputAdapter + Exchange [c_first_name,c_last_name,s_store_name] #1 + WholeStageCodegen (7) + HashAggregate [c_first_name,c_last_name,netpaid,s_store_name] [isEmpty,isEmpty,sum,sum] + HashAggregate [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,sum] [netpaid,sum,sum(UnscaledValue(ss_net_paid))] + InputAdapter + Exchange [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name] #2 + WholeStageCodegen (6) + HashAggregate [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_net_paid] [sum,sum] + Project [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_net_paid] + BroadcastHashJoin [c_birth_country,ca_country,ca_zip,s_zip] + Project [c_birth_country,c_first_name,c_last_name,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,s_zip,ss_net_paid] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Project [i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,s_zip,ss_customer_sk,ss_net_paid] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [s_state,s_store_name,s_zip,ss_customer_sk,ss_item_sk,ss_net_paid] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk] + BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + Filter [ss_customer_sk,ss_item_sk,ss_store_sk,ss_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [sr_item_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [s_state,s_store_name,s_store_sk,s_zip] + Filter [s_market_id,s_store_sk,s_zip] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_market_id,s_state,s_store_name,s_store_sk,s_zip] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Filter [i_color,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (4) + Filter [c_birth_country,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_birth_country,c_customer_sk,c_first_name,c_last_name] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (5) + Filter [ca_country,ca_zip] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_country,ca_state,ca_zip] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.sf100/explain.txt new file mode 100644 index 0000000000000..1c7950dbaa396 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.sf100/explain.txt @@ -0,0 +1,567 @@ +== Physical Plan == +* Project (48) ++- * Filter (47) + +- * HashAggregate (46) + +- Exchange (45) + +- * HashAggregate (44) + +- * HashAggregate (43) + +- Exchange (42) + +- * HashAggregate (41) + +- * Project (40) + +- * SortMergeJoin Inner (39) + :- * Sort (33) + : +- Exchange (32) + : +- * Project (31) + : +- * BroadcastHashJoin Inner BuildRight (30) + : :- * Project (18) + : : +- * SortMergeJoin Inner (17) + : : :- * Sort (11) + : : : +- Exchange (10) + : : : +- * Project (9) + : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.item (4) + : : +- * Sort (16) + : : +- Exchange (15) + : : +- * Filter (14) + : : +- * ColumnarToRow (13) + : : +- Scan parquet default.customer (12) + : +- BroadcastExchange (29) + : +- * Project (28) + : +- * BroadcastHashJoin Inner BuildLeft (27) + : :- BroadcastExchange (23) + : : +- * Project (22) + : : +- * Filter (21) + : : +- * ColumnarToRow (20) + : : +- Scan parquet default.store (19) + : +- * Filter (26) + : +- * ColumnarToRow (25) + : +- Scan parquet default.customer_address (24) + +- * Sort (38) + +- Exchange (37) + +- * Filter (36) + +- * ColumnarToRow (35) + +- Scan parquet default.store_returns (34) + + +(1) Scan parquet default.store_sales +Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 2] +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] + +(3) Filter [codegen id : 2] +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Condition : (((isnotnull(ss_ticket_number#4) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_customer_sk#2)) + +(4) Scan parquet default.item +Output [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_color), EqualTo(i_color,chiffon), IsNotNull(i_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] + +(6) Filter [codegen id : 1] +Input [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] +Condition : ((isnotnull(i_color#9) AND (i_color#9 = chiffon)) AND isnotnull(i_item_sk#6)) + +(7) BroadcastExchange +Input [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#12] + +(8) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#6] +Join condition: None + +(9) Project [codegen id : 2] +Output [10]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] +Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] + +(10) Exchange +Input [10]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] +Arguments: hashpartitioning(ss_customer_sk#2, 5), true, [id=#13] + +(11) Sort [codegen id : 3] +Input [10]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] +Arguments: [ss_customer_sk#2 ASC NULLS FIRST], false, 0 + +(12) Scan parquet default.customer +Output [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_birth_country)] +ReadSchema: struct + +(13) ColumnarToRow [codegen id : 4] +Input [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17] + +(14) Filter [codegen id : 4] +Input [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17] +Condition : (isnotnull(c_customer_sk#14) AND isnotnull(c_birth_country#17)) + +(15) Exchange +Input [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17] +Arguments: hashpartitioning(c_customer_sk#14, 5), true, [id=#18] + +(16) Sort [codegen id : 5] +Input [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17] +Arguments: [c_customer_sk#14 ASC NULLS FIRST], false, 0 + +(17) SortMergeJoin [codegen id : 8] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#14] +Join condition: None + +(18) Project [codegen id : 8] +Output [12]: [ss_item_sk#1, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17] +Input [14]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17] + +(19) Scan parquet default.store +Output [5]: [s_store_sk#19, s_store_name#20, s_market_id#21, s_state#22, s_zip#23] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 6] +Input [5]: [s_store_sk#19, s_store_name#20, s_market_id#21, s_state#22, s_zip#23] + +(21) Filter [codegen id : 6] +Input [5]: [s_store_sk#19, s_store_name#20, s_market_id#21, s_state#22, s_zip#23] +Condition : (((isnotnull(s_market_id#21) AND (s_market_id#21 = 8)) AND isnotnull(s_store_sk#19)) AND isnotnull(s_zip#23)) + +(22) Project [codegen id : 6] +Output [4]: [s_store_sk#19, s_store_name#20, s_state#22, s_zip#23] +Input [5]: [s_store_sk#19, s_store_name#20, s_market_id#21, s_state#22, s_zip#23] + +(23) BroadcastExchange +Input [4]: [s_store_sk#19, s_store_name#20, s_state#22, s_zip#23] +Arguments: HashedRelationBroadcastMode(List(input[3, string, true]),false), [id=#24] + +(24) Scan parquet default.customer_address +Output [3]: [ca_state#25, ca_zip#26, ca_country#27] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer_address] +PushedFilters: [IsNotNull(ca_zip), IsNotNull(ca_country)] +ReadSchema: struct + +(25) ColumnarToRow +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] + +(26) Filter +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] +Condition : (isnotnull(ca_zip#26) AND isnotnull(ca_country#27)) + +(27) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [s_zip#23] +Right keys [1]: [ca_zip#26] +Join condition: None + +(28) Project [codegen id : 7] +Output [5]: [s_store_sk#19, s_store_name#20, s_state#22, ca_state#25, ca_country#27] +Input [7]: [s_store_sk#19, s_store_name#20, s_state#22, s_zip#23, ca_state#25, ca_zip#26, ca_country#27] + +(29) BroadcastExchange +Input [5]: [s_store_sk#19, s_store_name#20, s_state#22, ca_state#25, ca_country#27] +Arguments: HashedRelationBroadcastMode(List(upper(input[4, string, true]), input[0, int, true]),false), [id=#28] + +(30) BroadcastHashJoin [codegen id : 8] +Left keys [2]: [c_birth_country#17, ss_store_sk#3] +Right keys [2]: [upper(ca_country#27), s_store_sk#19] +Join condition: None + +(31) Project [codegen id : 8] +Output [13]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, s_store_name#20, s_state#22, ca_state#25] +Input [17]: [ss_item_sk#1, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17, s_store_sk#19, s_store_name#20, s_state#22, ca_state#25, ca_country#27] + +(32) Exchange +Input [13]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, s_store_name#20, s_state#22, ca_state#25] +Arguments: hashpartitioning(cast(ss_ticket_number#4 as bigint), cast(ss_item_sk#1 as bigint), 5), true, [id=#29] + +(33) Sort [codegen id : 9] +Input [13]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, s_store_name#20, s_state#22, ca_state#25] +Arguments: [cast(ss_ticket_number#4 as bigint) ASC NULLS FIRST, cast(ss_item_sk#1 as bigint) ASC NULLS FIRST], false, 0 + +(34) Scan parquet default.store_returns +Output [2]: [sr_item_sk#30, sr_ticket_number#31] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_returns] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] +ReadSchema: struct + +(35) ColumnarToRow [codegen id : 10] +Input [2]: [sr_item_sk#30, sr_ticket_number#31] + +(36) Filter [codegen id : 10] +Input [2]: [sr_item_sk#30, sr_ticket_number#31] +Condition : (isnotnull(sr_ticket_number#31) AND isnotnull(sr_item_sk#30)) + +(37) Exchange +Input [2]: [sr_item_sk#30, sr_ticket_number#31] +Arguments: hashpartitioning(sr_ticket_number#31, sr_item_sk#30, 5), true, [id=#32] + +(38) Sort [codegen id : 11] +Input [2]: [sr_item_sk#30, sr_ticket_number#31] +Arguments: [sr_ticket_number#31 ASC NULLS FIRST, sr_item_sk#30 ASC NULLS FIRST], false, 0 + +(39) SortMergeJoin [codegen id : 12] +Left keys [2]: [cast(ss_ticket_number#4 as bigint), cast(ss_item_sk#1 as bigint)] +Right keys [2]: [sr_ticket_number#31, sr_item_sk#30] +Join condition: None + +(40) Project [codegen id : 12] +Output [11]: [ss_net_paid#5, s_store_name#20, s_state#22, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25] +Input [15]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, s_store_name#20, s_state#22, ca_state#25, sr_item_sk#30, sr_ticket_number#31] + +(41) HashAggregate [codegen id : 12] +Input [11]: [ss_net_paid#5, s_store_name#20, s_state#22, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25] +Keys [10]: [c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_state#22, i_color#9, i_current_price#7, i_manager_id#11, i_units#10, i_size#8] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum#33] +Results [11]: [c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_state#22, i_color#9, i_current_price#7, i_manager_id#11, i_units#10, i_size#8, sum#34] + +(42) Exchange +Input [11]: [c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_state#22, i_color#9, i_current_price#7, i_manager_id#11, i_units#10, i_size#8, sum#34] +Arguments: hashpartitioning(c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_state#22, i_color#9, i_current_price#7, i_manager_id#11, i_units#10, i_size#8, 5), true, [id=#35] + +(43) HashAggregate [codegen id : 13] +Input [11]: [c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_state#22, i_color#9, i_current_price#7, i_manager_id#11, i_units#10, i_size#8, sum#34] +Keys [10]: [c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_state#22, i_color#9, i_current_price#7, i_manager_id#11, i_units#10, i_size#8] +Functions [1]: [sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#36] +Results [4]: [c_last_name#16, c_first_name#15, s_store_name#20, MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#36,17,2) AS netpaid#37] + +(44) HashAggregate [codegen id : 13] +Input [4]: [c_last_name#16, c_first_name#15, s_store_name#20, netpaid#37] +Keys [3]: [c_last_name#16, c_first_name#15, s_store_name#20] +Functions [1]: [partial_sum(netpaid#37)] +Aggregate Attributes [2]: [sum#38, isEmpty#39] +Results [5]: [c_last_name#16, c_first_name#15, s_store_name#20, sum#40, isEmpty#41] + +(45) Exchange +Input [5]: [c_last_name#16, c_first_name#15, s_store_name#20, sum#40, isEmpty#41] +Arguments: hashpartitioning(c_last_name#16, c_first_name#15, s_store_name#20, 5), true, [id=#42] + +(46) HashAggregate [codegen id : 14] +Input [5]: [c_last_name#16, c_first_name#15, s_store_name#20, sum#40, isEmpty#41] +Keys [3]: [c_last_name#16, c_first_name#15, s_store_name#20] +Functions [1]: [sum(netpaid#37)] +Aggregate Attributes [1]: [sum(netpaid#37)#43] +Results [5]: [c_last_name#16, c_first_name#15, s_store_name#20, sum(netpaid#37)#43 AS paid#44, sum(netpaid#37)#43 AS sum(netpaid#37)#45] + +(47) Filter [codegen id : 14] +Input [5]: [c_last_name#16, c_first_name#15, s_store_name#20, paid#44, sum(netpaid#37)#45] +Condition : (isnotnull(sum(netpaid#37)#45) AND (cast(sum(netpaid#37)#45 as decimal(33,8)) > cast(Subquery scalar-subquery#46, [id=#47] as decimal(33,8)))) + +(48) Project [codegen id : 14] +Output [4]: [c_last_name#16, c_first_name#15, s_store_name#20, paid#44] +Input [5]: [c_last_name#16, c_first_name#15, s_store_name#20, paid#44, sum(netpaid#37)#45] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 47 Hosting Expression = Subquery scalar-subquery#46, [id=#47] +* HashAggregate (100) ++- Exchange (99) + +- * HashAggregate (98) + +- * HashAggregate (97) + +- Exchange (96) + +- * HashAggregate (95) + +- * Project (94) + +- * SortMergeJoin Inner (93) + :- * Sort (87) + : +- Exchange (86) + : +- * Project (85) + : +- * SortMergeJoin Inner (84) + : :- * Sort (78) + : : +- Exchange (77) + : : +- * Project (76) + : : +- * SortMergeJoin Inner (75) + : : :- * Sort (69) + : : : +- Exchange (68) + : : : +- * Project (67) + : : : +- * SortMergeJoin Inner (66) + : : : :- * Sort (60) + : : : : +- Exchange (59) + : : : : +- * Project (58) + : : : : +- * BroadcastHashJoin Inner BuildLeft (57) + : : : : :- BroadcastExchange (53) + : : : : : +- * Project (52) + : : : : : +- * Filter (51) + : : : : : +- * ColumnarToRow (50) + : : : : : +- Scan parquet default.store (49) + : : : : +- * Filter (56) + : : : : +- * ColumnarToRow (55) + : : : : +- Scan parquet default.store_sales (54) + : : : +- * Sort (65) + : : : +- Exchange (64) + : : : +- * Filter (63) + : : : +- * ColumnarToRow (62) + : : : +- Scan parquet default.item (61) + : : +- * Sort (74) + : : +- Exchange (73) + : : +- * Filter (72) + : : +- * ColumnarToRow (71) + : : +- Scan parquet default.customer (70) + : +- * Sort (83) + : +- Exchange (82) + : +- * Filter (81) + : +- * ColumnarToRow (80) + : +- Scan parquet default.customer_address (79) + +- * Sort (92) + +- Exchange (91) + +- * Filter (90) + +- * ColumnarToRow (89) + +- Scan parquet default.store_returns (88) + + +(49) Scan parquet default.store +Output [5]: [s_store_sk#19, s_store_name#20, s_market_id#21, s_state#22, s_zip#23] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)] +ReadSchema: struct + +(50) ColumnarToRow [codegen id : 1] +Input [5]: [s_store_sk#19, s_store_name#20, s_market_id#21, s_state#22, s_zip#23] + +(51) Filter [codegen id : 1] +Input [5]: [s_store_sk#19, s_store_name#20, s_market_id#21, s_state#22, s_zip#23] +Condition : (((isnotnull(s_market_id#21) AND (s_market_id#21 = 8)) AND isnotnull(s_store_sk#19)) AND isnotnull(s_zip#23)) + +(52) Project [codegen id : 1] +Output [4]: [s_store_sk#19, s_store_name#20, s_state#22, s_zip#23] +Input [5]: [s_store_sk#19, s_store_name#20, s_market_id#21, s_state#22, s_zip#23] + +(53) BroadcastExchange +Input [4]: [s_store_sk#19, s_store_name#20, s_state#22, s_zip#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#48] + +(54) Scan parquet default.store_sales +Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(55) ColumnarToRow +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] + +(56) Filter +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Condition : (((isnotnull(ss_ticket_number#4) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_customer_sk#2)) + +(57) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [s_store_sk#19] +Right keys [1]: [ss_store_sk#3] +Join condition: None + +(58) Project [codegen id : 2] +Output [7]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5] +Input [9]: [s_store_sk#19, s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] + +(59) Exchange +Input [7]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5] +Arguments: hashpartitioning(ss_item_sk#1, 5), true, [id=#49] + +(60) Sort [codegen id : 3] +Input [7]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5] +Arguments: [ss_item_sk#1 ASC NULLS FIRST], false, 0 + +(61) Scan parquet default.item +Output [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(62) ColumnarToRow [codegen id : 4] +Input [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] + +(63) Filter [codegen id : 4] +Input [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] +Condition : isnotnull(i_item_sk#6) + +(64) Exchange +Input [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] +Arguments: hashpartitioning(i_item_sk#6, 5), true, [id=#50] + +(65) Sort [codegen id : 5] +Input [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] +Arguments: [i_item_sk#6 ASC NULLS FIRST], false, 0 + +(66) SortMergeJoin [codegen id : 6] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#6] +Join condition: None + +(67) Project [codegen id : 6] +Output [12]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] +Input [13]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] + +(68) Exchange +Input [12]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] +Arguments: hashpartitioning(ss_customer_sk#2, 5), true, [id=#51] + +(69) Sort [codegen id : 7] +Input [12]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] +Arguments: [ss_customer_sk#2 ASC NULLS FIRST], false, 0 + +(70) Scan parquet default.customer +Output [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_birth_country)] +ReadSchema: struct + +(71) ColumnarToRow [codegen id : 8] +Input [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17] + +(72) Filter [codegen id : 8] +Input [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17] +Condition : (isnotnull(c_customer_sk#14) AND isnotnull(c_birth_country#17)) + +(73) Exchange +Input [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17] +Arguments: hashpartitioning(c_customer_sk#14, 5), true, [id=#52] + +(74) Sort [codegen id : 9] +Input [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17] +Arguments: [c_customer_sk#14 ASC NULLS FIRST], false, 0 + +(75) SortMergeJoin [codegen id : 10] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#14] +Join condition: None + +(76) Project [codegen id : 10] +Output [14]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17] +Input [16]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17] + +(77) Exchange +Input [14]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17] +Arguments: hashpartitioning(s_zip#23, c_birth_country#17, 5), true, [id=#53] + +(78) Sort [codegen id : 11] +Input [14]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17] +Arguments: [s_zip#23 ASC NULLS FIRST, c_birth_country#17 ASC NULLS FIRST], false, 0 + +(79) Scan parquet default.customer_address +Output [3]: [ca_state#25, ca_zip#26, ca_country#27] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer_address] +PushedFilters: [IsNotNull(ca_country), IsNotNull(ca_zip)] +ReadSchema: struct + +(80) ColumnarToRow [codegen id : 12] +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] + +(81) Filter [codegen id : 12] +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] +Condition : (isnotnull(ca_country#27) AND isnotnull(ca_zip#26)) + +(82) Exchange +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] +Arguments: hashpartitioning(ca_zip#26, upper(ca_country#27), 5), true, [id=#54] + +(83) Sort [codegen id : 13] +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] +Arguments: [ca_zip#26 ASC NULLS FIRST, upper(ca_country#27) ASC NULLS FIRST], false, 0 + +(84) SortMergeJoin [codegen id : 14] +Left keys [2]: [s_zip#23, c_birth_country#17] +Right keys [2]: [ca_zip#26, upper(ca_country#27)] +Join condition: None + +(85) Project [codegen id : 14] +Output [13]: [s_store_name#20, s_state#22, ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25] +Input [17]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17, ca_state#25, ca_zip#26, ca_country#27] + +(86) Exchange +Input [13]: [s_store_name#20, s_state#22, ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25] +Arguments: hashpartitioning(cast(ss_ticket_number#4 as bigint), cast(ss_item_sk#1 as bigint), 5), true, [id=#55] + +(87) Sort [codegen id : 15] +Input [13]: [s_store_name#20, s_state#22, ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25] +Arguments: [cast(ss_ticket_number#4 as bigint) ASC NULLS FIRST, cast(ss_item_sk#1 as bigint) ASC NULLS FIRST], false, 0 + +(88) Scan parquet default.store_returns +Output [2]: [sr_item_sk#30, sr_ticket_number#31] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_returns] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] +ReadSchema: struct + +(89) ColumnarToRow [codegen id : 16] +Input [2]: [sr_item_sk#30, sr_ticket_number#31] + +(90) Filter [codegen id : 16] +Input [2]: [sr_item_sk#30, sr_ticket_number#31] +Condition : (isnotnull(sr_ticket_number#31) AND isnotnull(sr_item_sk#30)) + +(91) Exchange +Input [2]: [sr_item_sk#30, sr_ticket_number#31] +Arguments: hashpartitioning(sr_ticket_number#31, sr_item_sk#30, 5), true, [id=#56] + +(92) Sort [codegen id : 17] +Input [2]: [sr_item_sk#30, sr_ticket_number#31] +Arguments: [sr_ticket_number#31 ASC NULLS FIRST, sr_item_sk#30 ASC NULLS FIRST], false, 0 + +(93) SortMergeJoin [codegen id : 18] +Left keys [2]: [cast(ss_ticket_number#4 as bigint), cast(ss_item_sk#1 as bigint)] +Right keys [2]: [sr_ticket_number#31, sr_item_sk#30] +Join condition: None + +(94) Project [codegen id : 18] +Output [11]: [ss_net_paid#5, s_store_name#20, s_state#22, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25] +Input [15]: [s_store_name#20, s_state#22, ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25, sr_item_sk#30, sr_ticket_number#31] + +(95) HashAggregate [codegen id : 18] +Input [11]: [ss_net_paid#5, s_store_name#20, s_state#22, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25] +Keys [10]: [c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_state#22, i_color#9, i_current_price#7, i_manager_id#11, i_units#10, i_size#8] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum#57] +Results [11]: [c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_state#22, i_color#9, i_current_price#7, i_manager_id#11, i_units#10, i_size#8, sum#58] + +(96) Exchange +Input [11]: [c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_state#22, i_color#9, i_current_price#7, i_manager_id#11, i_units#10, i_size#8, sum#58] +Arguments: hashpartitioning(c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_state#22, i_color#9, i_current_price#7, i_manager_id#11, i_units#10, i_size#8, 5), true, [id=#59] + +(97) HashAggregate [codegen id : 19] +Input [11]: [c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_state#22, i_color#9, i_current_price#7, i_manager_id#11, i_units#10, i_size#8, sum#58] +Keys [10]: [c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_state#22, i_color#9, i_current_price#7, i_manager_id#11, i_units#10, i_size#8] +Functions [1]: [sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#60] +Results [1]: [MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#60,17,2) AS netpaid#37] + +(98) HashAggregate [codegen id : 19] +Input [1]: [netpaid#37] +Keys: [] +Functions [1]: [partial_avg(netpaid#37)] +Aggregate Attributes [2]: [sum#61, count#62] +Results [2]: [sum#63, count#64] + +(99) Exchange +Input [2]: [sum#63, count#64] +Arguments: SinglePartition, true, [id=#65] + +(100) HashAggregate [codegen id : 20] +Input [2]: [sum#63, count#64] +Keys: [] +Functions [1]: [avg(netpaid#37)] +Aggregate Attributes [1]: [avg(netpaid#37)#66] +Results [1]: [CheckOverflow((0.050000 * promote_precision(avg(netpaid#37)#66)), DecimalType(24,8), true) AS (CAST(0.05 AS DECIMAL(21,6)) * CAST(avg(netpaid) AS DECIMAL(21,6)))#67] + + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.sf100/simplified.txt new file mode 100644 index 0000000000000..6eb86a35357b0 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.sf100/simplified.txt @@ -0,0 +1,179 @@ +WholeStageCodegen (14) + Project [c_first_name,c_last_name,paid,s_store_name] + Filter [sum(netpaid)] + Subquery #1 + WholeStageCodegen (20) + HashAggregate [count,sum] [(CAST(0.05 AS DECIMAL(21,6)) * CAST(avg(netpaid) AS DECIMAL(21,6))),avg(netpaid),count,sum] + InputAdapter + Exchange #10 + WholeStageCodegen (19) + HashAggregate [netpaid] [count,count,sum,sum] + HashAggregate [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,sum] [netpaid,sum,sum(UnscaledValue(ss_net_paid))] + InputAdapter + Exchange [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name] #11 + WholeStageCodegen (18) + HashAggregate [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_net_paid] [sum,sum] + Project [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_net_paid] + SortMergeJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + InputAdapter + WholeStageCodegen (15) + Sort [ss_item_sk,ss_ticket_number] + InputAdapter + Exchange [ss_item_sk,ss_ticket_number] #12 + WholeStageCodegen (14) + Project [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_item_sk,ss_net_paid,ss_ticket_number] + SortMergeJoin [c_birth_country,ca_country,ca_zip,s_zip] + InputAdapter + WholeStageCodegen (11) + Sort [c_birth_country,s_zip] + InputAdapter + Exchange [c_birth_country,s_zip] #13 + WholeStageCodegen (10) + Project [c_birth_country,c_first_name,c_last_name,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,s_zip,ss_item_sk,ss_net_paid,ss_ticket_number] + SortMergeJoin [c_customer_sk,ss_customer_sk] + InputAdapter + WholeStageCodegen (7) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #14 + WholeStageCodegen (6) + Project [i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,s_zip,ss_customer_sk,ss_item_sk,ss_net_paid,ss_ticket_number] + SortMergeJoin [i_item_sk,ss_item_sk] + InputAdapter + WholeStageCodegen (3) + Sort [ss_item_sk] + InputAdapter + Exchange [ss_item_sk] #15 + WholeStageCodegen (2) + Project [s_state,s_store_name,s_zip,ss_customer_sk,ss_item_sk,ss_net_paid,ss_ticket_number] + BroadcastHashJoin [s_store_sk,ss_store_sk] + InputAdapter + BroadcastExchange #16 + WholeStageCodegen (1) + Project [s_state,s_store_name,s_store_sk,s_zip] + Filter [s_market_id,s_store_sk,s_zip] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_market_id,s_state,s_store_name,s_store_sk,s_zip] + Filter [ss_customer_sk,ss_item_sk,ss_store_sk,ss_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] + InputAdapter + WholeStageCodegen (5) + Sort [i_item_sk] + InputAdapter + Exchange [i_item_sk] #17 + WholeStageCodegen (4) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] + InputAdapter + WholeStageCodegen (9) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #18 + WholeStageCodegen (8) + Filter [c_birth_country,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_birth_country,c_customer_sk,c_first_name,c_last_name] + InputAdapter + WholeStageCodegen (13) + Sort [ca_country,ca_zip] + InputAdapter + Exchange [ca_country,ca_zip] #19 + WholeStageCodegen (12) + Filter [ca_country,ca_zip] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_country,ca_state,ca_zip] + InputAdapter + WholeStageCodegen (17) + Sort [sr_item_sk,sr_ticket_number] + InputAdapter + Exchange [sr_item_sk,sr_ticket_number] #20 + WholeStageCodegen (16) + Filter [sr_item_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] + HashAggregate [c_first_name,c_last_name,isEmpty,s_store_name,sum] [isEmpty,paid,sum,sum(netpaid),sum(netpaid)] + InputAdapter + Exchange [c_first_name,c_last_name,s_store_name] #1 + WholeStageCodegen (13) + HashAggregate [c_first_name,c_last_name,netpaid,s_store_name] [isEmpty,isEmpty,sum,sum] + HashAggregate [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,sum] [netpaid,sum,sum(UnscaledValue(ss_net_paid))] + InputAdapter + Exchange [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name] #2 + WholeStageCodegen (12) + HashAggregate [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_net_paid] [sum,sum] + Project [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_net_paid] + SortMergeJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + InputAdapter + WholeStageCodegen (9) + Sort [ss_item_sk,ss_ticket_number] + InputAdapter + Exchange [ss_item_sk,ss_ticket_number] #3 + WholeStageCodegen (8) + Project [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_item_sk,ss_net_paid,ss_ticket_number] + BroadcastHashJoin [c_birth_country,ca_country,s_store_sk,ss_store_sk] + Project [c_birth_country,c_first_name,c_last_name,i_color,i_current_price,i_manager_id,i_size,i_units,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] + SortMergeJoin [c_customer_sk,ss_customer_sk] + InputAdapter + WholeStageCodegen (3) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #4 + WholeStageCodegen (2) + Project [i_color,i_current_price,i_manager_id,i_size,i_units,ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Filter [ss_customer_sk,ss_item_sk,ss_store_sk,ss_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (1) + Filter [i_color,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] + InputAdapter + WholeStageCodegen (5) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #6 + WholeStageCodegen (4) + Filter [c_birth_country,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_birth_country,c_customer_sk,c_first_name,c_last_name] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (7) + Project [ca_country,ca_state,s_state,s_store_name,s_store_sk] + BroadcastHashJoin [ca_zip,s_zip] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (6) + Project [s_state,s_store_name,s_store_sk,s_zip] + Filter [s_market_id,s_store_sk,s_zip] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_market_id,s_state,s_store_name,s_store_sk,s_zip] + Filter [ca_country,ca_zip] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_country,ca_state,ca_zip] + InputAdapter + WholeStageCodegen (11) + Sort [sr_item_sk,sr_ticket_number] + InputAdapter + Exchange [sr_item_sk,sr_ticket_number] #9 + WholeStageCodegen (10) + Filter [sr_item_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/explain.txt new file mode 100644 index 0000000000000..aa1cd3e86f29a --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/explain.txt @@ -0,0 +1,477 @@ +== Physical Plan == +* Project (42) ++- * Filter (41) + +- * HashAggregate (40) + +- Exchange (39) + +- * HashAggregate (38) + +- * HashAggregate (37) + +- Exchange (36) + +- * HashAggregate (35) + +- * Project (34) + +- * BroadcastHashJoin Inner BuildRight (33) + :- * Project (28) + : +- * BroadcastHashJoin Inner BuildRight (27) + : :- * Project (22) + : : +- * BroadcastHashJoin Inner BuildRight (21) + : : :- * Project (16) + : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : :- * Project (9) + : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.store_sales (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.store_returns (4) + : : : +- BroadcastExchange (14) + : : : +- * Project (13) + : : : +- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet default.store (10) + : : +- BroadcastExchange (20) + : : +- * Filter (19) + : : +- * ColumnarToRow (18) + : : +- Scan parquet default.item (17) + : +- BroadcastExchange (26) + : +- * Filter (25) + : +- * ColumnarToRow (24) + : +- Scan parquet default.customer (23) + +- BroadcastExchange (32) + +- * Filter (31) + +- * ColumnarToRow (30) + +- Scan parquet default.customer_address (29) + + +(1) Scan parquet default.store_sales +Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 6] +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] + +(3) Filter [codegen id : 6] +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Condition : (((isnotnull(ss_ticket_number#4) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_customer_sk#2)) + +(4) Scan parquet default.store_returns +Output [2]: [sr_item_sk#6, sr_ticket_number#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_returns] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [sr_item_sk#6, sr_ticket_number#7] + +(6) Filter [codegen id : 1] +Input [2]: [sr_item_sk#6, sr_ticket_number#7] +Condition : (isnotnull(sr_ticket_number#7) AND isnotnull(sr_item_sk#6)) + +(7) BroadcastExchange +Input [2]: [sr_item_sk#6, sr_ticket_number#7] +Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false], input[0, bigint, false]),false), [id=#8] + +(8) BroadcastHashJoin [codegen id : 6] +Left keys [2]: [cast(ss_ticket_number#4 as bigint), cast(ss_item_sk#1 as bigint)] +Right keys [2]: [sr_ticket_number#7, sr_item_sk#6] +Join condition: None + +(9) Project [codegen id : 6] +Output [4]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5] +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, sr_item_sk#6, sr_ticket_number#7] + +(10) Scan parquet default.store +Output [5]: [s_store_sk#9, s_store_name#10, s_market_id#11, s_state#12, s_zip#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [5]: [s_store_sk#9, s_store_name#10, s_market_id#11, s_state#12, s_zip#13] + +(12) Filter [codegen id : 2] +Input [5]: [s_store_sk#9, s_store_name#10, s_market_id#11, s_state#12, s_zip#13] +Condition : (((isnotnull(s_market_id#11) AND (s_market_id#11 = 8)) AND isnotnull(s_store_sk#9)) AND isnotnull(s_zip#13)) + +(13) Project [codegen id : 2] +Output [4]: [s_store_sk#9, s_store_name#10, s_state#12, s_zip#13] +Input [5]: [s_store_sk#9, s_store_name#10, s_market_id#11, s_state#12, s_zip#13] + +(14) BroadcastExchange +Input [4]: [s_store_sk#9, s_store_name#10, s_state#12, s_zip#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] + +(15) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#9] +Join condition: None + +(16) Project [codegen id : 6] +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13] +Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5, s_store_sk#9, s_store_name#10, s_state#12, s_zip#13] + +(17) Scan parquet default.item +Output [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_color), EqualTo(i_color,chiffon), IsNotNull(i_item_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(19) Filter [codegen id : 3] +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Condition : ((isnotnull(i_color#18) AND (i_color#18 = chiffon)) AND isnotnull(i_item_sk#15)) + +(20) BroadcastExchange +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#21] + +(21) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#15] +Join condition: None + +(22) Project [codegen id : 6] +Output [10]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(23) Scan parquet default.customer +Output [4]: [c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_birth_country)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 4] +Input [4]: [c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] + +(25) Filter [codegen id : 4] +Input [4]: [c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] +Condition : (isnotnull(c_customer_sk#22) AND isnotnull(c_birth_country#25)) + +(26) BroadcastExchange +Input [4]: [c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#26] + +(27) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#22] +Join condition: None + +(28) Project [codegen id : 6] +Output [12]: [ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, c_birth_country#25] +Input [14]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] + +(29) Scan parquet default.customer_address +Output [3]: [ca_state#27, ca_zip#28, ca_country#29] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer_address] +PushedFilters: [IsNotNull(ca_zip), IsNotNull(ca_country)] +ReadSchema: struct + +(30) ColumnarToRow [codegen id : 5] +Input [3]: [ca_state#27, ca_zip#28, ca_country#29] + +(31) Filter [codegen id : 5] +Input [3]: [ca_state#27, ca_zip#28, ca_country#29] +Condition : (isnotnull(ca_zip#28) AND isnotnull(ca_country#29)) + +(32) BroadcastExchange +Input [3]: [ca_state#27, ca_zip#28, ca_country#29] +Arguments: HashedRelationBroadcastMode(List(upper(input[2, string, false]), input[1, string, false]),false), [id=#30] + +(33) BroadcastHashJoin [codegen id : 6] +Left keys [2]: [c_birth_country#25, s_zip#13] +Right keys [2]: [upper(ca_country#29), ca_zip#28] +Join condition: None + +(34) Project [codegen id : 6] +Output [11]: [ss_net_paid#5, s_store_name#10, s_state#12, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, ca_state#27] +Input [15]: [ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, c_birth_country#25, ca_state#27, ca_zip#28, ca_country#29] + +(35) HashAggregate [codegen id : 6] +Input [11]: [ss_net_paid#5, s_store_name#10, s_state#12, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, ca_state#27] +Keys [10]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum#31] +Results [11]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#32] + +(36) Exchange +Input [11]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#32] +Arguments: hashpartitioning(c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, 5), true, [id=#33] + +(37) HashAggregate [codegen id : 7] +Input [11]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#32] +Keys [10]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#34] +Results [4]: [c_last_name#24, c_first_name#23, s_store_name#10, MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#34,17,2) AS netpaid#35] + +(38) HashAggregate [codegen id : 7] +Input [4]: [c_last_name#24, c_first_name#23, s_store_name#10, netpaid#35] +Keys [3]: [c_last_name#24, c_first_name#23, s_store_name#10] +Functions [1]: [partial_sum(netpaid#35)] +Aggregate Attributes [2]: [sum#36, isEmpty#37] +Results [5]: [c_last_name#24, c_first_name#23, s_store_name#10, sum#38, isEmpty#39] + +(39) Exchange +Input [5]: [c_last_name#24, c_first_name#23, s_store_name#10, sum#38, isEmpty#39] +Arguments: hashpartitioning(c_last_name#24, c_first_name#23, s_store_name#10, 5), true, [id=#40] + +(40) HashAggregate [codegen id : 8] +Input [5]: [c_last_name#24, c_first_name#23, s_store_name#10, sum#38, isEmpty#39] +Keys [3]: [c_last_name#24, c_first_name#23, s_store_name#10] +Functions [1]: [sum(netpaid#35)] +Aggregate Attributes [1]: [sum(netpaid#35)#41] +Results [5]: [c_last_name#24, c_first_name#23, s_store_name#10, sum(netpaid#35)#41 AS paid#42, sum(netpaid#35)#41 AS sum(netpaid#35)#43] + +(41) Filter [codegen id : 8] +Input [5]: [c_last_name#24, c_first_name#23, s_store_name#10, paid#42, sum(netpaid#35)#43] +Condition : (isnotnull(sum(netpaid#35)#43) AND (cast(sum(netpaid#35)#43 as decimal(33,8)) > cast(Subquery scalar-subquery#44, [id=#45] as decimal(33,8)))) + +(42) Project [codegen id : 8] +Output [4]: [c_last_name#24, c_first_name#23, s_store_name#10, paid#42] +Input [5]: [c_last_name#24, c_first_name#23, s_store_name#10, paid#42, sum(netpaid#35)#43] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 41 Hosting Expression = Subquery scalar-subquery#44, [id=#45] +* HashAggregate (82) ++- Exchange (81) + +- * HashAggregate (80) + +- * HashAggregate (79) + +- Exchange (78) + +- * HashAggregate (77) + +- * Project (76) + +- * BroadcastHashJoin Inner BuildRight (75) + :- * Project (70) + : +- * BroadcastHashJoin Inner BuildRight (69) + : :- * Project (64) + : : +- * BroadcastHashJoin Inner BuildRight (63) + : : :- * Project (58) + : : : +- * BroadcastHashJoin Inner BuildRight (57) + : : : :- * Project (51) + : : : : +- * BroadcastHashJoin Inner BuildRight (50) + : : : : :- * Filter (45) + : : : : : +- * ColumnarToRow (44) + : : : : : +- Scan parquet default.store_sales (43) + : : : : +- BroadcastExchange (49) + : : : : +- * Filter (48) + : : : : +- * ColumnarToRow (47) + : : : : +- Scan parquet default.store_returns (46) + : : : +- BroadcastExchange (56) + : : : +- * Project (55) + : : : +- * Filter (54) + : : : +- * ColumnarToRow (53) + : : : +- Scan parquet default.store (52) + : : +- BroadcastExchange (62) + : : +- * Filter (61) + : : +- * ColumnarToRow (60) + : : +- Scan parquet default.item (59) + : +- BroadcastExchange (68) + : +- * Filter (67) + : +- * ColumnarToRow (66) + : +- Scan parquet default.customer (65) + +- BroadcastExchange (74) + +- * Filter (73) + +- * ColumnarToRow (72) + +- Scan parquet default.customer_address (71) + + +(43) Scan parquet default.store_sales +Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(44) ColumnarToRow [codegen id : 6] +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] + +(45) Filter [codegen id : 6] +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Condition : (((isnotnull(ss_ticket_number#4) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_customer_sk#2)) + +(46) Scan parquet default.store_returns +Output [2]: [sr_item_sk#6, sr_ticket_number#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_returns] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] +ReadSchema: struct + +(47) ColumnarToRow [codegen id : 1] +Input [2]: [sr_item_sk#6, sr_ticket_number#7] + +(48) Filter [codegen id : 1] +Input [2]: [sr_item_sk#6, sr_ticket_number#7] +Condition : (isnotnull(sr_ticket_number#7) AND isnotnull(sr_item_sk#6)) + +(49) BroadcastExchange +Input [2]: [sr_item_sk#6, sr_ticket_number#7] +Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false], input[0, bigint, false]),false), [id=#46] + +(50) BroadcastHashJoin [codegen id : 6] +Left keys [2]: [cast(ss_ticket_number#4 as bigint), cast(ss_item_sk#1 as bigint)] +Right keys [2]: [sr_ticket_number#7, sr_item_sk#6] +Join condition: None + +(51) Project [codegen id : 6] +Output [4]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5] +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, sr_item_sk#6, sr_ticket_number#7] + +(52) Scan parquet default.store +Output [5]: [s_store_sk#9, s_store_name#10, s_market_id#11, s_state#12, s_zip#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)] +ReadSchema: struct + +(53) ColumnarToRow [codegen id : 2] +Input [5]: [s_store_sk#9, s_store_name#10, s_market_id#11, s_state#12, s_zip#13] + +(54) Filter [codegen id : 2] +Input [5]: [s_store_sk#9, s_store_name#10, s_market_id#11, s_state#12, s_zip#13] +Condition : (((isnotnull(s_market_id#11) AND (s_market_id#11 = 8)) AND isnotnull(s_store_sk#9)) AND isnotnull(s_zip#13)) + +(55) Project [codegen id : 2] +Output [4]: [s_store_sk#9, s_store_name#10, s_state#12, s_zip#13] +Input [5]: [s_store_sk#9, s_store_name#10, s_market_id#11, s_state#12, s_zip#13] + +(56) BroadcastExchange +Input [4]: [s_store_sk#9, s_store_name#10, s_state#12, s_zip#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#47] + +(57) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#9] +Join condition: None + +(58) Project [codegen id : 6] +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13] +Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5, s_store_sk#9, s_store_name#10, s_state#12, s_zip#13] + +(59) Scan parquet default.item +Output [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(60) ColumnarToRow [codegen id : 3] +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(61) Filter [codegen id : 3] +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Condition : isnotnull(i_item_sk#15) + +(62) BroadcastExchange +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#48] + +(63) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#15] +Join condition: None + +(64) Project [codegen id : 6] +Output [10]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(65) Scan parquet default.customer +Output [4]: [c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_birth_country)] +ReadSchema: struct + +(66) ColumnarToRow [codegen id : 4] +Input [4]: [c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] + +(67) Filter [codegen id : 4] +Input [4]: [c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] +Condition : (isnotnull(c_customer_sk#22) AND isnotnull(c_birth_country#25)) + +(68) BroadcastExchange +Input [4]: [c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#49] + +(69) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#22] +Join condition: None + +(70) Project [codegen id : 6] +Output [12]: [ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, c_birth_country#25] +Input [14]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] + +(71) Scan parquet default.customer_address +Output [3]: [ca_state#27, ca_zip#28, ca_country#29] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer_address] +PushedFilters: [IsNotNull(ca_country), IsNotNull(ca_zip)] +ReadSchema: struct + +(72) ColumnarToRow [codegen id : 5] +Input [3]: [ca_state#27, ca_zip#28, ca_country#29] + +(73) Filter [codegen id : 5] +Input [3]: [ca_state#27, ca_zip#28, ca_country#29] +Condition : (isnotnull(ca_country#29) AND isnotnull(ca_zip#28)) + +(74) BroadcastExchange +Input [3]: [ca_state#27, ca_zip#28, ca_country#29] +Arguments: HashedRelationBroadcastMode(List(upper(input[2, string, false]), input[1, string, false]),false), [id=#50] + +(75) BroadcastHashJoin [codegen id : 6] +Left keys [2]: [c_birth_country#25, s_zip#13] +Right keys [2]: [upper(ca_country#29), ca_zip#28] +Join condition: None + +(76) Project [codegen id : 6] +Output [11]: [ss_net_paid#5, s_store_name#10, s_state#12, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, ca_state#27] +Input [15]: [ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, c_birth_country#25, ca_state#27, ca_zip#28, ca_country#29] + +(77) HashAggregate [codegen id : 6] +Input [11]: [ss_net_paid#5, s_store_name#10, s_state#12, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, ca_state#27] +Keys [10]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum#51] +Results [11]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#52] + +(78) Exchange +Input [11]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#52] +Arguments: hashpartitioning(c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, 5), true, [id=#53] + +(79) HashAggregate [codegen id : 7] +Input [11]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#52] +Keys [10]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#54] +Results [1]: [MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#54,17,2) AS netpaid#35] + +(80) HashAggregate [codegen id : 7] +Input [1]: [netpaid#35] +Keys: [] +Functions [1]: [partial_avg(netpaid#35)] +Aggregate Attributes [2]: [sum#55, count#56] +Results [2]: [sum#57, count#58] + +(81) Exchange +Input [2]: [sum#57, count#58] +Arguments: SinglePartition, true, [id=#59] + +(82) HashAggregate [codegen id : 8] +Input [2]: [sum#57, count#58] +Keys: [] +Functions [1]: [avg(netpaid#35)] +Aggregate Attributes [1]: [avg(netpaid#35)#60] +Results [1]: [CheckOverflow((0.050000 * promote_precision(avg(netpaid#35)#60)), DecimalType(24,8), true) AS (CAST(0.05 AS DECIMAL(21,6)) * CAST(avg(netpaid) AS DECIMAL(21,6)))#61] + + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/simplified.txt new file mode 100644 index 0000000000000..306d7f1f78e11 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/simplified.txt @@ -0,0 +1,125 @@ +WholeStageCodegen (8) + Project [c_first_name,c_last_name,paid,s_store_name] + Filter [sum(netpaid)] + Subquery #1 + WholeStageCodegen (8) + HashAggregate [count,sum] [(CAST(0.05 AS DECIMAL(21,6)) * CAST(avg(netpaid) AS DECIMAL(21,6))),avg(netpaid),count,sum] + InputAdapter + Exchange #8 + WholeStageCodegen (7) + HashAggregate [netpaid] [count,count,sum,sum] + HashAggregate [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,sum] [netpaid,sum,sum(UnscaledValue(ss_net_paid))] + InputAdapter + Exchange [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name] #9 + WholeStageCodegen (6) + HashAggregate [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_net_paid] [sum,sum] + Project [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_net_paid] + BroadcastHashJoin [c_birth_country,ca_country,ca_zip,s_zip] + Project [c_birth_country,c_first_name,c_last_name,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,s_zip,ss_net_paid] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Project [i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,s_zip,ss_customer_sk,ss_net_paid] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [s_state,s_store_name,s_zip,ss_customer_sk,ss_item_sk,ss_net_paid] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk] + BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + Filter [ss_customer_sk,ss_item_sk,ss_store_sk,ss_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (1) + Filter [sr_item_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (2) + Project [s_state,s_store_name,s_store_sk,s_zip] + Filter [s_market_id,s_store_sk,s_zip] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_market_id,s_state,s_store_name,s_store_sk,s_zip] + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (3) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (4) + Filter [c_birth_country,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_birth_country,c_customer_sk,c_first_name,c_last_name] + InputAdapter + BroadcastExchange #14 + WholeStageCodegen (5) + Filter [ca_country,ca_zip] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_country,ca_state,ca_zip] + HashAggregate [c_first_name,c_last_name,isEmpty,s_store_name,sum] [isEmpty,paid,sum,sum(netpaid),sum(netpaid)] + InputAdapter + Exchange [c_first_name,c_last_name,s_store_name] #1 + WholeStageCodegen (7) + HashAggregate [c_first_name,c_last_name,netpaid,s_store_name] [isEmpty,isEmpty,sum,sum] + HashAggregate [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,sum] [netpaid,sum,sum(UnscaledValue(ss_net_paid))] + InputAdapter + Exchange [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name] #2 + WholeStageCodegen (6) + HashAggregate [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_net_paid] [sum,sum] + Project [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_net_paid] + BroadcastHashJoin [c_birth_country,ca_country,ca_zip,s_zip] + Project [c_birth_country,c_first_name,c_last_name,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,s_zip,ss_net_paid] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Project [i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,s_zip,ss_customer_sk,ss_net_paid] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [s_state,s_store_name,s_zip,ss_customer_sk,ss_item_sk,ss_net_paid] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk] + BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + Filter [ss_customer_sk,ss_item_sk,ss_store_sk,ss_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [sr_item_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [s_state,s_store_name,s_store_sk,s_zip] + Filter [s_market_id,s_store_sk,s_zip] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_market_id,s_state,s_store_name,s_store_sk,s_zip] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Filter [i_color,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (4) + Filter [c_birth_country,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_birth_country,c_customer_sk,c_first_name,c_last_name] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (5) + Filter [ca_country,ca_zip] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_country,ca_state,ca_zip] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/explain.txt new file mode 100644 index 0000000000000..f9d1aa308b108 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/explain.txt @@ -0,0 +1,314 @@ +== Physical Plan == +TakeOrderedAndProject (57) ++- * HashAggregate (56) + +- Exchange (55) + +- * HashAggregate (54) + +- * Project (53) + +- * SortMergeJoin Inner (52) + :- * Sort (43) + : +- Exchange (42) + : +- * Project (41) + : +- * SortMergeJoin Inner (40) + : :- * Sort (27) + : : +- Exchange (26) + : : +- * Project (25) + : : +- * SortMergeJoin Inner (24) + : : :- * Sort (18) + : : : +- Exchange (17) + : : : +- * Project (16) + : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : :- * Project (10) + : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.store_sales (1) + : : : : +- BroadcastExchange (8) + : : : : +- * Project (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.date_dim (4) + : : : +- BroadcastExchange (14) + : : : +- * Filter (13) + : : : +- * ColumnarToRow (12) + : : : +- Scan parquet default.store (11) + : : +- * Sort (23) + : : +- Exchange (22) + : : +- * Filter (21) + : : +- * ColumnarToRow (20) + : : +- Scan parquet default.item (19) + : +- * Sort (39) + : +- Exchange (38) + : +- * Project (37) + : +- * BroadcastHashJoin Inner BuildLeft (36) + : :- BroadcastExchange (32) + : : +- * Project (31) + : : +- * Filter (30) + : : +- * ColumnarToRow (29) + : : +- Scan parquet default.date_dim (28) + : +- * Filter (35) + : +- * ColumnarToRow (34) + : +- Scan parquet default.store_returns (33) + +- * Sort (51) + +- Exchange (50) + +- * Project (49) + +- * BroadcastHashJoin Inner BuildRight (48) + :- * Filter (46) + : +- * ColumnarToRow (45) + : +- Scan parquet default.catalog_sales (44) + +- ReusedExchange (47) + + +(1) Scan parquet default.store_sales +Output [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_net_profit#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_item_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_net_profit#6] + +(3) Filter [codegen id : 3] +Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_net_profit#6] +Condition : ((((isnotnull(ss_customer_sk#3) AND isnotnull(ss_item_sk#2)) AND isnotnull(ss_ticket_number#5)) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_store_sk#4)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#7, d_year#8, d_moy#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,4), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#7, d_year#8, d_moy#9] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#7, d_year#8, d_moy#9] +Condition : ((((isnotnull(d_moy#9) AND isnotnull(d_year#8)) AND (d_moy#9 = 4)) AND (d_year#8 = 2001)) AND isnotnull(d_date_sk#7)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#7] +Input [3]: [d_date_sk#7, d_year#8, d_moy#9] + +(8) BroadcastExchange +Input [1]: [d_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#10] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#7] +Join condition: None + +(10) Project [codegen id : 3] +Output [5]: [ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_net_profit#6] +Input [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_net_profit#6, d_date_sk#7] + +(11) Scan parquet default.store +Output [3]: [s_store_sk#11, s_store_id#12, s_store_name#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [s_store_sk#11, s_store_id#12, s_store_name#13] + +(13) Filter [codegen id : 2] +Input [3]: [s_store_sk#11, s_store_id#12, s_store_name#13] +Condition : isnotnull(s_store_sk#11) + +(14) BroadcastExchange +Input [3]: [s_store_sk#11, s_store_id#12, s_store_name#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#11] +Join condition: None + +(16) Project [codegen id : 3] +Output [6]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_net_profit#6, s_store_id#12, s_store_name#13] +Input [8]: [ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_net_profit#6, s_store_sk#11, s_store_id#12, s_store_name#13] + +(17) Exchange +Input [6]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_net_profit#6, s_store_id#12, s_store_name#13] +Arguments: hashpartitioning(ss_item_sk#2, 5), true, [id=#15] + +(18) Sort [codegen id : 4] +Input [6]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_net_profit#6, s_store_id#12, s_store_name#13] +Arguments: [ss_item_sk#2 ASC NULLS FIRST], false, 0 + +(19) Scan parquet default.item +Output [3]: [i_item_sk#16, i_item_id#17, i_item_desc#18] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 5] +Input [3]: [i_item_sk#16, i_item_id#17, i_item_desc#18] + +(21) Filter [codegen id : 5] +Input [3]: [i_item_sk#16, i_item_id#17, i_item_desc#18] +Condition : isnotnull(i_item_sk#16) + +(22) Exchange +Input [3]: [i_item_sk#16, i_item_id#17, i_item_desc#18] +Arguments: hashpartitioning(i_item_sk#16, 5), true, [id=#19] + +(23) Sort [codegen id : 6] +Input [3]: [i_item_sk#16, i_item_id#17, i_item_desc#18] +Arguments: [i_item_sk#16 ASC NULLS FIRST], false, 0 + +(24) SortMergeJoin [codegen id : 7] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#16] +Join condition: None + +(25) Project [codegen id : 7] +Output [8]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_net_profit#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18] +Input [9]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_net_profit#6, s_store_id#12, s_store_name#13, i_item_sk#16, i_item_id#17, i_item_desc#18] + +(26) Exchange +Input [8]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_net_profit#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18] +Arguments: hashpartitioning(cast(ss_ticket_number#5 as bigint), cast(ss_item_sk#2 as bigint), cast(ss_customer_sk#3 as bigint), 5), true, [id=#20] + +(27) Sort [codegen id : 8] +Input [8]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_net_profit#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18] +Arguments: [cast(ss_ticket_number#5 as bigint) ASC NULLS FIRST, cast(ss_item_sk#2 as bigint) ASC NULLS FIRST, cast(ss_customer_sk#3 as bigint) ASC NULLS FIRST], false, 0 + +(28) Scan parquet default.date_dim +Output [3]: [d_date_sk#21, d_year#22, d_moy#23] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,10), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 9] +Input [3]: [d_date_sk#21, d_year#22, d_moy#23] + +(30) Filter [codegen id : 9] +Input [3]: [d_date_sk#21, d_year#22, d_moy#23] +Condition : (((((isnotnull(d_year#22) AND isnotnull(d_moy#23)) AND (d_moy#23 >= 4)) AND (d_moy#23 <= 10)) AND (d_year#22 = 2001)) AND isnotnull(d_date_sk#21)) + +(31) Project [codegen id : 9] +Output [1]: [d_date_sk#21] +Input [3]: [d_date_sk#21, d_year#22, d_moy#23] + +(32) BroadcastExchange +Input [1]: [d_date_sk#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#24] + +(33) Scan parquet default.store_returns +Output [5]: [sr_returned_date_sk#25, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28, sr_net_loss#29] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_returns] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_customer_sk), IsNotNull(sr_item_sk), IsNotNull(sr_returned_date_sk)] +ReadSchema: struct + +(34) ColumnarToRow +Input [5]: [sr_returned_date_sk#25, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28, sr_net_loss#29] + +(35) Filter +Input [5]: [sr_returned_date_sk#25, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28, sr_net_loss#29] +Condition : (((isnotnull(sr_ticket_number#28) AND isnotnull(sr_customer_sk#27)) AND isnotnull(sr_item_sk#26)) AND isnotnull(sr_returned_date_sk#25)) + +(36) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cast(d_date_sk#21 as bigint)] +Right keys [1]: [sr_returned_date_sk#25] +Join condition: None + +(37) Project [codegen id : 10] +Output [4]: [sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28, sr_net_loss#29] +Input [6]: [d_date_sk#21, sr_returned_date_sk#25, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28, sr_net_loss#29] + +(38) Exchange +Input [4]: [sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28, sr_net_loss#29] +Arguments: hashpartitioning(sr_ticket_number#28, sr_item_sk#26, sr_customer_sk#27, 5), true, [id=#30] + +(39) Sort [codegen id : 11] +Input [4]: [sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28, sr_net_loss#29] +Arguments: [sr_ticket_number#28 ASC NULLS FIRST, sr_item_sk#26 ASC NULLS FIRST, sr_customer_sk#27 ASC NULLS FIRST], false, 0 + +(40) SortMergeJoin [codegen id : 12] +Left keys [3]: [cast(ss_ticket_number#5 as bigint), cast(ss_item_sk#2 as bigint), cast(ss_customer_sk#3 as bigint)] +Right keys [3]: [sr_ticket_number#28, sr_item_sk#26, sr_customer_sk#27] +Join condition: None + +(41) Project [codegen id : 12] +Output [8]: [ss_net_profit#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#26, sr_customer_sk#27, sr_net_loss#29] +Input [12]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_net_profit#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28, sr_net_loss#29] + +(42) Exchange +Input [8]: [ss_net_profit#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#26, sr_customer_sk#27, sr_net_loss#29] +Arguments: hashpartitioning(sr_customer_sk#27, sr_item_sk#26, 5), true, [id=#31] + +(43) Sort [codegen id : 13] +Input [8]: [ss_net_profit#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#26, sr_customer_sk#27, sr_net_loss#29] +Arguments: [sr_customer_sk#27 ASC NULLS FIRST, sr_item_sk#26 ASC NULLS FIRST], false, 0 + +(44) Scan parquet default.catalog_sales +Output [4]: [cs_sold_date_sk#32, cs_bill_customer_sk#33, cs_item_sk#34, cs_net_profit#35] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(45) ColumnarToRow [codegen id : 15] +Input [4]: [cs_sold_date_sk#32, cs_bill_customer_sk#33, cs_item_sk#34, cs_net_profit#35] + +(46) Filter [codegen id : 15] +Input [4]: [cs_sold_date_sk#32, cs_bill_customer_sk#33, cs_item_sk#34, cs_net_profit#35] +Condition : ((isnotnull(cs_bill_customer_sk#33) AND isnotnull(cs_item_sk#34)) AND isnotnull(cs_sold_date_sk#32)) + +(47) ReusedExchange [Reuses operator id: 32] +Output [1]: [d_date_sk#36] + +(48) BroadcastHashJoin [codegen id : 15] +Left keys [1]: [cs_sold_date_sk#32] +Right keys [1]: [d_date_sk#36] +Join condition: None + +(49) Project [codegen id : 15] +Output [3]: [cs_bill_customer_sk#33, cs_item_sk#34, cs_net_profit#35] +Input [5]: [cs_sold_date_sk#32, cs_bill_customer_sk#33, cs_item_sk#34, cs_net_profit#35, d_date_sk#36] + +(50) Exchange +Input [3]: [cs_bill_customer_sk#33, cs_item_sk#34, cs_net_profit#35] +Arguments: hashpartitioning(cast(cs_bill_customer_sk#33 as bigint), cast(cs_item_sk#34 as bigint), 5), true, [id=#37] + +(51) Sort [codegen id : 16] +Input [3]: [cs_bill_customer_sk#33, cs_item_sk#34, cs_net_profit#35] +Arguments: [cast(cs_bill_customer_sk#33 as bigint) ASC NULLS FIRST, cast(cs_item_sk#34 as bigint) ASC NULLS FIRST], false, 0 + +(52) SortMergeJoin [codegen id : 17] +Left keys [2]: [sr_customer_sk#27, sr_item_sk#26] +Right keys [2]: [cast(cs_bill_customer_sk#33 as bigint), cast(cs_item_sk#34 as bigint)] +Join condition: None + +(53) Project [codegen id : 17] +Output [7]: [ss_net_profit#6, sr_net_loss#29, cs_net_profit#35, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18] +Input [11]: [ss_net_profit#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#26, sr_customer_sk#27, sr_net_loss#29, cs_bill_customer_sk#33, cs_item_sk#34, cs_net_profit#35] + +(54) HashAggregate [codegen id : 17] +Input [7]: [ss_net_profit#6, sr_net_loss#29, cs_net_profit#35, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18] +Keys [4]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13] +Functions [3]: [partial_sum(UnscaledValue(ss_net_profit#6)), partial_sum(UnscaledValue(sr_net_loss#29)), partial_sum(UnscaledValue(cs_net_profit#35))] +Aggregate Attributes [3]: [sum#38, sum#39, sum#40] +Results [7]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13, sum#41, sum#42, sum#43] + +(55) Exchange +Input [7]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13, sum#41, sum#42, sum#43] +Arguments: hashpartitioning(i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13, 5), true, [id=#44] + +(56) HashAggregate [codegen id : 18] +Input [7]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13, sum#41, sum#42, sum#43] +Keys [4]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13] +Functions [3]: [sum(UnscaledValue(ss_net_profit#6)), sum(UnscaledValue(sr_net_loss#29)), sum(UnscaledValue(cs_net_profit#35))] +Aggregate Attributes [3]: [sum(UnscaledValue(ss_net_profit#6))#45, sum(UnscaledValue(sr_net_loss#29))#46, sum(UnscaledValue(cs_net_profit#35))#47] +Results [7]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13, MakeDecimal(sum(UnscaledValue(ss_net_profit#6))#45,17,2) AS store_sales_profit#48, MakeDecimal(sum(UnscaledValue(sr_net_loss#29))#46,17,2) AS store_returns_loss#49, MakeDecimal(sum(UnscaledValue(cs_net_profit#35))#47,17,2) AS catalog_sales_profit#50] + +(57) TakeOrderedAndProject +Input [7]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13, store_sales_profit#48, store_returns_loss#49, catalog_sales_profit#50] +Arguments: 100, [i_item_id#17 ASC NULLS FIRST, i_item_desc#18 ASC NULLS FIRST, s_store_id#12 ASC NULLS FIRST, s_store_name#13 ASC NULLS FIRST], [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13, store_sales_profit#48, store_returns_loss#49, catalog_sales_profit#50] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/simplified.txt new file mode 100644 index 0000000000000..af6cf2abe7d10 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/simplified.txt @@ -0,0 +1,98 @@ +TakeOrderedAndProject [catalog_sales_profit,i_item_desc,i_item_id,s_store_id,s_store_name,store_returns_loss,store_sales_profit] + WholeStageCodegen (18) + HashAggregate [i_item_desc,i_item_id,s_store_id,s_store_name,sum,sum,sum] [catalog_sales_profit,store_returns_loss,store_sales_profit,sum,sum,sum,sum(UnscaledValue(cs_net_profit)),sum(UnscaledValue(sr_net_loss)),sum(UnscaledValue(ss_net_profit))] + InputAdapter + Exchange [i_item_desc,i_item_id,s_store_id,s_store_name] #1 + WholeStageCodegen (17) + HashAggregate [cs_net_profit,i_item_desc,i_item_id,s_store_id,s_store_name,sr_net_loss,ss_net_profit] [sum,sum,sum,sum,sum,sum] + Project [cs_net_profit,i_item_desc,i_item_id,s_store_id,s_store_name,sr_net_loss,ss_net_profit] + SortMergeJoin [cs_bill_customer_sk,cs_item_sk,sr_customer_sk,sr_item_sk] + InputAdapter + WholeStageCodegen (13) + Sort [sr_customer_sk,sr_item_sk] + InputAdapter + Exchange [sr_customer_sk,sr_item_sk] #2 + WholeStageCodegen (12) + Project [i_item_desc,i_item_id,s_store_id,s_store_name,sr_customer_sk,sr_item_sk,sr_net_loss,ss_net_profit] + SortMergeJoin [sr_customer_sk,sr_item_sk,sr_ticket_number,ss_customer_sk,ss_item_sk,ss_ticket_number] + InputAdapter + WholeStageCodegen (8) + Sort [ss_customer_sk,ss_item_sk,ss_ticket_number] + InputAdapter + Exchange [ss_customer_sk,ss_item_sk,ss_ticket_number] #3 + WholeStageCodegen (7) + Project [i_item_desc,i_item_id,s_store_id,s_store_name,ss_customer_sk,ss_item_sk,ss_net_profit,ss_ticket_number] + SortMergeJoin [i_item_sk,ss_item_sk] + InputAdapter + WholeStageCodegen (4) + Sort [ss_item_sk] + InputAdapter + Exchange [ss_item_sk] #4 + WholeStageCodegen (3) + Project [s_store_id,s_store_name,ss_customer_sk,ss_item_sk,ss_net_profit,ss_ticket_number] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_customer_sk,ss_item_sk,ss_net_profit,ss_store_sk,ss_ticket_number] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (2) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_id,s_store_name,s_store_sk] + InputAdapter + WholeStageCodegen (6) + Sort [i_item_sk] + InputAdapter + Exchange [i_item_sk] #7 + WholeStageCodegen (5) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_desc,i_item_id,i_item_sk] + InputAdapter + WholeStageCodegen (11) + Sort [sr_customer_sk,sr_item_sk,sr_ticket_number] + InputAdapter + Exchange [sr_customer_sk,sr_item_sk,sr_ticket_number] #8 + WholeStageCodegen (10) + Project [sr_customer_sk,sr_item_sk,sr_net_loss,sr_ticket_number] + BroadcastHashJoin [d_date_sk,sr_returned_date_sk] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (9) + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + Filter [sr_customer_sk,sr_item_sk,sr_returned_date_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_customer_sk,sr_item_sk,sr_net_loss,sr_returned_date_sk,sr_ticket_number] + InputAdapter + WholeStageCodegen (16) + Sort [cs_bill_customer_sk,cs_item_sk] + InputAdapter + Exchange [cs_bill_customer_sk,cs_item_sk] #10 + WholeStageCodegen (15) + Project [cs_bill_customer_sk,cs_item_sk,cs_net_profit] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_net_profit,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #9 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25/explain.txt new file mode 100644 index 0000000000000..b749a37913efb --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25/explain.txt @@ -0,0 +1,269 @@ +== Physical Plan == +TakeOrderedAndProject (48) ++- * HashAggregate (47) + +- Exchange (46) + +- * HashAggregate (45) + +- * Project (44) + +- * BroadcastHashJoin Inner BuildRight (43) + :- * Project (38) + : +- * BroadcastHashJoin Inner BuildRight (37) + : :- * Project (32) + : : +- * BroadcastHashJoin Inner BuildRight (31) + : : :- * Project (29) + : : : +- * BroadcastHashJoin Inner BuildRight (28) + : : : :- * Project (22) + : : : : +- * BroadcastHashJoin Inner BuildRight (21) + : : : : :- * Project (15) + : : : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : : : :- * Project (9) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : : : :- * Filter (3) + : : : : : : : +- * ColumnarToRow (2) + : : : : : : : +- Scan parquet default.store_sales (1) + : : : : : : +- BroadcastExchange (7) + : : : : : : +- * Filter (6) + : : : : : : +- * ColumnarToRow (5) + : : : : : : +- Scan parquet default.store_returns (4) + : : : : : +- BroadcastExchange (13) + : : : : : +- * Filter (12) + : : : : : +- * ColumnarToRow (11) + : : : : : +- Scan parquet default.catalog_sales (10) + : : : : +- BroadcastExchange (20) + : : : : +- * Project (19) + : : : : +- * Filter (18) + : : : : +- * ColumnarToRow (17) + : : : : +- Scan parquet default.date_dim (16) + : : : +- BroadcastExchange (27) + : : : +- * Project (26) + : : : +- * Filter (25) + : : : +- * ColumnarToRow (24) + : : : +- Scan parquet default.date_dim (23) + : : +- ReusedExchange (30) + : +- BroadcastExchange (36) + : +- * Filter (35) + : +- * ColumnarToRow (34) + : +- Scan parquet default.store (33) + +- BroadcastExchange (42) + +- * Filter (41) + +- * ColumnarToRow (40) + +- Scan parquet default.item (39) + + +(1) Scan parquet default.store_sales +Output [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_net_profit#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 8] +Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_net_profit#6] + +(3) Filter [codegen id : 8] +Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_net_profit#6] +Condition : ((((isnotnull(ss_item_sk#2) AND isnotnull(ss_customer_sk#3)) AND isnotnull(ss_ticket_number#5)) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_store_sk#4)) + +(4) Scan parquet default.store_returns +Output [5]: [sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_net_loss#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_returns] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_customer_sk), IsNotNull(sr_item_sk), IsNotNull(sr_returned_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [5]: [sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_net_loss#11] + +(6) Filter [codegen id : 1] +Input [5]: [sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_net_loss#11] +Condition : (((isnotnull(sr_ticket_number#10) AND isnotnull(sr_customer_sk#9)) AND isnotnull(sr_item_sk#8)) AND isnotnull(sr_returned_date_sk#7)) + +(7) BroadcastExchange +Input [5]: [sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_net_loss#11] +Arguments: HashedRelationBroadcastMode(List(input[2, bigint, false], input[1, bigint, false], input[3, bigint, false]),false), [id=#12] + +(8) BroadcastHashJoin [codegen id : 8] +Left keys [3]: [cast(ss_customer_sk#3 as bigint), cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#5 as bigint)] +Right keys [3]: [sr_customer_sk#9, sr_item_sk#8, sr_ticket_number#10] +Join condition: None + +(9) Project [codegen id : 8] +Output [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_net_profit#6, sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_net_loss#11] +Input [11]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_net_profit#6, sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_net_loss#11] + +(10) Scan parquet default.catalog_sales +Output [4]: [cs_sold_date_sk#13, cs_bill_customer_sk#14, cs_item_sk#15, cs_net_profit#16] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [4]: [cs_sold_date_sk#13, cs_bill_customer_sk#14, cs_item_sk#15, cs_net_profit#16] + +(12) Filter [codegen id : 2] +Input [4]: [cs_sold_date_sk#13, cs_bill_customer_sk#14, cs_item_sk#15, cs_net_profit#16] +Condition : ((isnotnull(cs_bill_customer_sk#14) AND isnotnull(cs_item_sk#15)) AND isnotnull(cs_sold_date_sk#13)) + +(13) BroadcastExchange +Input [4]: [cs_sold_date_sk#13, cs_bill_customer_sk#14, cs_item_sk#15, cs_net_profit#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint), cast(input[2, int, false] as bigint)),false), [id=#17] + +(14) BroadcastHashJoin [codegen id : 8] +Left keys [2]: [sr_customer_sk#9, sr_item_sk#8] +Right keys [2]: [cast(cs_bill_customer_sk#14 as bigint), cast(cs_item_sk#15 as bigint)] +Join condition: None + +(15) Project [codegen id : 8] +Output [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_net_profit#6, sr_returned_date_sk#7, sr_net_loss#11, cs_sold_date_sk#13, cs_net_profit#16] +Input [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_net_profit#6, sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_net_loss#11, cs_sold_date_sk#13, cs_bill_customer_sk#14, cs_item_sk#15, cs_net_profit#16] + +(16) Scan parquet default.date_dim +Output [3]: [d_date_sk#18, d_year#19, d_moy#20] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,4), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#18, d_year#19, d_moy#20] + +(18) Filter [codegen id : 3] +Input [3]: [d_date_sk#18, d_year#19, d_moy#20] +Condition : ((((isnotnull(d_moy#20) AND isnotnull(d_year#19)) AND (d_moy#20 = 4)) AND (d_year#19 = 2001)) AND isnotnull(d_date_sk#18)) + +(19) Project [codegen id : 3] +Output [1]: [d_date_sk#18] +Input [3]: [d_date_sk#18, d_year#19, d_moy#20] + +(20) BroadcastExchange +Input [1]: [d_date_sk#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#21] + +(21) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#18] +Join condition: None + +(22) Project [codegen id : 8] +Output [7]: [ss_item_sk#2, ss_store_sk#4, ss_net_profit#6, sr_returned_date_sk#7, sr_net_loss#11, cs_sold_date_sk#13, cs_net_profit#16] +Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_net_profit#6, sr_returned_date_sk#7, sr_net_loss#11, cs_sold_date_sk#13, cs_net_profit#16, d_date_sk#18] + +(23) Scan parquet default.date_dim +Output [3]: [d_date_sk#22, d_year#23, d_moy#24] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,10), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 4] +Input [3]: [d_date_sk#22, d_year#23, d_moy#24] + +(25) Filter [codegen id : 4] +Input [3]: [d_date_sk#22, d_year#23, d_moy#24] +Condition : (((((isnotnull(d_moy#24) AND isnotnull(d_year#23)) AND (d_moy#24 >= 4)) AND (d_moy#24 <= 10)) AND (d_year#23 = 2001)) AND isnotnull(d_date_sk#22)) + +(26) Project [codegen id : 4] +Output [1]: [d_date_sk#22] +Input [3]: [d_date_sk#22, d_year#23, d_moy#24] + +(27) BroadcastExchange +Input [1]: [d_date_sk#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#25] + +(28) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [sr_returned_date_sk#7] +Right keys [1]: [cast(d_date_sk#22 as bigint)] +Join condition: None + +(29) Project [codegen id : 8] +Output [6]: [ss_item_sk#2, ss_store_sk#4, ss_net_profit#6, sr_net_loss#11, cs_sold_date_sk#13, cs_net_profit#16] +Input [8]: [ss_item_sk#2, ss_store_sk#4, ss_net_profit#6, sr_returned_date_sk#7, sr_net_loss#11, cs_sold_date_sk#13, cs_net_profit#16, d_date_sk#22] + +(30) ReusedExchange [Reuses operator id: 27] +Output [1]: [d_date_sk#26] + +(31) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [cs_sold_date_sk#13] +Right keys [1]: [d_date_sk#26] +Join condition: None + +(32) Project [codegen id : 8] +Output [5]: [ss_item_sk#2, ss_store_sk#4, ss_net_profit#6, sr_net_loss#11, cs_net_profit#16] +Input [7]: [ss_item_sk#2, ss_store_sk#4, ss_net_profit#6, sr_net_loss#11, cs_sold_date_sk#13, cs_net_profit#16, d_date_sk#26] + +(33) Scan parquet default.store +Output [3]: [s_store_sk#27, s_store_id#28, s_store_name#29] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 6] +Input [3]: [s_store_sk#27, s_store_id#28, s_store_name#29] + +(35) Filter [codegen id : 6] +Input [3]: [s_store_sk#27, s_store_id#28, s_store_name#29] +Condition : isnotnull(s_store_sk#27) + +(36) BroadcastExchange +Input [3]: [s_store_sk#27, s_store_id#28, s_store_name#29] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#30] + +(37) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#27] +Join condition: None + +(38) Project [codegen id : 8] +Output [6]: [ss_item_sk#2, ss_net_profit#6, sr_net_loss#11, cs_net_profit#16, s_store_id#28, s_store_name#29] +Input [8]: [ss_item_sk#2, ss_store_sk#4, ss_net_profit#6, sr_net_loss#11, cs_net_profit#16, s_store_sk#27, s_store_id#28, s_store_name#29] + +(39) Scan parquet default.item +Output [3]: [i_item_sk#31, i_item_id#32, i_item_desc#33] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(40) ColumnarToRow [codegen id : 7] +Input [3]: [i_item_sk#31, i_item_id#32, i_item_desc#33] + +(41) Filter [codegen id : 7] +Input [3]: [i_item_sk#31, i_item_id#32, i_item_desc#33] +Condition : isnotnull(i_item_sk#31) + +(42) BroadcastExchange +Input [3]: [i_item_sk#31, i_item_id#32, i_item_desc#33] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#34] + +(43) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#31] +Join condition: None + +(44) Project [codegen id : 8] +Output [7]: [ss_net_profit#6, sr_net_loss#11, cs_net_profit#16, s_store_id#28, s_store_name#29, i_item_id#32, i_item_desc#33] +Input [9]: [ss_item_sk#2, ss_net_profit#6, sr_net_loss#11, cs_net_profit#16, s_store_id#28, s_store_name#29, i_item_sk#31, i_item_id#32, i_item_desc#33] + +(45) HashAggregate [codegen id : 8] +Input [7]: [ss_net_profit#6, sr_net_loss#11, cs_net_profit#16, s_store_id#28, s_store_name#29, i_item_id#32, i_item_desc#33] +Keys [4]: [i_item_id#32, i_item_desc#33, s_store_id#28, s_store_name#29] +Functions [3]: [partial_sum(UnscaledValue(ss_net_profit#6)), partial_sum(UnscaledValue(sr_net_loss#11)), partial_sum(UnscaledValue(cs_net_profit#16))] +Aggregate Attributes [3]: [sum#35, sum#36, sum#37] +Results [7]: [i_item_id#32, i_item_desc#33, s_store_id#28, s_store_name#29, sum#38, sum#39, sum#40] + +(46) Exchange +Input [7]: [i_item_id#32, i_item_desc#33, s_store_id#28, s_store_name#29, sum#38, sum#39, sum#40] +Arguments: hashpartitioning(i_item_id#32, i_item_desc#33, s_store_id#28, s_store_name#29, 5), true, [id=#41] + +(47) HashAggregate [codegen id : 9] +Input [7]: [i_item_id#32, i_item_desc#33, s_store_id#28, s_store_name#29, sum#38, sum#39, sum#40] +Keys [4]: [i_item_id#32, i_item_desc#33, s_store_id#28, s_store_name#29] +Functions [3]: [sum(UnscaledValue(ss_net_profit#6)), sum(UnscaledValue(sr_net_loss#11)), sum(UnscaledValue(cs_net_profit#16))] +Aggregate Attributes [3]: [sum(UnscaledValue(ss_net_profit#6))#42, sum(UnscaledValue(sr_net_loss#11))#43, sum(UnscaledValue(cs_net_profit#16))#44] +Results [7]: [i_item_id#32, i_item_desc#33, s_store_id#28, s_store_name#29, MakeDecimal(sum(UnscaledValue(ss_net_profit#6))#42,17,2) AS store_sales_profit#45, MakeDecimal(sum(UnscaledValue(sr_net_loss#11))#43,17,2) AS store_returns_loss#46, MakeDecimal(sum(UnscaledValue(cs_net_profit#16))#44,17,2) AS catalog_sales_profit#47] + +(48) TakeOrderedAndProject +Input [7]: [i_item_id#32, i_item_desc#33, s_store_id#28, s_store_name#29, store_sales_profit#45, store_returns_loss#46, catalog_sales_profit#47] +Arguments: 100, [i_item_id#32 ASC NULLS FIRST, i_item_desc#33 ASC NULLS FIRST, s_store_id#28 ASC NULLS FIRST, s_store_name#29 ASC NULLS FIRST], [i_item_id#32, i_item_desc#33, s_store_id#28, s_store_name#29, store_sales_profit#45, store_returns_loss#46, catalog_sales_profit#47] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25/simplified.txt new file mode 100644 index 0000000000000..3de0f3d20ae08 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25/simplified.txt @@ -0,0 +1,71 @@ +TakeOrderedAndProject [catalog_sales_profit,i_item_desc,i_item_id,s_store_id,s_store_name,store_returns_loss,store_sales_profit] + WholeStageCodegen (9) + HashAggregate [i_item_desc,i_item_id,s_store_id,s_store_name,sum,sum,sum] [catalog_sales_profit,store_returns_loss,store_sales_profit,sum,sum,sum,sum(UnscaledValue(cs_net_profit)),sum(UnscaledValue(sr_net_loss)),sum(UnscaledValue(ss_net_profit))] + InputAdapter + Exchange [i_item_desc,i_item_id,s_store_id,s_store_name] #1 + WholeStageCodegen (8) + HashAggregate [cs_net_profit,i_item_desc,i_item_id,s_store_id,s_store_name,sr_net_loss,ss_net_profit] [sum,sum,sum,sum,sum,sum] + Project [cs_net_profit,i_item_desc,i_item_id,s_store_id,s_store_name,sr_net_loss,ss_net_profit] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [cs_net_profit,s_store_id,s_store_name,sr_net_loss,ss_item_sk,ss_net_profit] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [cs_net_profit,sr_net_loss,ss_item_sk,ss_net_profit,ss_store_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_net_profit,cs_sold_date_sk,sr_net_loss,ss_item_sk,ss_net_profit,ss_store_sk] + BroadcastHashJoin [d_date_sk,sr_returned_date_sk] + Project [cs_net_profit,cs_sold_date_sk,sr_net_loss,sr_returned_date_sk,ss_item_sk,ss_net_profit,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [cs_net_profit,cs_sold_date_sk,sr_net_loss,sr_returned_date_sk,ss_item_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [cs_bill_customer_sk,cs_item_sk,sr_customer_sk,sr_item_sk] + Project [sr_customer_sk,sr_item_sk,sr_net_loss,sr_returned_date_sk,ss_item_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [sr_customer_sk,sr_item_sk,sr_ticket_number,ss_customer_sk,ss_item_sk,ss_ticket_number] + Filter [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [sr_customer_sk,sr_item_sk,sr_returned_date_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_customer_sk,sr_item_sk,sr_net_loss,sr_returned_date_sk,sr_ticket_number] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_net_profit,cs_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + InputAdapter + ReusedExchange [d_date_sk] #5 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (6) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_id,s_store_name,s_store_sk] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (7) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_desc,i_item_id,i_item_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26.sf100/explain.txt new file mode 100644 index 0000000000000..671ce981abf6c --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26.sf100/explain.txt @@ -0,0 +1,193 @@ +== Physical Plan == +TakeOrderedAndProject (34) ++- * HashAggregate (33) + +- Exchange (32) + +- * HashAggregate (31) + +- * Project (30) + +- * BroadcastHashJoin Inner BuildRight (29) + :- * Project (24) + : +- * BroadcastHashJoin Inner BuildRight (23) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.catalog_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.customer_demographics (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.promotion (11) + : +- BroadcastExchange (22) + : +- * Project (21) + : +- * Filter (20) + : +- * ColumnarToRow (19) + : +- Scan parquet default.date_dim (18) + +- BroadcastExchange (28) + +- * Filter (27) + +- * ColumnarToRow (26) + +- Scan parquet default.item (25) + + +(1) Scan parquet default.catalog_sales +Output [8]: [cs_sold_date_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_promo_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk), IsNotNull(cs_promo_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [8]: [cs_sold_date_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_promo_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8] + +(3) Filter [codegen id : 5] +Input [8]: [cs_sold_date_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_promo_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8] +Condition : (((isnotnull(cs_bill_cdemo_sk#2) AND isnotnull(cs_sold_date_sk#1)) AND isnotnull(cs_item_sk#3)) AND isnotnull(cs_promo_sk#4)) + +(4) Scan parquet default.customer_demographics +Output [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer_demographics] +PushedFilters: [IsNotNull(cd_gender), IsNotNull(cd_education_status), IsNotNull(cd_marital_status), EqualTo(cd_gender,M), EqualTo(cd_marital_status,S), EqualTo(cd_education_status,College), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] + +(6) Filter [codegen id : 1] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Condition : ((((((isnotnull(cd_gender#10) AND isnotnull(cd_education_status#12)) AND isnotnull(cd_marital_status#11)) AND (cd_gender#10 = M)) AND (cd_marital_status#11 = S)) AND (cd_education_status#12 = College)) AND isnotnull(cd_demo_sk#9)) + +(7) Project [codegen id : 1] +Output [1]: [cd_demo_sk#9] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] + +(8) BroadcastExchange +Input [1]: [cd_demo_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] + +(9) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_bill_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#9] +Join condition: None + +(10) Project [codegen id : 5] +Output [7]: [cs_sold_date_sk#1, cs_item_sk#3, cs_promo_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8] +Input [9]: [cs_sold_date_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_promo_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cd_demo_sk#9] + +(11) Scan parquet default.promotion +Output [3]: [p_promo_sk#14, p_channel_email#15, p_channel_event#16] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/promotion] +PushedFilters: [Or(EqualTo(p_channel_email,N),EqualTo(p_channel_event,N)), IsNotNull(p_promo_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [p_promo_sk#14, p_channel_email#15, p_channel_event#16] + +(13) Filter [codegen id : 2] +Input [3]: [p_promo_sk#14, p_channel_email#15, p_channel_event#16] +Condition : (((p_channel_email#15 = N) OR (p_channel_event#16 = N)) AND isnotnull(p_promo_sk#14)) + +(14) Project [codegen id : 2] +Output [1]: [p_promo_sk#14] +Input [3]: [p_promo_sk#14, p_channel_email#15, p_channel_event#16] + +(15) BroadcastExchange +Input [1]: [p_promo_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#17] + +(16) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_promo_sk#4] +Right keys [1]: [p_promo_sk#14] +Join condition: None + +(17) Project [codegen id : 5] +Output [6]: [cs_sold_date_sk#1, cs_item_sk#3, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8] +Input [8]: [cs_sold_date_sk#1, cs_item_sk#3, cs_promo_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, p_promo_sk#14] + +(18) Scan parquet default.date_dim +Output [2]: [d_date_sk#18, d_year#19] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [2]: [d_date_sk#18, d_year#19] + +(20) Filter [codegen id : 3] +Input [2]: [d_date_sk#18, d_year#19] +Condition : ((isnotnull(d_year#19) AND (d_year#19 = 2000)) AND isnotnull(d_date_sk#18)) + +(21) Project [codegen id : 3] +Output [1]: [d_date_sk#18] +Input [2]: [d_date_sk#18, d_year#19] + +(22) BroadcastExchange +Input [1]: [d_date_sk#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] + +(23) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_sold_date_sk#1] +Right keys [1]: [d_date_sk#18] +Join condition: None + +(24) Project [codegen id : 5] +Output [5]: [cs_item_sk#3, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8] +Input [7]: [cs_sold_date_sk#1, cs_item_sk#3, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, d_date_sk#18] + +(25) Scan parquet default.item +Output [2]: [i_item_sk#21, i_item_id#22] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(26) ColumnarToRow [codegen id : 4] +Input [2]: [i_item_sk#21, i_item_id#22] + +(27) Filter [codegen id : 4] +Input [2]: [i_item_sk#21, i_item_id#22] +Condition : isnotnull(i_item_sk#21) + +(28) BroadcastExchange +Input [2]: [i_item_sk#21, i_item_id#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#23] + +(29) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_item_sk#3] +Right keys [1]: [i_item_sk#21] +Join condition: None + +(30) Project [codegen id : 5] +Output [5]: [cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, i_item_id#22] +Input [7]: [cs_item_sk#3, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, i_item_sk#21, i_item_id#22] + +(31) HashAggregate [codegen id : 5] +Input [5]: [cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, i_item_id#22] +Keys [1]: [i_item_id#22] +Functions [4]: [partial_avg(cast(cs_quantity#5 as bigint)), partial_avg(UnscaledValue(cs_list_price#6)), partial_avg(UnscaledValue(cs_coupon_amt#8)), partial_avg(UnscaledValue(cs_sales_price#7))] +Aggregate Attributes [8]: [sum#24, count#25, sum#26, count#27, sum#28, count#29, sum#30, count#31] +Results [9]: [i_item_id#22, sum#32, count#33, sum#34, count#35, sum#36, count#37, sum#38, count#39] + +(32) Exchange +Input [9]: [i_item_id#22, sum#32, count#33, sum#34, count#35, sum#36, count#37, sum#38, count#39] +Arguments: hashpartitioning(i_item_id#22, 5), true, [id=#40] + +(33) HashAggregate [codegen id : 6] +Input [9]: [i_item_id#22, sum#32, count#33, sum#34, count#35, sum#36, count#37, sum#38, count#39] +Keys [1]: [i_item_id#22] +Functions [4]: [avg(cast(cs_quantity#5 as bigint)), avg(UnscaledValue(cs_list_price#6)), avg(UnscaledValue(cs_coupon_amt#8)), avg(UnscaledValue(cs_sales_price#7))] +Aggregate Attributes [4]: [avg(cast(cs_quantity#5 as bigint))#41, avg(UnscaledValue(cs_list_price#6))#42, avg(UnscaledValue(cs_coupon_amt#8))#43, avg(UnscaledValue(cs_sales_price#7))#44] +Results [5]: [i_item_id#22, avg(cast(cs_quantity#5 as bigint))#41 AS agg1#45, cast((avg(UnscaledValue(cs_list_price#6))#42 / 100.0) as decimal(11,6)) AS agg2#46, cast((avg(UnscaledValue(cs_coupon_amt#8))#43 / 100.0) as decimal(11,6)) AS agg3#47, cast((avg(UnscaledValue(cs_sales_price#7))#44 / 100.0) as decimal(11,6)) AS agg4#48] + +(34) TakeOrderedAndProject +Input [5]: [i_item_id#22, agg1#45, agg2#46, agg3#47, agg4#48] +Arguments: 100, [i_item_id#22 ASC NULLS FIRST], [i_item_id#22, agg1#45, agg2#46, agg3#47, agg4#48] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26.sf100/simplified.txt new file mode 100644 index 0000000000000..9ce1856692adb --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26.sf100/simplified.txt @@ -0,0 +1,50 @@ +TakeOrderedAndProject [agg1,agg2,agg3,agg4,i_item_id] + WholeStageCodegen (6) + HashAggregate [count,count,count,count,i_item_id,sum,sum,sum,sum] [agg1,agg2,agg3,agg4,avg(UnscaledValue(cs_coupon_amt)),avg(UnscaledValue(cs_list_price)),avg(UnscaledValue(cs_sales_price)),avg(cast(cs_quantity as bigint)),count,count,count,count,sum,sum,sum,sum] + InputAdapter + Exchange [i_item_id] #1 + WholeStageCodegen (5) + HashAggregate [cs_coupon_amt,cs_list_price,cs_quantity,cs_sales_price,i_item_id] [count,count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum,sum] + Project [cs_coupon_amt,cs_list_price,cs_quantity,cs_sales_price,i_item_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_coupon_amt,cs_item_sk,cs_list_price,cs_quantity,cs_sales_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_coupon_amt,cs_item_sk,cs_list_price,cs_quantity,cs_sales_price,cs_sold_date_sk] + BroadcastHashJoin [cs_promo_sk,p_promo_sk] + Project [cs_coupon_amt,cs_item_sk,cs_list_price,cs_promo_sk,cs_quantity,cs_sales_price,cs_sold_date_sk] + BroadcastHashJoin [cd_demo_sk,cs_bill_cdemo_sk] + Filter [cs_bill_cdemo_sk,cs_item_sk,cs_promo_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_bill_cdemo_sk,cs_coupon_amt,cs_item_sk,cs_list_price,cs_promo_sk,cs_quantity,cs_sales_price,cs_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [cd_demo_sk] + Filter [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [p_promo_sk] + Filter [p_channel_email,p_channel_event,p_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.promotion [p_channel_email,p_channel_event,p_promo_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_id,i_item_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26/explain.txt new file mode 100644 index 0000000000000..fb76c3804a462 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26/explain.txt @@ -0,0 +1,193 @@ +== Physical Plan == +TakeOrderedAndProject (34) ++- * HashAggregate (33) + +- Exchange (32) + +- * HashAggregate (31) + +- * Project (30) + +- * BroadcastHashJoin Inner BuildRight (29) + :- * Project (23) + : +- * BroadcastHashJoin Inner BuildRight (22) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.catalog_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.customer_demographics (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.date_dim (11) + : +- BroadcastExchange (21) + : +- * Filter (20) + : +- * ColumnarToRow (19) + : +- Scan parquet default.item (18) + +- BroadcastExchange (28) + +- * Project (27) + +- * Filter (26) + +- * ColumnarToRow (25) + +- Scan parquet default.promotion (24) + + +(1) Scan parquet default.catalog_sales +Output [8]: [cs_sold_date_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_promo_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk), IsNotNull(cs_promo_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [8]: [cs_sold_date_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_promo_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8] + +(3) Filter [codegen id : 5] +Input [8]: [cs_sold_date_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_promo_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8] +Condition : (((isnotnull(cs_bill_cdemo_sk#2) AND isnotnull(cs_sold_date_sk#1)) AND isnotnull(cs_item_sk#3)) AND isnotnull(cs_promo_sk#4)) + +(4) Scan parquet default.customer_demographics +Output [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer_demographics] +PushedFilters: [IsNotNull(cd_marital_status), IsNotNull(cd_gender), IsNotNull(cd_education_status), EqualTo(cd_gender,M), EqualTo(cd_marital_status,S), EqualTo(cd_education_status,College), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] + +(6) Filter [codegen id : 1] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Condition : ((((((isnotnull(cd_marital_status#11) AND isnotnull(cd_gender#10)) AND isnotnull(cd_education_status#12)) AND (cd_gender#10 = M)) AND (cd_marital_status#11 = S)) AND (cd_education_status#12 = College)) AND isnotnull(cd_demo_sk#9)) + +(7) Project [codegen id : 1] +Output [1]: [cd_demo_sk#9] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] + +(8) BroadcastExchange +Input [1]: [cd_demo_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] + +(9) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_bill_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#9] +Join condition: None + +(10) Project [codegen id : 5] +Output [7]: [cs_sold_date_sk#1, cs_item_sk#3, cs_promo_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8] +Input [9]: [cs_sold_date_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_promo_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cd_demo_sk#9] + +(11) Scan parquet default.date_dim +Output [2]: [d_date_sk#14, d_year#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#14, d_year#15] + +(13) Filter [codegen id : 2] +Input [2]: [d_date_sk#14, d_year#15] +Condition : ((isnotnull(d_year#15) AND (d_year#15 = 2000)) AND isnotnull(d_date_sk#14)) + +(14) Project [codegen id : 2] +Output [1]: [d_date_sk#14] +Input [2]: [d_date_sk#14, d_year#15] + +(15) BroadcastExchange +Input [1]: [d_date_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] + +(16) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_sold_date_sk#1] +Right keys [1]: [d_date_sk#14] +Join condition: None + +(17) Project [codegen id : 5] +Output [6]: [cs_item_sk#3, cs_promo_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8] +Input [8]: [cs_sold_date_sk#1, cs_item_sk#3, cs_promo_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, d_date_sk#14] + +(18) Scan parquet default.item +Output [2]: [i_item_sk#17, i_item_id#18] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [2]: [i_item_sk#17, i_item_id#18] + +(20) Filter [codegen id : 3] +Input [2]: [i_item_sk#17, i_item_id#18] +Condition : isnotnull(i_item_sk#17) + +(21) BroadcastExchange +Input [2]: [i_item_sk#17, i_item_id#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] + +(22) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_item_sk#3] +Right keys [1]: [i_item_sk#17] +Join condition: None + +(23) Project [codegen id : 5] +Output [6]: [cs_promo_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, i_item_id#18] +Input [8]: [cs_item_sk#3, cs_promo_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, i_item_sk#17, i_item_id#18] + +(24) Scan parquet default.promotion +Output [3]: [p_promo_sk#20, p_channel_email#21, p_channel_event#22] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/promotion] +PushedFilters: [Or(EqualTo(p_channel_email,N),EqualTo(p_channel_event,N)), IsNotNull(p_promo_sk)] +ReadSchema: struct + +(25) ColumnarToRow [codegen id : 4] +Input [3]: [p_promo_sk#20, p_channel_email#21, p_channel_event#22] + +(26) Filter [codegen id : 4] +Input [3]: [p_promo_sk#20, p_channel_email#21, p_channel_event#22] +Condition : (((p_channel_email#21 = N) OR (p_channel_event#22 = N)) AND isnotnull(p_promo_sk#20)) + +(27) Project [codegen id : 4] +Output [1]: [p_promo_sk#20] +Input [3]: [p_promo_sk#20, p_channel_email#21, p_channel_event#22] + +(28) BroadcastExchange +Input [1]: [p_promo_sk#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#23] + +(29) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_promo_sk#4] +Right keys [1]: [p_promo_sk#20] +Join condition: None + +(30) Project [codegen id : 5] +Output [5]: [cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, i_item_id#18] +Input [7]: [cs_promo_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, i_item_id#18, p_promo_sk#20] + +(31) HashAggregate [codegen id : 5] +Input [5]: [cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, i_item_id#18] +Keys [1]: [i_item_id#18] +Functions [4]: [partial_avg(cast(cs_quantity#5 as bigint)), partial_avg(UnscaledValue(cs_list_price#6)), partial_avg(UnscaledValue(cs_coupon_amt#8)), partial_avg(UnscaledValue(cs_sales_price#7))] +Aggregate Attributes [8]: [sum#24, count#25, sum#26, count#27, sum#28, count#29, sum#30, count#31] +Results [9]: [i_item_id#18, sum#32, count#33, sum#34, count#35, sum#36, count#37, sum#38, count#39] + +(32) Exchange +Input [9]: [i_item_id#18, sum#32, count#33, sum#34, count#35, sum#36, count#37, sum#38, count#39] +Arguments: hashpartitioning(i_item_id#18, 5), true, [id=#40] + +(33) HashAggregate [codegen id : 6] +Input [9]: [i_item_id#18, sum#32, count#33, sum#34, count#35, sum#36, count#37, sum#38, count#39] +Keys [1]: [i_item_id#18] +Functions [4]: [avg(cast(cs_quantity#5 as bigint)), avg(UnscaledValue(cs_list_price#6)), avg(UnscaledValue(cs_coupon_amt#8)), avg(UnscaledValue(cs_sales_price#7))] +Aggregate Attributes [4]: [avg(cast(cs_quantity#5 as bigint))#41, avg(UnscaledValue(cs_list_price#6))#42, avg(UnscaledValue(cs_coupon_amt#8))#43, avg(UnscaledValue(cs_sales_price#7))#44] +Results [5]: [i_item_id#18, avg(cast(cs_quantity#5 as bigint))#41 AS agg1#45, cast((avg(UnscaledValue(cs_list_price#6))#42 / 100.0) as decimal(11,6)) AS agg2#46, cast((avg(UnscaledValue(cs_coupon_amt#8))#43 / 100.0) as decimal(11,6)) AS agg3#47, cast((avg(UnscaledValue(cs_sales_price#7))#44 / 100.0) as decimal(11,6)) AS agg4#48] + +(34) TakeOrderedAndProject +Input [5]: [i_item_id#18, agg1#45, agg2#46, agg3#47, agg4#48] +Arguments: 100, [i_item_id#18 ASC NULLS FIRST], [i_item_id#18, agg1#45, agg2#46, agg3#47, agg4#48] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26/simplified.txt new file mode 100644 index 0000000000000..cd4705f97a303 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26/simplified.txt @@ -0,0 +1,50 @@ +TakeOrderedAndProject [agg1,agg2,agg3,agg4,i_item_id] + WholeStageCodegen (6) + HashAggregate [count,count,count,count,i_item_id,sum,sum,sum,sum] [agg1,agg2,agg3,agg4,avg(UnscaledValue(cs_coupon_amt)),avg(UnscaledValue(cs_list_price)),avg(UnscaledValue(cs_sales_price)),avg(cast(cs_quantity as bigint)),count,count,count,count,sum,sum,sum,sum] + InputAdapter + Exchange [i_item_id] #1 + WholeStageCodegen (5) + HashAggregate [cs_coupon_amt,cs_list_price,cs_quantity,cs_sales_price,i_item_id] [count,count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum,sum] + Project [cs_coupon_amt,cs_list_price,cs_quantity,cs_sales_price,i_item_id] + BroadcastHashJoin [cs_promo_sk,p_promo_sk] + Project [cs_coupon_amt,cs_list_price,cs_promo_sk,cs_quantity,cs_sales_price,i_item_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_coupon_amt,cs_item_sk,cs_list_price,cs_promo_sk,cs_quantity,cs_sales_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_coupon_amt,cs_item_sk,cs_list_price,cs_promo_sk,cs_quantity,cs_sales_price,cs_sold_date_sk] + BroadcastHashJoin [cd_demo_sk,cs_bill_cdemo_sk] + Filter [cs_bill_cdemo_sk,cs_item_sk,cs_promo_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_bill_cdemo_sk,cs_coupon_amt,cs_item_sk,cs_list_price,cs_promo_sk,cs_quantity,cs_sales_price,cs_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [cd_demo_sk] + Filter [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_id,i_item_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Project [p_promo_sk] + Filter [p_channel_email,p_channel_event,p_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.promotion [p_channel_email,p_channel_event,p_promo_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27.sf100/explain.txt new file mode 100644 index 0000000000000..07e50e7a65c57 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27.sf100/explain.txt @@ -0,0 +1,193 @@ +== Physical Plan == +TakeOrderedAndProject (34) ++- * HashAggregate (33) + +- Exchange (32) + +- * HashAggregate (31) + +- * Expand (30) + +- * Project (29) + +- * BroadcastHashJoin Inner BuildRight (28) + :- * Project (23) + : +- * BroadcastHashJoin Inner BuildRight (22) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.customer_demographics (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.date_dim (11) + : +- BroadcastExchange (21) + : +- * Filter (20) + : +- * ColumnarToRow (19) + : +- Scan parquet default.store (18) + +- BroadcastExchange (27) + +- * Filter (26) + +- * ColumnarToRow (25) + +- Scan parquet default.item (24) + + +(1) Scan parquet default.store_sales +Output [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_cdemo_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] + +(3) Filter [codegen id : 5] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Condition : (((isnotnull(ss_cdemo_sk#3) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_store_sk#4)) AND isnotnull(ss_item_sk#2)) + +(4) Scan parquet default.customer_demographics +Output [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer_demographics] +PushedFilters: [IsNotNull(cd_gender), IsNotNull(cd_education_status), IsNotNull(cd_marital_status), EqualTo(cd_gender,M), EqualTo(cd_marital_status,S), EqualTo(cd_education_status,College), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] + +(6) Filter [codegen id : 1] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Condition : ((((((isnotnull(cd_gender#10) AND isnotnull(cd_education_status#12)) AND isnotnull(cd_marital_status#11)) AND (cd_gender#10 = M)) AND (cd_marital_status#11 = S)) AND (cd_education_status#12 = College)) AND isnotnull(cd_demo_sk#9)) + +(7) Project [codegen id : 1] +Output [1]: [cd_demo_sk#9] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] + +(8) BroadcastExchange +Input [1]: [cd_demo_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] + +(9) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_cdemo_sk#3] +Right keys [1]: [cd_demo_sk#9] +Join condition: None + +(10) Project [codegen id : 5] +Output [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, cd_demo_sk#9] + +(11) Scan parquet default.date_dim +Output [2]: [d_date_sk#14, d_year#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#14, d_year#15] + +(13) Filter [codegen id : 2] +Input [2]: [d_date_sk#14, d_year#15] +Condition : ((isnotnull(d_year#15) AND (d_year#15 = 2002)) AND isnotnull(d_date_sk#14)) + +(14) Project [codegen id : 2] +Output [1]: [d_date_sk#14] +Input [2]: [d_date_sk#14, d_year#15] + +(15) BroadcastExchange +Input [1]: [d_date_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] + +(16) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#14] +Join condition: None + +(17) Project [codegen id : 5] +Output [6]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, d_date_sk#14] + +(18) Scan parquet default.store +Output [2]: [s_store_sk#17, s_state#18] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [2]: [s_store_sk#17, s_state#18] + +(20) Filter [codegen id : 3] +Input [2]: [s_store_sk#17, s_state#18] +Condition : ((isnotnull(s_state#18) AND (s_state#18 = TN)) AND isnotnull(s_store_sk#17)) + +(21) BroadcastExchange +Input [2]: [s_store_sk#17, s_state#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] + +(22) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#17] +Join condition: None + +(23) Project [codegen id : 5] +Output [6]: [ss_item_sk#2, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, s_state#18] +Input [8]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, s_store_sk#17, s_state#18] + +(24) Scan parquet default.item +Output [2]: [i_item_sk#20, i_item_id#21] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(25) ColumnarToRow [codegen id : 4] +Input [2]: [i_item_sk#20, i_item_id#21] + +(26) Filter [codegen id : 4] +Input [2]: [i_item_sk#20, i_item_id#21] +Condition : isnotnull(i_item_sk#20) + +(27) BroadcastExchange +Input [2]: [i_item_sk#20, i_item_id#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#22] + +(28) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#20] +Join condition: None + +(29) Project [codegen id : 5] +Output [6]: [ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_id#21, s_state#18] +Input [8]: [ss_item_sk#2, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, s_state#18, i_item_sk#20, i_item_id#21] + +(30) Expand [codegen id : 5] +Input [6]: [ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_id#21, s_state#18] +Arguments: [List(ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_id#21, s_state#18, 0), List(ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_id#21, null, 1), List(ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, null, null, 3)], [ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_id#23, s_state#24, spark_grouping_id#25] + +(31) HashAggregate [codegen id : 5] +Input [7]: [ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_id#23, s_state#24, spark_grouping_id#25] +Keys [3]: [i_item_id#23, s_state#24, spark_grouping_id#25] +Functions [4]: [partial_avg(cast(ss_quantity#5 as bigint)), partial_avg(UnscaledValue(ss_list_price#6)), partial_avg(UnscaledValue(ss_coupon_amt#8)), partial_avg(UnscaledValue(ss_sales_price#7))] +Aggregate Attributes [8]: [sum#26, count#27, sum#28, count#29, sum#30, count#31, sum#32, count#33] +Results [11]: [i_item_id#23, s_state#24, spark_grouping_id#25, sum#34, count#35, sum#36, count#37, sum#38, count#39, sum#40, count#41] + +(32) Exchange +Input [11]: [i_item_id#23, s_state#24, spark_grouping_id#25, sum#34, count#35, sum#36, count#37, sum#38, count#39, sum#40, count#41] +Arguments: hashpartitioning(i_item_id#23, s_state#24, spark_grouping_id#25, 5), true, [id=#42] + +(33) HashAggregate [codegen id : 6] +Input [11]: [i_item_id#23, s_state#24, spark_grouping_id#25, sum#34, count#35, sum#36, count#37, sum#38, count#39, sum#40, count#41] +Keys [3]: [i_item_id#23, s_state#24, spark_grouping_id#25] +Functions [4]: [avg(cast(ss_quantity#5 as bigint)), avg(UnscaledValue(ss_list_price#6)), avg(UnscaledValue(ss_coupon_amt#8)), avg(UnscaledValue(ss_sales_price#7))] +Aggregate Attributes [4]: [avg(cast(ss_quantity#5 as bigint))#43, avg(UnscaledValue(ss_list_price#6))#44, avg(UnscaledValue(ss_coupon_amt#8))#45, avg(UnscaledValue(ss_sales_price#7))#46] +Results [7]: [i_item_id#23, s_state#24, cast((shiftright(spark_grouping_id#25, 0) & 1) as tinyint) AS g_state#47, avg(cast(ss_quantity#5 as bigint))#43 AS agg1#48, cast((avg(UnscaledValue(ss_list_price#6))#44 / 100.0) as decimal(11,6)) AS agg2#49, cast((avg(UnscaledValue(ss_coupon_amt#8))#45 / 100.0) as decimal(11,6)) AS agg3#50, cast((avg(UnscaledValue(ss_sales_price#7))#46 / 100.0) as decimal(11,6)) AS agg4#51] + +(34) TakeOrderedAndProject +Input [7]: [i_item_id#23, s_state#24, g_state#47, agg1#48, agg2#49, agg3#50, agg4#51] +Arguments: 100, [i_item_id#23 ASC NULLS FIRST, s_state#24 ASC NULLS FIRST], [i_item_id#23, s_state#24, g_state#47, agg1#48, agg2#49, agg3#50, agg4#51] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27.sf100/simplified.txt new file mode 100644 index 0000000000000..df5dc914d869b --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27.sf100/simplified.txt @@ -0,0 +1,50 @@ +TakeOrderedAndProject [agg1,agg2,agg3,agg4,g_state,i_item_id,s_state] + WholeStageCodegen (6) + HashAggregate [count,count,count,count,i_item_id,s_state,spark_grouping_id,sum,sum,sum,sum] [agg1,agg2,agg3,agg4,avg(UnscaledValue(ss_coupon_amt)),avg(UnscaledValue(ss_list_price)),avg(UnscaledValue(ss_sales_price)),avg(cast(ss_quantity as bigint)),count,count,count,count,g_state,sum,sum,sum,sum] + InputAdapter + Exchange [i_item_id,s_state,spark_grouping_id] #1 + WholeStageCodegen (5) + HashAggregate [i_item_id,s_state,spark_grouping_id,ss_coupon_amt,ss_list_price,ss_quantity,ss_sales_price] [count,count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum,sum] + Expand [i_item_id,s_state,ss_coupon_amt,ss_list_price,ss_quantity,ss_sales_price] + Project [i_item_id,s_state,ss_coupon_amt,ss_list_price,ss_quantity,ss_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [s_state,ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [cd_demo_sk,ss_cdemo_sk] + Filter [ss_cdemo_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_cdemo_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [cd_demo_sk] + Filter [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [s_state,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_state,s_store_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_id,i_item_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27/explain.txt new file mode 100644 index 0000000000000..8258588e4546a --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27/explain.txt @@ -0,0 +1,193 @@ +== Physical Plan == +TakeOrderedAndProject (34) ++- * HashAggregate (33) + +- Exchange (32) + +- * HashAggregate (31) + +- * Expand (30) + +- * Project (29) + +- * BroadcastHashJoin Inner BuildRight (28) + :- * Project (23) + : +- * BroadcastHashJoin Inner BuildRight (22) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.customer_demographics (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.date_dim (11) + : +- BroadcastExchange (21) + : +- * Filter (20) + : +- * ColumnarToRow (19) + : +- Scan parquet default.store (18) + +- BroadcastExchange (27) + +- * Filter (26) + +- * ColumnarToRow (25) + +- Scan parquet default.item (24) + + +(1) Scan parquet default.store_sales +Output [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_cdemo_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] + +(3) Filter [codegen id : 5] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Condition : (((isnotnull(ss_cdemo_sk#3) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_store_sk#4)) AND isnotnull(ss_item_sk#2)) + +(4) Scan parquet default.customer_demographics +Output [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer_demographics] +PushedFilters: [IsNotNull(cd_marital_status), IsNotNull(cd_gender), IsNotNull(cd_education_status), EqualTo(cd_gender,M), EqualTo(cd_marital_status,S), EqualTo(cd_education_status,College), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] + +(6) Filter [codegen id : 1] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Condition : ((((((isnotnull(cd_marital_status#11) AND isnotnull(cd_gender#10)) AND isnotnull(cd_education_status#12)) AND (cd_gender#10 = M)) AND (cd_marital_status#11 = S)) AND (cd_education_status#12 = College)) AND isnotnull(cd_demo_sk#9)) + +(7) Project [codegen id : 1] +Output [1]: [cd_demo_sk#9] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] + +(8) BroadcastExchange +Input [1]: [cd_demo_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] + +(9) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_cdemo_sk#3] +Right keys [1]: [cd_demo_sk#9] +Join condition: None + +(10) Project [codegen id : 5] +Output [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, cd_demo_sk#9] + +(11) Scan parquet default.date_dim +Output [2]: [d_date_sk#14, d_year#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#14, d_year#15] + +(13) Filter [codegen id : 2] +Input [2]: [d_date_sk#14, d_year#15] +Condition : ((isnotnull(d_year#15) AND (d_year#15 = 2002)) AND isnotnull(d_date_sk#14)) + +(14) Project [codegen id : 2] +Output [1]: [d_date_sk#14] +Input [2]: [d_date_sk#14, d_year#15] + +(15) BroadcastExchange +Input [1]: [d_date_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] + +(16) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#14] +Join condition: None + +(17) Project [codegen id : 5] +Output [6]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, d_date_sk#14] + +(18) Scan parquet default.store +Output [2]: [s_store_sk#17, s_state#18] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [2]: [s_store_sk#17, s_state#18] + +(20) Filter [codegen id : 3] +Input [2]: [s_store_sk#17, s_state#18] +Condition : ((isnotnull(s_state#18) AND (s_state#18 = TN)) AND isnotnull(s_store_sk#17)) + +(21) BroadcastExchange +Input [2]: [s_store_sk#17, s_state#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] + +(22) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#17] +Join condition: None + +(23) Project [codegen id : 5] +Output [6]: [ss_item_sk#2, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, s_state#18] +Input [8]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, s_store_sk#17, s_state#18] + +(24) Scan parquet default.item +Output [2]: [i_item_sk#20, i_item_id#21] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(25) ColumnarToRow [codegen id : 4] +Input [2]: [i_item_sk#20, i_item_id#21] + +(26) Filter [codegen id : 4] +Input [2]: [i_item_sk#20, i_item_id#21] +Condition : isnotnull(i_item_sk#20) + +(27) BroadcastExchange +Input [2]: [i_item_sk#20, i_item_id#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#22] + +(28) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#20] +Join condition: None + +(29) Project [codegen id : 5] +Output [6]: [ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_id#21, s_state#18] +Input [8]: [ss_item_sk#2, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, s_state#18, i_item_sk#20, i_item_id#21] + +(30) Expand [codegen id : 5] +Input [6]: [ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_id#21, s_state#18] +Arguments: [List(ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_id#21, s_state#18, 0), List(ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_id#21, null, 1), List(ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, null, null, 3)], [ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_id#23, s_state#24, spark_grouping_id#25] + +(31) HashAggregate [codegen id : 5] +Input [7]: [ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_id#23, s_state#24, spark_grouping_id#25] +Keys [3]: [i_item_id#23, s_state#24, spark_grouping_id#25] +Functions [4]: [partial_avg(cast(ss_quantity#5 as bigint)), partial_avg(UnscaledValue(ss_list_price#6)), partial_avg(UnscaledValue(ss_coupon_amt#8)), partial_avg(UnscaledValue(ss_sales_price#7))] +Aggregate Attributes [8]: [sum#26, count#27, sum#28, count#29, sum#30, count#31, sum#32, count#33] +Results [11]: [i_item_id#23, s_state#24, spark_grouping_id#25, sum#34, count#35, sum#36, count#37, sum#38, count#39, sum#40, count#41] + +(32) Exchange +Input [11]: [i_item_id#23, s_state#24, spark_grouping_id#25, sum#34, count#35, sum#36, count#37, sum#38, count#39, sum#40, count#41] +Arguments: hashpartitioning(i_item_id#23, s_state#24, spark_grouping_id#25, 5), true, [id=#42] + +(33) HashAggregate [codegen id : 6] +Input [11]: [i_item_id#23, s_state#24, spark_grouping_id#25, sum#34, count#35, sum#36, count#37, sum#38, count#39, sum#40, count#41] +Keys [3]: [i_item_id#23, s_state#24, spark_grouping_id#25] +Functions [4]: [avg(cast(ss_quantity#5 as bigint)), avg(UnscaledValue(ss_list_price#6)), avg(UnscaledValue(ss_coupon_amt#8)), avg(UnscaledValue(ss_sales_price#7))] +Aggregate Attributes [4]: [avg(cast(ss_quantity#5 as bigint))#43, avg(UnscaledValue(ss_list_price#6))#44, avg(UnscaledValue(ss_coupon_amt#8))#45, avg(UnscaledValue(ss_sales_price#7))#46] +Results [7]: [i_item_id#23, s_state#24, cast((shiftright(spark_grouping_id#25, 0) & 1) as tinyint) AS g_state#47, avg(cast(ss_quantity#5 as bigint))#43 AS agg1#48, cast((avg(UnscaledValue(ss_list_price#6))#44 / 100.0) as decimal(11,6)) AS agg2#49, cast((avg(UnscaledValue(ss_coupon_amt#8))#45 / 100.0) as decimal(11,6)) AS agg3#50, cast((avg(UnscaledValue(ss_sales_price#7))#46 / 100.0) as decimal(11,6)) AS agg4#51] + +(34) TakeOrderedAndProject +Input [7]: [i_item_id#23, s_state#24, g_state#47, agg1#48, agg2#49, agg3#50, agg4#51] +Arguments: 100, [i_item_id#23 ASC NULLS FIRST, s_state#24 ASC NULLS FIRST], [i_item_id#23, s_state#24, g_state#47, agg1#48, agg2#49, agg3#50, agg4#51] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27/simplified.txt new file mode 100644 index 0000000000000..df5dc914d869b --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27/simplified.txt @@ -0,0 +1,50 @@ +TakeOrderedAndProject [agg1,agg2,agg3,agg4,g_state,i_item_id,s_state] + WholeStageCodegen (6) + HashAggregate [count,count,count,count,i_item_id,s_state,spark_grouping_id,sum,sum,sum,sum] [agg1,agg2,agg3,agg4,avg(UnscaledValue(ss_coupon_amt)),avg(UnscaledValue(ss_list_price)),avg(UnscaledValue(ss_sales_price)),avg(cast(ss_quantity as bigint)),count,count,count,count,g_state,sum,sum,sum,sum] + InputAdapter + Exchange [i_item_id,s_state,spark_grouping_id] #1 + WholeStageCodegen (5) + HashAggregate [i_item_id,s_state,spark_grouping_id,ss_coupon_amt,ss_list_price,ss_quantity,ss_sales_price] [count,count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum,sum] + Expand [i_item_id,s_state,ss_coupon_amt,ss_list_price,ss_quantity,ss_sales_price] + Project [i_item_id,s_state,ss_coupon_amt,ss_list_price,ss_quantity,ss_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [s_state,ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [cd_demo_sk,ss_cdemo_sk] + Filter [ss_cdemo_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_cdemo_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [cd_demo_sk] + Filter [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [s_state,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_state,s_store_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_id,i_item_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28.sf100/explain.txt new file mode 100644 index 0000000000000..bc499294a413e --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28.sf100/explain.txt @@ -0,0 +1,437 @@ +== Physical Plan == +CollectLimit (71) ++- BroadcastNestedLoopJoin Inner BuildRight (70) + :- BroadcastNestedLoopJoin Inner BuildRight (58) + : :- BroadcastNestedLoopJoin Inner BuildRight (46) + : : :- BroadcastNestedLoopJoin Inner BuildRight (34) + : : : :- BroadcastNestedLoopJoin Inner BuildRight (22) + : : : : :- * HashAggregate (10) + : : : : : +- Exchange (9) + : : : : : +- * HashAggregate (8) + : : : : : +- * HashAggregate (7) + : : : : : +- Exchange (6) + : : : : : +- * HashAggregate (5) + : : : : : +- * Project (4) + : : : : : +- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.store_sales (1) + : : : : +- BroadcastExchange (21) + : : : : +- * HashAggregate (20) + : : : : +- Exchange (19) + : : : : +- * HashAggregate (18) + : : : : +- * HashAggregate (17) + : : : : +- Exchange (16) + : : : : +- * HashAggregate (15) + : : : : +- * Project (14) + : : : : +- * Filter (13) + : : : : +- * ColumnarToRow (12) + : : : : +- Scan parquet default.store_sales (11) + : : : +- BroadcastExchange (33) + : : : +- * HashAggregate (32) + : : : +- Exchange (31) + : : : +- * HashAggregate (30) + : : : +- * HashAggregate (29) + : : : +- Exchange (28) + : : : +- * HashAggregate (27) + : : : +- * Project (26) + : : : +- * Filter (25) + : : : +- * ColumnarToRow (24) + : : : +- Scan parquet default.store_sales (23) + : : +- BroadcastExchange (45) + : : +- * HashAggregate (44) + : : +- Exchange (43) + : : +- * HashAggregate (42) + : : +- * HashAggregate (41) + : : +- Exchange (40) + : : +- * HashAggregate (39) + : : +- * Project (38) + : : +- * Filter (37) + : : +- * ColumnarToRow (36) + : : +- Scan parquet default.store_sales (35) + : +- BroadcastExchange (57) + : +- * HashAggregate (56) + : +- Exchange (55) + : +- * HashAggregate (54) + : +- * HashAggregate (53) + : +- Exchange (52) + : +- * HashAggregate (51) + : +- * Project (50) + : +- * Filter (49) + : +- * ColumnarToRow (48) + : +- Scan parquet default.store_sales (47) + +- BroadcastExchange (69) + +- * HashAggregate (68) + +- Exchange (67) + +- * HashAggregate (66) + +- * HashAggregate (65) + +- Exchange (64) + +- * HashAggregate (63) + +- * Project (62) + +- * Filter (61) + +- * ColumnarToRow (60) + +- Scan parquet default.store_sales (59) + + +(1) Scan parquet default.store_sales +Output [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,0), LessThanOrEqual(ss_quantity,5)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] + +(3) Filter [codegen id : 1] +Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] +Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 0)) AND (ss_quantity#1 <= 5)) AND ((((ss_list_price#3 >= 8.00) AND (cast(ss_list_price#3 as decimal(12,2)) <= 18.00)) OR ((ss_coupon_amt#4 >= 459.00) AND (cast(ss_coupon_amt#4 as decimal(12,2)) <= 1459.00))) OR ((ss_wholesale_cost#2 >= 57.00) AND (cast(ss_wholesale_cost#2 as decimal(12,2)) <= 77.00)))) + +(4) Project [codegen id : 1] +Output [1]: [ss_list_price#3] +Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] + +(5) HashAggregate [codegen id : 1] +Input [1]: [ss_list_price#3] +Keys [1]: [ss_list_price#3] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#3)), partial_count(ss_list_price#3)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#3))#5, count(ss_list_price#3)#6] +Results [4]: [ss_list_price#3, sum#7, count#8, count#9] + +(6) Exchange +Input [4]: [ss_list_price#3, sum#7, count#8, count#9] +Arguments: hashpartitioning(ss_list_price#3, 5), true, [id=#10] + +(7) HashAggregate [codegen id : 2] +Input [4]: [ss_list_price#3, sum#7, count#8, count#9] +Keys [1]: [ss_list_price#3] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#3))#5, count(ss_list_price#3)#6] +Results [4]: [ss_list_price#3, sum#7, count#8, count#9] + +(8) HashAggregate [codegen id : 2] +Input [4]: [ss_list_price#3, sum#7, count#8, count#9] +Keys: [] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3), partial_count(distinct ss_list_price#3)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#5, count(ss_list_price#3)#6, count(ss_list_price#3)#11] +Results [4]: [sum#7, count#8, count#9, count#12] + +(9) Exchange +Input [4]: [sum#7, count#8, count#9, count#12] +Arguments: SinglePartition, true, [id=#13] + +(10) HashAggregate [codegen id : 3] +Input [4]: [sum#7, count#8, count#9, count#12] +Keys: [] +Functions [3]: [avg(UnscaledValue(ss_list_price#3)), count(ss_list_price#3), count(distinct ss_list_price#3)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#5, count(ss_list_price#3)#6, count(ss_list_price#3)#11] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#3))#5 / 100.0) as decimal(11,6)) AS B1_LP#14, count(ss_list_price#3)#6 AS B1_CNT#15, count(ss_list_price#3)#11 AS B1_CNTD#16] + +(11) Scan parquet default.store_sales +Output [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,6), LessThanOrEqual(ss_quantity,10)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 4] +Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] + +(13) Filter [codegen id : 4] +Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] +Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 6)) AND (ss_quantity#1 <= 10)) AND ((((ss_list_price#3 >= 90.00) AND (cast(ss_list_price#3 as decimal(12,2)) <= 100.00)) OR ((ss_coupon_amt#4 >= 2323.00) AND (cast(ss_coupon_amt#4 as decimal(12,2)) <= 3323.00))) OR ((ss_wholesale_cost#2 >= 31.00) AND (cast(ss_wholesale_cost#2 as decimal(12,2)) <= 51.00)))) + +(14) Project [codegen id : 4] +Output [1]: [ss_list_price#3] +Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] + +(15) HashAggregate [codegen id : 4] +Input [1]: [ss_list_price#3] +Keys [1]: [ss_list_price#3] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#3)), partial_count(ss_list_price#3)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#3))#17, count(ss_list_price#3)#18] +Results [4]: [ss_list_price#3, sum#19, count#20, count#21] + +(16) Exchange +Input [4]: [ss_list_price#3, sum#19, count#20, count#21] +Arguments: hashpartitioning(ss_list_price#3, 5), true, [id=#22] + +(17) HashAggregate [codegen id : 5] +Input [4]: [ss_list_price#3, sum#19, count#20, count#21] +Keys [1]: [ss_list_price#3] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#3))#17, count(ss_list_price#3)#18] +Results [4]: [ss_list_price#3, sum#19, count#20, count#21] + +(18) HashAggregate [codegen id : 5] +Input [4]: [ss_list_price#3, sum#19, count#20, count#21] +Keys: [] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3), partial_count(distinct ss_list_price#3)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#17, count(ss_list_price#3)#18, count(ss_list_price#3)#23] +Results [4]: [sum#19, count#20, count#21, count#24] + +(19) Exchange +Input [4]: [sum#19, count#20, count#21, count#24] +Arguments: SinglePartition, true, [id=#25] + +(20) HashAggregate [codegen id : 6] +Input [4]: [sum#19, count#20, count#21, count#24] +Keys: [] +Functions [3]: [avg(UnscaledValue(ss_list_price#3)), count(ss_list_price#3), count(distinct ss_list_price#3)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#17, count(ss_list_price#3)#18, count(ss_list_price#3)#23] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#3))#17 / 100.0) as decimal(11,6)) AS B2_LP#26, count(ss_list_price#3)#18 AS B2_CNT#27, count(ss_list_price#3)#23 AS B2_CNTD#28] + +(21) BroadcastExchange +Input [3]: [B2_LP#26, B2_CNT#27, B2_CNTD#28] +Arguments: IdentityBroadcastMode, [id=#29] + +(22) BroadcastNestedLoopJoin +Join condition: None + +(23) Scan parquet default.store_sales +Output [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,11), LessThanOrEqual(ss_quantity,15)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 7] +Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] + +(25) Filter [codegen id : 7] +Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] +Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 11)) AND (ss_quantity#1 <= 15)) AND ((((ss_list_price#3 >= 142.00) AND (cast(ss_list_price#3 as decimal(12,2)) <= 152.00)) OR ((ss_coupon_amt#4 >= 12214.00) AND (cast(ss_coupon_amt#4 as decimal(12,2)) <= 13214.00))) OR ((ss_wholesale_cost#2 >= 79.00) AND (cast(ss_wholesale_cost#2 as decimal(12,2)) <= 99.00)))) + +(26) Project [codegen id : 7] +Output [1]: [ss_list_price#3] +Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] + +(27) HashAggregate [codegen id : 7] +Input [1]: [ss_list_price#3] +Keys [1]: [ss_list_price#3] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#3)), partial_count(ss_list_price#3)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#3))#30, count(ss_list_price#3)#31] +Results [4]: [ss_list_price#3, sum#32, count#33, count#34] + +(28) Exchange +Input [4]: [ss_list_price#3, sum#32, count#33, count#34] +Arguments: hashpartitioning(ss_list_price#3, 5), true, [id=#35] + +(29) HashAggregate [codegen id : 8] +Input [4]: [ss_list_price#3, sum#32, count#33, count#34] +Keys [1]: [ss_list_price#3] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#3))#30, count(ss_list_price#3)#31] +Results [4]: [ss_list_price#3, sum#32, count#33, count#34] + +(30) HashAggregate [codegen id : 8] +Input [4]: [ss_list_price#3, sum#32, count#33, count#34] +Keys: [] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3), partial_count(distinct ss_list_price#3)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#30, count(ss_list_price#3)#31, count(ss_list_price#3)#36] +Results [4]: [sum#32, count#33, count#34, count#37] + +(31) Exchange +Input [4]: [sum#32, count#33, count#34, count#37] +Arguments: SinglePartition, true, [id=#38] + +(32) HashAggregate [codegen id : 9] +Input [4]: [sum#32, count#33, count#34, count#37] +Keys: [] +Functions [3]: [avg(UnscaledValue(ss_list_price#3)), count(ss_list_price#3), count(distinct ss_list_price#3)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#30, count(ss_list_price#3)#31, count(ss_list_price#3)#36] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#3))#30 / 100.0) as decimal(11,6)) AS B3_LP#39, count(ss_list_price#3)#31 AS B3_CNT#40, count(ss_list_price#3)#36 AS B3_CNTD#41] + +(33) BroadcastExchange +Input [3]: [B3_LP#39, B3_CNT#40, B3_CNTD#41] +Arguments: IdentityBroadcastMode, [id=#42] + +(34) BroadcastNestedLoopJoin +Join condition: None + +(35) Scan parquet default.store_sales +Output [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,16), LessThanOrEqual(ss_quantity,20)] +ReadSchema: struct + +(36) ColumnarToRow [codegen id : 10] +Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] + +(37) Filter [codegen id : 10] +Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] +Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 16)) AND (ss_quantity#1 <= 20)) AND ((((ss_list_price#3 >= 135.00) AND (cast(ss_list_price#3 as decimal(12,2)) <= 145.00)) OR ((ss_coupon_amt#4 >= 6071.00) AND (cast(ss_coupon_amt#4 as decimal(12,2)) <= 7071.00))) OR ((ss_wholesale_cost#2 >= 38.00) AND (cast(ss_wholesale_cost#2 as decimal(12,2)) <= 58.00)))) + +(38) Project [codegen id : 10] +Output [1]: [ss_list_price#3] +Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] + +(39) HashAggregate [codegen id : 10] +Input [1]: [ss_list_price#3] +Keys [1]: [ss_list_price#3] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#3)), partial_count(ss_list_price#3)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#3))#43, count(ss_list_price#3)#44] +Results [4]: [ss_list_price#3, sum#45, count#46, count#47] + +(40) Exchange +Input [4]: [ss_list_price#3, sum#45, count#46, count#47] +Arguments: hashpartitioning(ss_list_price#3, 5), true, [id=#48] + +(41) HashAggregate [codegen id : 11] +Input [4]: [ss_list_price#3, sum#45, count#46, count#47] +Keys [1]: [ss_list_price#3] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#3))#43, count(ss_list_price#3)#44] +Results [4]: [ss_list_price#3, sum#45, count#46, count#47] + +(42) HashAggregate [codegen id : 11] +Input [4]: [ss_list_price#3, sum#45, count#46, count#47] +Keys: [] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3), partial_count(distinct ss_list_price#3)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#43, count(ss_list_price#3)#44, count(ss_list_price#3)#49] +Results [4]: [sum#45, count#46, count#47, count#50] + +(43) Exchange +Input [4]: [sum#45, count#46, count#47, count#50] +Arguments: SinglePartition, true, [id=#51] + +(44) HashAggregate [codegen id : 12] +Input [4]: [sum#45, count#46, count#47, count#50] +Keys: [] +Functions [3]: [avg(UnscaledValue(ss_list_price#3)), count(ss_list_price#3), count(distinct ss_list_price#3)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#43, count(ss_list_price#3)#44, count(ss_list_price#3)#49] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#3))#43 / 100.0) as decimal(11,6)) AS B4_LP#52, count(ss_list_price#3)#44 AS B4_CNT#53, count(ss_list_price#3)#49 AS B4_CNTD#54] + +(45) BroadcastExchange +Input [3]: [B4_LP#52, B4_CNT#53, B4_CNTD#54] +Arguments: IdentityBroadcastMode, [id=#55] + +(46) BroadcastNestedLoopJoin +Join condition: None + +(47) Scan parquet default.store_sales +Output [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,25)] +ReadSchema: struct + +(48) ColumnarToRow [codegen id : 13] +Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] + +(49) Filter [codegen id : 13] +Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] +Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 21)) AND (ss_quantity#1 <= 25)) AND ((((ss_list_price#3 >= 122.00) AND (cast(ss_list_price#3 as decimal(12,2)) <= 132.00)) OR ((ss_coupon_amt#4 >= 836.00) AND (cast(ss_coupon_amt#4 as decimal(12,2)) <= 1836.00))) OR ((ss_wholesale_cost#2 >= 17.00) AND (cast(ss_wholesale_cost#2 as decimal(12,2)) <= 37.00)))) + +(50) Project [codegen id : 13] +Output [1]: [ss_list_price#3] +Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] + +(51) HashAggregate [codegen id : 13] +Input [1]: [ss_list_price#3] +Keys [1]: [ss_list_price#3] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#3)), partial_count(ss_list_price#3)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#3))#56, count(ss_list_price#3)#57] +Results [4]: [ss_list_price#3, sum#58, count#59, count#60] + +(52) Exchange +Input [4]: [ss_list_price#3, sum#58, count#59, count#60] +Arguments: hashpartitioning(ss_list_price#3, 5), true, [id=#61] + +(53) HashAggregate [codegen id : 14] +Input [4]: [ss_list_price#3, sum#58, count#59, count#60] +Keys [1]: [ss_list_price#3] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#3))#56, count(ss_list_price#3)#57] +Results [4]: [ss_list_price#3, sum#58, count#59, count#60] + +(54) HashAggregate [codegen id : 14] +Input [4]: [ss_list_price#3, sum#58, count#59, count#60] +Keys: [] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3), partial_count(distinct ss_list_price#3)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#56, count(ss_list_price#3)#57, count(ss_list_price#3)#62] +Results [4]: [sum#58, count#59, count#60, count#63] + +(55) Exchange +Input [4]: [sum#58, count#59, count#60, count#63] +Arguments: SinglePartition, true, [id=#64] + +(56) HashAggregate [codegen id : 15] +Input [4]: [sum#58, count#59, count#60, count#63] +Keys: [] +Functions [3]: [avg(UnscaledValue(ss_list_price#3)), count(ss_list_price#3), count(distinct ss_list_price#3)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#56, count(ss_list_price#3)#57, count(ss_list_price#3)#62] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#3))#56 / 100.0) as decimal(11,6)) AS B5_LP#65, count(ss_list_price#3)#57 AS B5_CNT#66, count(ss_list_price#3)#62 AS B5_CNTD#67] + +(57) BroadcastExchange +Input [3]: [B5_LP#65, B5_CNT#66, B5_CNTD#67] +Arguments: IdentityBroadcastMode, [id=#68] + +(58) BroadcastNestedLoopJoin +Join condition: None + +(59) Scan parquet default.store_sales +Output [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,26), LessThanOrEqual(ss_quantity,30)] +ReadSchema: struct + +(60) ColumnarToRow [codegen id : 16] +Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] + +(61) Filter [codegen id : 16] +Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] +Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 26)) AND (ss_quantity#1 <= 30)) AND ((((ss_list_price#3 >= 154.00) AND (cast(ss_list_price#3 as decimal(12,2)) <= 164.00)) OR ((ss_coupon_amt#4 >= 7326.00) AND (cast(ss_coupon_amt#4 as decimal(12,2)) <= 8326.00))) OR ((ss_wholesale_cost#2 >= 7.00) AND (cast(ss_wholesale_cost#2 as decimal(12,2)) <= 27.00)))) + +(62) Project [codegen id : 16] +Output [1]: [ss_list_price#3] +Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] + +(63) HashAggregate [codegen id : 16] +Input [1]: [ss_list_price#3] +Keys [1]: [ss_list_price#3] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#3)), partial_count(ss_list_price#3)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#3))#69, count(ss_list_price#3)#70] +Results [4]: [ss_list_price#3, sum#71, count#72, count#73] + +(64) Exchange +Input [4]: [ss_list_price#3, sum#71, count#72, count#73] +Arguments: hashpartitioning(ss_list_price#3, 5), true, [id=#74] + +(65) HashAggregate [codegen id : 17] +Input [4]: [ss_list_price#3, sum#71, count#72, count#73] +Keys [1]: [ss_list_price#3] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#3))#69, count(ss_list_price#3)#70] +Results [4]: [ss_list_price#3, sum#71, count#72, count#73] + +(66) HashAggregate [codegen id : 17] +Input [4]: [ss_list_price#3, sum#71, count#72, count#73] +Keys: [] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3), partial_count(distinct ss_list_price#3)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#69, count(ss_list_price#3)#70, count(ss_list_price#3)#75] +Results [4]: [sum#71, count#72, count#73, count#76] + +(67) Exchange +Input [4]: [sum#71, count#72, count#73, count#76] +Arguments: SinglePartition, true, [id=#77] + +(68) HashAggregate [codegen id : 18] +Input [4]: [sum#71, count#72, count#73, count#76] +Keys: [] +Functions [3]: [avg(UnscaledValue(ss_list_price#3)), count(ss_list_price#3), count(distinct ss_list_price#3)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#69, count(ss_list_price#3)#70, count(ss_list_price#3)#75] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#3))#69 / 100.0) as decimal(11,6)) AS B6_LP#78, count(ss_list_price#3)#70 AS B6_CNT#79, count(ss_list_price#3)#75 AS B6_CNTD#80] + +(69) BroadcastExchange +Input [3]: [B6_LP#78, B6_CNT#79, B6_CNTD#80] +Arguments: IdentityBroadcastMode, [id=#81] + +(70) BroadcastNestedLoopJoin +Join condition: None + +(71) CollectLimit +Input [18]: [B1_LP#14, B1_CNT#15, B1_CNTD#16, B2_LP#26, B2_CNT#27, B2_CNTD#28, B3_LP#39, B3_CNT#40, B3_CNTD#41, B4_LP#52, B4_CNT#53, B4_CNTD#54, B5_LP#65, B5_CNT#66, B5_CNTD#67, B6_LP#78, B6_CNT#79, B6_CNTD#80] +Arguments: 100 + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28.sf100/simplified.txt new file mode 100644 index 0000000000000..7bd64fdc41260 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28.sf100/simplified.txt @@ -0,0 +1,107 @@ +CollectLimit + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + WholeStageCodegen (3) + HashAggregate [count,count,count,sum] [B1_CNT,B1_CNTD,B1_LP,avg(UnscaledValue(ss_list_price)),count,count,count,count(ss_list_price),count(ss_list_price),sum] + InputAdapter + Exchange #1 + WholeStageCodegen (2) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count,count,count(ss_list_price),count(ss_list_price),sum,sum] + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),sum,sum] + InputAdapter + Exchange [ss_list_price] #2 + WholeStageCodegen (1) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),sum,sum] + Project [ss_list_price] + Filter [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] + BroadcastExchange #3 + WholeStageCodegen (6) + HashAggregate [count,count,count,sum] [B2_CNT,B2_CNTD,B2_LP,avg(UnscaledValue(ss_list_price)),count,count,count,count(ss_list_price),count(ss_list_price),sum] + InputAdapter + Exchange #4 + WholeStageCodegen (5) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count,count,count(ss_list_price),count(ss_list_price),sum,sum] + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),sum,sum] + InputAdapter + Exchange [ss_list_price] #5 + WholeStageCodegen (4) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),sum,sum] + Project [ss_list_price] + Filter [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] + BroadcastExchange #6 + WholeStageCodegen (9) + HashAggregate [count,count,count,sum] [B3_CNT,B3_CNTD,B3_LP,avg(UnscaledValue(ss_list_price)),count,count,count,count(ss_list_price),count(ss_list_price),sum] + InputAdapter + Exchange #7 + WholeStageCodegen (8) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count,count,count(ss_list_price),count(ss_list_price),sum,sum] + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),sum,sum] + InputAdapter + Exchange [ss_list_price] #8 + WholeStageCodegen (7) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),sum,sum] + Project [ss_list_price] + Filter [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] + BroadcastExchange #9 + WholeStageCodegen (12) + HashAggregate [count,count,count,sum] [B4_CNT,B4_CNTD,B4_LP,avg(UnscaledValue(ss_list_price)),count,count,count,count(ss_list_price),count(ss_list_price),sum] + InputAdapter + Exchange #10 + WholeStageCodegen (11) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count,count,count(ss_list_price),count(ss_list_price),sum,sum] + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),sum,sum] + InputAdapter + Exchange [ss_list_price] #11 + WholeStageCodegen (10) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),sum,sum] + Project [ss_list_price] + Filter [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] + BroadcastExchange #12 + WholeStageCodegen (15) + HashAggregate [count,count,count,sum] [B5_CNT,B5_CNTD,B5_LP,avg(UnscaledValue(ss_list_price)),count,count,count,count(ss_list_price),count(ss_list_price),sum] + InputAdapter + Exchange #13 + WholeStageCodegen (14) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count,count,count(ss_list_price),count(ss_list_price),sum,sum] + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),sum,sum] + InputAdapter + Exchange [ss_list_price] #14 + WholeStageCodegen (13) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),sum,sum] + Project [ss_list_price] + Filter [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] + BroadcastExchange #15 + WholeStageCodegen (18) + HashAggregate [count,count,count,sum] [B6_CNT,B6_CNTD,B6_LP,avg(UnscaledValue(ss_list_price)),count,count,count,count(ss_list_price),count(ss_list_price),sum] + InputAdapter + Exchange #16 + WholeStageCodegen (17) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count,count,count(ss_list_price),count(ss_list_price),sum,sum] + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),sum,sum] + InputAdapter + Exchange [ss_list_price] #17 + WholeStageCodegen (16) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),sum,sum] + Project [ss_list_price] + Filter [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/explain.txt new file mode 100644 index 0000000000000..4169644f231c8 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/explain.txt @@ -0,0 +1,437 @@ +== Physical Plan == +CollectLimit (71) ++- BroadcastNestedLoopJoin Inner BuildRight (70) + :- BroadcastNestedLoopJoin Inner BuildRight (58) + : :- BroadcastNestedLoopJoin Inner BuildRight (46) + : : :- BroadcastNestedLoopJoin Inner BuildRight (34) + : : : :- BroadcastNestedLoopJoin Inner BuildRight (22) + : : : : :- * HashAggregate (10) + : : : : : +- Exchange (9) + : : : : : +- * HashAggregate (8) + : : : : : +- * HashAggregate (7) + : : : : : +- Exchange (6) + : : : : : +- * HashAggregate (5) + : : : : : +- * Project (4) + : : : : : +- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.store_sales (1) + : : : : +- BroadcastExchange (21) + : : : : +- * HashAggregate (20) + : : : : +- Exchange (19) + : : : : +- * HashAggregate (18) + : : : : +- * HashAggregate (17) + : : : : +- Exchange (16) + : : : : +- * HashAggregate (15) + : : : : +- * Project (14) + : : : : +- * Filter (13) + : : : : +- * ColumnarToRow (12) + : : : : +- Scan parquet default.store_sales (11) + : : : +- BroadcastExchange (33) + : : : +- * HashAggregate (32) + : : : +- Exchange (31) + : : : +- * HashAggregate (30) + : : : +- * HashAggregate (29) + : : : +- Exchange (28) + : : : +- * HashAggregate (27) + : : : +- * Project (26) + : : : +- * Filter (25) + : : : +- * ColumnarToRow (24) + : : : +- Scan parquet default.store_sales (23) + : : +- BroadcastExchange (45) + : : +- * HashAggregate (44) + : : +- Exchange (43) + : : +- * HashAggregate (42) + : : +- * HashAggregate (41) + : : +- Exchange (40) + : : +- * HashAggregate (39) + : : +- * Project (38) + : : +- * Filter (37) + : : +- * ColumnarToRow (36) + : : +- Scan parquet default.store_sales (35) + : +- BroadcastExchange (57) + : +- * HashAggregate (56) + : +- Exchange (55) + : +- * HashAggregate (54) + : +- * HashAggregate (53) + : +- Exchange (52) + : +- * HashAggregate (51) + : +- * Project (50) + : +- * Filter (49) + : +- * ColumnarToRow (48) + : +- Scan parquet default.store_sales (47) + +- BroadcastExchange (69) + +- * HashAggregate (68) + +- Exchange (67) + +- * HashAggregate (66) + +- * HashAggregate (65) + +- Exchange (64) + +- * HashAggregate (63) + +- * Project (62) + +- * Filter (61) + +- * ColumnarToRow (60) + +- Scan parquet default.store_sales (59) + + +(1) Scan parquet default.store_sales +Output [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,0), LessThanOrEqual(ss_quantity,5)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] + +(3) Filter [codegen id : 1] +Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] +Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 0)) AND (ss_quantity#1 <= 5)) AND ((((ss_list_price#3 >= 8.00) AND (cast(ss_list_price#3 as decimal(12,2)) <= 18.00)) OR ((ss_coupon_amt#4 >= 459.00) AND (cast(ss_coupon_amt#4 as decimal(12,2)) <= 1459.00))) OR ((ss_wholesale_cost#2 >= 57.00) AND (cast(ss_wholesale_cost#2 as decimal(12,2)) <= 77.00)))) + +(4) Project [codegen id : 1] +Output [1]: [ss_list_price#3] +Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] + +(5) HashAggregate [codegen id : 1] +Input [1]: [ss_list_price#3] +Keys [1]: [ss_list_price#3] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#3)), partial_count(ss_list_price#3)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#3))#5, count(ss_list_price#3)#6] +Results [4]: [ss_list_price#3, sum#7, count#8, count#9] + +(6) Exchange +Input [4]: [ss_list_price#3, sum#7, count#8, count#9] +Arguments: hashpartitioning(ss_list_price#3, 5), true, [id=#10] + +(7) HashAggregate [codegen id : 2] +Input [4]: [ss_list_price#3, sum#7, count#8, count#9] +Keys [1]: [ss_list_price#3] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#3))#5, count(ss_list_price#3)#6] +Results [4]: [ss_list_price#3, sum#7, count#8, count#9] + +(8) HashAggregate [codegen id : 2] +Input [4]: [ss_list_price#3, sum#7, count#8, count#9] +Keys: [] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3), partial_count(distinct ss_list_price#3)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#5, count(ss_list_price#3)#6, count(ss_list_price#3)#11] +Results [4]: [sum#7, count#8, count#9, count#12] + +(9) Exchange +Input [4]: [sum#7, count#8, count#9, count#12] +Arguments: SinglePartition, true, [id=#13] + +(10) HashAggregate [codegen id : 3] +Input [4]: [sum#7, count#8, count#9, count#12] +Keys: [] +Functions [3]: [avg(UnscaledValue(ss_list_price#3)), count(ss_list_price#3), count(distinct ss_list_price#3)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#5, count(ss_list_price#3)#6, count(ss_list_price#3)#11] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#3))#5 / 100.0) as decimal(11,6)) AS B1_LP#14, count(ss_list_price#3)#6 AS B1_CNT#15, count(ss_list_price#3)#11 AS B1_CNTD#16] + +(11) Scan parquet default.store_sales +Output [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,6), LessThanOrEqual(ss_quantity,10)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 4] +Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] + +(13) Filter [codegen id : 4] +Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] +Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 6)) AND (ss_quantity#1 <= 10)) AND ((((ss_list_price#3 >= 90.00) AND (cast(ss_list_price#3 as decimal(12,2)) <= 100.00)) OR ((ss_coupon_amt#4 >= 2323.00) AND (cast(ss_coupon_amt#4 as decimal(12,2)) <= 3323.00))) OR ((ss_wholesale_cost#2 >= 31.00) AND (cast(ss_wholesale_cost#2 as decimal(12,2)) <= 51.00)))) + +(14) Project [codegen id : 4] +Output [1]: [ss_list_price#3] +Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] + +(15) HashAggregate [codegen id : 4] +Input [1]: [ss_list_price#3] +Keys [1]: [ss_list_price#3] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#3)), partial_count(ss_list_price#3)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#3))#17, count(ss_list_price#3)#18] +Results [4]: [ss_list_price#3, sum#19, count#20, count#21] + +(16) Exchange +Input [4]: [ss_list_price#3, sum#19, count#20, count#21] +Arguments: hashpartitioning(ss_list_price#3, 5), true, [id=#22] + +(17) HashAggregate [codegen id : 5] +Input [4]: [ss_list_price#3, sum#19, count#20, count#21] +Keys [1]: [ss_list_price#3] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#3))#17, count(ss_list_price#3)#18] +Results [4]: [ss_list_price#3, sum#19, count#20, count#21] + +(18) HashAggregate [codegen id : 5] +Input [4]: [ss_list_price#3, sum#19, count#20, count#21] +Keys: [] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3), partial_count(distinct ss_list_price#3)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#17, count(ss_list_price#3)#18, count(ss_list_price#3)#23] +Results [4]: [sum#19, count#20, count#21, count#24] + +(19) Exchange +Input [4]: [sum#19, count#20, count#21, count#24] +Arguments: SinglePartition, true, [id=#25] + +(20) HashAggregate [codegen id : 6] +Input [4]: [sum#19, count#20, count#21, count#24] +Keys: [] +Functions [3]: [avg(UnscaledValue(ss_list_price#3)), count(ss_list_price#3), count(distinct ss_list_price#3)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#17, count(ss_list_price#3)#18, count(ss_list_price#3)#23] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#3))#17 / 100.0) as decimal(11,6)) AS B2_LP#26, count(ss_list_price#3)#18 AS B2_CNT#27, count(ss_list_price#3)#23 AS B2_CNTD#28] + +(21) BroadcastExchange +Input [3]: [B2_LP#26, B2_CNT#27, B2_CNTD#28] +Arguments: IdentityBroadcastMode, [id=#29] + +(22) BroadcastNestedLoopJoin +Join condition: None + +(23) Scan parquet default.store_sales +Output [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,11), LessThanOrEqual(ss_quantity,15)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 7] +Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] + +(25) Filter [codegen id : 7] +Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] +Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 11)) AND (ss_quantity#1 <= 15)) AND ((((ss_list_price#3 >= 142.00) AND (cast(ss_list_price#3 as decimal(12,2)) <= 152.00)) OR ((ss_coupon_amt#4 >= 12214.00) AND (cast(ss_coupon_amt#4 as decimal(12,2)) <= 13214.00))) OR ((ss_wholesale_cost#2 >= 79.00) AND (cast(ss_wholesale_cost#2 as decimal(12,2)) <= 99.00)))) + +(26) Project [codegen id : 7] +Output [1]: [ss_list_price#3] +Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] + +(27) HashAggregate [codegen id : 7] +Input [1]: [ss_list_price#3] +Keys [1]: [ss_list_price#3] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#3)), partial_count(ss_list_price#3)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#3))#30, count(ss_list_price#3)#31] +Results [4]: [ss_list_price#3, sum#32, count#33, count#34] + +(28) Exchange +Input [4]: [ss_list_price#3, sum#32, count#33, count#34] +Arguments: hashpartitioning(ss_list_price#3, 5), true, [id=#35] + +(29) HashAggregate [codegen id : 8] +Input [4]: [ss_list_price#3, sum#32, count#33, count#34] +Keys [1]: [ss_list_price#3] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#3))#30, count(ss_list_price#3)#31] +Results [4]: [ss_list_price#3, sum#32, count#33, count#34] + +(30) HashAggregate [codegen id : 8] +Input [4]: [ss_list_price#3, sum#32, count#33, count#34] +Keys: [] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3), partial_count(distinct ss_list_price#3)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#30, count(ss_list_price#3)#31, count(ss_list_price#3)#36] +Results [4]: [sum#32, count#33, count#34, count#37] + +(31) Exchange +Input [4]: [sum#32, count#33, count#34, count#37] +Arguments: SinglePartition, true, [id=#38] + +(32) HashAggregate [codegen id : 9] +Input [4]: [sum#32, count#33, count#34, count#37] +Keys: [] +Functions [3]: [avg(UnscaledValue(ss_list_price#3)), count(ss_list_price#3), count(distinct ss_list_price#3)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#30, count(ss_list_price#3)#31, count(ss_list_price#3)#36] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#3))#30 / 100.0) as decimal(11,6)) AS B3_LP#39, count(ss_list_price#3)#31 AS B3_CNT#40, count(ss_list_price#3)#36 AS B3_CNTD#41] + +(33) BroadcastExchange +Input [3]: [B3_LP#39, B3_CNT#40, B3_CNTD#41] +Arguments: IdentityBroadcastMode, [id=#42] + +(34) BroadcastNestedLoopJoin +Join condition: None + +(35) Scan parquet default.store_sales +Output [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,16), LessThanOrEqual(ss_quantity,20)] +ReadSchema: struct + +(36) ColumnarToRow [codegen id : 10] +Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] + +(37) Filter [codegen id : 10] +Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] +Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 16)) AND (ss_quantity#1 <= 20)) AND ((((ss_list_price#3 >= 135.00) AND (cast(ss_list_price#3 as decimal(12,2)) <= 145.00)) OR ((ss_coupon_amt#4 >= 6071.00) AND (cast(ss_coupon_amt#4 as decimal(12,2)) <= 7071.00))) OR ((ss_wholesale_cost#2 >= 38.00) AND (cast(ss_wholesale_cost#2 as decimal(12,2)) <= 58.00)))) + +(38) Project [codegen id : 10] +Output [1]: [ss_list_price#3] +Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] + +(39) HashAggregate [codegen id : 10] +Input [1]: [ss_list_price#3] +Keys [1]: [ss_list_price#3] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#3)), partial_count(ss_list_price#3)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#3))#43, count(ss_list_price#3)#44] +Results [4]: [ss_list_price#3, sum#45, count#46, count#47] + +(40) Exchange +Input [4]: [ss_list_price#3, sum#45, count#46, count#47] +Arguments: hashpartitioning(ss_list_price#3, 5), true, [id=#48] + +(41) HashAggregate [codegen id : 11] +Input [4]: [ss_list_price#3, sum#45, count#46, count#47] +Keys [1]: [ss_list_price#3] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#3))#43, count(ss_list_price#3)#44] +Results [4]: [ss_list_price#3, sum#45, count#46, count#47] + +(42) HashAggregate [codegen id : 11] +Input [4]: [ss_list_price#3, sum#45, count#46, count#47] +Keys: [] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3), partial_count(distinct ss_list_price#3)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#43, count(ss_list_price#3)#44, count(ss_list_price#3)#49] +Results [4]: [sum#45, count#46, count#47, count#50] + +(43) Exchange +Input [4]: [sum#45, count#46, count#47, count#50] +Arguments: SinglePartition, true, [id=#51] + +(44) HashAggregate [codegen id : 12] +Input [4]: [sum#45, count#46, count#47, count#50] +Keys: [] +Functions [3]: [avg(UnscaledValue(ss_list_price#3)), count(ss_list_price#3), count(distinct ss_list_price#3)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#43, count(ss_list_price#3)#44, count(ss_list_price#3)#49] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#3))#43 / 100.0) as decimal(11,6)) AS B4_LP#52, count(ss_list_price#3)#44 AS B4_CNT#53, count(ss_list_price#3)#49 AS B4_CNTD#54] + +(45) BroadcastExchange +Input [3]: [B4_LP#52, B4_CNT#53, B4_CNTD#54] +Arguments: IdentityBroadcastMode, [id=#55] + +(46) BroadcastNestedLoopJoin +Join condition: None + +(47) Scan parquet default.store_sales +Output [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,25)] +ReadSchema: struct + +(48) ColumnarToRow [codegen id : 13] +Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] + +(49) Filter [codegen id : 13] +Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] +Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 21)) AND (ss_quantity#1 <= 25)) AND ((((ss_list_price#3 >= 122.00) AND (cast(ss_list_price#3 as decimal(12,2)) <= 132.00)) OR ((ss_coupon_amt#4 >= 836.00) AND (cast(ss_coupon_amt#4 as decimal(12,2)) <= 1836.00))) OR ((ss_wholesale_cost#2 >= 17.00) AND (cast(ss_wholesale_cost#2 as decimal(12,2)) <= 37.00)))) + +(50) Project [codegen id : 13] +Output [1]: [ss_list_price#3] +Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] + +(51) HashAggregate [codegen id : 13] +Input [1]: [ss_list_price#3] +Keys [1]: [ss_list_price#3] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#3)), partial_count(ss_list_price#3)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#3))#56, count(ss_list_price#3)#57] +Results [4]: [ss_list_price#3, sum#58, count#59, count#60] + +(52) Exchange +Input [4]: [ss_list_price#3, sum#58, count#59, count#60] +Arguments: hashpartitioning(ss_list_price#3, 5), true, [id=#61] + +(53) HashAggregate [codegen id : 14] +Input [4]: [ss_list_price#3, sum#58, count#59, count#60] +Keys [1]: [ss_list_price#3] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#3))#56, count(ss_list_price#3)#57] +Results [4]: [ss_list_price#3, sum#58, count#59, count#60] + +(54) HashAggregate [codegen id : 14] +Input [4]: [ss_list_price#3, sum#58, count#59, count#60] +Keys: [] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3), partial_count(distinct ss_list_price#3)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#56, count(ss_list_price#3)#57, count(ss_list_price#3)#62] +Results [4]: [sum#58, count#59, count#60, count#63] + +(55) Exchange +Input [4]: [sum#58, count#59, count#60, count#63] +Arguments: SinglePartition, true, [id=#64] + +(56) HashAggregate [codegen id : 15] +Input [4]: [sum#58, count#59, count#60, count#63] +Keys: [] +Functions [3]: [avg(UnscaledValue(ss_list_price#3)), count(ss_list_price#3), count(distinct ss_list_price#3)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#56, count(ss_list_price#3)#57, count(ss_list_price#3)#62] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#3))#56 / 100.0) as decimal(11,6)) AS B5_LP#65, count(ss_list_price#3)#57 AS B5_CNT#66, count(ss_list_price#3)#62 AS B5_CNTD#67] + +(57) BroadcastExchange +Input [3]: [B5_LP#65, B5_CNT#66, B5_CNTD#67] +Arguments: IdentityBroadcastMode, [id=#68] + +(58) BroadcastNestedLoopJoin +Join condition: None + +(59) Scan parquet default.store_sales +Output [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,26), LessThanOrEqual(ss_quantity,30)] +ReadSchema: struct + +(60) ColumnarToRow [codegen id : 16] +Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] + +(61) Filter [codegen id : 16] +Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] +Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 26)) AND (ss_quantity#1 <= 30)) AND ((((ss_list_price#3 >= 154.00) AND (cast(ss_list_price#3 as decimal(12,2)) <= 164.00)) OR ((ss_coupon_amt#4 >= 7326.00) AND (cast(ss_coupon_amt#4 as decimal(12,2)) <= 8326.00))) OR ((ss_wholesale_cost#2 >= 7.00) AND (cast(ss_wholesale_cost#2 as decimal(12,2)) <= 27.00)))) + +(62) Project [codegen id : 16] +Output [1]: [ss_list_price#3] +Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] + +(63) HashAggregate [codegen id : 16] +Input [1]: [ss_list_price#3] +Keys [1]: [ss_list_price#3] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#3)), partial_count(ss_list_price#3)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#3))#69, count(ss_list_price#3)#70] +Results [4]: [ss_list_price#3, sum#71, count#72, count#73] + +(64) Exchange +Input [4]: [ss_list_price#3, sum#71, count#72, count#73] +Arguments: hashpartitioning(ss_list_price#3, 5), true, [id=#74] + +(65) HashAggregate [codegen id : 17] +Input [4]: [ss_list_price#3, sum#71, count#72, count#73] +Keys [1]: [ss_list_price#3] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#3))#69, count(ss_list_price#3)#70] +Results [4]: [ss_list_price#3, sum#71, count#72, count#73] + +(66) HashAggregate [codegen id : 17] +Input [4]: [ss_list_price#3, sum#71, count#72, count#73] +Keys: [] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3), partial_count(distinct ss_list_price#3)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#69, count(ss_list_price#3)#70, count(ss_list_price#3)#75] +Results [4]: [sum#71, count#72, count#73, count#76] + +(67) Exchange +Input [4]: [sum#71, count#72, count#73, count#76] +Arguments: SinglePartition, true, [id=#77] + +(68) HashAggregate [codegen id : 18] +Input [4]: [sum#71, count#72, count#73, count#76] +Keys: [] +Functions [3]: [avg(UnscaledValue(ss_list_price#3)), count(ss_list_price#3), count(distinct ss_list_price#3)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#69, count(ss_list_price#3)#70, count(ss_list_price#3)#75] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#3))#69 / 100.0) as decimal(11,6)) AS B6_LP#78, count(ss_list_price#3)#70 AS B6_CNT#79, count(ss_list_price#3)#75 AS B6_CNTD#80] + +(69) BroadcastExchange +Input [3]: [B6_LP#78, B6_CNT#79, B6_CNTD#80] +Arguments: IdentityBroadcastMode, [id=#81] + +(70) BroadcastNestedLoopJoin +Join condition: None + +(71) CollectLimit +Input [18]: [B1_LP#14, B1_CNT#15, B1_CNTD#16, B2_LP#26, B2_CNT#27, B2_CNTD#28, B3_LP#39, B3_CNT#40, B3_CNTD#41, B4_LP#52, B4_CNT#53, B4_CNTD#54, B5_LP#65, B5_CNT#66, B5_CNTD#67, B6_LP#78, B6_CNT#79, B6_CNTD#80] +Arguments: 100 + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/simplified.txt new file mode 100644 index 0000000000000..7bd64fdc41260 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/simplified.txt @@ -0,0 +1,107 @@ +CollectLimit + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + WholeStageCodegen (3) + HashAggregate [count,count,count,sum] [B1_CNT,B1_CNTD,B1_LP,avg(UnscaledValue(ss_list_price)),count,count,count,count(ss_list_price),count(ss_list_price),sum] + InputAdapter + Exchange #1 + WholeStageCodegen (2) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count,count,count(ss_list_price),count(ss_list_price),sum,sum] + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),sum,sum] + InputAdapter + Exchange [ss_list_price] #2 + WholeStageCodegen (1) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),sum,sum] + Project [ss_list_price] + Filter [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] + BroadcastExchange #3 + WholeStageCodegen (6) + HashAggregate [count,count,count,sum] [B2_CNT,B2_CNTD,B2_LP,avg(UnscaledValue(ss_list_price)),count,count,count,count(ss_list_price),count(ss_list_price),sum] + InputAdapter + Exchange #4 + WholeStageCodegen (5) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count,count,count(ss_list_price),count(ss_list_price),sum,sum] + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),sum,sum] + InputAdapter + Exchange [ss_list_price] #5 + WholeStageCodegen (4) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),sum,sum] + Project [ss_list_price] + Filter [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] + BroadcastExchange #6 + WholeStageCodegen (9) + HashAggregate [count,count,count,sum] [B3_CNT,B3_CNTD,B3_LP,avg(UnscaledValue(ss_list_price)),count,count,count,count(ss_list_price),count(ss_list_price),sum] + InputAdapter + Exchange #7 + WholeStageCodegen (8) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count,count,count(ss_list_price),count(ss_list_price),sum,sum] + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),sum,sum] + InputAdapter + Exchange [ss_list_price] #8 + WholeStageCodegen (7) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),sum,sum] + Project [ss_list_price] + Filter [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] + BroadcastExchange #9 + WholeStageCodegen (12) + HashAggregate [count,count,count,sum] [B4_CNT,B4_CNTD,B4_LP,avg(UnscaledValue(ss_list_price)),count,count,count,count(ss_list_price),count(ss_list_price),sum] + InputAdapter + Exchange #10 + WholeStageCodegen (11) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count,count,count(ss_list_price),count(ss_list_price),sum,sum] + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),sum,sum] + InputAdapter + Exchange [ss_list_price] #11 + WholeStageCodegen (10) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),sum,sum] + Project [ss_list_price] + Filter [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] + BroadcastExchange #12 + WholeStageCodegen (15) + HashAggregate [count,count,count,sum] [B5_CNT,B5_CNTD,B5_LP,avg(UnscaledValue(ss_list_price)),count,count,count,count(ss_list_price),count(ss_list_price),sum] + InputAdapter + Exchange #13 + WholeStageCodegen (14) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count,count,count(ss_list_price),count(ss_list_price),sum,sum] + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),sum,sum] + InputAdapter + Exchange [ss_list_price] #14 + WholeStageCodegen (13) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),sum,sum] + Project [ss_list_price] + Filter [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] + BroadcastExchange #15 + WholeStageCodegen (18) + HashAggregate [count,count,count,sum] [B6_CNT,B6_CNTD,B6_LP,avg(UnscaledValue(ss_list_price)),count,count,count,count(ss_list_price),count(ss_list_price),sum] + InputAdapter + Exchange #16 + WholeStageCodegen (17) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count,count,count(ss_list_price),count(ss_list_price),sum,sum] + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),sum,sum] + InputAdapter + Exchange [ss_list_price] #17 + WholeStageCodegen (16) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),sum,sum] + Project [ss_list_price] + Filter [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.sf100/explain.txt new file mode 100644 index 0000000000000..7625d9dd683e9 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.sf100/explain.txt @@ -0,0 +1,337 @@ +== Physical Plan == +TakeOrderedAndProject (61) ++- * HashAggregate (60) + +- Exchange (59) + +- * HashAggregate (58) + +- * Project (57) + +- * SortMergeJoin Inner (56) + :- * Sort (43) + : +- Exchange (42) + : +- * Project (41) + : +- * SortMergeJoin Inner (40) + : :- * Sort (27) + : : +- Exchange (26) + : : +- * Project (25) + : : +- * SortMergeJoin Inner (24) + : : :- * Sort (18) + : : : +- Exchange (17) + : : : +- * Project (16) + : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : :- * Project (10) + : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.store_sales (1) + : : : : +- BroadcastExchange (8) + : : : : +- * Project (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.date_dim (4) + : : : +- BroadcastExchange (14) + : : : +- * Filter (13) + : : : +- * ColumnarToRow (12) + : : : +- Scan parquet default.store (11) + : : +- * Sort (23) + : : +- Exchange (22) + : : +- * Filter (21) + : : +- * ColumnarToRow (20) + : : +- Scan parquet default.item (19) + : +- * Sort (39) + : +- Exchange (38) + : +- * Project (37) + : +- * BroadcastHashJoin Inner BuildLeft (36) + : :- BroadcastExchange (32) + : : +- * Project (31) + : : +- * Filter (30) + : : +- * ColumnarToRow (29) + : : +- Scan parquet default.date_dim (28) + : +- * Filter (35) + : +- * ColumnarToRow (34) + : +- Scan parquet default.store_returns (33) + +- * Sort (55) + +- Exchange (54) + +- * Project (53) + +- * BroadcastHashJoin Inner BuildRight (52) + :- * Filter (46) + : +- * ColumnarToRow (45) + : +- Scan parquet default.catalog_sales (44) + +- BroadcastExchange (51) + +- * Project (50) + +- * Filter (49) + +- * ColumnarToRow (48) + +- Scan parquet default.date_dim (47) + + +(1) Scan parquet default.store_sales +Output [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_quantity#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_item_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_quantity#6] + +(3) Filter [codegen id : 3] +Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_quantity#6] +Condition : ((((isnotnull(ss_customer_sk#3) AND isnotnull(ss_item_sk#2)) AND isnotnull(ss_ticket_number#5)) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_store_sk#4)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#7, d_year#8, d_moy#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,9), EqualTo(d_year,1999), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#7, d_year#8, d_moy#9] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#7, d_year#8, d_moy#9] +Condition : ((((isnotnull(d_moy#9) AND isnotnull(d_year#8)) AND (d_moy#9 = 9)) AND (d_year#8 = 1999)) AND isnotnull(d_date_sk#7)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#7] +Input [3]: [d_date_sk#7, d_year#8, d_moy#9] + +(8) BroadcastExchange +Input [1]: [d_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#10] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#7] +Join condition: None + +(10) Project [codegen id : 3] +Output [5]: [ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_quantity#6] +Input [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_quantity#6, d_date_sk#7] + +(11) Scan parquet default.store +Output [3]: [s_store_sk#11, s_store_id#12, s_store_name#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [s_store_sk#11, s_store_id#12, s_store_name#13] + +(13) Filter [codegen id : 2] +Input [3]: [s_store_sk#11, s_store_id#12, s_store_name#13] +Condition : isnotnull(s_store_sk#11) + +(14) BroadcastExchange +Input [3]: [s_store_sk#11, s_store_id#12, s_store_name#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#11] +Join condition: None + +(16) Project [codegen id : 3] +Output [6]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_quantity#6, s_store_id#12, s_store_name#13] +Input [8]: [ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_quantity#6, s_store_sk#11, s_store_id#12, s_store_name#13] + +(17) Exchange +Input [6]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_quantity#6, s_store_id#12, s_store_name#13] +Arguments: hashpartitioning(ss_item_sk#2, 5), true, [id=#15] + +(18) Sort [codegen id : 4] +Input [6]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_quantity#6, s_store_id#12, s_store_name#13] +Arguments: [ss_item_sk#2 ASC NULLS FIRST], false, 0 + +(19) Scan parquet default.item +Output [3]: [i_item_sk#16, i_item_id#17, i_item_desc#18] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 5] +Input [3]: [i_item_sk#16, i_item_id#17, i_item_desc#18] + +(21) Filter [codegen id : 5] +Input [3]: [i_item_sk#16, i_item_id#17, i_item_desc#18] +Condition : isnotnull(i_item_sk#16) + +(22) Exchange +Input [3]: [i_item_sk#16, i_item_id#17, i_item_desc#18] +Arguments: hashpartitioning(i_item_sk#16, 5), true, [id=#19] + +(23) Sort [codegen id : 6] +Input [3]: [i_item_sk#16, i_item_id#17, i_item_desc#18] +Arguments: [i_item_sk#16 ASC NULLS FIRST], false, 0 + +(24) SortMergeJoin [codegen id : 7] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#16] +Join condition: None + +(25) Project [codegen id : 7] +Output [8]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_quantity#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18] +Input [9]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_quantity#6, s_store_id#12, s_store_name#13, i_item_sk#16, i_item_id#17, i_item_desc#18] + +(26) Exchange +Input [8]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_quantity#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18] +Arguments: hashpartitioning(cast(ss_ticket_number#5 as bigint), cast(ss_item_sk#2 as bigint), cast(ss_customer_sk#3 as bigint), 5), true, [id=#20] + +(27) Sort [codegen id : 8] +Input [8]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_quantity#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18] +Arguments: [cast(ss_ticket_number#5 as bigint) ASC NULLS FIRST, cast(ss_item_sk#2 as bigint) ASC NULLS FIRST, cast(ss_customer_sk#3 as bigint) ASC NULLS FIRST], false, 0 + +(28) Scan parquet default.date_dim +Output [3]: [d_date_sk#21, d_year#22, d_moy#23] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), GreaterThanOrEqual(d_moy,9), LessThanOrEqual(d_moy,12), EqualTo(d_year,1999), IsNotNull(d_date_sk)] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 9] +Input [3]: [d_date_sk#21, d_year#22, d_moy#23] + +(30) Filter [codegen id : 9] +Input [3]: [d_date_sk#21, d_year#22, d_moy#23] +Condition : (((((isnotnull(d_year#22) AND isnotnull(d_moy#23)) AND (d_moy#23 >= 9)) AND (d_moy#23 <= 12)) AND (d_year#22 = 1999)) AND isnotnull(d_date_sk#21)) + +(31) Project [codegen id : 9] +Output [1]: [d_date_sk#21] +Input [3]: [d_date_sk#21, d_year#22, d_moy#23] + +(32) BroadcastExchange +Input [1]: [d_date_sk#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#24] + +(33) Scan parquet default.store_returns +Output [5]: [sr_returned_date_sk#25, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28, sr_return_quantity#29] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_returns] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_customer_sk), IsNotNull(sr_item_sk), IsNotNull(sr_returned_date_sk)] +ReadSchema: struct + +(34) ColumnarToRow +Input [5]: [sr_returned_date_sk#25, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28, sr_return_quantity#29] + +(35) Filter +Input [5]: [sr_returned_date_sk#25, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28, sr_return_quantity#29] +Condition : (((isnotnull(sr_ticket_number#28) AND isnotnull(sr_customer_sk#27)) AND isnotnull(sr_item_sk#26)) AND isnotnull(sr_returned_date_sk#25)) + +(36) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cast(d_date_sk#21 as bigint)] +Right keys [1]: [sr_returned_date_sk#25] +Join condition: None + +(37) Project [codegen id : 10] +Output [4]: [sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28, sr_return_quantity#29] +Input [6]: [d_date_sk#21, sr_returned_date_sk#25, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28, sr_return_quantity#29] + +(38) Exchange +Input [4]: [sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28, sr_return_quantity#29] +Arguments: hashpartitioning(sr_ticket_number#28, sr_item_sk#26, sr_customer_sk#27, 5), true, [id=#30] + +(39) Sort [codegen id : 11] +Input [4]: [sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28, sr_return_quantity#29] +Arguments: [sr_ticket_number#28 ASC NULLS FIRST, sr_item_sk#26 ASC NULLS FIRST, sr_customer_sk#27 ASC NULLS FIRST], false, 0 + +(40) SortMergeJoin [codegen id : 12] +Left keys [3]: [cast(ss_ticket_number#5 as bigint), cast(ss_item_sk#2 as bigint), cast(ss_customer_sk#3 as bigint)] +Right keys [3]: [sr_ticket_number#28, sr_item_sk#26, sr_customer_sk#27] +Join condition: None + +(41) Project [codegen id : 12] +Output [8]: [ss_quantity#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#26, sr_customer_sk#27, sr_return_quantity#29] +Input [12]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_quantity#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28, sr_return_quantity#29] + +(42) Exchange +Input [8]: [ss_quantity#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#26, sr_customer_sk#27, sr_return_quantity#29] +Arguments: hashpartitioning(sr_customer_sk#27, sr_item_sk#26, 5), true, [id=#31] + +(43) Sort [codegen id : 13] +Input [8]: [ss_quantity#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#26, sr_customer_sk#27, sr_return_quantity#29] +Arguments: [sr_customer_sk#27 ASC NULLS FIRST, sr_item_sk#26 ASC NULLS FIRST], false, 0 + +(44) Scan parquet default.catalog_sales +Output [4]: [cs_sold_date_sk#32, cs_bill_customer_sk#33, cs_item_sk#34, cs_quantity#35] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(45) ColumnarToRow [codegen id : 15] +Input [4]: [cs_sold_date_sk#32, cs_bill_customer_sk#33, cs_item_sk#34, cs_quantity#35] + +(46) Filter [codegen id : 15] +Input [4]: [cs_sold_date_sk#32, cs_bill_customer_sk#33, cs_item_sk#34, cs_quantity#35] +Condition : ((isnotnull(cs_bill_customer_sk#33) AND isnotnull(cs_item_sk#34)) AND isnotnull(cs_sold_date_sk#32)) + +(47) Scan parquet default.date_dim +Output [2]: [d_date_sk#36, d_year#37] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(48) ColumnarToRow [codegen id : 14] +Input [2]: [d_date_sk#36, d_year#37] + +(49) Filter [codegen id : 14] +Input [2]: [d_date_sk#36, d_year#37] +Condition : (d_year#37 IN (1999,2000,2001) AND isnotnull(d_date_sk#36)) + +(50) Project [codegen id : 14] +Output [1]: [d_date_sk#36] +Input [2]: [d_date_sk#36, d_year#37] + +(51) BroadcastExchange +Input [1]: [d_date_sk#36] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#38] + +(52) BroadcastHashJoin [codegen id : 15] +Left keys [1]: [cs_sold_date_sk#32] +Right keys [1]: [d_date_sk#36] +Join condition: None + +(53) Project [codegen id : 15] +Output [3]: [cs_bill_customer_sk#33, cs_item_sk#34, cs_quantity#35] +Input [5]: [cs_sold_date_sk#32, cs_bill_customer_sk#33, cs_item_sk#34, cs_quantity#35, d_date_sk#36] + +(54) Exchange +Input [3]: [cs_bill_customer_sk#33, cs_item_sk#34, cs_quantity#35] +Arguments: hashpartitioning(cast(cs_bill_customer_sk#33 as bigint), cast(cs_item_sk#34 as bigint), 5), true, [id=#39] + +(55) Sort [codegen id : 16] +Input [3]: [cs_bill_customer_sk#33, cs_item_sk#34, cs_quantity#35] +Arguments: [cast(cs_bill_customer_sk#33 as bigint) ASC NULLS FIRST, cast(cs_item_sk#34 as bigint) ASC NULLS FIRST], false, 0 + +(56) SortMergeJoin [codegen id : 17] +Left keys [2]: [sr_customer_sk#27, sr_item_sk#26] +Right keys [2]: [cast(cs_bill_customer_sk#33 as bigint), cast(cs_item_sk#34 as bigint)] +Join condition: None + +(57) Project [codegen id : 17] +Output [7]: [ss_quantity#6, sr_return_quantity#29, cs_quantity#35, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18] +Input [11]: [ss_quantity#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#26, sr_customer_sk#27, sr_return_quantity#29, cs_bill_customer_sk#33, cs_item_sk#34, cs_quantity#35] + +(58) HashAggregate [codegen id : 17] +Input [7]: [ss_quantity#6, sr_return_quantity#29, cs_quantity#35, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18] +Keys [4]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13] +Functions [3]: [partial_sum(cast(ss_quantity#6 as bigint)), partial_sum(cast(sr_return_quantity#29 as bigint)), partial_sum(cast(cs_quantity#35 as bigint))] +Aggregate Attributes [3]: [sum#40, sum#41, sum#42] +Results [7]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13, sum#43, sum#44, sum#45] + +(59) Exchange +Input [7]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13, sum#43, sum#44, sum#45] +Arguments: hashpartitioning(i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13, 5), true, [id=#46] + +(60) HashAggregate [codegen id : 18] +Input [7]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13, sum#43, sum#44, sum#45] +Keys [4]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13] +Functions [3]: [sum(cast(ss_quantity#6 as bigint)), sum(cast(sr_return_quantity#29 as bigint)), sum(cast(cs_quantity#35 as bigint))] +Aggregate Attributes [3]: [sum(cast(ss_quantity#6 as bigint))#47, sum(cast(sr_return_quantity#29 as bigint))#48, sum(cast(cs_quantity#35 as bigint))#49] +Results [7]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13, sum(cast(ss_quantity#6 as bigint))#47 AS store_sales_quantity#50, sum(cast(sr_return_quantity#29 as bigint))#48 AS store_returns_quantity#51, sum(cast(cs_quantity#35 as bigint))#49 AS catalog_sales_quantity#52] + +(61) TakeOrderedAndProject +Input [7]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13, store_sales_quantity#50, store_returns_quantity#51, catalog_sales_quantity#52] +Arguments: 100, [i_item_id#17 ASC NULLS FIRST, i_item_desc#18 ASC NULLS FIRST, s_store_id#12 ASC NULLS FIRST, s_store_name#13 ASC NULLS FIRST], [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13, store_sales_quantity#50, store_returns_quantity#51, catalog_sales_quantity#52] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.sf100/simplified.txt new file mode 100644 index 0000000000000..79ec8f0b8315d --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.sf100/simplified.txt @@ -0,0 +1,104 @@ +TakeOrderedAndProject [catalog_sales_quantity,i_item_desc,i_item_id,s_store_id,s_store_name,store_returns_quantity,store_sales_quantity] + WholeStageCodegen (18) + HashAggregate [i_item_desc,i_item_id,s_store_id,s_store_name,sum,sum,sum] [catalog_sales_quantity,store_returns_quantity,store_sales_quantity,sum,sum,sum,sum(cast(cs_quantity as bigint)),sum(cast(sr_return_quantity as bigint)),sum(cast(ss_quantity as bigint))] + InputAdapter + Exchange [i_item_desc,i_item_id,s_store_id,s_store_name] #1 + WholeStageCodegen (17) + HashAggregate [cs_quantity,i_item_desc,i_item_id,s_store_id,s_store_name,sr_return_quantity,ss_quantity] [sum,sum,sum,sum,sum,sum] + Project [cs_quantity,i_item_desc,i_item_id,s_store_id,s_store_name,sr_return_quantity,ss_quantity] + SortMergeJoin [cs_bill_customer_sk,cs_item_sk,sr_customer_sk,sr_item_sk] + InputAdapter + WholeStageCodegen (13) + Sort [sr_customer_sk,sr_item_sk] + InputAdapter + Exchange [sr_customer_sk,sr_item_sk] #2 + WholeStageCodegen (12) + Project [i_item_desc,i_item_id,s_store_id,s_store_name,sr_customer_sk,sr_item_sk,sr_return_quantity,ss_quantity] + SortMergeJoin [sr_customer_sk,sr_item_sk,sr_ticket_number,ss_customer_sk,ss_item_sk,ss_ticket_number] + InputAdapter + WholeStageCodegen (8) + Sort [ss_customer_sk,ss_item_sk,ss_ticket_number] + InputAdapter + Exchange [ss_customer_sk,ss_item_sk,ss_ticket_number] #3 + WholeStageCodegen (7) + Project [i_item_desc,i_item_id,s_store_id,s_store_name,ss_customer_sk,ss_item_sk,ss_quantity,ss_ticket_number] + SortMergeJoin [i_item_sk,ss_item_sk] + InputAdapter + WholeStageCodegen (4) + Sort [ss_item_sk] + InputAdapter + Exchange [ss_item_sk] #4 + WholeStageCodegen (3) + Project [s_store_id,s_store_name,ss_customer_sk,ss_item_sk,ss_quantity,ss_ticket_number] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_customer_sk,ss_item_sk,ss_quantity,ss_store_sk,ss_ticket_number] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (2) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_id,s_store_name,s_store_sk] + InputAdapter + WholeStageCodegen (6) + Sort [i_item_sk] + InputAdapter + Exchange [i_item_sk] #7 + WholeStageCodegen (5) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_desc,i_item_id,i_item_sk] + InputAdapter + WholeStageCodegen (11) + Sort [sr_customer_sk,sr_item_sk,sr_ticket_number] + InputAdapter + Exchange [sr_customer_sk,sr_item_sk,sr_ticket_number] #8 + WholeStageCodegen (10) + Project [sr_customer_sk,sr_item_sk,sr_return_quantity,sr_ticket_number] + BroadcastHashJoin [d_date_sk,sr_returned_date_sk] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (9) + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + Filter [sr_customer_sk,sr_item_sk,sr_returned_date_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_customer_sk,sr_item_sk,sr_return_quantity,sr_returned_date_sk,sr_ticket_number] + InputAdapter + WholeStageCodegen (16) + Sort [cs_bill_customer_sk,cs_item_sk] + InputAdapter + Exchange [cs_bill_customer_sk,cs_item_sk] #10 + WholeStageCodegen (15) + Project [cs_bill_customer_sk,cs_item_sk,cs_quantity] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk] + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (14) + Project [d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29/explain.txt new file mode 100644 index 0000000000000..9de2335c2d493 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29/explain.txt @@ -0,0 +1,292 @@ +== Physical Plan == +TakeOrderedAndProject (52) ++- * HashAggregate (51) + +- Exchange (50) + +- * HashAggregate (49) + +- * Project (48) + +- * BroadcastHashJoin Inner BuildRight (47) + :- * Project (42) + : +- * BroadcastHashJoin Inner BuildRight (41) + : :- * Project (36) + : : +- * BroadcastHashJoin Inner BuildRight (35) + : : :- * Project (29) + : : : +- * BroadcastHashJoin Inner BuildRight (28) + : : : :- * Project (22) + : : : : +- * BroadcastHashJoin Inner BuildRight (21) + : : : : :- * Project (15) + : : : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : : : :- * Project (9) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : : : :- * Filter (3) + : : : : : : : +- * ColumnarToRow (2) + : : : : : : : +- Scan parquet default.store_sales (1) + : : : : : : +- BroadcastExchange (7) + : : : : : : +- * Filter (6) + : : : : : : +- * ColumnarToRow (5) + : : : : : : +- Scan parquet default.store_returns (4) + : : : : : +- BroadcastExchange (13) + : : : : : +- * Filter (12) + : : : : : +- * ColumnarToRow (11) + : : : : : +- Scan parquet default.catalog_sales (10) + : : : : +- BroadcastExchange (20) + : : : : +- * Project (19) + : : : : +- * Filter (18) + : : : : +- * ColumnarToRow (17) + : : : : +- Scan parquet default.date_dim (16) + : : : +- BroadcastExchange (27) + : : : +- * Project (26) + : : : +- * Filter (25) + : : : +- * ColumnarToRow (24) + : : : +- Scan parquet default.date_dim (23) + : : +- BroadcastExchange (34) + : : +- * Project (33) + : : +- * Filter (32) + : : +- * ColumnarToRow (31) + : : +- Scan parquet default.date_dim (30) + : +- BroadcastExchange (40) + : +- * Filter (39) + : +- * ColumnarToRow (38) + : +- Scan parquet default.store (37) + +- BroadcastExchange (46) + +- * Filter (45) + +- * ColumnarToRow (44) + +- Scan parquet default.item (43) + + +(1) Scan parquet default.store_sales +Output [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_quantity#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 8] +Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_quantity#6] + +(3) Filter [codegen id : 8] +Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_quantity#6] +Condition : ((((isnotnull(ss_item_sk#2) AND isnotnull(ss_customer_sk#3)) AND isnotnull(ss_ticket_number#5)) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_store_sk#4)) + +(4) Scan parquet default.store_returns +Output [5]: [sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_return_quantity#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_returns] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_customer_sk), IsNotNull(sr_item_sk), IsNotNull(sr_returned_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [5]: [sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_return_quantity#11] + +(6) Filter [codegen id : 1] +Input [5]: [sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_return_quantity#11] +Condition : (((isnotnull(sr_ticket_number#10) AND isnotnull(sr_customer_sk#9)) AND isnotnull(sr_item_sk#8)) AND isnotnull(sr_returned_date_sk#7)) + +(7) BroadcastExchange +Input [5]: [sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_return_quantity#11] +Arguments: HashedRelationBroadcastMode(List(input[2, bigint, false], input[1, bigint, false], input[3, bigint, false]),false), [id=#12] + +(8) BroadcastHashJoin [codegen id : 8] +Left keys [3]: [cast(ss_customer_sk#3 as bigint), cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#5 as bigint)] +Right keys [3]: [sr_customer_sk#9, sr_item_sk#8, sr_ticket_number#10] +Join condition: None + +(9) Project [codegen id : 8] +Output [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_return_quantity#11] +Input [11]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_quantity#6, sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_return_quantity#11] + +(10) Scan parquet default.catalog_sales +Output [4]: [cs_sold_date_sk#13, cs_bill_customer_sk#14, cs_item_sk#15, cs_quantity#16] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [4]: [cs_sold_date_sk#13, cs_bill_customer_sk#14, cs_item_sk#15, cs_quantity#16] + +(12) Filter [codegen id : 2] +Input [4]: [cs_sold_date_sk#13, cs_bill_customer_sk#14, cs_item_sk#15, cs_quantity#16] +Condition : ((isnotnull(cs_bill_customer_sk#14) AND isnotnull(cs_item_sk#15)) AND isnotnull(cs_sold_date_sk#13)) + +(13) BroadcastExchange +Input [4]: [cs_sold_date_sk#13, cs_bill_customer_sk#14, cs_item_sk#15, cs_quantity#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint), cast(input[2, int, false] as bigint)),false), [id=#17] + +(14) BroadcastHashJoin [codegen id : 8] +Left keys [2]: [sr_customer_sk#9, sr_item_sk#8] +Right keys [2]: [cast(cs_bill_customer_sk#14 as bigint), cast(cs_item_sk#15 as bigint)] +Join condition: None + +(15) Project [codegen id : 8] +Output [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_returned_date_sk#7, sr_return_quantity#11, cs_sold_date_sk#13, cs_quantity#16] +Input [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_return_quantity#11, cs_sold_date_sk#13, cs_bill_customer_sk#14, cs_item_sk#15, cs_quantity#16] + +(16) Scan parquet default.date_dim +Output [3]: [d_date_sk#18, d_year#19, d_moy#20] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,9), EqualTo(d_year,1999), IsNotNull(d_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#18, d_year#19, d_moy#20] + +(18) Filter [codegen id : 3] +Input [3]: [d_date_sk#18, d_year#19, d_moy#20] +Condition : ((((isnotnull(d_moy#20) AND isnotnull(d_year#19)) AND (d_moy#20 = 9)) AND (d_year#19 = 1999)) AND isnotnull(d_date_sk#18)) + +(19) Project [codegen id : 3] +Output [1]: [d_date_sk#18] +Input [3]: [d_date_sk#18, d_year#19, d_moy#20] + +(20) BroadcastExchange +Input [1]: [d_date_sk#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#21] + +(21) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#18] +Join condition: None + +(22) Project [codegen id : 8] +Output [7]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_returned_date_sk#7, sr_return_quantity#11, cs_sold_date_sk#13, cs_quantity#16] +Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_returned_date_sk#7, sr_return_quantity#11, cs_sold_date_sk#13, cs_quantity#16, d_date_sk#18] + +(23) Scan parquet default.date_dim +Output [3]: [d_date_sk#22, d_year#23, d_moy#24] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), GreaterThanOrEqual(d_moy,9), LessThanOrEqual(d_moy,12), EqualTo(d_year,1999), IsNotNull(d_date_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 4] +Input [3]: [d_date_sk#22, d_year#23, d_moy#24] + +(25) Filter [codegen id : 4] +Input [3]: [d_date_sk#22, d_year#23, d_moy#24] +Condition : (((((isnotnull(d_moy#24) AND isnotnull(d_year#23)) AND (d_moy#24 >= 9)) AND (d_moy#24 <= 12)) AND (d_year#23 = 1999)) AND isnotnull(d_date_sk#22)) + +(26) Project [codegen id : 4] +Output [1]: [d_date_sk#22] +Input [3]: [d_date_sk#22, d_year#23, d_moy#24] + +(27) BroadcastExchange +Input [1]: [d_date_sk#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#25] + +(28) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [sr_returned_date_sk#7] +Right keys [1]: [cast(d_date_sk#22 as bigint)] +Join condition: None + +(29) Project [codegen id : 8] +Output [6]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_return_quantity#11, cs_sold_date_sk#13, cs_quantity#16] +Input [8]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_returned_date_sk#7, sr_return_quantity#11, cs_sold_date_sk#13, cs_quantity#16, d_date_sk#22] + +(30) Scan parquet default.date_dim +Output [2]: [d_date_sk#26, d_year#27] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 5] +Input [2]: [d_date_sk#26, d_year#27] + +(32) Filter [codegen id : 5] +Input [2]: [d_date_sk#26, d_year#27] +Condition : (d_year#27 IN (1999,2000,2001) AND isnotnull(d_date_sk#26)) + +(33) Project [codegen id : 5] +Output [1]: [d_date_sk#26] +Input [2]: [d_date_sk#26, d_year#27] + +(34) BroadcastExchange +Input [1]: [d_date_sk#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#28] + +(35) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [cs_sold_date_sk#13] +Right keys [1]: [d_date_sk#26] +Join condition: None + +(36) Project [codegen id : 8] +Output [5]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_return_quantity#11, cs_quantity#16] +Input [7]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_return_quantity#11, cs_sold_date_sk#13, cs_quantity#16, d_date_sk#26] + +(37) Scan parquet default.store +Output [3]: [s_store_sk#29, s_store_id#30, s_store_name#31] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(38) ColumnarToRow [codegen id : 6] +Input [3]: [s_store_sk#29, s_store_id#30, s_store_name#31] + +(39) Filter [codegen id : 6] +Input [3]: [s_store_sk#29, s_store_id#30, s_store_name#31] +Condition : isnotnull(s_store_sk#29) + +(40) BroadcastExchange +Input [3]: [s_store_sk#29, s_store_id#30, s_store_name#31] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#32] + +(41) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#29] +Join condition: None + +(42) Project [codegen id : 8] +Output [6]: [ss_item_sk#2, ss_quantity#6, sr_return_quantity#11, cs_quantity#16, s_store_id#30, s_store_name#31] +Input [8]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_return_quantity#11, cs_quantity#16, s_store_sk#29, s_store_id#30, s_store_name#31] + +(43) Scan parquet default.item +Output [3]: [i_item_sk#33, i_item_id#34, i_item_desc#35] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(44) ColumnarToRow [codegen id : 7] +Input [3]: [i_item_sk#33, i_item_id#34, i_item_desc#35] + +(45) Filter [codegen id : 7] +Input [3]: [i_item_sk#33, i_item_id#34, i_item_desc#35] +Condition : isnotnull(i_item_sk#33) + +(46) BroadcastExchange +Input [3]: [i_item_sk#33, i_item_id#34, i_item_desc#35] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#36] + +(47) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#33] +Join condition: None + +(48) Project [codegen id : 8] +Output [7]: [ss_quantity#6, sr_return_quantity#11, cs_quantity#16, s_store_id#30, s_store_name#31, i_item_id#34, i_item_desc#35] +Input [9]: [ss_item_sk#2, ss_quantity#6, sr_return_quantity#11, cs_quantity#16, s_store_id#30, s_store_name#31, i_item_sk#33, i_item_id#34, i_item_desc#35] + +(49) HashAggregate [codegen id : 8] +Input [7]: [ss_quantity#6, sr_return_quantity#11, cs_quantity#16, s_store_id#30, s_store_name#31, i_item_id#34, i_item_desc#35] +Keys [4]: [i_item_id#34, i_item_desc#35, s_store_id#30, s_store_name#31] +Functions [3]: [partial_sum(cast(ss_quantity#6 as bigint)), partial_sum(cast(sr_return_quantity#11 as bigint)), partial_sum(cast(cs_quantity#16 as bigint))] +Aggregate Attributes [3]: [sum#37, sum#38, sum#39] +Results [7]: [i_item_id#34, i_item_desc#35, s_store_id#30, s_store_name#31, sum#40, sum#41, sum#42] + +(50) Exchange +Input [7]: [i_item_id#34, i_item_desc#35, s_store_id#30, s_store_name#31, sum#40, sum#41, sum#42] +Arguments: hashpartitioning(i_item_id#34, i_item_desc#35, s_store_id#30, s_store_name#31, 5), true, [id=#43] + +(51) HashAggregate [codegen id : 9] +Input [7]: [i_item_id#34, i_item_desc#35, s_store_id#30, s_store_name#31, sum#40, sum#41, sum#42] +Keys [4]: [i_item_id#34, i_item_desc#35, s_store_id#30, s_store_name#31] +Functions [3]: [sum(cast(ss_quantity#6 as bigint)), sum(cast(sr_return_quantity#11 as bigint)), sum(cast(cs_quantity#16 as bigint))] +Aggregate Attributes [3]: [sum(cast(ss_quantity#6 as bigint))#44, sum(cast(sr_return_quantity#11 as bigint))#45, sum(cast(cs_quantity#16 as bigint))#46] +Results [7]: [i_item_id#34, i_item_desc#35, s_store_id#30, s_store_name#31, sum(cast(ss_quantity#6 as bigint))#44 AS store_sales_quantity#47, sum(cast(sr_return_quantity#11 as bigint))#45 AS store_returns_quantity#48, sum(cast(cs_quantity#16 as bigint))#46 AS catalog_sales_quantity#49] + +(52) TakeOrderedAndProject +Input [7]: [i_item_id#34, i_item_desc#35, s_store_id#30, s_store_name#31, store_sales_quantity#47, store_returns_quantity#48, catalog_sales_quantity#49] +Arguments: 100, [i_item_id#34 ASC NULLS FIRST, i_item_desc#35 ASC NULLS FIRST, s_store_id#30 ASC NULLS FIRST, s_store_name#31 ASC NULLS FIRST], [i_item_id#34, i_item_desc#35, s_store_id#30, s_store_name#31, store_sales_quantity#47, store_returns_quantity#48, catalog_sales_quantity#49] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29/simplified.txt new file mode 100644 index 0000000000000..26b7884072011 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29/simplified.txt @@ -0,0 +1,77 @@ +TakeOrderedAndProject [catalog_sales_quantity,i_item_desc,i_item_id,s_store_id,s_store_name,store_returns_quantity,store_sales_quantity] + WholeStageCodegen (9) + HashAggregate [i_item_desc,i_item_id,s_store_id,s_store_name,sum,sum,sum] [catalog_sales_quantity,store_returns_quantity,store_sales_quantity,sum,sum,sum,sum(cast(cs_quantity as bigint)),sum(cast(sr_return_quantity as bigint)),sum(cast(ss_quantity as bigint))] + InputAdapter + Exchange [i_item_desc,i_item_id,s_store_id,s_store_name] #1 + WholeStageCodegen (8) + HashAggregate [cs_quantity,i_item_desc,i_item_id,s_store_id,s_store_name,sr_return_quantity,ss_quantity] [sum,sum,sum,sum,sum,sum] + Project [cs_quantity,i_item_desc,i_item_id,s_store_id,s_store_name,sr_return_quantity,ss_quantity] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [cs_quantity,s_store_id,s_store_name,sr_return_quantity,ss_item_sk,ss_quantity] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [cs_quantity,sr_return_quantity,ss_item_sk,ss_quantity,ss_store_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_quantity,cs_sold_date_sk,sr_return_quantity,ss_item_sk,ss_quantity,ss_store_sk] + BroadcastHashJoin [d_date_sk,sr_returned_date_sk] + Project [cs_quantity,cs_sold_date_sk,sr_return_quantity,sr_returned_date_sk,ss_item_sk,ss_quantity,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [cs_quantity,cs_sold_date_sk,sr_return_quantity,sr_returned_date_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [cs_bill_customer_sk,cs_item_sk,sr_customer_sk,sr_item_sk] + Project [sr_customer_sk,sr_item_sk,sr_return_quantity,sr_returned_date_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [sr_customer_sk,sr_item_sk,sr_ticket_number,ss_customer_sk,ss_item_sk,ss_ticket_number] + Filter [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [sr_customer_sk,sr_item_sk,sr_returned_date_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_customer_sk,sr_item_sk,sr_return_quantity,sr_returned_date_sk,sr_ticket_number] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Project [d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (6) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_id,s_store_name,s_store_sk] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (7) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_desc,i_item_id,i_item_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3.sf100/explain.txt new file mode 100644 index 0000000000000..6ebca702beb34 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3.sf100/explain.txt @@ -0,0 +1,122 @@ +== Physical Plan == +TakeOrderedAndProject (21) ++- * HashAggregate (20) + +- Exchange (19) + +- * HashAggregate (18) + +- * Project (17) + +- * BroadcastHashJoin Inner BuildRight (16) + :- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.store_sales (1) + : +- BroadcastExchange (8) + : +- * Project (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet default.item (4) + +- BroadcastExchange (15) + +- * Project (14) + +- * Filter (13) + +- * ColumnarToRow (12) + +- Scan parquet default.date_dim (11) + + +(1) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3] + +(3) Filter [codegen id : 3] +Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3] +Condition : (isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_item_sk#2)) + +(4) Scan parquet default.item +Output [4]: [i_item_sk#4, i_brand_id#5, i_brand#6, i_manufact_id#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_manufact_id), EqualTo(i_manufact_id,128), IsNotNull(i_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [i_item_sk#4, i_brand_id#5, i_brand#6, i_manufact_id#7] + +(6) Filter [codegen id : 1] +Input [4]: [i_item_sk#4, i_brand_id#5, i_brand#6, i_manufact_id#7] +Condition : ((isnotnull(i_manufact_id#7) AND (i_manufact_id#7 = 128)) AND isnotnull(i_item_sk#4)) + +(7) Project [codegen id : 1] +Output [3]: [i_item_sk#4, i_brand_id#5, i_brand#6] +Input [4]: [i_item_sk#4, i_brand_id#5, i_brand#6, i_manufact_id#7] + +(8) BroadcastExchange +Input [3]: [i_item_sk#4, i_brand_id#5, i_brand#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#8] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#4] +Join condition: None + +(10) Project [codegen id : 3] +Output [4]: [ss_sold_date_sk#1, ss_ext_sales_price#3, i_brand_id#5, i_brand#6] +Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3, i_item_sk#4, i_brand_id#5, i_brand#6] + +(11) Scan parquet default.date_dim +Output [3]: [d_date_sk#9, d_year#10, d_moy#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_moy), EqualTo(d_moy,11), IsNotNull(d_date_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] + +(13) Filter [codegen id : 2] +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] +Condition : ((isnotnull(d_moy#11) AND (d_moy#11 = 11)) AND isnotnull(d_date_sk#9)) + +(14) Project [codegen id : 2] +Output [2]: [d_date_sk#9, d_year#10] +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] + +(15) BroadcastExchange +Input [2]: [d_date_sk#9, d_year#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(16) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#9] +Join condition: None + +(17) Project [codegen id : 3] +Output [4]: [d_year#10, ss_ext_sales_price#3, i_brand_id#5, i_brand#6] +Input [6]: [ss_sold_date_sk#1, ss_ext_sales_price#3, i_brand_id#5, i_brand#6, d_date_sk#9, d_year#10] + +(18) HashAggregate [codegen id : 3] +Input [4]: [d_year#10, ss_ext_sales_price#3, i_brand_id#5, i_brand#6] +Keys [3]: [d_year#10, i_brand#6, i_brand_id#5] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum#13] +Results [4]: [d_year#10, i_brand#6, i_brand_id#5, sum#14] + +(19) Exchange +Input [4]: [d_year#10, i_brand#6, i_brand_id#5, sum#14] +Arguments: hashpartitioning(d_year#10, i_brand#6, i_brand_id#5, 5), true, [id=#15] + +(20) HashAggregate [codegen id : 4] +Input [4]: [d_year#10, i_brand#6, i_brand_id#5, sum#14] +Keys [3]: [d_year#10, i_brand#6, i_brand_id#5] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#16] +Results [4]: [d_year#10, i_brand_id#5 AS brand_id#17, i_brand#6 AS brand#18, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#16,17,2) AS sum_agg#19] + +(21) TakeOrderedAndProject +Input [4]: [d_year#10, brand_id#17, brand#18, sum_agg#19] +Arguments: 100, [d_year#10 ASC NULLS FIRST, sum_agg#19 DESC NULLS LAST, brand_id#17 ASC NULLS FIRST], [d_year#10, brand_id#17, brand#18, sum_agg#19] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3.sf100/simplified.txt new file mode 100644 index 0000000000000..f12df6c472196 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3.sf100/simplified.txt @@ -0,0 +1,31 @@ +TakeOrderedAndProject [brand,brand_id,d_year,sum_agg] + WholeStageCodegen (4) + HashAggregate [d_year,i_brand,i_brand_id,sum] [brand,brand_id,sum,sum(UnscaledValue(ss_ext_sales_price)),sum_agg] + InputAdapter + Exchange [d_year,i_brand,i_brand_id] #1 + WholeStageCodegen (3) + HashAggregate [d_year,i_brand,i_brand_id,ss_ext_sales_price] [sum,sum] + Project [d_year,i_brand,i_brand_id,ss_ext_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_brand,i_brand_id,ss_ext_sales_price,ss_sold_date_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [i_brand,i_brand_id,i_item_sk] + Filter [i_item_sk,i_manufact_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand,i_brand_id,i_item_sk,i_manufact_id] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [d_date_sk,d_year] + Filter [d_date_sk,d_moy] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3/explain.txt new file mode 100644 index 0000000000000..3dccfeaf633a8 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3/explain.txt @@ -0,0 +1,122 @@ +== Physical Plan == +TakeOrderedAndProject (21) ++- * HashAggregate (20) + +- Exchange (19) + +- * HashAggregate (18) + +- * Project (17) + +- * BroadcastHashJoin Inner BuildRight (16) + :- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Project (4) + : : +- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.date_dim (1) + : +- BroadcastExchange (8) + : +- * Filter (7) + : +- * ColumnarToRow (6) + : +- Scan parquet default.store_sales (5) + +- BroadcastExchange (15) + +- * Project (14) + +- * Filter (13) + +- * ColumnarToRow (12) + +- Scan parquet default.item (11) + + +(1) Scan parquet default.date_dim +Output [3]: [d_date_sk#1, d_year#2, d_moy#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_moy), EqualTo(d_moy,11), IsNotNull(d_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(3) Filter [codegen id : 3] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Condition : ((isnotnull(d_moy#3) AND (d_moy#3 = 11)) AND isnotnull(d_date_sk#1)) + +(4) Project [codegen id : 3] +Output [2]: [d_date_sk#1, d_year#2] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(5) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] + +(7) Filter [codegen id : 1] +Input [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] +Condition : (isnotnull(ss_sold_date_sk#4) AND isnotnull(ss_item_sk#5)) + +(8) BroadcastExchange +Input [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [d_date_sk#1] +Right keys [1]: [ss_sold_date_sk#4] +Join condition: None + +(10) Project [codegen id : 3] +Output [3]: [d_year#2, ss_item_sk#5, ss_ext_sales_price#6] +Input [5]: [d_date_sk#1, d_year#2, ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] + +(11) Scan parquet default.item +Output [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manufact_id#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_manufact_id), EqualTo(i_manufact_id,128), IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manufact_id#11] + +(13) Filter [codegen id : 2] +Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manufact_id#11] +Condition : ((isnotnull(i_manufact_id#11) AND (i_manufact_id#11 = 128)) AND isnotnull(i_item_sk#8)) + +(14) Project [codegen id : 2] +Output [3]: [i_item_sk#8, i_brand_id#9, i_brand#10] +Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manufact_id#11] + +(15) BroadcastExchange +Input [3]: [i_item_sk#8, i_brand_id#9, i_brand#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(16) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_item_sk#5] +Right keys [1]: [i_item_sk#8] +Join condition: None + +(17) Project [codegen id : 3] +Output [4]: [d_year#2, ss_ext_sales_price#6, i_brand_id#9, i_brand#10] +Input [6]: [d_year#2, ss_item_sk#5, ss_ext_sales_price#6, i_item_sk#8, i_brand_id#9, i_brand#10] + +(18) HashAggregate [codegen id : 3] +Input [4]: [d_year#2, ss_ext_sales_price#6, i_brand_id#9, i_brand#10] +Keys [3]: [d_year#2, i_brand#10, i_brand_id#9] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#6))] +Aggregate Attributes [1]: [sum#13] +Results [4]: [d_year#2, i_brand#10, i_brand_id#9, sum#14] + +(19) Exchange +Input [4]: [d_year#2, i_brand#10, i_brand_id#9, sum#14] +Arguments: hashpartitioning(d_year#2, i_brand#10, i_brand_id#9, 5), true, [id=#15] + +(20) HashAggregate [codegen id : 4] +Input [4]: [d_year#2, i_brand#10, i_brand_id#9, sum#14] +Keys [3]: [d_year#2, i_brand#10, i_brand_id#9] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#6))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#6))#16] +Results [4]: [d_year#2, i_brand_id#9 AS brand_id#17, i_brand#10 AS brand#18, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#6))#16,17,2) AS sum_agg#19] + +(21) TakeOrderedAndProject +Input [4]: [d_year#2, brand_id#17, brand#18, sum_agg#19] +Arguments: 100, [d_year#2 ASC NULLS FIRST, sum_agg#19 DESC NULLS LAST, brand_id#17 ASC NULLS FIRST], [d_year#2, brand_id#17, brand#18, sum_agg#19] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3/simplified.txt new file mode 100644 index 0000000000000..4008c7b77cd8b --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3/simplified.txt @@ -0,0 +1,31 @@ +TakeOrderedAndProject [brand,brand_id,d_year,sum_agg] + WholeStageCodegen (4) + HashAggregate [d_year,i_brand,i_brand_id,sum] [brand,brand_id,sum,sum(UnscaledValue(ss_ext_sales_price)),sum_agg] + InputAdapter + Exchange [d_year,i_brand,i_brand_id] #1 + WholeStageCodegen (3) + HashAggregate [d_year,i_brand,i_brand_id,ss_ext_sales_price] [sum,sum] + Project [d_year,i_brand,i_brand_id,ss_ext_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [d_year,ss_ext_sales_price,ss_item_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [d_date_sk,d_year] + Filter [d_date_sk,d_moy] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [i_brand,i_brand_id,i_item_sk] + Filter [i_item_sk,i_manufact_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand,i_brand_id,i_item_sk,i_manufact_id] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30.sf100/explain.txt new file mode 100644 index 0000000000000..636d2569b0575 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30.sf100/explain.txt @@ -0,0 +1,333 @@ +== Physical Plan == +TakeOrderedAndProject (59) ++- * Project (58) + +- * BroadcastHashJoin Inner BuildRight (57) + :- * Project (45) + : +- * BroadcastHashJoin Inner BuildRight (44) + : :- * Filter (23) + : : +- * HashAggregate (22) + : : +- Exchange (21) + : : +- * HashAggregate (20) + : : +- * Project (19) + : : +- * SortMergeJoin Inner (18) + : : :- * Sort (12) + : : : +- Exchange (11) + : : : +- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.web_returns (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- * Sort (17) + : : +- Exchange (16) + : : +- * Filter (15) + : : +- * ColumnarToRow (14) + : : +- Scan parquet default.customer_address (13) + : +- BroadcastExchange (43) + : +- * Filter (42) + : +- * HashAggregate (41) + : +- Exchange (40) + : +- * HashAggregate (39) + : +- * HashAggregate (38) + : +- Exchange (37) + : +- * HashAggregate (36) + : +- * Project (35) + : +- * SortMergeJoin Inner (34) + : :- * Sort (31) + : : +- Exchange (30) + : : +- * Project (29) + : : +- * BroadcastHashJoin Inner BuildRight (28) + : : :- * Filter (26) + : : : +- * ColumnarToRow (25) + : : : +- Scan parquet default.web_returns (24) + : : +- ReusedExchange (27) + : +- * Sort (33) + : +- ReusedExchange (32) + +- BroadcastExchange (56) + +- * Project (55) + +- * BroadcastHashJoin Inner BuildRight (54) + :- * Filter (48) + : +- * ColumnarToRow (47) + : +- Scan parquet default.customer (46) + +- BroadcastExchange (53) + +- * Project (52) + +- * Filter (51) + +- * ColumnarToRow (50) + +- Scan parquet default.customer_address (49) + + +(1) Scan parquet default.web_returns +Output [4]: [wr_returned_date_sk#1, wr_returning_customer_sk#2, wr_returning_addr_sk#3, wr_return_amt#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_returns] +PushedFilters: [IsNotNull(wr_returned_date_sk), IsNotNull(wr_returning_addr_sk), IsNotNull(wr_returning_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 2] +Input [4]: [wr_returned_date_sk#1, wr_returning_customer_sk#2, wr_returning_addr_sk#3, wr_return_amt#4] + +(3) Filter [codegen id : 2] +Input [4]: [wr_returned_date_sk#1, wr_returning_customer_sk#2, wr_returning_addr_sk#3, wr_return_amt#4] +Condition : ((isnotnull(wr_returned_date_sk#1) AND isnotnull(wr_returning_addr_sk#3)) AND isnotnull(wr_returning_customer_sk#2)) + +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#5, d_year#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#5, d_year#6] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#5, d_year#6] +Condition : ((isnotnull(d_year#6) AND (d_year#6 = 2002)) AND isnotnull(d_date_sk#5)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#5] +Input [2]: [d_date_sk#5, d_year#6] + +(8) BroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#7] + +(9) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [wr_returned_date_sk#1] +Right keys [1]: [cast(d_date_sk#5 as bigint)] +Join condition: None + +(10) Project [codegen id : 2] +Output [3]: [wr_returning_customer_sk#2, wr_returning_addr_sk#3, wr_return_amt#4] +Input [5]: [wr_returned_date_sk#1, wr_returning_customer_sk#2, wr_returning_addr_sk#3, wr_return_amt#4, d_date_sk#5] + +(11) Exchange +Input [3]: [wr_returning_customer_sk#2, wr_returning_addr_sk#3, wr_return_amt#4] +Arguments: hashpartitioning(wr_returning_addr_sk#3, 5), true, [id=#8] + +(12) Sort [codegen id : 3] +Input [3]: [wr_returning_customer_sk#2, wr_returning_addr_sk#3, wr_return_amt#4] +Arguments: [wr_returning_addr_sk#3 ASC NULLS FIRST], false, 0 + +(13) Scan parquet default.customer_address +Output [2]: [ca_address_sk#9, ca_state#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_state)] +ReadSchema: struct + +(14) ColumnarToRow [codegen id : 4] +Input [2]: [ca_address_sk#9, ca_state#10] + +(15) Filter [codegen id : 4] +Input [2]: [ca_address_sk#9, ca_state#10] +Condition : (isnotnull(ca_address_sk#9) AND isnotnull(ca_state#10)) + +(16) Exchange +Input [2]: [ca_address_sk#9, ca_state#10] +Arguments: hashpartitioning(cast(ca_address_sk#9 as bigint), 5), true, [id=#11] + +(17) Sort [codegen id : 5] +Input [2]: [ca_address_sk#9, ca_state#10] +Arguments: [cast(ca_address_sk#9 as bigint) ASC NULLS FIRST], false, 0 + +(18) SortMergeJoin [codegen id : 6] +Left keys [1]: [wr_returning_addr_sk#3] +Right keys [1]: [cast(ca_address_sk#9 as bigint)] +Join condition: None + +(19) Project [codegen id : 6] +Output [3]: [wr_returning_customer_sk#2, wr_return_amt#4, ca_state#10] +Input [5]: [wr_returning_customer_sk#2, wr_returning_addr_sk#3, wr_return_amt#4, ca_address_sk#9, ca_state#10] + +(20) HashAggregate [codegen id : 6] +Input [3]: [wr_returning_customer_sk#2, wr_return_amt#4, ca_state#10] +Keys [2]: [wr_returning_customer_sk#2, ca_state#10] +Functions [1]: [partial_sum(UnscaledValue(wr_return_amt#4))] +Aggregate Attributes [1]: [sum#12] +Results [3]: [wr_returning_customer_sk#2, ca_state#10, sum#13] + +(21) Exchange +Input [3]: [wr_returning_customer_sk#2, ca_state#10, sum#13] +Arguments: hashpartitioning(wr_returning_customer_sk#2, ca_state#10, 5), true, [id=#14] + +(22) HashAggregate [codegen id : 17] +Input [3]: [wr_returning_customer_sk#2, ca_state#10, sum#13] +Keys [2]: [wr_returning_customer_sk#2, ca_state#10] +Functions [1]: [sum(UnscaledValue(wr_return_amt#4))] +Aggregate Attributes [1]: [sum(UnscaledValue(wr_return_amt#4))#15] +Results [3]: [wr_returning_customer_sk#2 AS ctr_customer_sk#16, ca_state#10 AS ctr_state#17, MakeDecimal(sum(UnscaledValue(wr_return_amt#4))#15,17,2) AS ctr_total_return#18] + +(23) Filter [codegen id : 17] +Input [3]: [ctr_customer_sk#16, ctr_state#17, ctr_total_return#18] +Condition : isnotnull(ctr_total_return#18) + +(24) Scan parquet default.web_returns +Output [4]: [wr_returned_date_sk#1, wr_returning_customer_sk#2, wr_returning_addr_sk#3, wr_return_amt#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_returns] +PushedFilters: [IsNotNull(wr_returned_date_sk), IsNotNull(wr_returning_addr_sk)] +ReadSchema: struct + +(25) ColumnarToRow [codegen id : 8] +Input [4]: [wr_returned_date_sk#1, wr_returning_customer_sk#2, wr_returning_addr_sk#3, wr_return_amt#4] + +(26) Filter [codegen id : 8] +Input [4]: [wr_returned_date_sk#1, wr_returning_customer_sk#2, wr_returning_addr_sk#3, wr_return_amt#4] +Condition : (isnotnull(wr_returned_date_sk#1) AND isnotnull(wr_returning_addr_sk#3)) + +(27) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(28) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [wr_returned_date_sk#1] +Right keys [1]: [cast(d_date_sk#5 as bigint)] +Join condition: None + +(29) Project [codegen id : 8] +Output [3]: [wr_returning_customer_sk#2, wr_returning_addr_sk#3, wr_return_amt#4] +Input [5]: [wr_returned_date_sk#1, wr_returning_customer_sk#2, wr_returning_addr_sk#3, wr_return_amt#4, d_date_sk#5] + +(30) Exchange +Input [3]: [wr_returning_customer_sk#2, wr_returning_addr_sk#3, wr_return_amt#4] +Arguments: hashpartitioning(wr_returning_addr_sk#3, 5), true, [id=#19] + +(31) Sort [codegen id : 9] +Input [3]: [wr_returning_customer_sk#2, wr_returning_addr_sk#3, wr_return_amt#4] +Arguments: [wr_returning_addr_sk#3 ASC NULLS FIRST], false, 0 + +(32) ReusedExchange [Reuses operator id: 16] +Output [2]: [ca_address_sk#9, ca_state#10] + +(33) Sort [codegen id : 11] +Input [2]: [ca_address_sk#9, ca_state#10] +Arguments: [cast(ca_address_sk#9 as bigint) ASC NULLS FIRST], false, 0 + +(34) SortMergeJoin [codegen id : 12] +Left keys [1]: [wr_returning_addr_sk#3] +Right keys [1]: [cast(ca_address_sk#9 as bigint)] +Join condition: None + +(35) Project [codegen id : 12] +Output [3]: [wr_returning_customer_sk#2, wr_return_amt#4, ca_state#10] +Input [5]: [wr_returning_customer_sk#2, wr_returning_addr_sk#3, wr_return_amt#4, ca_address_sk#9, ca_state#10] + +(36) HashAggregate [codegen id : 12] +Input [3]: [wr_returning_customer_sk#2, wr_return_amt#4, ca_state#10] +Keys [2]: [wr_returning_customer_sk#2, ca_state#10] +Functions [1]: [partial_sum(UnscaledValue(wr_return_amt#4))] +Aggregate Attributes [1]: [sum#20] +Results [3]: [wr_returning_customer_sk#2, ca_state#10, sum#21] + +(37) Exchange +Input [3]: [wr_returning_customer_sk#2, ca_state#10, sum#21] +Arguments: hashpartitioning(wr_returning_customer_sk#2, ca_state#10, 5), true, [id=#22] + +(38) HashAggregate [codegen id : 13] +Input [3]: [wr_returning_customer_sk#2, ca_state#10, sum#21] +Keys [2]: [wr_returning_customer_sk#2, ca_state#10] +Functions [1]: [sum(UnscaledValue(wr_return_amt#4))] +Aggregate Attributes [1]: [sum(UnscaledValue(wr_return_amt#4))#23] +Results [2]: [ca_state#10 AS ctr_state#17, MakeDecimal(sum(UnscaledValue(wr_return_amt#4))#23,17,2) AS ctr_total_return#18] + +(39) HashAggregate [codegen id : 13] +Input [2]: [ctr_state#17, ctr_total_return#18] +Keys [1]: [ctr_state#17] +Functions [1]: [partial_avg(ctr_total_return#18)] +Aggregate Attributes [2]: [sum#24, count#25] +Results [3]: [ctr_state#17, sum#26, count#27] + +(40) Exchange +Input [3]: [ctr_state#17, sum#26, count#27] +Arguments: hashpartitioning(ctr_state#17, 5), true, [id=#28] + +(41) HashAggregate [codegen id : 14] +Input [3]: [ctr_state#17, sum#26, count#27] +Keys [1]: [ctr_state#17] +Functions [1]: [avg(ctr_total_return#18)] +Aggregate Attributes [1]: [avg(ctr_total_return#18)#29] +Results [2]: [CheckOverflow((promote_precision(avg(ctr_total_return#18)#29) * 1.200000), DecimalType(24,7), true) AS (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#30, ctr_state#17 AS ctr_state#17#31] + +(42) Filter [codegen id : 14] +Input [2]: [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#30, ctr_state#17#31] +Condition : isnotnull((CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#30) + +(43) BroadcastExchange +Input [2]: [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#30, ctr_state#17#31] +Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [id=#32] + +(44) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ctr_state#17] +Right keys [1]: [ctr_state#17#31] +Join condition: (cast(ctr_total_return#18 as decimal(24,7)) > (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#30) + +(45) Project [codegen id : 17] +Output [2]: [ctr_customer_sk#16, ctr_total_return#18] +Input [5]: [ctr_customer_sk#16, ctr_state#17, ctr_total_return#18, (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#30, ctr_state#17#31] + +(46) Scan parquet default.customer +Output [14]: [c_customer_sk#33, c_customer_id#34, c_current_addr_sk#35, c_salutation#36, c_first_name#37, c_last_name#38, c_preferred_cust_flag#39, c_birth_day#40, c_birth_month#41, c_birth_year#42, c_birth_country#43, c_login#44, c_email_address#45, c_last_review_date#46] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(47) ColumnarToRow [codegen id : 16] +Input [14]: [c_customer_sk#33, c_customer_id#34, c_current_addr_sk#35, c_salutation#36, c_first_name#37, c_last_name#38, c_preferred_cust_flag#39, c_birth_day#40, c_birth_month#41, c_birth_year#42, c_birth_country#43, c_login#44, c_email_address#45, c_last_review_date#46] + +(48) Filter [codegen id : 16] +Input [14]: [c_customer_sk#33, c_customer_id#34, c_current_addr_sk#35, c_salutation#36, c_first_name#37, c_last_name#38, c_preferred_cust_flag#39, c_birth_day#40, c_birth_month#41, c_birth_year#42, c_birth_country#43, c_login#44, c_email_address#45, c_last_review_date#46] +Condition : (isnotnull(c_customer_sk#33) AND isnotnull(c_current_addr_sk#35)) + +(49) Scan parquet default.customer_address +Output [2]: [ca_address_sk#9, ca_state#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer_address] +PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,GA), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(50) ColumnarToRow [codegen id : 15] +Input [2]: [ca_address_sk#9, ca_state#10] + +(51) Filter [codegen id : 15] +Input [2]: [ca_address_sk#9, ca_state#10] +Condition : ((isnotnull(ca_state#10) AND (ca_state#10 = GA)) AND isnotnull(ca_address_sk#9)) + +(52) Project [codegen id : 15] +Output [1]: [ca_address_sk#9] +Input [2]: [ca_address_sk#9, ca_state#10] + +(53) BroadcastExchange +Input [1]: [ca_address_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#47] + +(54) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [c_current_addr_sk#35] +Right keys [1]: [ca_address_sk#9] +Join condition: None + +(55) Project [codegen id : 16] +Output [13]: [c_customer_sk#33, c_customer_id#34, c_salutation#36, c_first_name#37, c_last_name#38, c_preferred_cust_flag#39, c_birth_day#40, c_birth_month#41, c_birth_year#42, c_birth_country#43, c_login#44, c_email_address#45, c_last_review_date#46] +Input [15]: [c_customer_sk#33, c_customer_id#34, c_current_addr_sk#35, c_salutation#36, c_first_name#37, c_last_name#38, c_preferred_cust_flag#39, c_birth_day#40, c_birth_month#41, c_birth_year#42, c_birth_country#43, c_login#44, c_email_address#45, c_last_review_date#46, ca_address_sk#9] + +(56) BroadcastExchange +Input [13]: [c_customer_sk#33, c_customer_id#34, c_salutation#36, c_first_name#37, c_last_name#38, c_preferred_cust_flag#39, c_birth_day#40, c_birth_month#41, c_birth_year#42, c_birth_country#43, c_login#44, c_email_address#45, c_last_review_date#46] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#48] + +(57) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ctr_customer_sk#16] +Right keys [1]: [cast(c_customer_sk#33 as bigint)] +Join condition: None + +(58) Project [codegen id : 17] +Output [13]: [c_customer_id#34, c_salutation#36, c_first_name#37, c_last_name#38, c_preferred_cust_flag#39, c_birth_day#40, c_birth_month#41, c_birth_year#42, c_birth_country#43, c_login#44, c_email_address#45, c_last_review_date#46, ctr_total_return#18] +Input [15]: [ctr_customer_sk#16, ctr_total_return#18, c_customer_sk#33, c_customer_id#34, c_salutation#36, c_first_name#37, c_last_name#38, c_preferred_cust_flag#39, c_birth_day#40, c_birth_month#41, c_birth_year#42, c_birth_country#43, c_login#44, c_email_address#45, c_last_review_date#46] + +(59) TakeOrderedAndProject +Input [13]: [c_customer_id#34, c_salutation#36, c_first_name#37, c_last_name#38, c_preferred_cust_flag#39, c_birth_day#40, c_birth_month#41, c_birth_year#42, c_birth_country#43, c_login#44, c_email_address#45, c_last_review_date#46, ctr_total_return#18] +Arguments: 100, [c_customer_id#34 ASC NULLS FIRST, c_salutation#36 ASC NULLS FIRST, c_first_name#37 ASC NULLS FIRST, c_last_name#38 ASC NULLS FIRST, c_preferred_cust_flag#39 ASC NULLS FIRST, c_birth_day#40 ASC NULLS FIRST, c_birth_month#41 ASC NULLS FIRST, c_birth_year#42 ASC NULLS FIRST, c_birth_country#43 ASC NULLS FIRST, c_login#44 ASC NULLS FIRST, c_email_address#45 ASC NULLS FIRST, c_last_review_date#46 ASC NULLS FIRST, ctr_total_return#18 ASC NULLS FIRST], [c_customer_id#34, c_salutation#36, c_first_name#37, c_last_name#38, c_preferred_cust_flag#39, c_birth_day#40, c_birth_month#41, c_birth_year#42, c_birth_country#43, c_login#44, c_email_address#45, c_last_review_date#46, ctr_total_return#18] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30.sf100/simplified.txt new file mode 100644 index 0000000000000..d60728f540ce4 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30.sf100/simplified.txt @@ -0,0 +1,96 @@ +TakeOrderedAndProject [c_birth_country,c_birth_day,c_birth_month,c_birth_year,c_customer_id,c_email_address,c_first_name,c_last_name,c_last_review_date,c_login,c_preferred_cust_flag,c_salutation,ctr_total_return] + WholeStageCodegen (17) + Project [c_birth_country,c_birth_day,c_birth_month,c_birth_year,c_customer_id,c_email_address,c_first_name,c_last_name,c_last_review_date,c_login,c_preferred_cust_flag,c_salutation,ctr_total_return] + BroadcastHashJoin [c_customer_sk,ctr_customer_sk] + Project [ctr_customer_sk,ctr_total_return] + BroadcastHashJoin [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6))),ctr_state,ctr_state,ctr_total_return] + Filter [ctr_total_return] + HashAggregate [ca_state,sum,wr_returning_customer_sk] [ctr_customer_sk,ctr_state,ctr_total_return,sum,sum(UnscaledValue(wr_return_amt))] + InputAdapter + Exchange [ca_state,wr_returning_customer_sk] #1 + WholeStageCodegen (6) + HashAggregate [ca_state,wr_return_amt,wr_returning_customer_sk] [sum,sum] + Project [ca_state,wr_return_amt,wr_returning_customer_sk] + SortMergeJoin [ca_address_sk,wr_returning_addr_sk] + InputAdapter + WholeStageCodegen (3) + Sort [wr_returning_addr_sk] + InputAdapter + Exchange [wr_returning_addr_sk] #2 + WholeStageCodegen (2) + Project [wr_return_amt,wr_returning_addr_sk,wr_returning_customer_sk] + BroadcastHashJoin [d_date_sk,wr_returned_date_sk] + Filter [wr_returned_date_sk,wr_returning_addr_sk,wr_returning_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_returns [wr_return_amt,wr_returned_date_sk,wr_returning_addr_sk,wr_returning_customer_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + WholeStageCodegen (5) + Sort [ca_address_sk] + InputAdapter + Exchange [ca_address_sk] #4 + WholeStageCodegen (4) + Filter [ca_address_sk,ca_state] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (14) + Filter [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))] + HashAggregate [count,ctr_state,sum] [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6))),avg(ctr_total_return),count,ctr_state,sum] + InputAdapter + Exchange [ctr_state] #6 + WholeStageCodegen (13) + HashAggregate [ctr_state,ctr_total_return] [count,count,sum,sum] + HashAggregate [ca_state,sum,wr_returning_customer_sk] [ctr_state,ctr_total_return,sum,sum(UnscaledValue(wr_return_amt))] + InputAdapter + Exchange [ca_state,wr_returning_customer_sk] #7 + WholeStageCodegen (12) + HashAggregate [ca_state,wr_return_amt,wr_returning_customer_sk] [sum,sum] + Project [ca_state,wr_return_amt,wr_returning_customer_sk] + SortMergeJoin [ca_address_sk,wr_returning_addr_sk] + InputAdapter + WholeStageCodegen (9) + Sort [wr_returning_addr_sk] + InputAdapter + Exchange [wr_returning_addr_sk] #8 + WholeStageCodegen (8) + Project [wr_return_amt,wr_returning_addr_sk,wr_returning_customer_sk] + BroadcastHashJoin [d_date_sk,wr_returned_date_sk] + Filter [wr_returned_date_sk,wr_returning_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_returns [wr_return_amt,wr_returned_date_sk,wr_returning_addr_sk,wr_returning_customer_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + WholeStageCodegen (11) + Sort [ca_address_sk] + InputAdapter + ReusedExchange [ca_address_sk,ca_state] #4 + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (16) + Project [c_birth_country,c_birth_day,c_birth_month,c_birth_year,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_last_review_date,c_login,c_preferred_cust_flag,c_salutation] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Filter [c_current_addr_sk,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_birth_country,c_birth_day,c_birth_month,c_birth_year,c_current_addr_sk,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_last_review_date,c_login,c_preferred_cust_flag,c_salutation] + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (15) + Project [ca_address_sk] + Filter [ca_address_sk,ca_state] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_state] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30/explain.txt new file mode 100644 index 0000000000000..8b899ff2cb2da --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30/explain.txt @@ -0,0 +1,303 @@ +== Physical Plan == +TakeOrderedAndProject (53) ++- * Project (52) + +- * BroadcastHashJoin Inner BuildRight (51) + :- * Project (45) + : +- * BroadcastHashJoin Inner BuildRight (44) + : :- * Project (39) + : : +- * BroadcastHashJoin Inner BuildRight (38) + : : :- * Filter (20) + : : : +- * HashAggregate (19) + : : : +- Exchange (18) + : : : +- * HashAggregate (17) + : : : +- * Project (16) + : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : :- * Project (10) + : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.web_returns (1) + : : : : +- BroadcastExchange (8) + : : : : +- * Project (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.date_dim (4) + : : : +- BroadcastExchange (14) + : : : +- * Filter (13) + : : : +- * ColumnarToRow (12) + : : : +- Scan parquet default.customer_address (11) + : : +- BroadcastExchange (37) + : : +- * Filter (36) + : : +- * HashAggregate (35) + : : +- Exchange (34) + : : +- * HashAggregate (33) + : : +- * HashAggregate (32) + : : +- Exchange (31) + : : +- * HashAggregate (30) + : : +- * Project (29) + : : +- * BroadcastHashJoin Inner BuildRight (28) + : : :- * Project (26) + : : : +- * BroadcastHashJoin Inner BuildRight (25) + : : : :- * Filter (23) + : : : : +- * ColumnarToRow (22) + : : : : +- Scan parquet default.web_returns (21) + : : : +- ReusedExchange (24) + : : +- ReusedExchange (27) + : +- BroadcastExchange (43) + : +- * Filter (42) + : +- * ColumnarToRow (41) + : +- Scan parquet default.customer (40) + +- BroadcastExchange (50) + +- * Project (49) + +- * Filter (48) + +- * ColumnarToRow (47) + +- Scan parquet default.customer_address (46) + + +(1) Scan parquet default.web_returns +Output [4]: [wr_returned_date_sk#1, wr_returning_customer_sk#2, wr_returning_addr_sk#3, wr_return_amt#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_returns] +PushedFilters: [IsNotNull(wr_returned_date_sk), IsNotNull(wr_returning_addr_sk), IsNotNull(wr_returning_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [4]: [wr_returned_date_sk#1, wr_returning_customer_sk#2, wr_returning_addr_sk#3, wr_return_amt#4] + +(3) Filter [codegen id : 3] +Input [4]: [wr_returned_date_sk#1, wr_returning_customer_sk#2, wr_returning_addr_sk#3, wr_return_amt#4] +Condition : ((isnotnull(wr_returned_date_sk#1) AND isnotnull(wr_returning_addr_sk#3)) AND isnotnull(wr_returning_customer_sk#2)) + +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#5, d_year#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#5, d_year#6] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#5, d_year#6] +Condition : ((isnotnull(d_year#6) AND (d_year#6 = 2002)) AND isnotnull(d_date_sk#5)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#5] +Input [2]: [d_date_sk#5, d_year#6] + +(8) BroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#7] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [wr_returned_date_sk#1] +Right keys [1]: [cast(d_date_sk#5 as bigint)] +Join condition: None + +(10) Project [codegen id : 3] +Output [3]: [wr_returning_customer_sk#2, wr_returning_addr_sk#3, wr_return_amt#4] +Input [5]: [wr_returned_date_sk#1, wr_returning_customer_sk#2, wr_returning_addr_sk#3, wr_return_amt#4, d_date_sk#5] + +(11) Scan parquet default.customer_address +Output [2]: [ca_address_sk#8, ca_state#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_state)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [ca_address_sk#8, ca_state#9] + +(13) Filter [codegen id : 2] +Input [2]: [ca_address_sk#8, ca_state#9] +Condition : (isnotnull(ca_address_sk#8) AND isnotnull(ca_state#9)) + +(14) BroadcastExchange +Input [2]: [ca_address_sk#8, ca_state#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#10] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [wr_returning_addr_sk#3] +Right keys [1]: [cast(ca_address_sk#8 as bigint)] +Join condition: None + +(16) Project [codegen id : 3] +Output [3]: [wr_returning_customer_sk#2, wr_return_amt#4, ca_state#9] +Input [5]: [wr_returning_customer_sk#2, wr_returning_addr_sk#3, wr_return_amt#4, ca_address_sk#8, ca_state#9] + +(17) HashAggregate [codegen id : 3] +Input [3]: [wr_returning_customer_sk#2, wr_return_amt#4, ca_state#9] +Keys [2]: [wr_returning_customer_sk#2, ca_state#9] +Functions [1]: [partial_sum(UnscaledValue(wr_return_amt#4))] +Aggregate Attributes [1]: [sum#11] +Results [3]: [wr_returning_customer_sk#2, ca_state#9, sum#12] + +(18) Exchange +Input [3]: [wr_returning_customer_sk#2, ca_state#9, sum#12] +Arguments: hashpartitioning(wr_returning_customer_sk#2, ca_state#9, 5), true, [id=#13] + +(19) HashAggregate [codegen id : 11] +Input [3]: [wr_returning_customer_sk#2, ca_state#9, sum#12] +Keys [2]: [wr_returning_customer_sk#2, ca_state#9] +Functions [1]: [sum(UnscaledValue(wr_return_amt#4))] +Aggregate Attributes [1]: [sum(UnscaledValue(wr_return_amt#4))#14] +Results [3]: [wr_returning_customer_sk#2 AS ctr_customer_sk#15, ca_state#9 AS ctr_state#16, MakeDecimal(sum(UnscaledValue(wr_return_amt#4))#14,17,2) AS ctr_total_return#17] + +(20) Filter [codegen id : 11] +Input [3]: [ctr_customer_sk#15, ctr_state#16, ctr_total_return#17] +Condition : isnotnull(ctr_total_return#17) + +(21) Scan parquet default.web_returns +Output [4]: [wr_returned_date_sk#1, wr_returning_customer_sk#2, wr_returning_addr_sk#3, wr_return_amt#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_returns] +PushedFilters: [IsNotNull(wr_returned_date_sk), IsNotNull(wr_returning_addr_sk)] +ReadSchema: struct + +(22) ColumnarToRow [codegen id : 6] +Input [4]: [wr_returned_date_sk#1, wr_returning_customer_sk#2, wr_returning_addr_sk#3, wr_return_amt#4] + +(23) Filter [codegen id : 6] +Input [4]: [wr_returned_date_sk#1, wr_returning_customer_sk#2, wr_returning_addr_sk#3, wr_return_amt#4] +Condition : (isnotnull(wr_returned_date_sk#1) AND isnotnull(wr_returning_addr_sk#3)) + +(24) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(25) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [wr_returned_date_sk#1] +Right keys [1]: [cast(d_date_sk#5 as bigint)] +Join condition: None + +(26) Project [codegen id : 6] +Output [3]: [wr_returning_customer_sk#2, wr_returning_addr_sk#3, wr_return_amt#4] +Input [5]: [wr_returned_date_sk#1, wr_returning_customer_sk#2, wr_returning_addr_sk#3, wr_return_amt#4, d_date_sk#5] + +(27) ReusedExchange [Reuses operator id: 14] +Output [2]: [ca_address_sk#8, ca_state#9] + +(28) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [wr_returning_addr_sk#3] +Right keys [1]: [cast(ca_address_sk#8 as bigint)] +Join condition: None + +(29) Project [codegen id : 6] +Output [3]: [wr_returning_customer_sk#2, wr_return_amt#4, ca_state#9] +Input [5]: [wr_returning_customer_sk#2, wr_returning_addr_sk#3, wr_return_amt#4, ca_address_sk#8, ca_state#9] + +(30) HashAggregate [codegen id : 6] +Input [3]: [wr_returning_customer_sk#2, wr_return_amt#4, ca_state#9] +Keys [2]: [wr_returning_customer_sk#2, ca_state#9] +Functions [1]: [partial_sum(UnscaledValue(wr_return_amt#4))] +Aggregate Attributes [1]: [sum#18] +Results [3]: [wr_returning_customer_sk#2, ca_state#9, sum#19] + +(31) Exchange +Input [3]: [wr_returning_customer_sk#2, ca_state#9, sum#19] +Arguments: hashpartitioning(wr_returning_customer_sk#2, ca_state#9, 5), true, [id=#20] + +(32) HashAggregate [codegen id : 7] +Input [3]: [wr_returning_customer_sk#2, ca_state#9, sum#19] +Keys [2]: [wr_returning_customer_sk#2, ca_state#9] +Functions [1]: [sum(UnscaledValue(wr_return_amt#4))] +Aggregate Attributes [1]: [sum(UnscaledValue(wr_return_amt#4))#21] +Results [2]: [ca_state#9 AS ctr_state#16, MakeDecimal(sum(UnscaledValue(wr_return_amt#4))#21,17,2) AS ctr_total_return#17] + +(33) HashAggregate [codegen id : 7] +Input [2]: [ctr_state#16, ctr_total_return#17] +Keys [1]: [ctr_state#16] +Functions [1]: [partial_avg(ctr_total_return#17)] +Aggregate Attributes [2]: [sum#22, count#23] +Results [3]: [ctr_state#16, sum#24, count#25] + +(34) Exchange +Input [3]: [ctr_state#16, sum#24, count#25] +Arguments: hashpartitioning(ctr_state#16, 5), true, [id=#26] + +(35) HashAggregate [codegen id : 8] +Input [3]: [ctr_state#16, sum#24, count#25] +Keys [1]: [ctr_state#16] +Functions [1]: [avg(ctr_total_return#17)] +Aggregate Attributes [1]: [avg(ctr_total_return#17)#27] +Results [2]: [CheckOverflow((promote_precision(avg(ctr_total_return#17)#27) * 1.200000), DecimalType(24,7), true) AS (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#28, ctr_state#16 AS ctr_state#16#29] + +(36) Filter [codegen id : 8] +Input [2]: [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#28, ctr_state#16#29] +Condition : isnotnull((CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#28) + +(37) BroadcastExchange +Input [2]: [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#28, ctr_state#16#29] +Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [id=#30] + +(38) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ctr_state#16] +Right keys [1]: [ctr_state#16#29] +Join condition: (cast(ctr_total_return#17 as decimal(24,7)) > (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#28) + +(39) Project [codegen id : 11] +Output [2]: [ctr_customer_sk#15, ctr_total_return#17] +Input [5]: [ctr_customer_sk#15, ctr_state#16, ctr_total_return#17, (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#28, ctr_state#16#29] + +(40) Scan parquet default.customer +Output [14]: [c_customer_sk#31, c_customer_id#32, c_current_addr_sk#33, c_salutation#34, c_first_name#35, c_last_name#36, c_preferred_cust_flag#37, c_birth_day#38, c_birth_month#39, c_birth_year#40, c_birth_country#41, c_login#42, c_email_address#43, c_last_review_date#44] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(41) ColumnarToRow [codegen id : 9] +Input [14]: [c_customer_sk#31, c_customer_id#32, c_current_addr_sk#33, c_salutation#34, c_first_name#35, c_last_name#36, c_preferred_cust_flag#37, c_birth_day#38, c_birth_month#39, c_birth_year#40, c_birth_country#41, c_login#42, c_email_address#43, c_last_review_date#44] + +(42) Filter [codegen id : 9] +Input [14]: [c_customer_sk#31, c_customer_id#32, c_current_addr_sk#33, c_salutation#34, c_first_name#35, c_last_name#36, c_preferred_cust_flag#37, c_birth_day#38, c_birth_month#39, c_birth_year#40, c_birth_country#41, c_login#42, c_email_address#43, c_last_review_date#44] +Condition : (isnotnull(c_customer_sk#31) AND isnotnull(c_current_addr_sk#33)) + +(43) BroadcastExchange +Input [14]: [c_customer_sk#31, c_customer_id#32, c_current_addr_sk#33, c_salutation#34, c_first_name#35, c_last_name#36, c_preferred_cust_flag#37, c_birth_day#38, c_birth_month#39, c_birth_year#40, c_birth_country#41, c_login#42, c_email_address#43, c_last_review_date#44] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#45] + +(44) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ctr_customer_sk#15] +Right keys [1]: [cast(c_customer_sk#31 as bigint)] +Join condition: None + +(45) Project [codegen id : 11] +Output [14]: [ctr_total_return#17, c_customer_id#32, c_current_addr_sk#33, c_salutation#34, c_first_name#35, c_last_name#36, c_preferred_cust_flag#37, c_birth_day#38, c_birth_month#39, c_birth_year#40, c_birth_country#41, c_login#42, c_email_address#43, c_last_review_date#44] +Input [16]: [ctr_customer_sk#15, ctr_total_return#17, c_customer_sk#31, c_customer_id#32, c_current_addr_sk#33, c_salutation#34, c_first_name#35, c_last_name#36, c_preferred_cust_flag#37, c_birth_day#38, c_birth_month#39, c_birth_year#40, c_birth_country#41, c_login#42, c_email_address#43, c_last_review_date#44] + +(46) Scan parquet default.customer_address +Output [2]: [ca_address_sk#8, ca_state#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer_address] +PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,GA), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(47) ColumnarToRow [codegen id : 10] +Input [2]: [ca_address_sk#8, ca_state#9] + +(48) Filter [codegen id : 10] +Input [2]: [ca_address_sk#8, ca_state#9] +Condition : ((isnotnull(ca_state#9) AND (ca_state#9 = GA)) AND isnotnull(ca_address_sk#8)) + +(49) Project [codegen id : 10] +Output [1]: [ca_address_sk#8] +Input [2]: [ca_address_sk#8, ca_state#9] + +(50) BroadcastExchange +Input [1]: [ca_address_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#46] + +(51) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [c_current_addr_sk#33] +Right keys [1]: [ca_address_sk#8] +Join condition: None + +(52) Project [codegen id : 11] +Output [13]: [c_customer_id#32, c_salutation#34, c_first_name#35, c_last_name#36, c_preferred_cust_flag#37, c_birth_day#38, c_birth_month#39, c_birth_year#40, c_birth_country#41, c_login#42, c_email_address#43, c_last_review_date#44, ctr_total_return#17] +Input [15]: [ctr_total_return#17, c_customer_id#32, c_current_addr_sk#33, c_salutation#34, c_first_name#35, c_last_name#36, c_preferred_cust_flag#37, c_birth_day#38, c_birth_month#39, c_birth_year#40, c_birth_country#41, c_login#42, c_email_address#43, c_last_review_date#44, ca_address_sk#8] + +(53) TakeOrderedAndProject +Input [13]: [c_customer_id#32, c_salutation#34, c_first_name#35, c_last_name#36, c_preferred_cust_flag#37, c_birth_day#38, c_birth_month#39, c_birth_year#40, c_birth_country#41, c_login#42, c_email_address#43, c_last_review_date#44, ctr_total_return#17] +Arguments: 100, [c_customer_id#32 ASC NULLS FIRST, c_salutation#34 ASC NULLS FIRST, c_first_name#35 ASC NULLS FIRST, c_last_name#36 ASC NULLS FIRST, c_preferred_cust_flag#37 ASC NULLS FIRST, c_birth_day#38 ASC NULLS FIRST, c_birth_month#39 ASC NULLS FIRST, c_birth_year#40 ASC NULLS FIRST, c_birth_country#41 ASC NULLS FIRST, c_login#42 ASC NULLS FIRST, c_email_address#43 ASC NULLS FIRST, c_last_review_date#44 ASC NULLS FIRST, ctr_total_return#17 ASC NULLS FIRST], [c_customer_id#32, c_salutation#34, c_first_name#35, c_last_name#36, c_preferred_cust_flag#37, c_birth_day#38, c_birth_month#39, c_birth_year#40, c_birth_country#41, c_login#42, c_email_address#43, c_last_review_date#44, ctr_total_return#17] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30/simplified.txt new file mode 100644 index 0000000000000..afc390489e5aa --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30/simplified.txt @@ -0,0 +1,78 @@ +TakeOrderedAndProject [c_birth_country,c_birth_day,c_birth_month,c_birth_year,c_customer_id,c_email_address,c_first_name,c_last_name,c_last_review_date,c_login,c_preferred_cust_flag,c_salutation,ctr_total_return] + WholeStageCodegen (11) + Project [c_birth_country,c_birth_day,c_birth_month,c_birth_year,c_customer_id,c_email_address,c_first_name,c_last_name,c_last_review_date,c_login,c_preferred_cust_flag,c_salutation,ctr_total_return] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_birth_country,c_birth_day,c_birth_month,c_birth_year,c_current_addr_sk,c_customer_id,c_email_address,c_first_name,c_last_name,c_last_review_date,c_login,c_preferred_cust_flag,c_salutation,ctr_total_return] + BroadcastHashJoin [c_customer_sk,ctr_customer_sk] + Project [ctr_customer_sk,ctr_total_return] + BroadcastHashJoin [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6))),ctr_state,ctr_state,ctr_total_return] + Filter [ctr_total_return] + HashAggregate [ca_state,sum,wr_returning_customer_sk] [ctr_customer_sk,ctr_state,ctr_total_return,sum,sum(UnscaledValue(wr_return_amt))] + InputAdapter + Exchange [ca_state,wr_returning_customer_sk] #1 + WholeStageCodegen (3) + HashAggregate [ca_state,wr_return_amt,wr_returning_customer_sk] [sum,sum] + Project [ca_state,wr_return_amt,wr_returning_customer_sk] + BroadcastHashJoin [ca_address_sk,wr_returning_addr_sk] + Project [wr_return_amt,wr_returning_addr_sk,wr_returning_customer_sk] + BroadcastHashJoin [d_date_sk,wr_returned_date_sk] + Filter [wr_returned_date_sk,wr_returning_addr_sk,wr_returning_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_returns [wr_return_amt,wr_returned_date_sk,wr_returning_addr_sk,wr_returning_customer_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [ca_address_sk,ca_state] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (8) + Filter [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))] + HashAggregate [count,ctr_state,sum] [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6))),avg(ctr_total_return),count,ctr_state,sum] + InputAdapter + Exchange [ctr_state] #5 + WholeStageCodegen (7) + HashAggregate [ctr_state,ctr_total_return] [count,count,sum,sum] + HashAggregate [ca_state,sum,wr_returning_customer_sk] [ctr_state,ctr_total_return,sum,sum(UnscaledValue(wr_return_amt))] + InputAdapter + Exchange [ca_state,wr_returning_customer_sk] #6 + WholeStageCodegen (6) + HashAggregate [ca_state,wr_return_amt,wr_returning_customer_sk] [sum,sum] + Project [ca_state,wr_return_amt,wr_returning_customer_sk] + BroadcastHashJoin [ca_address_sk,wr_returning_addr_sk] + Project [wr_return_amt,wr_returning_addr_sk,wr_returning_customer_sk] + BroadcastHashJoin [d_date_sk,wr_returned_date_sk] + Filter [wr_returned_date_sk,wr_returning_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_returns [wr_return_amt,wr_returned_date_sk,wr_returning_addr_sk,wr_returning_customer_sk] + InputAdapter + ReusedExchange [d_date_sk] #2 + InputAdapter + ReusedExchange [ca_address_sk,ca_state] #3 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (9) + Filter [c_current_addr_sk,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_birth_country,c_birth_day,c_birth_month,c_birth_year,c_current_addr_sk,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_last_review_date,c_login,c_preferred_cust_flag,c_salutation] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (10) + Project [ca_address_sk] + Filter [ca_address_sk,ca_state] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_state] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31.sf100/explain.txt new file mode 100644 index 0000000000000..919bf08024030 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31.sf100/explain.txt @@ -0,0 +1,663 @@ +== Physical Plan == +* Sort (119) ++- Exchange (118) + +- * Project (117) + +- * BroadcastHashJoin Inner BuildRight (116) + :- * Project (63) + : +- * BroadcastHashJoin Inner BuildRight (62) + : :- * Project (42) + : : +- * BroadcastHashJoin Inner BuildRight (41) + : : :- * HashAggregate (21) + : : : +- Exchange (20) + : : : +- * HashAggregate (19) + : : : +- * Project (18) + : : : +- * SortMergeJoin Inner (17) + : : : :- * Sort (11) + : : : : +- Exchange (10) + : : : : +- * Project (9) + : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.store_sales (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.date_dim (4) + : : : +- * Sort (16) + : : : +- Exchange (15) + : : : +- * Filter (14) + : : : +- * ColumnarToRow (13) + : : : +- Scan parquet default.customer_address (12) + : : +- BroadcastExchange (40) + : : +- * HashAggregate (39) + : : +- Exchange (38) + : : +- * HashAggregate (37) + : : +- * Project (36) + : : +- * SortMergeJoin Inner (35) + : : :- * Sort (32) + : : : +- Exchange (31) + : : : +- * Project (30) + : : : +- * BroadcastHashJoin Inner BuildRight (29) + : : : :- * Filter (24) + : : : : +- * ColumnarToRow (23) + : : : : +- Scan parquet default.store_sales (22) + : : : +- BroadcastExchange (28) + : : : +- * Filter (27) + : : : +- * ColumnarToRow (26) + : : : +- Scan parquet default.date_dim (25) + : : +- * Sort (34) + : : +- ReusedExchange (33) + : +- BroadcastExchange (61) + : +- * HashAggregate (60) + : +- Exchange (59) + : +- * HashAggregate (58) + : +- * Project (57) + : +- * SortMergeJoin Inner (56) + : :- * Sort (53) + : : +- Exchange (52) + : : +- * Project (51) + : : +- * BroadcastHashJoin Inner BuildRight (50) + : : :- * Filter (45) + : : : +- * ColumnarToRow (44) + : : : +- Scan parquet default.store_sales (43) + : : +- BroadcastExchange (49) + : : +- * Filter (48) + : : +- * ColumnarToRow (47) + : : +- Scan parquet default.date_dim (46) + : +- * Sort (55) + : +- ReusedExchange (54) + +- BroadcastExchange (115) + +- * Project (114) + +- * BroadcastHashJoin Inner BuildRight (113) + :- * Project (96) + : +- * BroadcastHashJoin Inner BuildRight (95) + : :- * HashAggregate (78) + : : +- Exchange (77) + : : +- * HashAggregate (76) + : : +- * Project (75) + : : +- * SortMergeJoin Inner (74) + : : :- * Sort (71) + : : : +- Exchange (70) + : : : +- * Project (69) + : : : +- * BroadcastHashJoin Inner BuildRight (68) + : : : :- * Filter (66) + : : : : +- * ColumnarToRow (65) + : : : : +- Scan parquet default.web_sales (64) + : : : +- ReusedExchange (67) + : : +- * Sort (73) + : : +- ReusedExchange (72) + : +- BroadcastExchange (94) + : +- * HashAggregate (93) + : +- Exchange (92) + : +- * HashAggregate (91) + : +- * Project (90) + : +- * SortMergeJoin Inner (89) + : :- * Sort (86) + : : +- Exchange (85) + : : +- * Project (84) + : : +- * BroadcastHashJoin Inner BuildRight (83) + : : :- * Filter (81) + : : : +- * ColumnarToRow (80) + : : : +- Scan parquet default.web_sales (79) + : : +- ReusedExchange (82) + : +- * Sort (88) + : +- ReusedExchange (87) + +- BroadcastExchange (112) + +- * HashAggregate (111) + +- Exchange (110) + +- * HashAggregate (109) + +- * Project (108) + +- * SortMergeJoin Inner (107) + :- * Sort (104) + : +- Exchange (103) + : +- * Project (102) + : +- * BroadcastHashJoin Inner BuildRight (101) + : :- * Filter (99) + : : +- * ColumnarToRow (98) + : : +- Scan parquet default.web_sales (97) + : +- ReusedExchange (100) + +- * Sort (106) + +- ReusedExchange (105) + + +(1) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 2] +Input [3]: [ss_sold_date_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] + +(3) Filter [codegen id : 2] +Input [3]: [ss_sold_date_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] +Condition : (isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_addr_sk#2)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#4, d_year#5, d_qoy#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,3), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#4, d_year#5, d_qoy#6] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#4, d_year#5, d_qoy#6] +Condition : ((((isnotnull(d_qoy#6) AND isnotnull(d_year#5)) AND (d_qoy#6 = 3)) AND (d_year#5 = 2000)) AND isnotnull(d_date_sk#4)) + +(7) BroadcastExchange +Input [3]: [d_date_sk#4, d_year#5, d_qoy#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7] + +(8) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(9) Project [codegen id : 2] +Output [4]: [ss_addr_sk#2, ss_ext_sales_price#3, d_year#5, d_qoy#6] +Input [6]: [ss_sold_date_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, d_date_sk#4, d_year#5, d_qoy#6] + +(10) Exchange +Input [4]: [ss_addr_sk#2, ss_ext_sales_price#3, d_year#5, d_qoy#6] +Arguments: hashpartitioning(ss_addr_sk#2, 5), true, [id=#8] + +(11) Sort [codegen id : 3] +Input [4]: [ss_addr_sk#2, ss_ext_sales_price#3, d_year#5, d_qoy#6] +Arguments: [ss_addr_sk#2 ASC NULLS FIRST], false, 0 + +(12) Scan parquet default.customer_address +Output [2]: [ca_address_sk#9, ca_county#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_county)] +ReadSchema: struct + +(13) ColumnarToRow [codegen id : 4] +Input [2]: [ca_address_sk#9, ca_county#10] + +(14) Filter [codegen id : 4] +Input [2]: [ca_address_sk#9, ca_county#10] +Condition : (isnotnull(ca_address_sk#9) AND isnotnull(ca_county#10)) + +(15) Exchange +Input [2]: [ca_address_sk#9, ca_county#10] +Arguments: hashpartitioning(ca_address_sk#9, 5), true, [id=#11] + +(16) Sort [codegen id : 5] +Input [2]: [ca_address_sk#9, ca_county#10] +Arguments: [ca_address_sk#9 ASC NULLS FIRST], false, 0 + +(17) SortMergeJoin [codegen id : 6] +Left keys [1]: [ss_addr_sk#2] +Right keys [1]: [ca_address_sk#9] +Join condition: None + +(18) Project [codegen id : 6] +Output [4]: [ss_ext_sales_price#3, d_year#5, d_qoy#6, ca_county#10] +Input [6]: [ss_addr_sk#2, ss_ext_sales_price#3, d_year#5, d_qoy#6, ca_address_sk#9, ca_county#10] + +(19) HashAggregate [codegen id : 6] +Input [4]: [ss_ext_sales_price#3, d_year#5, d_qoy#6, ca_county#10] +Keys [3]: [ca_county#10, d_qoy#6, d_year#5] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum#12] +Results [4]: [ca_county#10, d_qoy#6, d_year#5, sum#13] + +(20) Exchange +Input [4]: [ca_county#10, d_qoy#6, d_year#5, sum#13] +Arguments: hashpartitioning(ca_county#10, d_qoy#6, d_year#5, 5), true, [id=#14] + +(21) HashAggregate [codegen id : 42] +Input [4]: [ca_county#10, d_qoy#6, d_year#5, sum#13] +Keys [3]: [ca_county#10, d_qoy#6, d_year#5] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#15] +Results [2]: [ca_county#10, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#15,17,2) AS store_sales#16] + +(22) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 8] +Input [3]: [ss_sold_date_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] + +(24) Filter [codegen id : 8] +Input [3]: [ss_sold_date_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] +Condition : (isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_addr_sk#2)) + +(25) Scan parquet default.date_dim +Output [3]: [d_date_sk#17, d_year#18, d_qoy#19] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(26) ColumnarToRow [codegen id : 7] +Input [3]: [d_date_sk#17, d_year#18, d_qoy#19] + +(27) Filter [codegen id : 7] +Input [3]: [d_date_sk#17, d_year#18, d_qoy#19] +Condition : ((((isnotnull(d_qoy#19) AND isnotnull(d_year#18)) AND (d_qoy#19 = 2)) AND (d_year#18 = 2000)) AND isnotnull(d_date_sk#17)) + +(28) BroadcastExchange +Input [3]: [d_date_sk#17, d_year#18, d_qoy#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#20] + +(29) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#17] +Join condition: None + +(30) Project [codegen id : 8] +Output [4]: [ss_addr_sk#2, ss_ext_sales_price#3, d_year#18, d_qoy#19] +Input [6]: [ss_sold_date_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, d_date_sk#17, d_year#18, d_qoy#19] + +(31) Exchange +Input [4]: [ss_addr_sk#2, ss_ext_sales_price#3, d_year#18, d_qoy#19] +Arguments: hashpartitioning(ss_addr_sk#2, 5), true, [id=#21] + +(32) Sort [codegen id : 9] +Input [4]: [ss_addr_sk#2, ss_ext_sales_price#3, d_year#18, d_qoy#19] +Arguments: [ss_addr_sk#2 ASC NULLS FIRST], false, 0 + +(33) ReusedExchange [Reuses operator id: 15] +Output [2]: [ca_address_sk#22, ca_county#23] + +(34) Sort [codegen id : 11] +Input [2]: [ca_address_sk#22, ca_county#23] +Arguments: [ca_address_sk#22 ASC NULLS FIRST], false, 0 + +(35) SortMergeJoin [codegen id : 12] +Left keys [1]: [ss_addr_sk#2] +Right keys [1]: [ca_address_sk#22] +Join condition: None + +(36) Project [codegen id : 12] +Output [4]: [ss_ext_sales_price#3, d_year#18, d_qoy#19, ca_county#23] +Input [6]: [ss_addr_sk#2, ss_ext_sales_price#3, d_year#18, d_qoy#19, ca_address_sk#22, ca_county#23] + +(37) HashAggregate [codegen id : 12] +Input [4]: [ss_ext_sales_price#3, d_year#18, d_qoy#19, ca_county#23] +Keys [3]: [ca_county#23, d_qoy#19, d_year#18] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum#24] +Results [4]: [ca_county#23, d_qoy#19, d_year#18, sum#25] + +(38) Exchange +Input [4]: [ca_county#23, d_qoy#19, d_year#18, sum#25] +Arguments: hashpartitioning(ca_county#23, d_qoy#19, d_year#18, 5), true, [id=#26] + +(39) HashAggregate [codegen id : 13] +Input [4]: [ca_county#23, d_qoy#19, d_year#18, sum#25] +Keys [3]: [ca_county#23, d_qoy#19, d_year#18] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#27] +Results [2]: [ca_county#23, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#27,17,2) AS store_sales#28] + +(40) BroadcastExchange +Input [2]: [ca_county#23, store_sales#28] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#29] + +(41) BroadcastHashJoin [codegen id : 42] +Left keys [1]: [ca_county#10] +Right keys [1]: [ca_county#23] +Join condition: None + +(42) Project [codegen id : 42] +Output [3]: [store_sales#16, ca_county#23, store_sales#28] +Input [4]: [ca_county#10, store_sales#16, ca_county#23, store_sales#28] + +(43) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk)] +ReadSchema: struct + +(44) ColumnarToRow [codegen id : 15] +Input [3]: [ss_sold_date_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] + +(45) Filter [codegen id : 15] +Input [3]: [ss_sold_date_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] +Condition : (isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_addr_sk#2)) + +(46) Scan parquet default.date_dim +Output [3]: [d_date_sk#30, d_year#31, d_qoy#32] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,1), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(47) ColumnarToRow [codegen id : 14] +Input [3]: [d_date_sk#30, d_year#31, d_qoy#32] + +(48) Filter [codegen id : 14] +Input [3]: [d_date_sk#30, d_year#31, d_qoy#32] +Condition : ((((isnotnull(d_qoy#32) AND isnotnull(d_year#31)) AND (d_qoy#32 = 1)) AND (d_year#31 = 2000)) AND isnotnull(d_date_sk#30)) + +(49) BroadcastExchange +Input [3]: [d_date_sk#30, d_year#31, d_qoy#32] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#33] + +(50) BroadcastHashJoin [codegen id : 15] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#30] +Join condition: None + +(51) Project [codegen id : 15] +Output [4]: [ss_addr_sk#2, ss_ext_sales_price#3, d_year#31, d_qoy#32] +Input [6]: [ss_sold_date_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, d_date_sk#30, d_year#31, d_qoy#32] + +(52) Exchange +Input [4]: [ss_addr_sk#2, ss_ext_sales_price#3, d_year#31, d_qoy#32] +Arguments: hashpartitioning(ss_addr_sk#2, 5), true, [id=#34] + +(53) Sort [codegen id : 16] +Input [4]: [ss_addr_sk#2, ss_ext_sales_price#3, d_year#31, d_qoy#32] +Arguments: [ss_addr_sk#2 ASC NULLS FIRST], false, 0 + +(54) ReusedExchange [Reuses operator id: 15] +Output [2]: [ca_address_sk#35, ca_county#36] + +(55) Sort [codegen id : 18] +Input [2]: [ca_address_sk#35, ca_county#36] +Arguments: [ca_address_sk#35 ASC NULLS FIRST], false, 0 + +(56) SortMergeJoin [codegen id : 19] +Left keys [1]: [ss_addr_sk#2] +Right keys [1]: [ca_address_sk#35] +Join condition: None + +(57) Project [codegen id : 19] +Output [4]: [ss_ext_sales_price#3, d_year#31, d_qoy#32, ca_county#36] +Input [6]: [ss_addr_sk#2, ss_ext_sales_price#3, d_year#31, d_qoy#32, ca_address_sk#35, ca_county#36] + +(58) HashAggregate [codegen id : 19] +Input [4]: [ss_ext_sales_price#3, d_year#31, d_qoy#32, ca_county#36] +Keys [3]: [ca_county#36, d_qoy#32, d_year#31] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum#37] +Results [4]: [ca_county#36, d_qoy#32, d_year#31, sum#38] + +(59) Exchange +Input [4]: [ca_county#36, d_qoy#32, d_year#31, sum#38] +Arguments: hashpartitioning(ca_county#36, d_qoy#32, d_year#31, 5), true, [id=#39] + +(60) HashAggregate [codegen id : 20] +Input [4]: [ca_county#36, d_qoy#32, d_year#31, sum#38] +Keys [3]: [ca_county#36, d_qoy#32, d_year#31] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#40] +Results [3]: [ca_county#36, d_year#31, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#40,17,2) AS store_sales#41] + +(61) BroadcastExchange +Input [3]: [ca_county#36, d_year#31, store_sales#41] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#42] + +(62) BroadcastHashJoin [codegen id : 42] +Left keys [1]: [ca_county#23] +Right keys [1]: [ca_county#36] +Join condition: None + +(63) Project [codegen id : 42] +Output [5]: [store_sales#16, store_sales#28, ca_county#36, d_year#31, store_sales#41] +Input [6]: [store_sales#16, ca_county#23, store_sales#28, ca_county#36, d_year#31, store_sales#41] + +(64) Scan parquet default.web_sales +Output [3]: [ws_sold_date_sk#43, ws_bill_addr_sk#44, ws_ext_sales_price#45] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk)] +ReadSchema: struct + +(65) ColumnarToRow [codegen id : 22] +Input [3]: [ws_sold_date_sk#43, ws_bill_addr_sk#44, ws_ext_sales_price#45] + +(66) Filter [codegen id : 22] +Input [3]: [ws_sold_date_sk#43, ws_bill_addr_sk#44, ws_ext_sales_price#45] +Condition : (isnotnull(ws_sold_date_sk#43) AND isnotnull(ws_bill_addr_sk#44)) + +(67) ReusedExchange [Reuses operator id: 28] +Output [3]: [d_date_sk#46, d_year#47, d_qoy#48] + +(68) BroadcastHashJoin [codegen id : 22] +Left keys [1]: [ws_sold_date_sk#43] +Right keys [1]: [d_date_sk#46] +Join condition: None + +(69) Project [codegen id : 22] +Output [4]: [ws_bill_addr_sk#44, ws_ext_sales_price#45, d_year#47, d_qoy#48] +Input [6]: [ws_sold_date_sk#43, ws_bill_addr_sk#44, ws_ext_sales_price#45, d_date_sk#46, d_year#47, d_qoy#48] + +(70) Exchange +Input [4]: [ws_bill_addr_sk#44, ws_ext_sales_price#45, d_year#47, d_qoy#48] +Arguments: hashpartitioning(ws_bill_addr_sk#44, 5), true, [id=#49] + +(71) Sort [codegen id : 23] +Input [4]: [ws_bill_addr_sk#44, ws_ext_sales_price#45, d_year#47, d_qoy#48] +Arguments: [ws_bill_addr_sk#44 ASC NULLS FIRST], false, 0 + +(72) ReusedExchange [Reuses operator id: 15] +Output [2]: [ca_address_sk#50, ca_county#51] + +(73) Sort [codegen id : 25] +Input [2]: [ca_address_sk#50, ca_county#51] +Arguments: [ca_address_sk#50 ASC NULLS FIRST], false, 0 + +(74) SortMergeJoin [codegen id : 26] +Left keys [1]: [ws_bill_addr_sk#44] +Right keys [1]: [ca_address_sk#50] +Join condition: None + +(75) Project [codegen id : 26] +Output [4]: [ws_ext_sales_price#45, d_year#47, d_qoy#48, ca_county#51] +Input [6]: [ws_bill_addr_sk#44, ws_ext_sales_price#45, d_year#47, d_qoy#48, ca_address_sk#50, ca_county#51] + +(76) HashAggregate [codegen id : 26] +Input [4]: [ws_ext_sales_price#45, d_year#47, d_qoy#48, ca_county#51] +Keys [3]: [ca_county#51, d_qoy#48, d_year#47] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#45))] +Aggregate Attributes [1]: [sum#52] +Results [4]: [ca_county#51, d_qoy#48, d_year#47, sum#53] + +(77) Exchange +Input [4]: [ca_county#51, d_qoy#48, d_year#47, sum#53] +Arguments: hashpartitioning(ca_county#51, d_qoy#48, d_year#47, 5), true, [id=#54] + +(78) HashAggregate [codegen id : 41] +Input [4]: [ca_county#51, d_qoy#48, d_year#47, sum#53] +Keys [3]: [ca_county#51, d_qoy#48, d_year#47] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#45))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#45))#55] +Results [2]: [ca_county#51, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#45))#55,17,2) AS web_sales#56] + +(79) Scan parquet default.web_sales +Output [3]: [ws_sold_date_sk#43, ws_bill_addr_sk#44, ws_ext_sales_price#45] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk)] +ReadSchema: struct + +(80) ColumnarToRow [codegen id : 28] +Input [3]: [ws_sold_date_sk#43, ws_bill_addr_sk#44, ws_ext_sales_price#45] + +(81) Filter [codegen id : 28] +Input [3]: [ws_sold_date_sk#43, ws_bill_addr_sk#44, ws_ext_sales_price#45] +Condition : (isnotnull(ws_sold_date_sk#43) AND isnotnull(ws_bill_addr_sk#44)) + +(82) ReusedExchange [Reuses operator id: 49] +Output [3]: [d_date_sk#57, d_year#58, d_qoy#59] + +(83) BroadcastHashJoin [codegen id : 28] +Left keys [1]: [ws_sold_date_sk#43] +Right keys [1]: [d_date_sk#57] +Join condition: None + +(84) Project [codegen id : 28] +Output [4]: [ws_bill_addr_sk#44, ws_ext_sales_price#45, d_year#58, d_qoy#59] +Input [6]: [ws_sold_date_sk#43, ws_bill_addr_sk#44, ws_ext_sales_price#45, d_date_sk#57, d_year#58, d_qoy#59] + +(85) Exchange +Input [4]: [ws_bill_addr_sk#44, ws_ext_sales_price#45, d_year#58, d_qoy#59] +Arguments: hashpartitioning(ws_bill_addr_sk#44, 5), true, [id=#60] + +(86) Sort [codegen id : 29] +Input [4]: [ws_bill_addr_sk#44, ws_ext_sales_price#45, d_year#58, d_qoy#59] +Arguments: [ws_bill_addr_sk#44 ASC NULLS FIRST], false, 0 + +(87) ReusedExchange [Reuses operator id: 15] +Output [2]: [ca_address_sk#61, ca_county#62] + +(88) Sort [codegen id : 31] +Input [2]: [ca_address_sk#61, ca_county#62] +Arguments: [ca_address_sk#61 ASC NULLS FIRST], false, 0 + +(89) SortMergeJoin [codegen id : 32] +Left keys [1]: [ws_bill_addr_sk#44] +Right keys [1]: [ca_address_sk#61] +Join condition: None + +(90) Project [codegen id : 32] +Output [4]: [ws_ext_sales_price#45, d_year#58, d_qoy#59, ca_county#62] +Input [6]: [ws_bill_addr_sk#44, ws_ext_sales_price#45, d_year#58, d_qoy#59, ca_address_sk#61, ca_county#62] + +(91) HashAggregate [codegen id : 32] +Input [4]: [ws_ext_sales_price#45, d_year#58, d_qoy#59, ca_county#62] +Keys [3]: [ca_county#62, d_qoy#59, d_year#58] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#45))] +Aggregate Attributes [1]: [sum#63] +Results [4]: [ca_county#62, d_qoy#59, d_year#58, sum#64] + +(92) Exchange +Input [4]: [ca_county#62, d_qoy#59, d_year#58, sum#64] +Arguments: hashpartitioning(ca_county#62, d_qoy#59, d_year#58, 5), true, [id=#65] + +(93) HashAggregate [codegen id : 33] +Input [4]: [ca_county#62, d_qoy#59, d_year#58, sum#64] +Keys [3]: [ca_county#62, d_qoy#59, d_year#58] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#45))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#45))#66] +Results [2]: [ca_county#62, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#45))#66,17,2) AS web_sales#67] + +(94) BroadcastExchange +Input [2]: [ca_county#62, web_sales#67] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#68] + +(95) BroadcastHashJoin [codegen id : 41] +Left keys [1]: [ca_county#51] +Right keys [1]: [ca_county#62] +Join condition: None + +(96) Project [codegen id : 41] +Output [3]: [web_sales#56, ca_county#62, web_sales#67] +Input [4]: [ca_county#51, web_sales#56, ca_county#62, web_sales#67] + +(97) Scan parquet default.web_sales +Output [3]: [ws_sold_date_sk#43, ws_bill_addr_sk#44, ws_ext_sales_price#45] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk)] +ReadSchema: struct + +(98) ColumnarToRow [codegen id : 35] +Input [3]: [ws_sold_date_sk#43, ws_bill_addr_sk#44, ws_ext_sales_price#45] + +(99) Filter [codegen id : 35] +Input [3]: [ws_sold_date_sk#43, ws_bill_addr_sk#44, ws_ext_sales_price#45] +Condition : (isnotnull(ws_sold_date_sk#43) AND isnotnull(ws_bill_addr_sk#44)) + +(100) ReusedExchange [Reuses operator id: 7] +Output [3]: [d_date_sk#69, d_year#70, d_qoy#71] + +(101) BroadcastHashJoin [codegen id : 35] +Left keys [1]: [ws_sold_date_sk#43] +Right keys [1]: [d_date_sk#69] +Join condition: None + +(102) Project [codegen id : 35] +Output [4]: [ws_bill_addr_sk#44, ws_ext_sales_price#45, d_year#70, d_qoy#71] +Input [6]: [ws_sold_date_sk#43, ws_bill_addr_sk#44, ws_ext_sales_price#45, d_date_sk#69, d_year#70, d_qoy#71] + +(103) Exchange +Input [4]: [ws_bill_addr_sk#44, ws_ext_sales_price#45, d_year#70, d_qoy#71] +Arguments: hashpartitioning(ws_bill_addr_sk#44, 5), true, [id=#72] + +(104) Sort [codegen id : 36] +Input [4]: [ws_bill_addr_sk#44, ws_ext_sales_price#45, d_year#70, d_qoy#71] +Arguments: [ws_bill_addr_sk#44 ASC NULLS FIRST], false, 0 + +(105) ReusedExchange [Reuses operator id: 15] +Output [2]: [ca_address_sk#73, ca_county#74] + +(106) Sort [codegen id : 38] +Input [2]: [ca_address_sk#73, ca_county#74] +Arguments: [ca_address_sk#73 ASC NULLS FIRST], false, 0 + +(107) SortMergeJoin [codegen id : 39] +Left keys [1]: [ws_bill_addr_sk#44] +Right keys [1]: [ca_address_sk#73] +Join condition: None + +(108) Project [codegen id : 39] +Output [4]: [ws_ext_sales_price#45, d_year#70, d_qoy#71, ca_county#74] +Input [6]: [ws_bill_addr_sk#44, ws_ext_sales_price#45, d_year#70, d_qoy#71, ca_address_sk#73, ca_county#74] + +(109) HashAggregate [codegen id : 39] +Input [4]: [ws_ext_sales_price#45, d_year#70, d_qoy#71, ca_county#74] +Keys [3]: [ca_county#74, d_qoy#71, d_year#70] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#45))] +Aggregate Attributes [1]: [sum#75] +Results [4]: [ca_county#74, d_qoy#71, d_year#70, sum#76] + +(110) Exchange +Input [4]: [ca_county#74, d_qoy#71, d_year#70, sum#76] +Arguments: hashpartitioning(ca_county#74, d_qoy#71, d_year#70, 5), true, [id=#77] + +(111) HashAggregate [codegen id : 40] +Input [4]: [ca_county#74, d_qoy#71, d_year#70, sum#76] +Keys [3]: [ca_county#74, d_qoy#71, d_year#70] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#45))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#45))#78] +Results [2]: [ca_county#74, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#45))#78,17,2) AS web_sales#79] + +(112) BroadcastExchange +Input [2]: [ca_county#74, web_sales#79] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#80] + +(113) BroadcastHashJoin [codegen id : 41] +Left keys [1]: [ca_county#62] +Right keys [1]: [ca_county#74] +Join condition: None + +(114) Project [codegen id : 41] +Output [4]: [web_sales#56, ca_county#62, web_sales#67, web_sales#79] +Input [5]: [web_sales#56, ca_county#62, web_sales#67, ca_county#74, web_sales#79] + +(115) BroadcastExchange +Input [4]: [web_sales#56, ca_county#62, web_sales#67, web_sales#79] +Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [id=#81] + +(116) BroadcastHashJoin [codegen id : 42] +Left keys [1]: [ca_county#36] +Right keys [1]: [ca_county#62] +Join condition: ((CASE WHEN (web_sales#67 > 0.00) THEN CheckOverflow((promote_precision(web_sales#56) / promote_precision(web_sales#67)), DecimalType(37,20), true) ELSE null END > CASE WHEN (store_sales#41 > 0.00) THEN CheckOverflow((promote_precision(store_sales#28) / promote_precision(store_sales#41)), DecimalType(37,20), true) ELSE null END) AND (CASE WHEN (web_sales#56 > 0.00) THEN CheckOverflow((promote_precision(web_sales#79) / promote_precision(web_sales#56)), DecimalType(37,20), true) ELSE null END > CASE WHEN (store_sales#28 > 0.00) THEN CheckOverflow((promote_precision(store_sales#16) / promote_precision(store_sales#28)), DecimalType(37,20), true) ELSE null END)) + +(117) Project [codegen id : 42] +Output [6]: [ca_county#36, d_year#31, CheckOverflow((promote_precision(web_sales#56) / promote_precision(web_sales#67)), DecimalType(37,20), true) AS web_q1_q2_increase#82, CheckOverflow((promote_precision(store_sales#28) / promote_precision(store_sales#41)), DecimalType(37,20), true) AS store_q1_q2_increase#83, CheckOverflow((promote_precision(web_sales#79) / promote_precision(web_sales#56)), DecimalType(37,20), true) AS web_q2_q3_increase#84, CheckOverflow((promote_precision(store_sales#16) / promote_precision(store_sales#28)), DecimalType(37,20), true) AS store_q2_q3_increase#85] +Input [9]: [store_sales#16, store_sales#28, ca_county#36, d_year#31, store_sales#41, web_sales#56, ca_county#62, web_sales#67, web_sales#79] + +(118) Exchange +Input [6]: [ca_county#36, d_year#31, web_q1_q2_increase#82, store_q1_q2_increase#83, web_q2_q3_increase#84, store_q2_q3_increase#85] +Arguments: rangepartitioning(ca_county#36 ASC NULLS FIRST, 5), true, [id=#86] + +(119) Sort [codegen id : 43] +Input [6]: [ca_county#36, d_year#31, web_q1_q2_increase#82, store_q1_q2_increase#83, web_q2_q3_increase#84, store_q2_q3_increase#85] +Arguments: [ca_county#36 ASC NULLS FIRST], true, 0 + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31.sf100/simplified.txt new file mode 100644 index 0000000000000..6892a843d42d7 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31.sf100/simplified.txt @@ -0,0 +1,206 @@ +WholeStageCodegen (43) + Sort [ca_county] + InputAdapter + Exchange [ca_county] #1 + WholeStageCodegen (42) + Project [ca_county,d_year,store_sales,store_sales,store_sales,web_sales,web_sales,web_sales] + BroadcastHashJoin [ca_county,ca_county,store_sales,store_sales,store_sales,web_sales,web_sales,web_sales] + Project [ca_county,d_year,store_sales,store_sales,store_sales] + BroadcastHashJoin [ca_county,ca_county] + Project [ca_county,store_sales,store_sales] + BroadcastHashJoin [ca_county,ca_county] + HashAggregate [ca_county,d_qoy,d_year,sum] [store_sales,sum,sum(UnscaledValue(ss_ext_sales_price))] + InputAdapter + Exchange [ca_county,d_qoy,d_year] #2 + WholeStageCodegen (6) + HashAggregate [ca_county,d_qoy,d_year,ss_ext_sales_price] [sum,sum] + Project [ca_county,d_qoy,d_year,ss_ext_sales_price] + SortMergeJoin [ca_address_sk,ss_addr_sk] + InputAdapter + WholeStageCodegen (3) + Sort [ss_addr_sk] + InputAdapter + Exchange [ss_addr_sk] #3 + WholeStageCodegen (2) + Project [d_qoy,d_year,ss_addr_sk,ss_ext_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_addr_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Filter [d_date_sk,d_qoy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_qoy,d_year] + InputAdapter + WholeStageCodegen (5) + Sort [ca_address_sk] + InputAdapter + Exchange [ca_address_sk] #5 + WholeStageCodegen (4) + Filter [ca_address_sk,ca_county] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_county] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (13) + HashAggregate [ca_county,d_qoy,d_year,sum] [store_sales,sum,sum(UnscaledValue(ss_ext_sales_price))] + InputAdapter + Exchange [ca_county,d_qoy,d_year] #7 + WholeStageCodegen (12) + HashAggregate [ca_county,d_qoy,d_year,ss_ext_sales_price] [sum,sum] + Project [ca_county,d_qoy,d_year,ss_ext_sales_price] + SortMergeJoin [ca_address_sk,ss_addr_sk] + InputAdapter + WholeStageCodegen (9) + Sort [ss_addr_sk] + InputAdapter + Exchange [ss_addr_sk] #8 + WholeStageCodegen (8) + Project [d_qoy,d_year,ss_addr_sk,ss_ext_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_addr_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (7) + Filter [d_date_sk,d_qoy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_qoy,d_year] + InputAdapter + WholeStageCodegen (11) + Sort [ca_address_sk] + InputAdapter + ReusedExchange [ca_address_sk,ca_county] #5 + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (20) + HashAggregate [ca_county,d_qoy,d_year,sum] [store_sales,sum,sum(UnscaledValue(ss_ext_sales_price))] + InputAdapter + Exchange [ca_county,d_qoy,d_year] #11 + WholeStageCodegen (19) + HashAggregate [ca_county,d_qoy,d_year,ss_ext_sales_price] [sum,sum] + Project [ca_county,d_qoy,d_year,ss_ext_sales_price] + SortMergeJoin [ca_address_sk,ss_addr_sk] + InputAdapter + WholeStageCodegen (16) + Sort [ss_addr_sk] + InputAdapter + Exchange [ss_addr_sk] #12 + WholeStageCodegen (15) + Project [d_qoy,d_year,ss_addr_sk,ss_ext_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_addr_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (14) + Filter [d_date_sk,d_qoy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_qoy,d_year] + InputAdapter + WholeStageCodegen (18) + Sort [ca_address_sk] + InputAdapter + ReusedExchange [ca_address_sk,ca_county] #5 + InputAdapter + BroadcastExchange #14 + WholeStageCodegen (41) + Project [ca_county,web_sales,web_sales,web_sales] + BroadcastHashJoin [ca_county,ca_county] + Project [ca_county,web_sales,web_sales] + BroadcastHashJoin [ca_county,ca_county] + HashAggregate [ca_county,d_qoy,d_year,sum] [sum,sum(UnscaledValue(ws_ext_sales_price)),web_sales] + InputAdapter + Exchange [ca_county,d_qoy,d_year] #15 + WholeStageCodegen (26) + HashAggregate [ca_county,d_qoy,d_year,ws_ext_sales_price] [sum,sum] + Project [ca_county,d_qoy,d_year,ws_ext_sales_price] + SortMergeJoin [ca_address_sk,ws_bill_addr_sk] + InputAdapter + WholeStageCodegen (23) + Sort [ws_bill_addr_sk] + InputAdapter + Exchange [ws_bill_addr_sk] #16 + WholeStageCodegen (22) + Project [d_qoy,d_year,ws_bill_addr_sk,ws_ext_sales_price] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_bill_addr_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_qoy,d_year] #9 + InputAdapter + WholeStageCodegen (25) + Sort [ca_address_sk] + InputAdapter + ReusedExchange [ca_address_sk,ca_county] #5 + InputAdapter + BroadcastExchange #17 + WholeStageCodegen (33) + HashAggregate [ca_county,d_qoy,d_year,sum] [sum,sum(UnscaledValue(ws_ext_sales_price)),web_sales] + InputAdapter + Exchange [ca_county,d_qoy,d_year] #18 + WholeStageCodegen (32) + HashAggregate [ca_county,d_qoy,d_year,ws_ext_sales_price] [sum,sum] + Project [ca_county,d_qoy,d_year,ws_ext_sales_price] + SortMergeJoin [ca_address_sk,ws_bill_addr_sk] + InputAdapter + WholeStageCodegen (29) + Sort [ws_bill_addr_sk] + InputAdapter + Exchange [ws_bill_addr_sk] #19 + WholeStageCodegen (28) + Project [d_qoy,d_year,ws_bill_addr_sk,ws_ext_sales_price] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_bill_addr_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_qoy,d_year] #13 + InputAdapter + WholeStageCodegen (31) + Sort [ca_address_sk] + InputAdapter + ReusedExchange [ca_address_sk,ca_county] #5 + InputAdapter + BroadcastExchange #20 + WholeStageCodegen (40) + HashAggregate [ca_county,d_qoy,d_year,sum] [sum,sum(UnscaledValue(ws_ext_sales_price)),web_sales] + InputAdapter + Exchange [ca_county,d_qoy,d_year] #21 + WholeStageCodegen (39) + HashAggregate [ca_county,d_qoy,d_year,ws_ext_sales_price] [sum,sum] + Project [ca_county,d_qoy,d_year,ws_ext_sales_price] + SortMergeJoin [ca_address_sk,ws_bill_addr_sk] + InputAdapter + WholeStageCodegen (36) + Sort [ws_bill_addr_sk] + InputAdapter + Exchange [ws_bill_addr_sk] #22 + WholeStageCodegen (35) + Project [d_qoy,d_year,ws_bill_addr_sk,ws_ext_sales_price] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_bill_addr_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_qoy,d_year] #4 + InputAdapter + WholeStageCodegen (38) + Sort [ca_address_sk] + InputAdapter + ReusedExchange [ca_address_sk,ca_county] #5 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31/explain.txt new file mode 100644 index 0000000000000..1b3d2d7cc2f0b --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31/explain.txt @@ -0,0 +1,563 @@ +== Physical Plan == +* Sort (99) ++- Exchange (98) + +- * Project (97) + +- * BroadcastHashJoin Inner BuildRight (96) + :- * Project (82) + : +- * BroadcastHashJoin Inner BuildRight (81) + : :- * BroadcastHashJoin Inner BuildRight (67) + : : :- * Project (53) + : : : +- * BroadcastHashJoin Inner BuildRight (52) + : : : :- * BroadcastHashJoin Inner BuildRight (35) + : : : : :- * HashAggregate (18) + : : : : : +- Exchange (17) + : : : : : +- * HashAggregate (16) + : : : : : +- * Project (15) + : : : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : : : :- * Project (9) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : : : :- * Filter (3) + : : : : : : : +- * ColumnarToRow (2) + : : : : : : : +- Scan parquet default.store_sales (1) + : : : : : : +- BroadcastExchange (7) + : : : : : : +- * Filter (6) + : : : : : : +- * ColumnarToRow (5) + : : : : : : +- Scan parquet default.date_dim (4) + : : : : : +- BroadcastExchange (13) + : : : : : +- * Filter (12) + : : : : : +- * ColumnarToRow (11) + : : : : : +- Scan parquet default.customer_address (10) + : : : : +- BroadcastExchange (34) + : : : : +- * HashAggregate (33) + : : : : +- Exchange (32) + : : : : +- * HashAggregate (31) + : : : : +- * Project (30) + : : : : +- * BroadcastHashJoin Inner BuildRight (29) + : : : : :- * Project (27) + : : : : : +- * BroadcastHashJoin Inner BuildRight (26) + : : : : : :- * Filter (21) + : : : : : : +- * ColumnarToRow (20) + : : : : : : +- Scan parquet default.store_sales (19) + : : : : : +- BroadcastExchange (25) + : : : : : +- * Filter (24) + : : : : : +- * ColumnarToRow (23) + : : : : : +- Scan parquet default.date_dim (22) + : : : : +- ReusedExchange (28) + : : : +- BroadcastExchange (51) + : : : +- * HashAggregate (50) + : : : +- Exchange (49) + : : : +- * HashAggregate (48) + : : : +- * Project (47) + : : : +- * BroadcastHashJoin Inner BuildRight (46) + : : : :- * Project (44) + : : : : +- * BroadcastHashJoin Inner BuildRight (43) + : : : : :- * Filter (38) + : : : : : +- * ColumnarToRow (37) + : : : : : +- Scan parquet default.store_sales (36) + : : : : +- BroadcastExchange (42) + : : : : +- * Filter (41) + : : : : +- * ColumnarToRow (40) + : : : : +- Scan parquet default.date_dim (39) + : : : +- ReusedExchange (45) + : : +- BroadcastExchange (66) + : : +- * HashAggregate (65) + : : +- Exchange (64) + : : +- * HashAggregate (63) + : : +- * Project (62) + : : +- * BroadcastHashJoin Inner BuildRight (61) + : : :- * Project (59) + : : : +- * BroadcastHashJoin Inner BuildRight (58) + : : : :- * Filter (56) + : : : : +- * ColumnarToRow (55) + : : : : +- Scan parquet default.web_sales (54) + : : : +- ReusedExchange (57) + : : +- ReusedExchange (60) + : +- BroadcastExchange (80) + : +- * HashAggregate (79) + : +- Exchange (78) + : +- * HashAggregate (77) + : +- * Project (76) + : +- * BroadcastHashJoin Inner BuildRight (75) + : :- * Project (73) + : : +- * BroadcastHashJoin Inner BuildRight (72) + : : :- * Filter (70) + : : : +- * ColumnarToRow (69) + : : : +- Scan parquet default.web_sales (68) + : : +- ReusedExchange (71) + : +- ReusedExchange (74) + +- BroadcastExchange (95) + +- * HashAggregate (94) + +- Exchange (93) + +- * HashAggregate (92) + +- * Project (91) + +- * BroadcastHashJoin Inner BuildRight (90) + :- * Project (88) + : +- * BroadcastHashJoin Inner BuildRight (87) + : :- * Filter (85) + : : +- * ColumnarToRow (84) + : : +- Scan parquet default.web_sales (83) + : +- ReusedExchange (86) + +- ReusedExchange (89) + + +(1) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [ss_sold_date_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] + +(3) Filter [codegen id : 3] +Input [3]: [ss_sold_date_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] +Condition : (isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_addr_sk#2)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#4, d_year#5, d_qoy#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,1), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#4, d_year#5, d_qoy#6] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#4, d_year#5, d_qoy#6] +Condition : ((((isnotnull(d_qoy#6) AND isnotnull(d_year#5)) AND (d_qoy#6 = 1)) AND (d_year#5 = 2000)) AND isnotnull(d_date_sk#4)) + +(7) BroadcastExchange +Input [3]: [d_date_sk#4, d_year#5, d_qoy#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(9) Project [codegen id : 3] +Output [4]: [ss_addr_sk#2, ss_ext_sales_price#3, d_year#5, d_qoy#6] +Input [6]: [ss_sold_date_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, d_date_sk#4, d_year#5, d_qoy#6] + +(10) Scan parquet default.customer_address +Output [2]: [ca_address_sk#8, ca_county#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_county)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [ca_address_sk#8, ca_county#9] + +(12) Filter [codegen id : 2] +Input [2]: [ca_address_sk#8, ca_county#9] +Condition : (isnotnull(ca_address_sk#8) AND isnotnull(ca_county#9)) + +(13) BroadcastExchange +Input [2]: [ca_address_sk#8, ca_county#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#10] + +(14) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_addr_sk#2] +Right keys [1]: [ca_address_sk#8] +Join condition: None + +(15) Project [codegen id : 3] +Output [4]: [ss_ext_sales_price#3, d_year#5, d_qoy#6, ca_county#9] +Input [6]: [ss_addr_sk#2, ss_ext_sales_price#3, d_year#5, d_qoy#6, ca_address_sk#8, ca_county#9] + +(16) HashAggregate [codegen id : 3] +Input [4]: [ss_ext_sales_price#3, d_year#5, d_qoy#6, ca_county#9] +Keys [3]: [ca_county#9, d_qoy#6, d_year#5] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum#11] +Results [4]: [ca_county#9, d_qoy#6, d_year#5, sum#12] + +(17) Exchange +Input [4]: [ca_county#9, d_qoy#6, d_year#5, sum#12] +Arguments: hashpartitioning(ca_county#9, d_qoy#6, d_year#5, 5), true, [id=#13] + +(18) HashAggregate [codegen id : 24] +Input [4]: [ca_county#9, d_qoy#6, d_year#5, sum#12] +Keys [3]: [ca_county#9, d_qoy#6, d_year#5] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#14] +Results [3]: [ca_county#9, d_year#5, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#14,17,2) AS store_sales#15] + +(19) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 6] +Input [3]: [ss_sold_date_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] + +(21) Filter [codegen id : 6] +Input [3]: [ss_sold_date_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] +Condition : (isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_addr_sk#2)) + +(22) Scan parquet default.date_dim +Output [3]: [d_date_sk#16, d_year#17, d_qoy#18] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 4] +Input [3]: [d_date_sk#16, d_year#17, d_qoy#18] + +(24) Filter [codegen id : 4] +Input [3]: [d_date_sk#16, d_year#17, d_qoy#18] +Condition : ((((isnotnull(d_qoy#18) AND isnotnull(d_year#17)) AND (d_qoy#18 = 2)) AND (d_year#17 = 2000)) AND isnotnull(d_date_sk#16)) + +(25) BroadcastExchange +Input [3]: [d_date_sk#16, d_year#17, d_qoy#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] + +(26) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#16] +Join condition: None + +(27) Project [codegen id : 6] +Output [4]: [ss_addr_sk#2, ss_ext_sales_price#3, d_year#17, d_qoy#18] +Input [6]: [ss_sold_date_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, d_date_sk#16, d_year#17, d_qoy#18] + +(28) ReusedExchange [Reuses operator id: 13] +Output [2]: [ca_address_sk#20, ca_county#21] + +(29) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_addr_sk#2] +Right keys [1]: [ca_address_sk#20] +Join condition: None + +(30) Project [codegen id : 6] +Output [4]: [ss_ext_sales_price#3, d_year#17, d_qoy#18, ca_county#21] +Input [6]: [ss_addr_sk#2, ss_ext_sales_price#3, d_year#17, d_qoy#18, ca_address_sk#20, ca_county#21] + +(31) HashAggregate [codegen id : 6] +Input [4]: [ss_ext_sales_price#3, d_year#17, d_qoy#18, ca_county#21] +Keys [3]: [ca_county#21, d_qoy#18, d_year#17] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum#22] +Results [4]: [ca_county#21, d_qoy#18, d_year#17, sum#23] + +(32) Exchange +Input [4]: [ca_county#21, d_qoy#18, d_year#17, sum#23] +Arguments: hashpartitioning(ca_county#21, d_qoy#18, d_year#17, 5), true, [id=#24] + +(33) HashAggregate [codegen id : 7] +Input [4]: [ca_county#21, d_qoy#18, d_year#17, sum#23] +Keys [3]: [ca_county#21, d_qoy#18, d_year#17] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#25] +Results [2]: [ca_county#21, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#25,17,2) AS store_sales#26] + +(34) BroadcastExchange +Input [2]: [ca_county#21, store_sales#26] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#27] + +(35) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [ca_county#9] +Right keys [1]: [ca_county#21] +Join condition: None + +(36) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk)] +ReadSchema: struct + +(37) ColumnarToRow [codegen id : 10] +Input [3]: [ss_sold_date_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] + +(38) Filter [codegen id : 10] +Input [3]: [ss_sold_date_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] +Condition : (isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_addr_sk#2)) + +(39) Scan parquet default.date_dim +Output [3]: [d_date_sk#28, d_year#29, d_qoy#30] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,3), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(40) ColumnarToRow [codegen id : 8] +Input [3]: [d_date_sk#28, d_year#29, d_qoy#30] + +(41) Filter [codegen id : 8] +Input [3]: [d_date_sk#28, d_year#29, d_qoy#30] +Condition : ((((isnotnull(d_qoy#30) AND isnotnull(d_year#29)) AND (d_qoy#30 = 3)) AND (d_year#29 = 2000)) AND isnotnull(d_date_sk#28)) + +(42) BroadcastExchange +Input [3]: [d_date_sk#28, d_year#29, d_qoy#30] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#31] + +(43) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#28] +Join condition: None + +(44) Project [codegen id : 10] +Output [4]: [ss_addr_sk#2, ss_ext_sales_price#3, d_year#29, d_qoy#30] +Input [6]: [ss_sold_date_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, d_date_sk#28, d_year#29, d_qoy#30] + +(45) ReusedExchange [Reuses operator id: 13] +Output [2]: [ca_address_sk#32, ca_county#33] + +(46) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ss_addr_sk#2] +Right keys [1]: [ca_address_sk#32] +Join condition: None + +(47) Project [codegen id : 10] +Output [4]: [ss_ext_sales_price#3, d_year#29, d_qoy#30, ca_county#33] +Input [6]: [ss_addr_sk#2, ss_ext_sales_price#3, d_year#29, d_qoy#30, ca_address_sk#32, ca_county#33] + +(48) HashAggregate [codegen id : 10] +Input [4]: [ss_ext_sales_price#3, d_year#29, d_qoy#30, ca_county#33] +Keys [3]: [ca_county#33, d_qoy#30, d_year#29] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum#34] +Results [4]: [ca_county#33, d_qoy#30, d_year#29, sum#35] + +(49) Exchange +Input [4]: [ca_county#33, d_qoy#30, d_year#29, sum#35] +Arguments: hashpartitioning(ca_county#33, d_qoy#30, d_year#29, 5), true, [id=#36] + +(50) HashAggregate [codegen id : 11] +Input [4]: [ca_county#33, d_qoy#30, d_year#29, sum#35] +Keys [3]: [ca_county#33, d_qoy#30, d_year#29] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#37] +Results [2]: [ca_county#33, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#37,17,2) AS store_sales#38] + +(51) BroadcastExchange +Input [2]: [ca_county#33, store_sales#38] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#39] + +(52) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [ca_county#21] +Right keys [1]: [ca_county#33] +Join condition: None + +(53) Project [codegen id : 24] +Output [5]: [ca_county#9, d_year#5, store_sales#15, store_sales#26, store_sales#38] +Input [7]: [ca_county#9, d_year#5, store_sales#15, ca_county#21, store_sales#26, ca_county#33, store_sales#38] + +(54) Scan parquet default.web_sales +Output [3]: [ws_sold_date_sk#40, ws_bill_addr_sk#41, ws_ext_sales_price#42] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk)] +ReadSchema: struct + +(55) ColumnarToRow [codegen id : 14] +Input [3]: [ws_sold_date_sk#40, ws_bill_addr_sk#41, ws_ext_sales_price#42] + +(56) Filter [codegen id : 14] +Input [3]: [ws_sold_date_sk#40, ws_bill_addr_sk#41, ws_ext_sales_price#42] +Condition : (isnotnull(ws_sold_date_sk#40) AND isnotnull(ws_bill_addr_sk#41)) + +(57) ReusedExchange [Reuses operator id: 7] +Output [3]: [d_date_sk#43, d_year#44, d_qoy#45] + +(58) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [ws_sold_date_sk#40] +Right keys [1]: [d_date_sk#43] +Join condition: None + +(59) Project [codegen id : 14] +Output [4]: [ws_bill_addr_sk#41, ws_ext_sales_price#42, d_year#44, d_qoy#45] +Input [6]: [ws_sold_date_sk#40, ws_bill_addr_sk#41, ws_ext_sales_price#42, d_date_sk#43, d_year#44, d_qoy#45] + +(60) ReusedExchange [Reuses operator id: 13] +Output [2]: [ca_address_sk#46, ca_county#47] + +(61) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [ws_bill_addr_sk#41] +Right keys [1]: [ca_address_sk#46] +Join condition: None + +(62) Project [codegen id : 14] +Output [4]: [ws_ext_sales_price#42, d_year#44, d_qoy#45, ca_county#47] +Input [6]: [ws_bill_addr_sk#41, ws_ext_sales_price#42, d_year#44, d_qoy#45, ca_address_sk#46, ca_county#47] + +(63) HashAggregate [codegen id : 14] +Input [4]: [ws_ext_sales_price#42, d_year#44, d_qoy#45, ca_county#47] +Keys [3]: [ca_county#47, d_qoy#45, d_year#44] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#42))] +Aggregate Attributes [1]: [sum#48] +Results [4]: [ca_county#47, d_qoy#45, d_year#44, sum#49] + +(64) Exchange +Input [4]: [ca_county#47, d_qoy#45, d_year#44, sum#49] +Arguments: hashpartitioning(ca_county#47, d_qoy#45, d_year#44, 5), true, [id=#50] + +(65) HashAggregate [codegen id : 15] +Input [4]: [ca_county#47, d_qoy#45, d_year#44, sum#49] +Keys [3]: [ca_county#47, d_qoy#45, d_year#44] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#42))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#42))#51] +Results [2]: [ca_county#47, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#42))#51,17,2) AS web_sales#52] + +(66) BroadcastExchange +Input [2]: [ca_county#47, web_sales#52] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#53] + +(67) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [ca_county#9] +Right keys [1]: [ca_county#47] +Join condition: None + +(68) Scan parquet default.web_sales +Output [3]: [ws_sold_date_sk#40, ws_bill_addr_sk#41, ws_ext_sales_price#42] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk)] +ReadSchema: struct + +(69) ColumnarToRow [codegen id : 18] +Input [3]: [ws_sold_date_sk#40, ws_bill_addr_sk#41, ws_ext_sales_price#42] + +(70) Filter [codegen id : 18] +Input [3]: [ws_sold_date_sk#40, ws_bill_addr_sk#41, ws_ext_sales_price#42] +Condition : (isnotnull(ws_sold_date_sk#40) AND isnotnull(ws_bill_addr_sk#41)) + +(71) ReusedExchange [Reuses operator id: 25] +Output [3]: [d_date_sk#54, d_year#55, d_qoy#56] + +(72) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [ws_sold_date_sk#40] +Right keys [1]: [d_date_sk#54] +Join condition: None + +(73) Project [codegen id : 18] +Output [4]: [ws_bill_addr_sk#41, ws_ext_sales_price#42, d_year#55, d_qoy#56] +Input [6]: [ws_sold_date_sk#40, ws_bill_addr_sk#41, ws_ext_sales_price#42, d_date_sk#54, d_year#55, d_qoy#56] + +(74) ReusedExchange [Reuses operator id: 13] +Output [2]: [ca_address_sk#57, ca_county#58] + +(75) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [ws_bill_addr_sk#41] +Right keys [1]: [ca_address_sk#57] +Join condition: None + +(76) Project [codegen id : 18] +Output [4]: [ws_ext_sales_price#42, d_year#55, d_qoy#56, ca_county#58] +Input [6]: [ws_bill_addr_sk#41, ws_ext_sales_price#42, d_year#55, d_qoy#56, ca_address_sk#57, ca_county#58] + +(77) HashAggregate [codegen id : 18] +Input [4]: [ws_ext_sales_price#42, d_year#55, d_qoy#56, ca_county#58] +Keys [3]: [ca_county#58, d_qoy#56, d_year#55] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#42))] +Aggregate Attributes [1]: [sum#59] +Results [4]: [ca_county#58, d_qoy#56, d_year#55, sum#60] + +(78) Exchange +Input [4]: [ca_county#58, d_qoy#56, d_year#55, sum#60] +Arguments: hashpartitioning(ca_county#58, d_qoy#56, d_year#55, 5), true, [id=#61] + +(79) HashAggregate [codegen id : 19] +Input [4]: [ca_county#58, d_qoy#56, d_year#55, sum#60] +Keys [3]: [ca_county#58, d_qoy#56, d_year#55] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#42))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#42))#62] +Results [2]: [ca_county#58, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#42))#62,17,2) AS web_sales#63] + +(80) BroadcastExchange +Input [2]: [ca_county#58, web_sales#63] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#64] + +(81) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [ca_county#47] +Right keys [1]: [ca_county#58] +Join condition: (CASE WHEN (web_sales#52 > 0.00) THEN CheckOverflow((promote_precision(web_sales#63) / promote_precision(web_sales#52)), DecimalType(37,20), true) ELSE null END > CASE WHEN (store_sales#15 > 0.00) THEN CheckOverflow((promote_precision(store_sales#26) / promote_precision(store_sales#15)), DecimalType(37,20), true) ELSE null END) + +(82) Project [codegen id : 24] +Output [8]: [ca_county#9, d_year#5, store_sales#15, store_sales#26, store_sales#38, ca_county#47, web_sales#52, web_sales#63] +Input [9]: [ca_county#9, d_year#5, store_sales#15, store_sales#26, store_sales#38, ca_county#47, web_sales#52, ca_county#58, web_sales#63] + +(83) Scan parquet default.web_sales +Output [3]: [ws_sold_date_sk#40, ws_bill_addr_sk#41, ws_ext_sales_price#42] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk)] +ReadSchema: struct + +(84) ColumnarToRow [codegen id : 22] +Input [3]: [ws_sold_date_sk#40, ws_bill_addr_sk#41, ws_ext_sales_price#42] + +(85) Filter [codegen id : 22] +Input [3]: [ws_sold_date_sk#40, ws_bill_addr_sk#41, ws_ext_sales_price#42] +Condition : (isnotnull(ws_sold_date_sk#40) AND isnotnull(ws_bill_addr_sk#41)) + +(86) ReusedExchange [Reuses operator id: 42] +Output [3]: [d_date_sk#65, d_year#66, d_qoy#67] + +(87) BroadcastHashJoin [codegen id : 22] +Left keys [1]: [ws_sold_date_sk#40] +Right keys [1]: [d_date_sk#65] +Join condition: None + +(88) Project [codegen id : 22] +Output [4]: [ws_bill_addr_sk#41, ws_ext_sales_price#42, d_year#66, d_qoy#67] +Input [6]: [ws_sold_date_sk#40, ws_bill_addr_sk#41, ws_ext_sales_price#42, d_date_sk#65, d_year#66, d_qoy#67] + +(89) ReusedExchange [Reuses operator id: 13] +Output [2]: [ca_address_sk#68, ca_county#69] + +(90) BroadcastHashJoin [codegen id : 22] +Left keys [1]: [ws_bill_addr_sk#41] +Right keys [1]: [ca_address_sk#68] +Join condition: None + +(91) Project [codegen id : 22] +Output [4]: [ws_ext_sales_price#42, d_year#66, d_qoy#67, ca_county#69] +Input [6]: [ws_bill_addr_sk#41, ws_ext_sales_price#42, d_year#66, d_qoy#67, ca_address_sk#68, ca_county#69] + +(92) HashAggregate [codegen id : 22] +Input [4]: [ws_ext_sales_price#42, d_year#66, d_qoy#67, ca_county#69] +Keys [3]: [ca_county#69, d_qoy#67, d_year#66] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#42))] +Aggregate Attributes [1]: [sum#70] +Results [4]: [ca_county#69, d_qoy#67, d_year#66, sum#71] + +(93) Exchange +Input [4]: [ca_county#69, d_qoy#67, d_year#66, sum#71] +Arguments: hashpartitioning(ca_county#69, d_qoy#67, d_year#66, 5), true, [id=#72] + +(94) HashAggregate [codegen id : 23] +Input [4]: [ca_county#69, d_qoy#67, d_year#66, sum#71] +Keys [3]: [ca_county#69, d_qoy#67, d_year#66] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#42))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#42))#73] +Results [2]: [ca_county#69, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#42))#73,17,2) AS web_sales#74] + +(95) BroadcastExchange +Input [2]: [ca_county#69, web_sales#74] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#75] + +(96) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [ca_county#47] +Right keys [1]: [ca_county#69] +Join condition: (CASE WHEN (web_sales#63 > 0.00) THEN CheckOverflow((promote_precision(web_sales#74) / promote_precision(web_sales#63)), DecimalType(37,20), true) ELSE null END > CASE WHEN (store_sales#26 > 0.00) THEN CheckOverflow((promote_precision(store_sales#38) / promote_precision(store_sales#26)), DecimalType(37,20), true) ELSE null END) + +(97) Project [codegen id : 24] +Output [6]: [ca_county#9, d_year#5, CheckOverflow((promote_precision(web_sales#63) / promote_precision(web_sales#52)), DecimalType(37,20), true) AS web_q1_q2_increase#76, CheckOverflow((promote_precision(store_sales#26) / promote_precision(store_sales#15)), DecimalType(37,20), true) AS store_q1_q2_increase#77, CheckOverflow((promote_precision(web_sales#74) / promote_precision(web_sales#63)), DecimalType(37,20), true) AS web_q2_q3_increase#78, CheckOverflow((promote_precision(store_sales#38) / promote_precision(store_sales#26)), DecimalType(37,20), true) AS store_q2_q3_increase#79] +Input [10]: [ca_county#9, d_year#5, store_sales#15, store_sales#26, store_sales#38, ca_county#47, web_sales#52, web_sales#63, ca_county#69, web_sales#74] + +(98) Exchange +Input [6]: [ca_county#9, d_year#5, web_q1_q2_increase#76, store_q1_q2_increase#77, web_q2_q3_increase#78, store_q2_q3_increase#79] +Arguments: rangepartitioning(ca_county#9 ASC NULLS FIRST, 5), true, [id=#80] + +(99) Sort [codegen id : 25] +Input [6]: [ca_county#9, d_year#5, web_q1_q2_increase#76, store_q1_q2_increase#77, web_q2_q3_increase#78, store_q2_q3_increase#79] +Arguments: [ca_county#9 ASC NULLS FIRST], true, 0 + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31/simplified.txt new file mode 100644 index 0000000000000..c937700f0be16 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31/simplified.txt @@ -0,0 +1,150 @@ +WholeStageCodegen (25) + Sort [ca_county] + InputAdapter + Exchange [ca_county] #1 + WholeStageCodegen (24) + Project [ca_county,d_year,store_sales,store_sales,store_sales,web_sales,web_sales,web_sales] + BroadcastHashJoin [ca_county,ca_county,store_sales,store_sales,web_sales,web_sales] + Project [ca_county,ca_county,d_year,store_sales,store_sales,store_sales,web_sales,web_sales] + BroadcastHashJoin [ca_county,ca_county,store_sales,store_sales,web_sales,web_sales] + BroadcastHashJoin [ca_county,ca_county] + Project [ca_county,d_year,store_sales,store_sales,store_sales] + BroadcastHashJoin [ca_county,ca_county] + BroadcastHashJoin [ca_county,ca_county] + HashAggregate [ca_county,d_qoy,d_year,sum] [store_sales,sum,sum(UnscaledValue(ss_ext_sales_price))] + InputAdapter + Exchange [ca_county,d_qoy,d_year] #2 + WholeStageCodegen (3) + HashAggregate [ca_county,d_qoy,d_year,ss_ext_sales_price] [sum,sum] + Project [ca_county,d_qoy,d_year,ss_ext_sales_price] + BroadcastHashJoin [ca_address_sk,ss_addr_sk] + Project [d_qoy,d_year,ss_addr_sk,ss_ext_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_addr_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [d_date_sk,d_qoy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_qoy,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [ca_address_sk,ca_county] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_county] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (7) + HashAggregate [ca_county,d_qoy,d_year,sum] [store_sales,sum,sum(UnscaledValue(ss_ext_sales_price))] + InputAdapter + Exchange [ca_county,d_qoy,d_year] #6 + WholeStageCodegen (6) + HashAggregate [ca_county,d_qoy,d_year,ss_ext_sales_price] [sum,sum] + Project [ca_county,d_qoy,d_year,ss_ext_sales_price] + BroadcastHashJoin [ca_address_sk,ss_addr_sk] + Project [d_qoy,d_year,ss_addr_sk,ss_ext_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_addr_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (4) + Filter [d_date_sk,d_qoy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_qoy,d_year] + InputAdapter + ReusedExchange [ca_address_sk,ca_county] #4 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (11) + HashAggregate [ca_county,d_qoy,d_year,sum] [store_sales,sum,sum(UnscaledValue(ss_ext_sales_price))] + InputAdapter + Exchange [ca_county,d_qoy,d_year] #9 + WholeStageCodegen (10) + HashAggregate [ca_county,d_qoy,d_year,ss_ext_sales_price] [sum,sum] + Project [ca_county,d_qoy,d_year,ss_ext_sales_price] + BroadcastHashJoin [ca_address_sk,ss_addr_sk] + Project [d_qoy,d_year,ss_addr_sk,ss_ext_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_addr_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (8) + Filter [d_date_sk,d_qoy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_qoy,d_year] + InputAdapter + ReusedExchange [ca_address_sk,ca_county] #4 + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (15) + HashAggregate [ca_county,d_qoy,d_year,sum] [sum,sum(UnscaledValue(ws_ext_sales_price)),web_sales] + InputAdapter + Exchange [ca_county,d_qoy,d_year] #12 + WholeStageCodegen (14) + HashAggregate [ca_county,d_qoy,d_year,ws_ext_sales_price] [sum,sum] + Project [ca_county,d_qoy,d_year,ws_ext_sales_price] + BroadcastHashJoin [ca_address_sk,ws_bill_addr_sk] + Project [d_qoy,d_year,ws_bill_addr_sk,ws_ext_sales_price] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_bill_addr_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_qoy,d_year] #3 + InputAdapter + ReusedExchange [ca_address_sk,ca_county] #4 + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (19) + HashAggregate [ca_county,d_qoy,d_year,sum] [sum,sum(UnscaledValue(ws_ext_sales_price)),web_sales] + InputAdapter + Exchange [ca_county,d_qoy,d_year] #14 + WholeStageCodegen (18) + HashAggregate [ca_county,d_qoy,d_year,ws_ext_sales_price] [sum,sum] + Project [ca_county,d_qoy,d_year,ws_ext_sales_price] + BroadcastHashJoin [ca_address_sk,ws_bill_addr_sk] + Project [d_qoy,d_year,ws_bill_addr_sk,ws_ext_sales_price] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_bill_addr_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_qoy,d_year] #7 + InputAdapter + ReusedExchange [ca_address_sk,ca_county] #4 + InputAdapter + BroadcastExchange #15 + WholeStageCodegen (23) + HashAggregate [ca_county,d_qoy,d_year,sum] [sum,sum(UnscaledValue(ws_ext_sales_price)),web_sales] + InputAdapter + Exchange [ca_county,d_qoy,d_year] #16 + WholeStageCodegen (22) + HashAggregate [ca_county,d_qoy,d_year,ws_ext_sales_price] [sum,sum] + Project [ca_county,d_qoy,d_year,ws_ext_sales_price] + BroadcastHashJoin [ca_address_sk,ws_bill_addr_sk] + Project [d_qoy,d_year,ws_bill_addr_sk,ws_ext_sales_price] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_bill_addr_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_qoy,d_year] #10 + InputAdapter + ReusedExchange [ca_address_sk,ca_county] #4 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32.sf100/explain.txt new file mode 100644 index 0000000000000..d08f3d6ede6f0 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32.sf100/explain.txt @@ -0,0 +1,175 @@ +== Physical Plan == +CollectLimit (31) ++- * Project (30) + +- * BroadcastHashJoin Inner BuildRight (29) + :- * Project (27) + : +- * BroadcastHashJoin Inner BuildLeft (26) + : :- BroadcastExchange (22) + : : +- * Project (21) + : : +- * BroadcastHashJoin Inner BuildLeft (20) + : : :- BroadcastExchange (5) + : : : +- * Project (4) + : : : +- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.item (1) + : : +- * Filter (19) + : : +- * HashAggregate (18) + : : +- Exchange (17) + : : +- * HashAggregate (16) + : : +- * Project (15) + : : +- * BroadcastHashJoin Inner BuildRight (14) + : : :- * Filter (8) + : : : +- * ColumnarToRow (7) + : : : +- Scan parquet default.catalog_sales (6) + : : +- BroadcastExchange (13) + : : +- * Project (12) + : : +- * Filter (11) + : : +- * ColumnarToRow (10) + : : +- Scan parquet default.date_dim (9) + : +- * Filter (25) + : +- * ColumnarToRow (24) + : +- Scan parquet default.catalog_sales (23) + +- ReusedExchange (28) + + +(1) Scan parquet default.item +Output [2]: [i_item_sk#1, i_manufact_id#2] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_manufact_id), EqualTo(i_manufact_id,977), IsNotNull(i_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [2]: [i_item_sk#1, i_manufact_id#2] + +(3) Filter [codegen id : 1] +Input [2]: [i_item_sk#1, i_manufact_id#2] +Condition : ((isnotnull(i_manufact_id#2) AND (i_manufact_id#2 = 977)) AND isnotnull(i_item_sk#1)) + +(4) Project [codegen id : 1] +Output [1]: [i_item_sk#1] +Input [2]: [i_item_sk#1, i_manufact_id#2] + +(5) BroadcastExchange +Input [1]: [i_item_sk#1] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#3] + +(6) Scan parquet default.catalog_sales +Output [3]: [cs_sold_date_sk#4, cs_item_sk#5, cs_ext_discount_amt#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 3] +Input [3]: [cs_sold_date_sk#4, cs_item_sk#5, cs_ext_discount_amt#6] + +(8) Filter [codegen id : 3] +Input [3]: [cs_sold_date_sk#4, cs_item_sk#5, cs_ext_discount_amt#6] +Condition : (isnotnull(cs_sold_date_sk#4) AND isnotnull(cs_item_sk#5)) + +(9) Scan parquet default.date_dim +Output [2]: [d_date_sk#7, d_date#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-01-27), LessThanOrEqual(d_date,2000-04-26), IsNotNull(d_date_sk)] +ReadSchema: struct + +(10) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#7, d_date#8] + +(11) Filter [codegen id : 2] +Input [2]: [d_date_sk#7, d_date#8] +Condition : (((isnotnull(d_date#8) AND (d_date#8 >= 10983)) AND (d_date#8 <= 11073)) AND isnotnull(d_date_sk#7)) + +(12) Project [codegen id : 2] +Output [1]: [d_date_sk#7] +Input [2]: [d_date_sk#7, d_date#8] + +(13) BroadcastExchange +Input [1]: [d_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] + +(14) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cs_sold_date_sk#4] +Right keys [1]: [d_date_sk#7] +Join condition: None + +(15) Project [codegen id : 3] +Output [2]: [cs_item_sk#5, cs_ext_discount_amt#6] +Input [4]: [cs_sold_date_sk#4, cs_item_sk#5, cs_ext_discount_amt#6, d_date_sk#7] + +(16) HashAggregate [codegen id : 3] +Input [2]: [cs_item_sk#5, cs_ext_discount_amt#6] +Keys [1]: [cs_item_sk#5] +Functions [1]: [partial_avg(UnscaledValue(cs_ext_discount_amt#6))] +Aggregate Attributes [2]: [sum#10, count#11] +Results [3]: [cs_item_sk#5, sum#12, count#13] + +(17) Exchange +Input [3]: [cs_item_sk#5, sum#12, count#13] +Arguments: hashpartitioning(cs_item_sk#5, 5), true, [id=#14] + +(18) HashAggregate +Input [3]: [cs_item_sk#5, sum#12, count#13] +Keys [1]: [cs_item_sk#5] +Functions [1]: [avg(UnscaledValue(cs_ext_discount_amt#6))] +Aggregate Attributes [1]: [avg(UnscaledValue(cs_ext_discount_amt#6))#15] +Results [2]: [CheckOverflow((1.300000 * promote_precision(cast((avg(UnscaledValue(cs_ext_discount_amt#6))#15 / 100.0) as decimal(11,6)))), DecimalType(14,7), true) AS (CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6)))#16, cs_item_sk#5 AS cs_item_sk#5#17] + +(19) Filter +Input [2]: [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6)))#16, cs_item_sk#5#17] +Condition : isnotnull((CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6)))#16) + +(20) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [cs_item_sk#5#17] +Join condition: None + +(21) Project [codegen id : 4] +Output [2]: [i_item_sk#1, (CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6)))#16] +Input [3]: [i_item_sk#1, (CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6)))#16, cs_item_sk#5#17] + +(22) BroadcastExchange +Input [2]: [i_item_sk#1, (CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6)))#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] + +(23) Scan parquet default.catalog_sales +Output [3]: [cs_sold_date_sk#4, cs_item_sk#5, cs_ext_discount_amt#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_ext_discount_amt), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(24) ColumnarToRow +Input [3]: [cs_sold_date_sk#4, cs_item_sk#5, cs_ext_discount_amt#6] + +(25) Filter +Input [3]: [cs_sold_date_sk#4, cs_item_sk#5, cs_ext_discount_amt#6] +Condition : ((isnotnull(cs_item_sk#5) AND isnotnull(cs_ext_discount_amt#6)) AND isnotnull(cs_sold_date_sk#4)) + +(26) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [cs_item_sk#5] +Join condition: (cast(cs_ext_discount_amt#6 as decimal(14,7)) > (CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6)))#16) + +(27) Project [codegen id : 6] +Output [1]: [cs_sold_date_sk#4] +Input [5]: [i_item_sk#1, (CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6)))#16, cs_sold_date_sk#4, cs_item_sk#5, cs_ext_discount_amt#6] + +(28) ReusedExchange [Reuses operator id: 13] +Output [1]: [d_date_sk#7] + +(29) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cs_sold_date_sk#4] +Right keys [1]: [d_date_sk#7] +Join condition: None + +(30) Project [codegen id : 6] +Output [1]: [1 AS excess discount amount #19] +Input [2]: [cs_sold_date_sk#4, d_date_sk#7] + +(31) CollectLimit +Input [1]: [excess discount amount #19] +Arguments: 100 + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32.sf100/simplified.txt new file mode 100644 index 0000000000000..9bdab6f9682c8 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32.sf100/simplified.txt @@ -0,0 +1,45 @@ +CollectLimit + WholeStageCodegen (6) + Project + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk] + BroadcastHashJoin [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6))),cs_ext_discount_amt,cs_item_sk,i_item_sk] + InputAdapter + BroadcastExchange #1 + WholeStageCodegen (4) + Project [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6))),i_item_sk] + BroadcastHashJoin [cs_item_sk,i_item_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [i_item_sk] + Filter [i_item_sk,i_manufact_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_manufact_id] + Filter [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6)))] + HashAggregate [count,cs_item_sk,sum] [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6))),avg(UnscaledValue(cs_ext_discount_amt)),count,cs_item_sk,sum] + InputAdapter + Exchange [cs_item_sk] #3 + WholeStageCodegen (3) + HashAggregate [cs_ext_discount_amt,cs_item_sk] [count,count,sum,sum] + Project [cs_ext_discount_amt,cs_item_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_ext_discount_amt,cs_item_sk,cs_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk] + Filter [cs_ext_discount_amt,cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_ext_discount_amt,cs_item_sk,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #4 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/explain.txt new file mode 100644 index 0000000000000..a4589f00b84a3 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/explain.txt @@ -0,0 +1,175 @@ +== Physical Plan == +CollectLimit (31) ++- * Project (30) + +- * BroadcastHashJoin Inner BuildRight (29) + :- * Project (27) + : +- * BroadcastHashJoin Inner BuildRight (26) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.catalog_sales (1) + : : +- BroadcastExchange (8) + : : +- * Project (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet default.item (4) + : +- BroadcastExchange (25) + : +- * Filter (24) + : +- * HashAggregate (23) + : +- Exchange (22) + : +- * HashAggregate (21) + : +- * Project (20) + : +- * BroadcastHashJoin Inner BuildRight (19) + : :- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.catalog_sales (11) + : +- BroadcastExchange (18) + : +- * Project (17) + : +- * Filter (16) + : +- * ColumnarToRow (15) + : +- Scan parquet default.date_dim (14) + +- ReusedExchange (28) + + +(1) Scan parquet default.catalog_sales +Output [3]: [cs_sold_date_sk#1, cs_item_sk#2, cs_ext_discount_amt#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_ext_discount_amt), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 6] +Input [3]: [cs_sold_date_sk#1, cs_item_sk#2, cs_ext_discount_amt#3] + +(3) Filter [codegen id : 6] +Input [3]: [cs_sold_date_sk#1, cs_item_sk#2, cs_ext_discount_amt#3] +Condition : ((isnotnull(cs_item_sk#2) AND isnotnull(cs_ext_discount_amt#3)) AND isnotnull(cs_sold_date_sk#1)) + +(4) Scan parquet default.item +Output [2]: [i_item_sk#4, i_manufact_id#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_manufact_id), EqualTo(i_manufact_id,977), IsNotNull(i_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [i_item_sk#4, i_manufact_id#5] + +(6) Filter [codegen id : 1] +Input [2]: [i_item_sk#4, i_manufact_id#5] +Condition : ((isnotnull(i_manufact_id#5) AND (i_manufact_id#5 = 977)) AND isnotnull(i_item_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [i_item_sk#4] +Input [2]: [i_item_sk#4, i_manufact_id#5] + +(8) BroadcastExchange +Input [1]: [i_item_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#6] + +(9) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cs_item_sk#2] +Right keys [1]: [i_item_sk#4] +Join condition: None + +(10) Project [codegen id : 6] +Output [3]: [cs_sold_date_sk#1, cs_ext_discount_amt#3, i_item_sk#4] +Input [4]: [cs_sold_date_sk#1, cs_item_sk#2, cs_ext_discount_amt#3, i_item_sk#4] + +(11) Scan parquet default.catalog_sales +Output [3]: [cs_sold_date_sk#1, cs_item_sk#2, cs_ext_discount_amt#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 3] +Input [3]: [cs_sold_date_sk#1, cs_item_sk#2, cs_ext_discount_amt#3] + +(13) Filter [codegen id : 3] +Input [3]: [cs_sold_date_sk#1, cs_item_sk#2, cs_ext_discount_amt#3] +Condition : (isnotnull(cs_sold_date_sk#1) AND isnotnull(cs_item_sk#2)) + +(14) Scan parquet default.date_dim +Output [2]: [d_date_sk#7, d_date#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-01-27), LessThanOrEqual(d_date,2000-04-26), IsNotNull(d_date_sk)] +ReadSchema: struct + +(15) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#7, d_date#8] + +(16) Filter [codegen id : 2] +Input [2]: [d_date_sk#7, d_date#8] +Condition : (((isnotnull(d_date#8) AND (d_date#8 >= 10983)) AND (d_date#8 <= 11073)) AND isnotnull(d_date_sk#7)) + +(17) Project [codegen id : 2] +Output [1]: [d_date_sk#7] +Input [2]: [d_date_sk#7, d_date#8] + +(18) BroadcastExchange +Input [1]: [d_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] + +(19) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cs_sold_date_sk#1] +Right keys [1]: [d_date_sk#7] +Join condition: None + +(20) Project [codegen id : 3] +Output [2]: [cs_item_sk#2, cs_ext_discount_amt#3] +Input [4]: [cs_sold_date_sk#1, cs_item_sk#2, cs_ext_discount_amt#3, d_date_sk#7] + +(21) HashAggregate [codegen id : 3] +Input [2]: [cs_item_sk#2, cs_ext_discount_amt#3] +Keys [1]: [cs_item_sk#2] +Functions [1]: [partial_avg(UnscaledValue(cs_ext_discount_amt#3))] +Aggregate Attributes [2]: [sum#10, count#11] +Results [3]: [cs_item_sk#2, sum#12, count#13] + +(22) Exchange +Input [3]: [cs_item_sk#2, sum#12, count#13] +Arguments: hashpartitioning(cs_item_sk#2, 5), true, [id=#14] + +(23) HashAggregate [codegen id : 4] +Input [3]: [cs_item_sk#2, sum#12, count#13] +Keys [1]: [cs_item_sk#2] +Functions [1]: [avg(UnscaledValue(cs_ext_discount_amt#3))] +Aggregate Attributes [1]: [avg(UnscaledValue(cs_ext_discount_amt#3))#15] +Results [2]: [CheckOverflow((1.300000 * promote_precision(cast((avg(UnscaledValue(cs_ext_discount_amt#3))#15 / 100.0) as decimal(11,6)))), DecimalType(14,7), true) AS (CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6)))#16, cs_item_sk#2 AS cs_item_sk#2#17] + +(24) Filter [codegen id : 4] +Input [2]: [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6)))#16, cs_item_sk#2#17] +Condition : isnotnull((CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6)))#16) + +(25) BroadcastExchange +Input [2]: [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6)))#16, cs_item_sk#2#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [id=#18] + +(26) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [i_item_sk#4] +Right keys [1]: [cs_item_sk#2#17] +Join condition: (cast(cs_ext_discount_amt#3 as decimal(14,7)) > (CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6)))#16) + +(27) Project [codegen id : 6] +Output [1]: [cs_sold_date_sk#1] +Input [5]: [cs_sold_date_sk#1, cs_ext_discount_amt#3, i_item_sk#4, (CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6)))#16, cs_item_sk#2#17] + +(28) ReusedExchange [Reuses operator id: 18] +Output [1]: [d_date_sk#7] + +(29) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cs_sold_date_sk#1] +Right keys [1]: [d_date_sk#7] +Join condition: None + +(30) Project [codegen id : 6] +Output [1]: [1 AS excess discount amount #19] +Input [2]: [cs_sold_date_sk#1, d_date_sk#7] + +(31) CollectLimit +Input [1]: [excess discount amount #19] +Arguments: 100 + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/simplified.txt new file mode 100644 index 0000000000000..997e9db1f9241 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/simplified.txt @@ -0,0 +1,45 @@ +CollectLimit + WholeStageCodegen (6) + Project + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk] + BroadcastHashJoin [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6))),cs_ext_discount_amt,cs_item_sk,i_item_sk] + Project [cs_ext_discount_amt,cs_sold_date_sk,i_item_sk] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Filter [cs_ext_discount_amt,cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_ext_discount_amt,cs_item_sk,cs_sold_date_sk] + InputAdapter + BroadcastExchange #1 + WholeStageCodegen (1) + Project [i_item_sk] + Filter [i_item_sk,i_manufact_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_manufact_id] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (4) + Filter [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6)))] + HashAggregate [count,cs_item_sk,sum] [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6))),avg(UnscaledValue(cs_ext_discount_amt)),count,cs_item_sk,sum] + InputAdapter + Exchange [cs_item_sk] #3 + WholeStageCodegen (3) + HashAggregate [cs_ext_discount_amt,cs_item_sk] [count,count,sum,sum] + Project [cs_ext_discount_amt,cs_item_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_ext_discount_amt,cs_item_sk,cs_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #4 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33.sf100/explain.txt new file mode 100644 index 0000000000000..044f552befdd8 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33.sf100/explain.txt @@ -0,0 +1,378 @@ +== Physical Plan == +TakeOrderedAndProject (67) ++- * HashAggregate (66) + +- Exchange (65) + +- * HashAggregate (64) + +- Union (63) + :- * HashAggregate (32) + : +- Exchange (31) + : +- * HashAggregate (30) + : +- * Project (29) + : +- * BroadcastHashJoin Inner BuildRight (28) + : :- * Project (22) + : : +- * BroadcastHashJoin Inner BuildRight (21) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (20) + : : +- * BroadcastHashJoin LeftSemi BuildRight (19) + : : :- * Filter (13) + : : : +- * ColumnarToRow (12) + : : : +- Scan parquet default.item (11) + : : +- BroadcastExchange (18) + : : +- * Project (17) + : : +- * Filter (16) + : : +- * ColumnarToRow (15) + : : +- Scan parquet default.item (14) + : +- BroadcastExchange (27) + : +- * Project (26) + : +- * Filter (25) + : +- * ColumnarToRow (24) + : +- Scan parquet default.customer_address (23) + :- * HashAggregate (47) + : +- Exchange (46) + : +- * HashAggregate (45) + : +- * Project (44) + : +- * BroadcastHashJoin Inner BuildRight (43) + : :- * Project (41) + : : +- * BroadcastHashJoin Inner BuildRight (40) + : : :- * Project (38) + : : : +- * BroadcastHashJoin Inner BuildRight (37) + : : : :- * Filter (35) + : : : : +- * ColumnarToRow (34) + : : : : +- Scan parquet default.catalog_sales (33) + : : : +- ReusedExchange (36) + : : +- ReusedExchange (39) + : +- ReusedExchange (42) + +- * HashAggregate (62) + +- Exchange (61) + +- * HashAggregate (60) + +- * Project (59) + +- * BroadcastHashJoin Inner BuildRight (58) + :- * Project (56) + : +- * BroadcastHashJoin Inner BuildRight (55) + : :- * Project (53) + : : +- * BroadcastHashJoin Inner BuildRight (52) + : : :- * Filter (50) + : : : +- * ColumnarToRow (49) + : : : +- Scan parquet default.web_sales (48) + : : +- ReusedExchange (51) + : +- ReusedExchange (54) + +- ReusedExchange (57) + + +(1) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4] + +(3) Filter [codegen id : 5] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4] +Condition : ((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_addr_sk#3)) AND isnotnull(ss_item_sk#2)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#5, d_year#6, d_moy#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,5), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] +Condition : ((((isnotnull(d_year#6) AND isnotnull(d_moy#7)) AND (d_year#6 = 1998)) AND (d_moy#7 = 5)) AND isnotnull(d_date_sk#5)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#5] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] + +(8) BroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#8] + +(9) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(10) Project [codegen id : 5] +Output [3]: [ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4] +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4, d_date_sk#5] + +(11) Scan parquet default.item +Output [2]: [i_item_sk#9, i_manufact_id#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 3] +Input [2]: [i_item_sk#9, i_manufact_id#10] + +(13) Filter [codegen id : 3] +Input [2]: [i_item_sk#9, i_manufact_id#10] +Condition : isnotnull(i_item_sk#9) + +(14) Scan parquet default.item +Output [2]: [i_category#11, i_manufact_id#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Electronics)] +ReadSchema: struct + +(15) ColumnarToRow [codegen id : 2] +Input [2]: [i_category#11, i_manufact_id#10] + +(16) Filter [codegen id : 2] +Input [2]: [i_category#11, i_manufact_id#10] +Condition : (isnotnull(i_category#11) AND (i_category#11 = Electronics)) + +(17) Project [codegen id : 2] +Output [1]: [i_manufact_id#10 AS i_manufact_id#10#12] +Input [2]: [i_category#11, i_manufact_id#10] + +(18) BroadcastExchange +Input [1]: [i_manufact_id#10#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] + +(19) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [i_manufact_id#10] +Right keys [1]: [i_manufact_id#10#12] +Join condition: None + +(20) BroadcastExchange +Input [2]: [i_item_sk#9, i_manufact_id#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] + +(21) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#9] +Join condition: None + +(22) Project [codegen id : 5] +Output [3]: [ss_addr_sk#3, ss_ext_sales_price#4, i_manufact_id#10] +Input [5]: [ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4, i_item_sk#9, i_manufact_id#10] + +(23) Scan parquet default.customer_address +Output [2]: [ca_address_sk#15, ca_gmt_offset#16] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer_address] +PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 4] +Input [2]: [ca_address_sk#15, ca_gmt_offset#16] + +(25) Filter [codegen id : 4] +Input [2]: [ca_address_sk#15, ca_gmt_offset#16] +Condition : ((isnotnull(ca_gmt_offset#16) AND (ca_gmt_offset#16 = -5.00)) AND isnotnull(ca_address_sk#15)) + +(26) Project [codegen id : 4] +Output [1]: [ca_address_sk#15] +Input [2]: [ca_address_sk#15, ca_gmt_offset#16] + +(27) BroadcastExchange +Input [1]: [ca_address_sk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#17] + +(28) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_addr_sk#3] +Right keys [1]: [ca_address_sk#15] +Join condition: None + +(29) Project [codegen id : 5] +Output [2]: [ss_ext_sales_price#4, i_manufact_id#10] +Input [4]: [ss_addr_sk#3, ss_ext_sales_price#4, i_manufact_id#10, ca_address_sk#15] + +(30) HashAggregate [codegen id : 5] +Input [2]: [ss_ext_sales_price#4, i_manufact_id#10] +Keys [1]: [i_manufact_id#10] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#4))] +Aggregate Attributes [1]: [sum#18] +Results [2]: [i_manufact_id#10, sum#19] + +(31) Exchange +Input [2]: [i_manufact_id#10, sum#19] +Arguments: hashpartitioning(i_manufact_id#10, 5), true, [id=#20] + +(32) HashAggregate [codegen id : 6] +Input [2]: [i_manufact_id#10, sum#19] +Keys [1]: [i_manufact_id#10] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#4))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#4))#21] +Results [2]: [i_manufact_id#10, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#4))#21,17,2) AS total_sales#22] + +(33) Scan parquet default.catalog_sales +Output [4]: [cs_sold_date_sk#23, cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_addr_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 11] +Input [4]: [cs_sold_date_sk#23, cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26] + +(35) Filter [codegen id : 11] +Input [4]: [cs_sold_date_sk#23, cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26] +Condition : ((isnotnull(cs_sold_date_sk#23) AND isnotnull(cs_bill_addr_sk#24)) AND isnotnull(cs_item_sk#25)) + +(36) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(37) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_sold_date_sk#23] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(38) Project [codegen id : 11] +Output [3]: [cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26] +Input [5]: [cs_sold_date_sk#23, cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26, d_date_sk#5] + +(39) ReusedExchange [Reuses operator id: 20] +Output [2]: [i_item_sk#9, i_manufact_id#10] + +(40) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_item_sk#25] +Right keys [1]: [i_item_sk#9] +Join condition: None + +(41) Project [codegen id : 11] +Output [3]: [cs_bill_addr_sk#24, cs_ext_sales_price#26, i_manufact_id#10] +Input [5]: [cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26, i_item_sk#9, i_manufact_id#10] + +(42) ReusedExchange [Reuses operator id: 27] +Output [1]: [ca_address_sk#15] + +(43) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_bill_addr_sk#24] +Right keys [1]: [ca_address_sk#15] +Join condition: None + +(44) Project [codegen id : 11] +Output [2]: [cs_ext_sales_price#26, i_manufact_id#10] +Input [4]: [cs_bill_addr_sk#24, cs_ext_sales_price#26, i_manufact_id#10, ca_address_sk#15] + +(45) HashAggregate [codegen id : 11] +Input [2]: [cs_ext_sales_price#26, i_manufact_id#10] +Keys [1]: [i_manufact_id#10] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#26))] +Aggregate Attributes [1]: [sum#27] +Results [2]: [i_manufact_id#10, sum#28] + +(46) Exchange +Input [2]: [i_manufact_id#10, sum#28] +Arguments: hashpartitioning(i_manufact_id#10, 5), true, [id=#29] + +(47) HashAggregate [codegen id : 12] +Input [2]: [i_manufact_id#10, sum#28] +Keys [1]: [i_manufact_id#10] +Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#26))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#26))#30] +Results [2]: [i_manufact_id#10, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#26))#30,17,2) AS total_sales#31] + +(48) Scan parquet default.web_sales +Output [4]: [ws_sold_date_sk#32, ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk), IsNotNull(ws_item_sk)] +ReadSchema: struct + +(49) ColumnarToRow [codegen id : 17] +Input [4]: [ws_sold_date_sk#32, ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35] + +(50) Filter [codegen id : 17] +Input [4]: [ws_sold_date_sk#32, ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35] +Condition : ((isnotnull(ws_sold_date_sk#32) AND isnotnull(ws_bill_addr_sk#34)) AND isnotnull(ws_item_sk#33)) + +(51) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(52) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_sold_date_sk#32] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(53) Project [codegen id : 17] +Output [3]: [ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35] +Input [5]: [ws_sold_date_sk#32, ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35, d_date_sk#5] + +(54) ReusedExchange [Reuses operator id: 20] +Output [2]: [i_item_sk#9, i_manufact_id#10] + +(55) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_item_sk#33] +Right keys [1]: [i_item_sk#9] +Join condition: None + +(56) Project [codegen id : 17] +Output [3]: [ws_bill_addr_sk#34, ws_ext_sales_price#35, i_manufact_id#10] +Input [5]: [ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35, i_item_sk#9, i_manufact_id#10] + +(57) ReusedExchange [Reuses operator id: 27] +Output [1]: [ca_address_sk#15] + +(58) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_bill_addr_sk#34] +Right keys [1]: [ca_address_sk#15] +Join condition: None + +(59) Project [codegen id : 17] +Output [2]: [ws_ext_sales_price#35, i_manufact_id#10] +Input [4]: [ws_bill_addr_sk#34, ws_ext_sales_price#35, i_manufact_id#10, ca_address_sk#15] + +(60) HashAggregate [codegen id : 17] +Input [2]: [ws_ext_sales_price#35, i_manufact_id#10] +Keys [1]: [i_manufact_id#10] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#35))] +Aggregate Attributes [1]: [sum#36] +Results [2]: [i_manufact_id#10, sum#37] + +(61) Exchange +Input [2]: [i_manufact_id#10, sum#37] +Arguments: hashpartitioning(i_manufact_id#10, 5), true, [id=#38] + +(62) HashAggregate [codegen id : 18] +Input [2]: [i_manufact_id#10, sum#37] +Keys [1]: [i_manufact_id#10] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#35))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#35))#39] +Results [2]: [i_manufact_id#10, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#35))#39,17,2) AS total_sales#40] + +(63) Union + +(64) HashAggregate [codegen id : 19] +Input [2]: [i_manufact_id#10, total_sales#22] +Keys [1]: [i_manufact_id#10] +Functions [1]: [partial_sum(total_sales#22)] +Aggregate Attributes [2]: [sum#41, isEmpty#42] +Results [3]: [i_manufact_id#10, sum#43, isEmpty#44] + +(65) Exchange +Input [3]: [i_manufact_id#10, sum#43, isEmpty#44] +Arguments: hashpartitioning(i_manufact_id#10, 5), true, [id=#45] + +(66) HashAggregate [codegen id : 20] +Input [3]: [i_manufact_id#10, sum#43, isEmpty#44] +Keys [1]: [i_manufact_id#10] +Functions [1]: [sum(total_sales#22)] +Aggregate Attributes [1]: [sum(total_sales#22)#46] +Results [2]: [i_manufact_id#10, sum(total_sales#22)#46 AS total_sales#47] + +(67) TakeOrderedAndProject +Input [2]: [i_manufact_id#10, total_sales#47] +Arguments: 100, [total_sales#47 ASC NULLS FIRST], [i_manufact_id#10, total_sales#47] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33.sf100/simplified.txt new file mode 100644 index 0000000000000..e810c4bfbf62d --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33.sf100/simplified.txt @@ -0,0 +1,101 @@ +TakeOrderedAndProject [i_manufact_id,total_sales] + WholeStageCodegen (20) + HashAggregate [i_manufact_id,isEmpty,sum] [isEmpty,sum,sum(total_sales),total_sales] + InputAdapter + Exchange [i_manufact_id] #1 + WholeStageCodegen (19) + HashAggregate [i_manufact_id,total_sales] [isEmpty,isEmpty,sum,sum] + InputAdapter + Union + WholeStageCodegen (6) + HashAggregate [i_manufact_id,sum] [sum,sum(UnscaledValue(ss_ext_sales_price)),total_sales] + InputAdapter + Exchange [i_manufact_id] #2 + WholeStageCodegen (5) + HashAggregate [i_manufact_id,ss_ext_sales_price] [sum,sum] + Project [i_manufact_id,ss_ext_sales_price] + BroadcastHashJoin [ca_address_sk,ss_addr_sk] + Project [i_manufact_id,ss_addr_sk,ss_ext_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_addr_sk,ss_ext_sales_price,ss_item_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_addr_sk,ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + BroadcastHashJoin [i_manufact_id,i_manufact_id] + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_manufact_id] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Project [i_manufact_id] + Filter [i_category] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_category,i_manufact_id] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (4) + Project [ca_address_sk] + Filter [ca_address_sk,ca_gmt_offset] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset] + WholeStageCodegen (12) + HashAggregate [i_manufact_id,sum] [sum,sum(UnscaledValue(cs_ext_sales_price)),total_sales] + InputAdapter + Exchange [i_manufact_id] #7 + WholeStageCodegen (11) + HashAggregate [cs_ext_sales_price,i_manufact_id] [sum,sum] + Project [cs_ext_sales_price,i_manufact_id] + BroadcastHashJoin [ca_address_sk,cs_bill_addr_sk] + Project [cs_bill_addr_sk,cs_ext_sales_price,i_manufact_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_bill_addr_sk,cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + ReusedExchange [i_item_sk,i_manufact_id] #4 + InputAdapter + ReusedExchange [ca_address_sk] #6 + WholeStageCodegen (18) + HashAggregate [i_manufact_id,sum] [sum,sum(UnscaledValue(ws_ext_sales_price)),total_sales] + InputAdapter + Exchange [i_manufact_id] #8 + WholeStageCodegen (17) + HashAggregate [i_manufact_id,ws_ext_sales_price] [sum,sum] + Project [i_manufact_id,ws_ext_sales_price] + BroadcastHashJoin [ca_address_sk,ws_bill_addr_sk] + Project [i_manufact_id,ws_bill_addr_sk,ws_ext_sales_price] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Project [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_bill_addr_sk,ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + ReusedExchange [i_item_sk,i_manufact_id] #4 + InputAdapter + ReusedExchange [ca_address_sk] #6 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33/explain.txt new file mode 100644 index 0000000000000..78227141b3a2a --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33/explain.txt @@ -0,0 +1,378 @@ +== Physical Plan == +TakeOrderedAndProject (67) ++- * HashAggregate (66) + +- Exchange (65) + +- * HashAggregate (64) + +- Union (63) + :- * HashAggregate (32) + : +- Exchange (31) + : +- * HashAggregate (30) + : +- * Project (29) + : +- * BroadcastHashJoin Inner BuildRight (28) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.customer_address (11) + : +- BroadcastExchange (27) + : +- * BroadcastHashJoin LeftSemi BuildRight (26) + : :- * Filter (20) + : : +- * ColumnarToRow (19) + : : +- Scan parquet default.item (18) + : +- BroadcastExchange (25) + : +- * Project (24) + : +- * Filter (23) + : +- * ColumnarToRow (22) + : +- Scan parquet default.item (21) + :- * HashAggregate (47) + : +- Exchange (46) + : +- * HashAggregate (45) + : +- * Project (44) + : +- * BroadcastHashJoin Inner BuildRight (43) + : :- * Project (41) + : : +- * BroadcastHashJoin Inner BuildRight (40) + : : :- * Project (38) + : : : +- * BroadcastHashJoin Inner BuildRight (37) + : : : :- * Filter (35) + : : : : +- * ColumnarToRow (34) + : : : : +- Scan parquet default.catalog_sales (33) + : : : +- ReusedExchange (36) + : : +- ReusedExchange (39) + : +- ReusedExchange (42) + +- * HashAggregate (62) + +- Exchange (61) + +- * HashAggregate (60) + +- * Project (59) + +- * BroadcastHashJoin Inner BuildRight (58) + :- * Project (56) + : +- * BroadcastHashJoin Inner BuildRight (55) + : :- * Project (53) + : : +- * BroadcastHashJoin Inner BuildRight (52) + : : :- * Filter (50) + : : : +- * ColumnarToRow (49) + : : : +- Scan parquet default.web_sales (48) + : : +- ReusedExchange (51) + : +- ReusedExchange (54) + +- ReusedExchange (57) + + +(1) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4] + +(3) Filter [codegen id : 5] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4] +Condition : ((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_addr_sk#3)) AND isnotnull(ss_item_sk#2)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#5, d_year#6, d_moy#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,5), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] +Condition : ((((isnotnull(d_year#6) AND isnotnull(d_moy#7)) AND (d_year#6 = 1998)) AND (d_moy#7 = 5)) AND isnotnull(d_date_sk#5)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#5] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] + +(8) BroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#8] + +(9) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(10) Project [codegen id : 5] +Output [3]: [ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4] +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4, d_date_sk#5] + +(11) Scan parquet default.customer_address +Output [2]: [ca_address_sk#9, ca_gmt_offset#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer_address] +PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [ca_address_sk#9, ca_gmt_offset#10] + +(13) Filter [codegen id : 2] +Input [2]: [ca_address_sk#9, ca_gmt_offset#10] +Condition : ((isnotnull(ca_gmt_offset#10) AND (ca_gmt_offset#10 = -5.00)) AND isnotnull(ca_address_sk#9)) + +(14) Project [codegen id : 2] +Output [1]: [ca_address_sk#9] +Input [2]: [ca_address_sk#9, ca_gmt_offset#10] + +(15) BroadcastExchange +Input [1]: [ca_address_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] + +(16) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_addr_sk#3] +Right keys [1]: [ca_address_sk#9] +Join condition: None + +(17) Project [codegen id : 5] +Output [2]: [ss_item_sk#2, ss_ext_sales_price#4] +Input [4]: [ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4, ca_address_sk#9] + +(18) Scan parquet default.item +Output [2]: [i_item_sk#12, i_manufact_id#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 4] +Input [2]: [i_item_sk#12, i_manufact_id#13] + +(20) Filter [codegen id : 4] +Input [2]: [i_item_sk#12, i_manufact_id#13] +Condition : isnotnull(i_item_sk#12) + +(21) Scan parquet default.item +Output [2]: [i_category#14, i_manufact_id#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Electronics)] +ReadSchema: struct + +(22) ColumnarToRow [codegen id : 3] +Input [2]: [i_category#14, i_manufact_id#13] + +(23) Filter [codegen id : 3] +Input [2]: [i_category#14, i_manufact_id#13] +Condition : (isnotnull(i_category#14) AND (i_category#14 = Electronics)) + +(24) Project [codegen id : 3] +Output [1]: [i_manufact_id#13 AS i_manufact_id#13#15] +Input [2]: [i_category#14, i_manufact_id#13] + +(25) BroadcastExchange +Input [1]: [i_manufact_id#13#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] + +(26) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_manufact_id#13] +Right keys [1]: [i_manufact_id#13#15] +Join condition: None + +(27) BroadcastExchange +Input [2]: [i_item_sk#12, i_manufact_id#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#17] + +(28) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#12] +Join condition: None + +(29) Project [codegen id : 5] +Output [2]: [ss_ext_sales_price#4, i_manufact_id#13] +Input [4]: [ss_item_sk#2, ss_ext_sales_price#4, i_item_sk#12, i_manufact_id#13] + +(30) HashAggregate [codegen id : 5] +Input [2]: [ss_ext_sales_price#4, i_manufact_id#13] +Keys [1]: [i_manufact_id#13] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#4))] +Aggregate Attributes [1]: [sum#18] +Results [2]: [i_manufact_id#13, sum#19] + +(31) Exchange +Input [2]: [i_manufact_id#13, sum#19] +Arguments: hashpartitioning(i_manufact_id#13, 5), true, [id=#20] + +(32) HashAggregate [codegen id : 6] +Input [2]: [i_manufact_id#13, sum#19] +Keys [1]: [i_manufact_id#13] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#4))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#4))#21] +Results [2]: [i_manufact_id#13, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#4))#21,17,2) AS total_sales#22] + +(33) Scan parquet default.catalog_sales +Output [4]: [cs_sold_date_sk#23, cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_addr_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 11] +Input [4]: [cs_sold_date_sk#23, cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26] + +(35) Filter [codegen id : 11] +Input [4]: [cs_sold_date_sk#23, cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26] +Condition : ((isnotnull(cs_sold_date_sk#23) AND isnotnull(cs_bill_addr_sk#24)) AND isnotnull(cs_item_sk#25)) + +(36) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(37) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_sold_date_sk#23] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(38) Project [codegen id : 11] +Output [3]: [cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26] +Input [5]: [cs_sold_date_sk#23, cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26, d_date_sk#5] + +(39) ReusedExchange [Reuses operator id: 15] +Output [1]: [ca_address_sk#9] + +(40) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_bill_addr_sk#24] +Right keys [1]: [ca_address_sk#9] +Join condition: None + +(41) Project [codegen id : 11] +Output [2]: [cs_item_sk#25, cs_ext_sales_price#26] +Input [4]: [cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26, ca_address_sk#9] + +(42) ReusedExchange [Reuses operator id: 27] +Output [2]: [i_item_sk#12, i_manufact_id#13] + +(43) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_item_sk#25] +Right keys [1]: [i_item_sk#12] +Join condition: None + +(44) Project [codegen id : 11] +Output [2]: [cs_ext_sales_price#26, i_manufact_id#13] +Input [4]: [cs_item_sk#25, cs_ext_sales_price#26, i_item_sk#12, i_manufact_id#13] + +(45) HashAggregate [codegen id : 11] +Input [2]: [cs_ext_sales_price#26, i_manufact_id#13] +Keys [1]: [i_manufact_id#13] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#26))] +Aggregate Attributes [1]: [sum#27] +Results [2]: [i_manufact_id#13, sum#28] + +(46) Exchange +Input [2]: [i_manufact_id#13, sum#28] +Arguments: hashpartitioning(i_manufact_id#13, 5), true, [id=#29] + +(47) HashAggregate [codegen id : 12] +Input [2]: [i_manufact_id#13, sum#28] +Keys [1]: [i_manufact_id#13] +Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#26))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#26))#30] +Results [2]: [i_manufact_id#13, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#26))#30,17,2) AS total_sales#31] + +(48) Scan parquet default.web_sales +Output [4]: [ws_sold_date_sk#32, ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk), IsNotNull(ws_item_sk)] +ReadSchema: struct + +(49) ColumnarToRow [codegen id : 17] +Input [4]: [ws_sold_date_sk#32, ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35] + +(50) Filter [codegen id : 17] +Input [4]: [ws_sold_date_sk#32, ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35] +Condition : ((isnotnull(ws_sold_date_sk#32) AND isnotnull(ws_bill_addr_sk#34)) AND isnotnull(ws_item_sk#33)) + +(51) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(52) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_sold_date_sk#32] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(53) Project [codegen id : 17] +Output [3]: [ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35] +Input [5]: [ws_sold_date_sk#32, ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35, d_date_sk#5] + +(54) ReusedExchange [Reuses operator id: 15] +Output [1]: [ca_address_sk#9] + +(55) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_bill_addr_sk#34] +Right keys [1]: [ca_address_sk#9] +Join condition: None + +(56) Project [codegen id : 17] +Output [2]: [ws_item_sk#33, ws_ext_sales_price#35] +Input [4]: [ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35, ca_address_sk#9] + +(57) ReusedExchange [Reuses operator id: 27] +Output [2]: [i_item_sk#12, i_manufact_id#13] + +(58) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_item_sk#33] +Right keys [1]: [i_item_sk#12] +Join condition: None + +(59) Project [codegen id : 17] +Output [2]: [ws_ext_sales_price#35, i_manufact_id#13] +Input [4]: [ws_item_sk#33, ws_ext_sales_price#35, i_item_sk#12, i_manufact_id#13] + +(60) HashAggregate [codegen id : 17] +Input [2]: [ws_ext_sales_price#35, i_manufact_id#13] +Keys [1]: [i_manufact_id#13] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#35))] +Aggregate Attributes [1]: [sum#36] +Results [2]: [i_manufact_id#13, sum#37] + +(61) Exchange +Input [2]: [i_manufact_id#13, sum#37] +Arguments: hashpartitioning(i_manufact_id#13, 5), true, [id=#38] + +(62) HashAggregate [codegen id : 18] +Input [2]: [i_manufact_id#13, sum#37] +Keys [1]: [i_manufact_id#13] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#35))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#35))#39] +Results [2]: [i_manufact_id#13, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#35))#39,17,2) AS total_sales#40] + +(63) Union + +(64) HashAggregate [codegen id : 19] +Input [2]: [i_manufact_id#13, total_sales#22] +Keys [1]: [i_manufact_id#13] +Functions [1]: [partial_sum(total_sales#22)] +Aggregate Attributes [2]: [sum#41, isEmpty#42] +Results [3]: [i_manufact_id#13, sum#43, isEmpty#44] + +(65) Exchange +Input [3]: [i_manufact_id#13, sum#43, isEmpty#44] +Arguments: hashpartitioning(i_manufact_id#13, 5), true, [id=#45] + +(66) HashAggregate [codegen id : 20] +Input [3]: [i_manufact_id#13, sum#43, isEmpty#44] +Keys [1]: [i_manufact_id#13] +Functions [1]: [sum(total_sales#22)] +Aggregate Attributes [1]: [sum(total_sales#22)#46] +Results [2]: [i_manufact_id#13, sum(total_sales#22)#46 AS total_sales#47] + +(67) TakeOrderedAndProject +Input [2]: [i_manufact_id#13, total_sales#47] +Arguments: 100, [total_sales#47 ASC NULLS FIRST], [i_manufact_id#13, total_sales#47] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33/simplified.txt new file mode 100644 index 0000000000000..5b0cca34b3c70 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33/simplified.txt @@ -0,0 +1,101 @@ +TakeOrderedAndProject [i_manufact_id,total_sales] + WholeStageCodegen (20) + HashAggregate [i_manufact_id,isEmpty,sum] [isEmpty,sum,sum(total_sales),total_sales] + InputAdapter + Exchange [i_manufact_id] #1 + WholeStageCodegen (19) + HashAggregate [i_manufact_id,total_sales] [isEmpty,isEmpty,sum,sum] + InputAdapter + Union + WholeStageCodegen (6) + HashAggregate [i_manufact_id,sum] [sum,sum(UnscaledValue(ss_ext_sales_price)),total_sales] + InputAdapter + Exchange [i_manufact_id] #2 + WholeStageCodegen (5) + HashAggregate [i_manufact_id,ss_ext_sales_price] [sum,sum] + Project [i_manufact_id,ss_ext_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_ext_sales_price,ss_item_sk] + BroadcastHashJoin [ca_address_sk,ss_addr_sk] + Project [ss_addr_sk,ss_ext_sales_price,ss_item_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_addr_sk,ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [ca_address_sk] + Filter [ca_address_sk,ca_gmt_offset] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + BroadcastHashJoin [i_manufact_id,i_manufact_id] + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_manufact_id] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (3) + Project [i_manufact_id] + Filter [i_category] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_category,i_manufact_id] + WholeStageCodegen (12) + HashAggregate [i_manufact_id,sum] [sum,sum(UnscaledValue(cs_ext_sales_price)),total_sales] + InputAdapter + Exchange [i_manufact_id] #7 + WholeStageCodegen (11) + HashAggregate [cs_ext_sales_price,i_manufact_id] [sum,sum] + Project [cs_ext_sales_price,i_manufact_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_ext_sales_price,cs_item_sk] + BroadcastHashJoin [ca_address_sk,cs_bill_addr_sk] + Project [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_bill_addr_sk,cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + ReusedExchange [ca_address_sk] #4 + InputAdapter + ReusedExchange [i_item_sk,i_manufact_id] #5 + WholeStageCodegen (18) + HashAggregate [i_manufact_id,sum] [sum,sum(UnscaledValue(ws_ext_sales_price)),total_sales] + InputAdapter + Exchange [i_manufact_id] #8 + WholeStageCodegen (17) + HashAggregate [i_manufact_id,ws_ext_sales_price] [sum,sum] + Project [i_manufact_id,ws_ext_sales_price] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Project [ws_ext_sales_price,ws_item_sk] + BroadcastHashJoin [ca_address_sk,ws_bill_addr_sk] + Project [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_bill_addr_sk,ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + ReusedExchange [ca_address_sk] #4 + InputAdapter + ReusedExchange [i_item_sk,i_manufact_id] #5 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.sf100/explain.txt new file mode 100644 index 0000000000000..b17257a890db2 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.sf100/explain.txt @@ -0,0 +1,218 @@ +== Physical Plan == +* Sort (39) ++- Exchange (38) + +- * Project (37) + +- * SortMergeJoin Inner (36) + :- * Sort (30) + : +- Exchange (29) + : +- * Filter (28) + : +- * HashAggregate (27) + : +- Exchange (26) + : +- * HashAggregate (25) + : +- * Project (24) + : +- * BroadcastHashJoin Inner BuildRight (23) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.store (11) + : +- BroadcastExchange (22) + : +- * Project (21) + : +- * Filter (20) + : +- * ColumnarToRow (19) + : +- Scan parquet default.household_demographics (18) + +- * Sort (35) + +- Exchange (34) + +- * Filter (33) + +- * ColumnarToRow (32) + +- Scan parquet default.customer (31) + + +(1) Scan parquet default.store_sales +Output [5]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [5]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5] + +(3) Filter [codegen id : 4] +Input [5]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5] +Condition : (((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#4)) AND isnotnull(ss_hdemo_sk#3)) AND isnotnull(ss_customer_sk#2)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#6, d_year#7, d_dom#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [Or(And(GreaterThanOrEqual(d_dom,1),LessThanOrEqual(d_dom,3)),And(GreaterThanOrEqual(d_dom,25),LessThanOrEqual(d_dom,28))), In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] +Condition : (((((d_dom#8 >= 1) AND (d_dom#8 <= 3)) OR ((d_dom#8 >= 25) AND (d_dom#8 <= 28))) AND d_year#7 IN (1999,2000,2001)) AND isnotnull(d_date_sk#6)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#6] +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] + +(8) BroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#6] +Join condition: None + +(10) Project [codegen id : 4] +Output [4]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5] +Input [6]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5, d_date_sk#6] + +(11) Scan parquet default.store +Output [2]: [s_store_sk#10, s_county#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_county), EqualTo(s_county,Williamson County), IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [s_store_sk#10, s_county#11] + +(13) Filter [codegen id : 2] +Input [2]: [s_store_sk#10, s_county#11] +Condition : ((isnotnull(s_county#11) AND (s_county#11 = Williamson County)) AND isnotnull(s_store_sk#10)) + +(14) Project [codegen id : 2] +Output [1]: [s_store_sk#10] +Input [2]: [s_store_sk#10, s_county#11] + +(15) BroadcastExchange +Input [1]: [s_store_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(16) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#10] +Join condition: None + +(17) Project [codegen id : 4] +Output [3]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_ticket_number#5] +Input [5]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5, s_store_sk#10] + +(18) Scan parquet default.household_demographics +Output [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/household_demographics] +PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknown)), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] + +(20) Filter [codegen id : 3] +Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] +Condition : ((((isnotnull(hd_vehicle_count#16) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = unknown))) AND (hd_vehicle_count#16 > 0)) AND (CASE WHEN (hd_vehicle_count#16 > 0) THEN (cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) ELSE null END > 1.2)) AND isnotnull(hd_demo_sk#13)) + +(21) Project [codegen id : 3] +Output [1]: [hd_demo_sk#13] +Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] + +(22) BroadcastExchange +Input [1]: [hd_demo_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#17] + +(23) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_hdemo_sk#3] +Right keys [1]: [hd_demo_sk#13] +Join condition: None + +(24) Project [codegen id : 4] +Output [2]: [ss_customer_sk#2, ss_ticket_number#5] +Input [4]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_ticket_number#5, hd_demo_sk#13] + +(25) HashAggregate [codegen id : 4] +Input [2]: [ss_customer_sk#2, ss_ticket_number#5] +Keys [2]: [ss_ticket_number#5, ss_customer_sk#2] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#18] +Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] + +(26) Exchange +Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] +Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), true, [id=#20] + +(27) HashAggregate [codegen id : 5] +Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] +Keys [2]: [ss_ticket_number#5, ss_customer_sk#2] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#21] +Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count(1)#21 AS cnt#22] + +(28) Filter [codegen id : 5] +Input [3]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22] +Condition : ((cnt#22 >= 15) AND (cnt#22 <= 20)) + +(29) Exchange +Input [3]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22] +Arguments: hashpartitioning(ss_customer_sk#2, 5), true, [id=#23] + +(30) Sort [codegen id : 6] +Input [3]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22] +Arguments: [ss_customer_sk#2 ASC NULLS FIRST], false, 0 + +(31) Scan parquet default.customer +Output [5]: [c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(32) ColumnarToRow [codegen id : 7] +Input [5]: [c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28] + +(33) Filter [codegen id : 7] +Input [5]: [c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28] +Condition : isnotnull(c_customer_sk#24) + +(34) Exchange +Input [5]: [c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28] +Arguments: hashpartitioning(c_customer_sk#24, 5), true, [id=#29] + +(35) Sort [codegen id : 8] +Input [5]: [c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28] +Arguments: [c_customer_sk#24 ASC NULLS FIRST], false, 0 + +(36) SortMergeJoin [codegen id : 9] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#24] +Join condition: None + +(37) Project [codegen id : 9] +Output [6]: [c_last_name#27, c_first_name#26, c_salutation#25, c_preferred_cust_flag#28, ss_ticket_number#5, cnt#22] +Input [8]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22, c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28] + +(38) Exchange +Input [6]: [c_last_name#27, c_first_name#26, c_salutation#25, c_preferred_cust_flag#28, ss_ticket_number#5, cnt#22] +Arguments: rangepartitioning(c_last_name#27 ASC NULLS FIRST, c_first_name#26 ASC NULLS FIRST, c_salutation#25 ASC NULLS FIRST, c_preferred_cust_flag#28 DESC NULLS LAST, 5), true, [id=#30] + +(39) Sort [codegen id : 10] +Input [6]: [c_last_name#27, c_first_name#26, c_salutation#25, c_preferred_cust_flag#28, ss_ticket_number#5, cnt#22] +Arguments: [c_last_name#27 ASC NULLS FIRST, c_first_name#26 ASC NULLS FIRST, c_salutation#25 ASC NULLS FIRST, c_preferred_cust_flag#28 DESC NULLS LAST], true, 0 + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.sf100/simplified.txt new file mode 100644 index 0000000000000..94ec6ce18503e --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.sf100/simplified.txt @@ -0,0 +1,63 @@ +WholeStageCodegen (10) + Sort [c_first_name,c_last_name,c_preferred_cust_flag,c_salutation] + InputAdapter + Exchange [c_first_name,c_last_name,c_preferred_cust_flag,c_salutation] #1 + WholeStageCodegen (9) + Project [c_first_name,c_last_name,c_preferred_cust_flag,c_salutation,cnt,ss_ticket_number] + SortMergeJoin [c_customer_sk,ss_customer_sk] + InputAdapter + WholeStageCodegen (6) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #2 + WholeStageCodegen (5) + Filter [cnt] + HashAggregate [count,ss_customer_sk,ss_ticket_number] [cnt,count,count(1)] + InputAdapter + Exchange [ss_customer_sk,ss_ticket_number] #3 + WholeStageCodegen (4) + HashAggregate [ss_customer_sk,ss_ticket_number] [count,count] + Project [ss_customer_sk,ss_ticket_number] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [ss_customer_sk,ss_hdemo_sk,ss_ticket_number] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_dom,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_dom,d_year] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Project [s_store_sk] + Filter [s_county,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_county,s_store_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (3) + Project [hd_demo_sk] + Filter [hd_buy_potential,hd_demo_sk,hd_dep_count,hd_vehicle_count] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_buy_potential,hd_demo_sk,hd_dep_count,hd_vehicle_count] + InputAdapter + WholeStageCodegen (8) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #7 + WholeStageCodegen (7) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name,c_preferred_cust_flag,c_salutation] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/explain.txt new file mode 100644 index 0000000000000..3183f43c67433 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/explain.txt @@ -0,0 +1,203 @@ +== Physical Plan == +* Sort (36) ++- Exchange (35) + +- * Project (34) + +- * BroadcastHashJoin Inner BuildRight (33) + :- * Filter (28) + : +- * HashAggregate (27) + : +- Exchange (26) + : +- * HashAggregate (25) + : +- * Project (24) + : +- * BroadcastHashJoin Inner BuildRight (23) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.store (11) + : +- BroadcastExchange (22) + : +- * Project (21) + : +- * Filter (20) + : +- * ColumnarToRow (19) + : +- Scan parquet default.household_demographics (18) + +- BroadcastExchange (32) + +- * Filter (31) + +- * ColumnarToRow (30) + +- Scan parquet default.customer (29) + + +(1) Scan parquet default.store_sales +Output [5]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [5]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5] + +(3) Filter [codegen id : 4] +Input [5]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5] +Condition : (((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#4)) AND isnotnull(ss_hdemo_sk#3)) AND isnotnull(ss_customer_sk#2)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#6, d_year#7, d_dom#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [Or(And(GreaterThanOrEqual(d_dom,1),LessThanOrEqual(d_dom,3)),And(GreaterThanOrEqual(d_dom,25),LessThanOrEqual(d_dom,28))), In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] +Condition : (((((d_dom#8 >= 1) AND (d_dom#8 <= 3)) OR ((d_dom#8 >= 25) AND (d_dom#8 <= 28))) AND d_year#7 IN (1999,2000,2001)) AND isnotnull(d_date_sk#6)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#6] +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] + +(8) BroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#6] +Join condition: None + +(10) Project [codegen id : 4] +Output [4]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5] +Input [6]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5, d_date_sk#6] + +(11) Scan parquet default.store +Output [2]: [s_store_sk#10, s_county#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_county), EqualTo(s_county,Williamson County), IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [s_store_sk#10, s_county#11] + +(13) Filter [codegen id : 2] +Input [2]: [s_store_sk#10, s_county#11] +Condition : ((isnotnull(s_county#11) AND (s_county#11 = Williamson County)) AND isnotnull(s_store_sk#10)) + +(14) Project [codegen id : 2] +Output [1]: [s_store_sk#10] +Input [2]: [s_store_sk#10, s_county#11] + +(15) BroadcastExchange +Input [1]: [s_store_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(16) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#10] +Join condition: None + +(17) Project [codegen id : 4] +Output [3]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_ticket_number#5] +Input [5]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5, s_store_sk#10] + +(18) Scan parquet default.household_demographics +Output [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/household_demographics] +PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknown)), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] + +(20) Filter [codegen id : 3] +Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] +Condition : ((((isnotnull(hd_vehicle_count#16) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = unknown))) AND (hd_vehicle_count#16 > 0)) AND (CASE WHEN (hd_vehicle_count#16 > 0) THEN (cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) ELSE null END > 1.2)) AND isnotnull(hd_demo_sk#13)) + +(21) Project [codegen id : 3] +Output [1]: [hd_demo_sk#13] +Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] + +(22) BroadcastExchange +Input [1]: [hd_demo_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#17] + +(23) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_hdemo_sk#3] +Right keys [1]: [hd_demo_sk#13] +Join condition: None + +(24) Project [codegen id : 4] +Output [2]: [ss_customer_sk#2, ss_ticket_number#5] +Input [4]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_ticket_number#5, hd_demo_sk#13] + +(25) HashAggregate [codegen id : 4] +Input [2]: [ss_customer_sk#2, ss_ticket_number#5] +Keys [2]: [ss_ticket_number#5, ss_customer_sk#2] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#18] +Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] + +(26) Exchange +Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] +Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), true, [id=#20] + +(27) HashAggregate [codegen id : 6] +Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] +Keys [2]: [ss_ticket_number#5, ss_customer_sk#2] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#21] +Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count(1)#21 AS cnt#22] + +(28) Filter [codegen id : 6] +Input [3]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22] +Condition : ((cnt#22 >= 15) AND (cnt#22 <= 20)) + +(29) Scan parquet default.customer +Output [5]: [c_customer_sk#23, c_salutation#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(30) ColumnarToRow [codegen id : 5] +Input [5]: [c_customer_sk#23, c_salutation#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27] + +(31) Filter [codegen id : 5] +Input [5]: [c_customer_sk#23, c_salutation#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27] +Condition : isnotnull(c_customer_sk#23) + +(32) BroadcastExchange +Input [5]: [c_customer_sk#23, c_salutation#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#28] + +(33) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#23] +Join condition: None + +(34) Project [codegen id : 6] +Output [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22] +Input [8]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22, c_customer_sk#23, c_salutation#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27] + +(35) Exchange +Input [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22] +Arguments: rangepartitioning(c_last_name#26 ASC NULLS FIRST, c_first_name#25 ASC NULLS FIRST, c_salutation#24 ASC NULLS FIRST, c_preferred_cust_flag#27 DESC NULLS LAST, 5), true, [id=#29] + +(36) Sort [codegen id : 7] +Input [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22] +Arguments: [c_last_name#26 ASC NULLS FIRST, c_first_name#25 ASC NULLS FIRST, c_salutation#24 ASC NULLS FIRST, c_preferred_cust_flag#27 DESC NULLS LAST], true, 0 + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/simplified.txt new file mode 100644 index 0000000000000..12cd87e119622 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/simplified.txt @@ -0,0 +1,54 @@ +WholeStageCodegen (7) + Sort [c_first_name,c_last_name,c_preferred_cust_flag,c_salutation] + InputAdapter + Exchange [c_first_name,c_last_name,c_preferred_cust_flag,c_salutation] #1 + WholeStageCodegen (6) + Project [c_first_name,c_last_name,c_preferred_cust_flag,c_salutation,cnt,ss_ticket_number] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [cnt] + HashAggregate [count,ss_customer_sk,ss_ticket_number] [cnt,count,count(1)] + InputAdapter + Exchange [ss_customer_sk,ss_ticket_number] #2 + WholeStageCodegen (4) + HashAggregate [ss_customer_sk,ss_ticket_number] [count,count] + Project [ss_customer_sk,ss_ticket_number] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [ss_customer_sk,ss_hdemo_sk,ss_ticket_number] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_dom,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_dom,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [s_store_sk] + Filter [s_county,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_county,s_store_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Project [hd_demo_sk] + Filter [hd_buy_potential,hd_demo_sk,hd_dep_count,hd_vehicle_count] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_buy_potential,hd_demo_sk,hd_dep_count,hd_vehicle_count] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name,c_preferred_cust_flag,c_salutation] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35.sf100/explain.txt new file mode 100644 index 0000000000000..527e77d7a7afc --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35.sf100/explain.txt @@ -0,0 +1,329 @@ +== Physical Plan == +TakeOrderedAndProject (60) ++- * HashAggregate (59) + +- Exchange (58) + +- * HashAggregate (57) + +- * Project (56) + +- * SortMergeJoin Inner (55) + :- * Sort (49) + : +- Exchange (48) + : +- * Project (47) + : +- * SortMergeJoin Inner (46) + : :- * Sort (40) + : : +- Exchange (39) + : : +- * Project (38) + : : +- * Filter (37) + : : +- SortMergeJoin ExistenceJoin(exists#1) (36) + : : :- SortMergeJoin ExistenceJoin(exists#2) (27) + : : : :- SortMergeJoin LeftSemi (18) + : : : : :- * Sort (5) + : : : : : +- Exchange (4) + : : : : : +- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.customer (1) + : : : : +- * Sort (17) + : : : : +- Exchange (16) + : : : : +- * Project (15) + : : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : : :- * Filter (8) + : : : : : +- * ColumnarToRow (7) + : : : : : +- Scan parquet default.store_sales (6) + : : : : +- BroadcastExchange (13) + : : : : +- * Project (12) + : : : : +- * Filter (11) + : : : : +- * ColumnarToRow (10) + : : : : +- Scan parquet default.date_dim (9) + : : : +- * Sort (26) + : : : +- Exchange (25) + : : : +- * Project (24) + : : : +- * BroadcastHashJoin Inner BuildRight (23) + : : : :- * Filter (21) + : : : : +- * ColumnarToRow (20) + : : : : +- Scan parquet default.web_sales (19) + : : : +- ReusedExchange (22) + : : +- * Sort (35) + : : +- Exchange (34) + : : +- * Project (33) + : : +- * BroadcastHashJoin Inner BuildRight (32) + : : :- * Filter (30) + : : : +- * ColumnarToRow (29) + : : : +- Scan parquet default.catalog_sales (28) + : : +- ReusedExchange (31) + : +- * Sort (45) + : +- Exchange (44) + : +- * Filter (43) + : +- * ColumnarToRow (42) + : +- Scan parquet default.customer_address (41) + +- * Sort (54) + +- Exchange (53) + +- * Filter (52) + +- * ColumnarToRow (51) + +- Scan parquet default.customer_demographics (50) + + +(1) Scan parquet default.customer +Output [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer] +PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] + +(3) Filter [codegen id : 1] +Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] +Condition : (isnotnull(c_current_addr_sk#5) AND isnotnull(c_current_cdemo_sk#4)) + +(4) Exchange +Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] +Arguments: hashpartitioning(c_customer_sk#3, 5), true, [id=#6] + +(5) Sort [codegen id : 2] +Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] +Arguments: [c_customer_sk#3 ASC NULLS FIRST], false, 0 + +(6) Scan parquet default.store_sales +Output [2]: [ss_sold_date_sk#7, ss_customer_sk#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 4] +Input [2]: [ss_sold_date_sk#7, ss_customer_sk#8] + +(8) Filter [codegen id : 4] +Input [2]: [ss_sold_date_sk#7, ss_customer_sk#8] +Condition : isnotnull(ss_sold_date_sk#7) + +(9) Scan parquet default.date_dim +Output [3]: [d_date_sk#9, d_year#10, d_qoy#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_qoy), EqualTo(d_year,2002), LessThan(d_qoy,4), IsNotNull(d_date_sk)] +ReadSchema: struct + +(10) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#9, d_year#10, d_qoy#11] + +(11) Filter [codegen id : 3] +Input [3]: [d_date_sk#9, d_year#10, d_qoy#11] +Condition : ((((isnotnull(d_year#10) AND isnotnull(d_qoy#11)) AND (d_year#10 = 2002)) AND (d_qoy#11 < 4)) AND isnotnull(d_date_sk#9)) + +(12) Project [codegen id : 3] +Output [1]: [d_date_sk#9] +Input [3]: [d_date_sk#9, d_year#10, d_qoy#11] + +(13) BroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(14) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#7] +Right keys [1]: [d_date_sk#9] +Join condition: None + +(15) Project [codegen id : 4] +Output [1]: [ss_customer_sk#8] +Input [3]: [ss_sold_date_sk#7, ss_customer_sk#8, d_date_sk#9] + +(16) Exchange +Input [1]: [ss_customer_sk#8] +Arguments: hashpartitioning(ss_customer_sk#8, 5), true, [id=#13] + +(17) Sort [codegen id : 5] +Input [1]: [ss_customer_sk#8] +Arguments: [ss_customer_sk#8 ASC NULLS FIRST], false, 0 + +(18) SortMergeJoin +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [ss_customer_sk#8] +Join condition: None + +(19) Scan parquet default.web_sales +Output [2]: [ws_sold_date_sk#14, ws_bill_customer_sk#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 7] +Input [2]: [ws_sold_date_sk#14, ws_bill_customer_sk#15] + +(21) Filter [codegen id : 7] +Input [2]: [ws_sold_date_sk#14, ws_bill_customer_sk#15] +Condition : isnotnull(ws_sold_date_sk#14) + +(22) ReusedExchange [Reuses operator id: 13] +Output [1]: [d_date_sk#9] + +(23) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ws_sold_date_sk#14] +Right keys [1]: [d_date_sk#9] +Join condition: None + +(24) Project [codegen id : 7] +Output [1]: [ws_bill_customer_sk#15] +Input [3]: [ws_sold_date_sk#14, ws_bill_customer_sk#15, d_date_sk#9] + +(25) Exchange +Input [1]: [ws_bill_customer_sk#15] +Arguments: hashpartitioning(ws_bill_customer_sk#15, 5), true, [id=#16] + +(26) Sort [codegen id : 8] +Input [1]: [ws_bill_customer_sk#15] +Arguments: [ws_bill_customer_sk#15 ASC NULLS FIRST], false, 0 + +(27) SortMergeJoin +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [ws_bill_customer_sk#15] +Join condition: None + +(28) Scan parquet default.catalog_sales +Output [2]: [cs_sold_date_sk#17, cs_ship_customer_sk#18] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 10] +Input [2]: [cs_sold_date_sk#17, cs_ship_customer_sk#18] + +(30) Filter [codegen id : 10] +Input [2]: [cs_sold_date_sk#17, cs_ship_customer_sk#18] +Condition : isnotnull(cs_sold_date_sk#17) + +(31) ReusedExchange [Reuses operator id: 13] +Output [1]: [d_date_sk#9] + +(32) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_sold_date_sk#17] +Right keys [1]: [d_date_sk#9] +Join condition: None + +(33) Project [codegen id : 10] +Output [1]: [cs_ship_customer_sk#18] +Input [3]: [cs_sold_date_sk#17, cs_ship_customer_sk#18, d_date_sk#9] + +(34) Exchange +Input [1]: [cs_ship_customer_sk#18] +Arguments: hashpartitioning(cs_ship_customer_sk#18, 5), true, [id=#19] + +(35) Sort [codegen id : 11] +Input [1]: [cs_ship_customer_sk#18] +Arguments: [cs_ship_customer_sk#18 ASC NULLS FIRST], false, 0 + +(36) SortMergeJoin +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [cs_ship_customer_sk#18] +Join condition: None + +(37) Filter [codegen id : 12] +Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1] +Condition : (exists#2 OR exists#1) + +(38) Project [codegen id : 12] +Output [2]: [c_current_cdemo_sk#4, c_current_addr_sk#5] +Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1] + +(39) Exchange +Input [2]: [c_current_cdemo_sk#4, c_current_addr_sk#5] +Arguments: hashpartitioning(c_current_addr_sk#5, 5), true, [id=#20] + +(40) Sort [codegen id : 13] +Input [2]: [c_current_cdemo_sk#4, c_current_addr_sk#5] +Arguments: [c_current_addr_sk#5 ASC NULLS FIRST], false, 0 + +(41) Scan parquet default.customer_address +Output [2]: [ca_address_sk#21, ca_state#22] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct + +(42) ColumnarToRow [codegen id : 14] +Input [2]: [ca_address_sk#21, ca_state#22] + +(43) Filter [codegen id : 14] +Input [2]: [ca_address_sk#21, ca_state#22] +Condition : isnotnull(ca_address_sk#21) + +(44) Exchange +Input [2]: [ca_address_sk#21, ca_state#22] +Arguments: hashpartitioning(ca_address_sk#21, 5), true, [id=#23] + +(45) Sort [codegen id : 15] +Input [2]: [ca_address_sk#21, ca_state#22] +Arguments: [ca_address_sk#21 ASC NULLS FIRST], false, 0 + +(46) SortMergeJoin [codegen id : 16] +Left keys [1]: [c_current_addr_sk#5] +Right keys [1]: [ca_address_sk#21] +Join condition: None + +(47) Project [codegen id : 16] +Output [2]: [c_current_cdemo_sk#4, ca_state#22] +Input [4]: [c_current_cdemo_sk#4, c_current_addr_sk#5, ca_address_sk#21, ca_state#22] + +(48) Exchange +Input [2]: [c_current_cdemo_sk#4, ca_state#22] +Arguments: hashpartitioning(c_current_cdemo_sk#4, 5), true, [id=#24] + +(49) Sort [codegen id : 17] +Input [2]: [c_current_cdemo_sk#4, ca_state#22] +Arguments: [c_current_cdemo_sk#4 ASC NULLS FIRST], false, 0 + +(50) Scan parquet default.customer_demographics +Output [6]: [cd_demo_sk#25, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(51) ColumnarToRow [codegen id : 18] +Input [6]: [cd_demo_sk#25, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] + +(52) Filter [codegen id : 18] +Input [6]: [cd_demo_sk#25, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] +Condition : isnotnull(cd_demo_sk#25) + +(53) Exchange +Input [6]: [cd_demo_sk#25, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] +Arguments: hashpartitioning(cd_demo_sk#25, 5), true, [id=#31] + +(54) Sort [codegen id : 19] +Input [6]: [cd_demo_sk#25, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] +Arguments: [cd_demo_sk#25 ASC NULLS FIRST], false, 0 + +(55) SortMergeJoin [codegen id : 20] +Left keys [1]: [c_current_cdemo_sk#4] +Right keys [1]: [cd_demo_sk#25] +Join condition: None + +(56) Project [codegen id : 20] +Output [6]: [ca_state#22, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] +Input [8]: [c_current_cdemo_sk#4, ca_state#22, cd_demo_sk#25, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] + +(57) HashAggregate [codegen id : 20] +Input [6]: [ca_state#22, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] +Keys [6]: [ca_state#22, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] +Functions [10]: [partial_count(1), partial_min(cd_dep_count#28), partial_max(cd_dep_count#28), partial_avg(cast(cd_dep_count#28 as bigint)), partial_min(cd_dep_employed_count#29), partial_max(cd_dep_employed_count#29), partial_avg(cast(cd_dep_employed_count#29 as bigint)), partial_min(cd_dep_college_count#30), partial_max(cd_dep_college_count#30), partial_avg(cast(cd_dep_college_count#30 as bigint))] +Aggregate Attributes [13]: [count#32, min#33, max#34, sum#35, count#36, min#37, max#38, sum#39, count#40, min#41, max#42, sum#43, count#44] +Results [19]: [ca_state#22, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30, count#45, min#46, max#47, sum#48, count#49, min#50, max#51, sum#52, count#53, min#54, max#55, sum#56, count#57] + +(58) Exchange +Input [19]: [ca_state#22, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30, count#45, min#46, max#47, sum#48, count#49, min#50, max#51, sum#52, count#53, min#54, max#55, sum#56, count#57] +Arguments: hashpartitioning(ca_state#22, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30, 5), true, [id=#58] + +(59) HashAggregate [codegen id : 21] +Input [19]: [ca_state#22, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30, count#45, min#46, max#47, sum#48, count#49, min#50, max#51, sum#52, count#53, min#54, max#55, sum#56, count#57] +Keys [6]: [ca_state#22, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] +Functions [10]: [count(1), min(cd_dep_count#28), max(cd_dep_count#28), avg(cast(cd_dep_count#28 as bigint)), min(cd_dep_employed_count#29), max(cd_dep_employed_count#29), avg(cast(cd_dep_employed_count#29 as bigint)), min(cd_dep_college_count#30), max(cd_dep_college_count#30), avg(cast(cd_dep_college_count#30 as bigint))] +Aggregate Attributes [10]: [count(1)#59, min(cd_dep_count#28)#60, max(cd_dep_count#28)#61, avg(cast(cd_dep_count#28 as bigint))#62, min(cd_dep_employed_count#29)#63, max(cd_dep_employed_count#29)#64, avg(cast(cd_dep_employed_count#29 as bigint))#65, min(cd_dep_college_count#30)#66, max(cd_dep_college_count#30)#67, avg(cast(cd_dep_college_count#30 as bigint))#68] +Results [18]: [ca_state#22, cd_gender#26, cd_marital_status#27, count(1)#59 AS cnt1#69, min(cd_dep_count#28)#60 AS min(cd_dep_count)#70, max(cd_dep_count#28)#61 AS max(cd_dep_count)#71, avg(cast(cd_dep_count#28 as bigint))#62 AS avg(cd_dep_count)#72, cd_dep_employed_count#29, count(1)#59 AS cnt2#73, min(cd_dep_employed_count#29)#63 AS min(cd_dep_employed_count)#74, max(cd_dep_employed_count#29)#64 AS max(cd_dep_employed_count)#75, avg(cast(cd_dep_employed_count#29 as bigint))#65 AS avg(cd_dep_employed_count)#76, cd_dep_college_count#30, count(1)#59 AS cnt3#77, min(cd_dep_college_count#30)#66 AS min(cd_dep_college_count)#78, max(cd_dep_college_count#30)#67 AS max(cd_dep_college_count)#79, avg(cast(cd_dep_college_count#30 as bigint))#68 AS avg(cd_dep_college_count)#80, cd_dep_count#28 AS aggOrder#81] + +(60) TakeOrderedAndProject +Input [18]: [ca_state#22, cd_gender#26, cd_marital_status#27, cnt1#69, min(cd_dep_count)#70, max(cd_dep_count)#71, avg(cd_dep_count)#72, cd_dep_employed_count#29, cnt2#73, min(cd_dep_employed_count)#74, max(cd_dep_employed_count)#75, avg(cd_dep_employed_count)#76, cd_dep_college_count#30, cnt3#77, min(cd_dep_college_count)#78, max(cd_dep_college_count)#79, avg(cd_dep_college_count)#80, aggOrder#81] +Arguments: 100, [ca_state#22 ASC NULLS FIRST, cd_gender#26 ASC NULLS FIRST, cd_marital_status#27 ASC NULLS FIRST, aggOrder#81 ASC NULLS FIRST, cd_dep_employed_count#29 ASC NULLS FIRST, cd_dep_college_count#30 ASC NULLS FIRST], [ca_state#22, cd_gender#26, cd_marital_status#27, cnt1#69, min(cd_dep_count)#70, max(cd_dep_count)#71, avg(cd_dep_count)#72, cd_dep_employed_count#29, cnt2#73, min(cd_dep_employed_count)#74, max(cd_dep_employed_count)#75, avg(cd_dep_employed_count)#76, cd_dep_college_count#30, cnt3#77, min(cd_dep_college_count)#78, max(cd_dep_college_count)#79, avg(cd_dep_college_count)#80] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35.sf100/simplified.txt new file mode 100644 index 0000000000000..35dc74b2ced18 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35.sf100/simplified.txt @@ -0,0 +1,103 @@ +TakeOrderedAndProject [aggOrder,avg(cd_dep_college_count),avg(cd_dep_count),avg(cd_dep_employed_count),ca_state,cd_dep_college_count,cd_dep_employed_count,cd_gender,cd_marital_status,cnt1,cnt2,cnt3,max(cd_dep_college_count),max(cd_dep_count),max(cd_dep_employed_count),min(cd_dep_college_count),min(cd_dep_count),min(cd_dep_employed_count)] + WholeStageCodegen (21) + HashAggregate [ca_state,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_gender,cd_marital_status,count,count,count,count,max,max,max,min,min,min,sum,sum,sum] [aggOrder,avg(cast(cd_dep_college_count as bigint)),avg(cast(cd_dep_count as bigint)),avg(cast(cd_dep_employed_count as bigint)),avg(cd_dep_college_count),avg(cd_dep_count),avg(cd_dep_employed_count),cnt1,cnt2,cnt3,count,count,count,count,count(1),max,max,max,max(cd_dep_college_count),max(cd_dep_college_count),max(cd_dep_count),max(cd_dep_count),max(cd_dep_employed_count),max(cd_dep_employed_count),min,min,min,min(cd_dep_college_count),min(cd_dep_college_count),min(cd_dep_count),min(cd_dep_count),min(cd_dep_employed_count),min(cd_dep_employed_count),sum,sum,sum] + InputAdapter + Exchange [ca_state,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_gender,cd_marital_status] #1 + WholeStageCodegen (20) + HashAggregate [ca_state,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_gender,cd_marital_status] [count,count,count,count,count,count,count,count,max,max,max,max,max,max,min,min,min,min,min,min,sum,sum,sum,sum,sum,sum] + Project [ca_state,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_gender,cd_marital_status] + SortMergeJoin [c_current_cdemo_sk,cd_demo_sk] + InputAdapter + WholeStageCodegen (17) + Sort [c_current_cdemo_sk] + InputAdapter + Exchange [c_current_cdemo_sk] #2 + WholeStageCodegen (16) + Project [c_current_cdemo_sk,ca_state] + SortMergeJoin [c_current_addr_sk,ca_address_sk] + InputAdapter + WholeStageCodegen (13) + Sort [c_current_addr_sk] + InputAdapter + Exchange [c_current_addr_sk] #3 + WholeStageCodegen (12) + Project [c_current_addr_sk,c_current_cdemo_sk] + Filter [exists,exists] + InputAdapter + SortMergeJoin [c_customer_sk,cs_ship_customer_sk] + SortMergeJoin [c_customer_sk,ws_bill_customer_sk] + SortMergeJoin [c_customer_sk,ss_customer_sk] + WholeStageCodegen (2) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #4 + WholeStageCodegen (1) + Filter [c_current_addr_sk,c_current_cdemo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] + WholeStageCodegen (5) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #5 + WholeStageCodegen (4) + Project [ss_customer_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_date_sk,d_qoy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_qoy,d_year] + WholeStageCodegen (8) + Sort [ws_bill_customer_sk] + InputAdapter + Exchange [ws_bill_customer_sk] #7 + WholeStageCodegen (7) + Project [ws_bill_customer_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #6 + WholeStageCodegen (11) + Sort [cs_ship_customer_sk] + InputAdapter + Exchange [cs_ship_customer_sk] #8 + WholeStageCodegen (10) + Project [cs_ship_customer_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #6 + InputAdapter + WholeStageCodegen (15) + Sort [ca_address_sk] + InputAdapter + Exchange [ca_address_sk] #9 + WholeStageCodegen (14) + Filter [ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_state] + InputAdapter + WholeStageCodegen (19) + Sort [cd_demo_sk] + InputAdapter + Exchange [cd_demo_sk] #10 + WholeStageCodegen (18) + Filter [cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_gender,cd_marital_status] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35/explain.txt new file mode 100644 index 0000000000000..cb958fdb8abf4 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35/explain.txt @@ -0,0 +1,274 @@ +== Physical Plan == +TakeOrderedAndProject (49) ++- * HashAggregate (48) + +- Exchange (47) + +- * HashAggregate (46) + +- * Project (45) + +- * BroadcastHashJoin Inner BuildRight (44) + :- * Project (39) + : +- * BroadcastHashJoin Inner BuildRight (38) + : :- * Project (33) + : : +- * Filter (32) + : : +- * BroadcastHashJoin ExistenceJoin(exists#1) BuildRight (31) + : : :- * BroadcastHashJoin ExistenceJoin(exists#2) BuildRight (23) + : : : :- * BroadcastHashJoin LeftSemi BuildRight (15) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.customer (1) + : : : : +- BroadcastExchange (14) + : : : : +- * Project (13) + : : : : +- * BroadcastHashJoin Inner BuildRight (12) + : : : : :- * Filter (6) + : : : : : +- * ColumnarToRow (5) + : : : : : +- Scan parquet default.store_sales (4) + : : : : +- BroadcastExchange (11) + : : : : +- * Project (10) + : : : : +- * Filter (9) + : : : : +- * ColumnarToRow (8) + : : : : +- Scan parquet default.date_dim (7) + : : : +- BroadcastExchange (22) + : : : +- * Project (21) + : : : +- * BroadcastHashJoin Inner BuildRight (20) + : : : :- * Filter (18) + : : : : +- * ColumnarToRow (17) + : : : : +- Scan parquet default.web_sales (16) + : : : +- ReusedExchange (19) + : : +- BroadcastExchange (30) + : : +- * Project (29) + : : +- * BroadcastHashJoin Inner BuildRight (28) + : : :- * Filter (26) + : : : +- * ColumnarToRow (25) + : : : +- Scan parquet default.catalog_sales (24) + : : +- ReusedExchange (27) + : +- BroadcastExchange (37) + : +- * Filter (36) + : +- * ColumnarToRow (35) + : +- Scan parquet default.customer_address (34) + +- BroadcastExchange (43) + +- * Filter (42) + +- * ColumnarToRow (41) + +- Scan parquet default.customer_demographics (40) + + +(1) Scan parquet default.customer +Output [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 9] +Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] + +(3) Filter [codegen id : 9] +Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] +Condition : (isnotnull(c_current_addr_sk#5) AND isnotnull(c_current_cdemo_sk#4)) + +(4) Scan parquet default.store_sales +Output [2]: [ss_sold_date_sk#6, ss_customer_sk#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 2] +Input [2]: [ss_sold_date_sk#6, ss_customer_sk#7] + +(6) Filter [codegen id : 2] +Input [2]: [ss_sold_date_sk#6, ss_customer_sk#7] +Condition : isnotnull(ss_sold_date_sk#6) + +(7) Scan parquet default.date_dim +Output [3]: [d_date_sk#8, d_year#9, d_qoy#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_qoy), EqualTo(d_year,2002), LessThan(d_qoy,4), IsNotNull(d_date_sk)] +ReadSchema: struct + +(8) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#8, d_year#9, d_qoy#10] + +(9) Filter [codegen id : 1] +Input [3]: [d_date_sk#8, d_year#9, d_qoy#10] +Condition : ((((isnotnull(d_year#9) AND isnotnull(d_qoy#10)) AND (d_year#9 = 2002)) AND (d_qoy#10 < 4)) AND isnotnull(d_date_sk#8)) + +(10) Project [codegen id : 1] +Output [1]: [d_date_sk#8] +Input [3]: [d_date_sk#8, d_year#9, d_qoy#10] + +(11) BroadcastExchange +Input [1]: [d_date_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] + +(12) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#6] +Right keys [1]: [d_date_sk#8] +Join condition: None + +(13) Project [codegen id : 2] +Output [1]: [ss_customer_sk#7] +Input [3]: [ss_sold_date_sk#6, ss_customer_sk#7, d_date_sk#8] + +(14) BroadcastExchange +Input [1]: [ss_customer_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(15) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [ss_customer_sk#7] +Join condition: None + +(16) Scan parquet default.web_sales +Output [2]: [ws_sold_date_sk#13, ws_bill_customer_sk#14] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 4] +Input [2]: [ws_sold_date_sk#13, ws_bill_customer_sk#14] + +(18) Filter [codegen id : 4] +Input [2]: [ws_sold_date_sk#13, ws_bill_customer_sk#14] +Condition : isnotnull(ws_sold_date_sk#13) + +(19) ReusedExchange [Reuses operator id: 11] +Output [1]: [d_date_sk#8] + +(20) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ws_sold_date_sk#13] +Right keys [1]: [d_date_sk#8] +Join condition: None + +(21) Project [codegen id : 4] +Output [1]: [ws_bill_customer_sk#14] +Input [3]: [ws_sold_date_sk#13, ws_bill_customer_sk#14, d_date_sk#8] + +(22) BroadcastExchange +Input [1]: [ws_bill_customer_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] + +(23) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [ws_bill_customer_sk#14] +Join condition: None + +(24) Scan parquet default.catalog_sales +Output [2]: [cs_sold_date_sk#16, cs_ship_customer_sk#17] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(25) ColumnarToRow [codegen id : 6] +Input [2]: [cs_sold_date_sk#16, cs_ship_customer_sk#17] + +(26) Filter [codegen id : 6] +Input [2]: [cs_sold_date_sk#16, cs_ship_customer_sk#17] +Condition : isnotnull(cs_sold_date_sk#16) + +(27) ReusedExchange [Reuses operator id: 11] +Output [1]: [d_date_sk#8] + +(28) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cs_sold_date_sk#16] +Right keys [1]: [d_date_sk#8] +Join condition: None + +(29) Project [codegen id : 6] +Output [1]: [cs_ship_customer_sk#17] +Input [3]: [cs_sold_date_sk#16, cs_ship_customer_sk#17, d_date_sk#8] + +(30) BroadcastExchange +Input [1]: [cs_ship_customer_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] + +(31) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [cs_ship_customer_sk#17] +Join condition: None + +(32) Filter [codegen id : 9] +Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1] +Condition : (exists#2 OR exists#1) + +(33) Project [codegen id : 9] +Output [2]: [c_current_cdemo_sk#4, c_current_addr_sk#5] +Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1] + +(34) Scan parquet default.customer_address +Output [2]: [ca_address_sk#19, ca_state#20] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct + +(35) ColumnarToRow [codegen id : 7] +Input [2]: [ca_address_sk#19, ca_state#20] + +(36) Filter [codegen id : 7] +Input [2]: [ca_address_sk#19, ca_state#20] +Condition : isnotnull(ca_address_sk#19) + +(37) BroadcastExchange +Input [2]: [ca_address_sk#19, ca_state#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#21] + +(38) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_current_addr_sk#5] +Right keys [1]: [ca_address_sk#19] +Join condition: None + +(39) Project [codegen id : 9] +Output [2]: [c_current_cdemo_sk#4, ca_state#20] +Input [4]: [c_current_cdemo_sk#4, c_current_addr_sk#5, ca_address_sk#19, ca_state#20] + +(40) Scan parquet default.customer_demographics +Output [6]: [cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(41) ColumnarToRow [codegen id : 8] +Input [6]: [cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] + +(42) Filter [codegen id : 8] +Input [6]: [cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Condition : isnotnull(cd_demo_sk#22) + +(43) BroadcastExchange +Input [6]: [cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#28] + +(44) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_current_cdemo_sk#4] +Right keys [1]: [cd_demo_sk#22] +Join condition: None + +(45) Project [codegen id : 9] +Output [6]: [ca_state#20, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Input [8]: [c_current_cdemo_sk#4, ca_state#20, cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] + +(46) HashAggregate [codegen id : 9] +Input [6]: [ca_state#20, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Keys [6]: [ca_state#20, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Functions [10]: [partial_count(1), partial_min(cd_dep_count#25), partial_max(cd_dep_count#25), partial_avg(cast(cd_dep_count#25 as bigint)), partial_min(cd_dep_employed_count#26), partial_max(cd_dep_employed_count#26), partial_avg(cast(cd_dep_employed_count#26 as bigint)), partial_min(cd_dep_college_count#27), partial_max(cd_dep_college_count#27), partial_avg(cast(cd_dep_college_count#27 as bigint))] +Aggregate Attributes [13]: [count#29, min#30, max#31, sum#32, count#33, min#34, max#35, sum#36, count#37, min#38, max#39, sum#40, count#41] +Results [19]: [ca_state#20, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, count#42, min#43, max#44, sum#45, count#46, min#47, max#48, sum#49, count#50, min#51, max#52, sum#53, count#54] + +(47) Exchange +Input [19]: [ca_state#20, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, count#42, min#43, max#44, sum#45, count#46, min#47, max#48, sum#49, count#50, min#51, max#52, sum#53, count#54] +Arguments: hashpartitioning(ca_state#20, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, 5), true, [id=#55] + +(48) HashAggregate [codegen id : 10] +Input [19]: [ca_state#20, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, count#42, min#43, max#44, sum#45, count#46, min#47, max#48, sum#49, count#50, min#51, max#52, sum#53, count#54] +Keys [6]: [ca_state#20, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Functions [10]: [count(1), min(cd_dep_count#25), max(cd_dep_count#25), avg(cast(cd_dep_count#25 as bigint)), min(cd_dep_employed_count#26), max(cd_dep_employed_count#26), avg(cast(cd_dep_employed_count#26 as bigint)), min(cd_dep_college_count#27), max(cd_dep_college_count#27), avg(cast(cd_dep_college_count#27 as bigint))] +Aggregate Attributes [10]: [count(1)#56, min(cd_dep_count#25)#57, max(cd_dep_count#25)#58, avg(cast(cd_dep_count#25 as bigint))#59, min(cd_dep_employed_count#26)#60, max(cd_dep_employed_count#26)#61, avg(cast(cd_dep_employed_count#26 as bigint))#62, min(cd_dep_college_count#27)#63, max(cd_dep_college_count#27)#64, avg(cast(cd_dep_college_count#27 as bigint))#65] +Results [18]: [ca_state#20, cd_gender#23, cd_marital_status#24, count(1)#56 AS cnt1#66, min(cd_dep_count#25)#57 AS min(cd_dep_count)#67, max(cd_dep_count#25)#58 AS max(cd_dep_count)#68, avg(cast(cd_dep_count#25 as bigint))#59 AS avg(cd_dep_count)#69, cd_dep_employed_count#26, count(1)#56 AS cnt2#70, min(cd_dep_employed_count#26)#60 AS min(cd_dep_employed_count)#71, max(cd_dep_employed_count#26)#61 AS max(cd_dep_employed_count)#72, avg(cast(cd_dep_employed_count#26 as bigint))#62 AS avg(cd_dep_employed_count)#73, cd_dep_college_count#27, count(1)#56 AS cnt3#74, min(cd_dep_college_count#27)#63 AS min(cd_dep_college_count)#75, max(cd_dep_college_count#27)#64 AS max(cd_dep_college_count)#76, avg(cast(cd_dep_college_count#27 as bigint))#65 AS avg(cd_dep_college_count)#77, cd_dep_count#25 AS aggOrder#78] + +(49) TakeOrderedAndProject +Input [18]: [ca_state#20, cd_gender#23, cd_marital_status#24, cnt1#66, min(cd_dep_count)#67, max(cd_dep_count)#68, avg(cd_dep_count)#69, cd_dep_employed_count#26, cnt2#70, min(cd_dep_employed_count)#71, max(cd_dep_employed_count)#72, avg(cd_dep_employed_count)#73, cd_dep_college_count#27, cnt3#74, min(cd_dep_college_count)#75, max(cd_dep_college_count)#76, avg(cd_dep_college_count)#77, aggOrder#78] +Arguments: 100, [ca_state#20 ASC NULLS FIRST, cd_gender#23 ASC NULLS FIRST, cd_marital_status#24 ASC NULLS FIRST, aggOrder#78 ASC NULLS FIRST, cd_dep_employed_count#26 ASC NULLS FIRST, cd_dep_college_count#27 ASC NULLS FIRST], [ca_state#20, cd_gender#23, cd_marital_status#24, cnt1#66, min(cd_dep_count)#67, max(cd_dep_count)#68, avg(cd_dep_count)#69, cd_dep_employed_count#26, cnt2#70, min(cd_dep_employed_count)#71, max(cd_dep_employed_count)#72, avg(cd_dep_employed_count)#73, cd_dep_college_count#27, cnt3#74, min(cd_dep_college_count)#75, max(cd_dep_college_count)#76, avg(cd_dep_college_count)#77] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35/simplified.txt new file mode 100644 index 0000000000000..4f500e2b32b31 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35/simplified.txt @@ -0,0 +1,73 @@ +TakeOrderedAndProject [aggOrder,avg(cd_dep_college_count),avg(cd_dep_count),avg(cd_dep_employed_count),ca_state,cd_dep_college_count,cd_dep_employed_count,cd_gender,cd_marital_status,cnt1,cnt2,cnt3,max(cd_dep_college_count),max(cd_dep_count),max(cd_dep_employed_count),min(cd_dep_college_count),min(cd_dep_count),min(cd_dep_employed_count)] + WholeStageCodegen (10) + HashAggregate [ca_state,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_gender,cd_marital_status,count,count,count,count,max,max,max,min,min,min,sum,sum,sum] [aggOrder,avg(cast(cd_dep_college_count as bigint)),avg(cast(cd_dep_count as bigint)),avg(cast(cd_dep_employed_count as bigint)),avg(cd_dep_college_count),avg(cd_dep_count),avg(cd_dep_employed_count),cnt1,cnt2,cnt3,count,count,count,count,count(1),max,max,max,max(cd_dep_college_count),max(cd_dep_college_count),max(cd_dep_count),max(cd_dep_count),max(cd_dep_employed_count),max(cd_dep_employed_count),min,min,min,min(cd_dep_college_count),min(cd_dep_college_count),min(cd_dep_count),min(cd_dep_count),min(cd_dep_employed_count),min(cd_dep_employed_count),sum,sum,sum] + InputAdapter + Exchange [ca_state,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_gender,cd_marital_status] #1 + WholeStageCodegen (9) + HashAggregate [ca_state,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_gender,cd_marital_status] [count,count,count,count,count,count,count,count,max,max,max,max,max,max,min,min,min,min,min,min,sum,sum,sum,sum,sum,sum] + Project [ca_state,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_gender,cd_marital_status] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [c_current_cdemo_sk,ca_state] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_addr_sk,c_current_cdemo_sk] + Filter [exists,exists] + BroadcastHashJoin [c_customer_sk,cs_ship_customer_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [c_current_addr_sk,c_current_cdemo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (2) + Project [ss_customer_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_qoy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_qoy,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (4) + Project [ws_bill_customer_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (6) + Project [cs_ship_customer_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (7) + Filter [ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (8) + Filter [cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_gender,cd_marital_status] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36.sf100/explain.txt new file mode 100644 index 0000000000000..195f45feeba50 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36.sf100/explain.txt @@ -0,0 +1,180 @@ +== Physical Plan == +TakeOrderedAndProject (32) ++- * Project (31) + +- Window (30) + +- * Sort (29) + +- Exchange (28) + +- * HashAggregate (27) + +- Exchange (26) + +- * HashAggregate (25) + +- * Expand (24) + +- * Project (23) + +- * BroadcastHashJoin Inner BuildRight (22) + :- * Project (17) + : +- * BroadcastHashJoin Inner BuildRight (16) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.store_sales (1) + : : +- BroadcastExchange (8) + : : +- * Project (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet default.date_dim (4) + : +- BroadcastExchange (15) + : +- * Project (14) + : +- * Filter (13) + : +- * ColumnarToRow (12) + : +- Scan parquet default.store (11) + +- BroadcastExchange (21) + +- * Filter (20) + +- * ColumnarToRow (19) + +- Scan parquet default.item (18) + + +(1) Scan parquet default.store_sales +Output [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_ext_sales_price#4, ss_net_profit#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_ext_sales_price#4, ss_net_profit#5] + +(3) Filter [codegen id : 4] +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_ext_sales_price#4, ss_net_profit#5] +Condition : ((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_item_sk#2)) AND isnotnull(ss_store_sk#3)) + +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#6, d_year#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#6, d_year#7] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#6, d_year#7] +Condition : ((isnotnull(d_year#7) AND (d_year#7 = 2001)) AND isnotnull(d_date_sk#6)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#6] +Input [2]: [d_date_sk#6, d_year#7] + +(8) BroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#8] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#6] +Join condition: None + +(10) Project [codegen id : 4] +Output [4]: [ss_item_sk#2, ss_store_sk#3, ss_ext_sales_price#4, ss_net_profit#5] +Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_ext_sales_price#4, ss_net_profit#5, d_date_sk#6] + +(11) Scan parquet default.store +Output [2]: [s_store_sk#9, s_state#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [s_store_sk#9, s_state#10] + +(13) Filter [codegen id : 2] +Input [2]: [s_store_sk#9, s_state#10] +Condition : ((isnotnull(s_state#10) AND (s_state#10 = TN)) AND isnotnull(s_store_sk#9)) + +(14) Project [codegen id : 2] +Output [1]: [s_store_sk#9] +Input [2]: [s_store_sk#9, s_state#10] + +(15) BroadcastExchange +Input [1]: [s_store_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] + +(16) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#9] +Join condition: None + +(17) Project [codegen id : 4] +Output [3]: [ss_item_sk#2, ss_ext_sales_price#4, ss_net_profit#5] +Input [5]: [ss_item_sk#2, ss_store_sk#3, ss_ext_sales_price#4, ss_net_profit#5, s_store_sk#9] + +(18) Scan parquet default.item +Output [3]: [i_item_sk#12, i_class#13, i_category#14] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [3]: [i_item_sk#12, i_class#13, i_category#14] + +(20) Filter [codegen id : 3] +Input [3]: [i_item_sk#12, i_class#13, i_category#14] +Condition : isnotnull(i_item_sk#12) + +(21) BroadcastExchange +Input [3]: [i_item_sk#12, i_class#13, i_category#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#15] + +(22) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#12] +Join condition: None + +(23) Project [codegen id : 4] +Output [4]: [ss_ext_sales_price#4, ss_net_profit#5, i_category#14, i_class#13] +Input [6]: [ss_item_sk#2, ss_ext_sales_price#4, ss_net_profit#5, i_item_sk#12, i_class#13, i_category#14] + +(24) Expand [codegen id : 4] +Input [4]: [ss_ext_sales_price#4, ss_net_profit#5, i_category#14, i_class#13] +Arguments: [List(ss_ext_sales_price#4, ss_net_profit#5, i_category#14, i_class#13, 0), List(ss_ext_sales_price#4, ss_net_profit#5, i_category#14, null, 1), List(ss_ext_sales_price#4, ss_net_profit#5, null, null, 3)], [ss_ext_sales_price#4, ss_net_profit#5, i_category#16, i_class#17, spark_grouping_id#18] + +(25) HashAggregate [codegen id : 4] +Input [5]: [ss_ext_sales_price#4, ss_net_profit#5, i_category#16, i_class#17, spark_grouping_id#18] +Keys [3]: [i_category#16, i_class#17, spark_grouping_id#18] +Functions [2]: [partial_sum(UnscaledValue(ss_net_profit#5)), partial_sum(UnscaledValue(ss_ext_sales_price#4))] +Aggregate Attributes [2]: [sum#19, sum#20] +Results [5]: [i_category#16, i_class#17, spark_grouping_id#18, sum#21, sum#22] + +(26) Exchange +Input [5]: [i_category#16, i_class#17, spark_grouping_id#18, sum#21, sum#22] +Arguments: hashpartitioning(i_category#16, i_class#17, spark_grouping_id#18, 5), true, [id=#23] + +(27) HashAggregate [codegen id : 5] +Input [5]: [i_category#16, i_class#17, spark_grouping_id#18, sum#21, sum#22] +Keys [3]: [i_category#16, i_class#17, spark_grouping_id#18] +Functions [2]: [sum(UnscaledValue(ss_net_profit#5)), sum(UnscaledValue(ss_ext_sales_price#4))] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_net_profit#5))#24, sum(UnscaledValue(ss_ext_sales_price#4))#25] +Results [7]: [CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_net_profit#5))#24,17,2)) / promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#4))#25,17,2))), DecimalType(37,20), true) AS gross_margin#26, i_category#16, i_class#17, (cast((shiftright(spark_grouping_id#18, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#18, 0) & 1) as tinyint)) AS lochierarchy#27, (cast((shiftright(spark_grouping_id#18, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#18, 0) & 1) as tinyint)) AS _w1#28, CASE WHEN (cast(cast((shiftright(spark_grouping_id#18, 0) & 1) as tinyint) as int) = 0) THEN i_category#16 END AS _w2#29, CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_net_profit#5))#24,17,2)) / promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#4))#25,17,2))), DecimalType(37,20), true) AS _w3#30] + +(28) Exchange +Input [7]: [gross_margin#26, i_category#16, i_class#17, lochierarchy#27, _w1#28, _w2#29, _w3#30] +Arguments: hashpartitioning(_w1#28, _w2#29, 5), true, [id=#31] + +(29) Sort [codegen id : 6] +Input [7]: [gross_margin#26, i_category#16, i_class#17, lochierarchy#27, _w1#28, _w2#29, _w3#30] +Arguments: [_w1#28 ASC NULLS FIRST, _w2#29 ASC NULLS FIRST, _w3#30 ASC NULLS FIRST], false, 0 + +(30) Window +Input [7]: [gross_margin#26, i_category#16, i_class#17, lochierarchy#27, _w1#28, _w2#29, _w3#30] +Arguments: [rank(_w3#30) windowspecdefinition(_w1#28, _w2#29, _w3#30 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#32], [_w1#28, _w2#29], [_w3#30 ASC NULLS FIRST] + +(31) Project [codegen id : 7] +Output [5]: [gross_margin#26, i_category#16, i_class#17, lochierarchy#27, rank_within_parent#32] +Input [8]: [gross_margin#26, i_category#16, i_class#17, lochierarchy#27, _w1#28, _w2#29, _w3#30, rank_within_parent#32] + +(32) TakeOrderedAndProject +Input [5]: [gross_margin#26, i_category#16, i_class#17, lochierarchy#27, rank_within_parent#32] +Arguments: 100, [lochierarchy#27 DESC NULLS LAST, CASE WHEN (cast(lochierarchy#27 as int) = 0) THEN i_category#16 END ASC NULLS FIRST, rank_within_parent#32 ASC NULLS FIRST], [gross_margin#26, i_category#16, i_class#17, lochierarchy#27, rank_within_parent#32] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36.sf100/simplified.txt new file mode 100644 index 0000000000000..bef14cb3851ac --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36.sf100/simplified.txt @@ -0,0 +1,49 @@ +TakeOrderedAndProject [gross_margin,i_category,i_class,lochierarchy,rank_within_parent] + WholeStageCodegen (7) + Project [gross_margin,i_category,i_class,lochierarchy,rank_within_parent] + InputAdapter + Window [_w1,_w2,_w3] + WholeStageCodegen (6) + Sort [_w1,_w2,_w3] + InputAdapter + Exchange [_w1,_w2] #1 + WholeStageCodegen (5) + HashAggregate [i_category,i_class,spark_grouping_id,sum,sum] [_w1,_w2,_w3,gross_margin,lochierarchy,sum,sum,sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_net_profit))] + InputAdapter + Exchange [i_category,i_class,spark_grouping_id] #2 + WholeStageCodegen (4) + HashAggregate [i_category,i_class,spark_grouping_id,ss_ext_sales_price,ss_net_profit] [sum,sum,sum,sum] + Expand [i_category,i_class,ss_ext_sales_price,ss_net_profit] + Project [i_category,i_class,ss_ext_sales_price,ss_net_profit] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_ext_sales_price,ss_item_sk,ss_net_profit] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [s_store_sk] + Filter [s_state,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_state,s_store_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_category,i_class,i_item_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36/explain.txt new file mode 100644 index 0000000000000..1f86dd127f4b6 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36/explain.txt @@ -0,0 +1,180 @@ +== Physical Plan == +TakeOrderedAndProject (32) ++- * Project (31) + +- Window (30) + +- * Sort (29) + +- Exchange (28) + +- * HashAggregate (27) + +- Exchange (26) + +- * HashAggregate (25) + +- * Expand (24) + +- * Project (23) + +- * BroadcastHashJoin Inner BuildRight (22) + :- * Project (16) + : +- * BroadcastHashJoin Inner BuildRight (15) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.store_sales (1) + : : +- BroadcastExchange (8) + : : +- * Project (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet default.date_dim (4) + : +- BroadcastExchange (14) + : +- * Filter (13) + : +- * ColumnarToRow (12) + : +- Scan parquet default.item (11) + +- BroadcastExchange (21) + +- * Project (20) + +- * Filter (19) + +- * ColumnarToRow (18) + +- Scan parquet default.store (17) + + +(1) Scan parquet default.store_sales +Output [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_ext_sales_price#4, ss_net_profit#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_ext_sales_price#4, ss_net_profit#5] + +(3) Filter [codegen id : 4] +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_ext_sales_price#4, ss_net_profit#5] +Condition : ((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_item_sk#2)) AND isnotnull(ss_store_sk#3)) + +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#6, d_year#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#6, d_year#7] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#6, d_year#7] +Condition : ((isnotnull(d_year#7) AND (d_year#7 = 2001)) AND isnotnull(d_date_sk#6)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#6] +Input [2]: [d_date_sk#6, d_year#7] + +(8) BroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#8] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#6] +Join condition: None + +(10) Project [codegen id : 4] +Output [4]: [ss_item_sk#2, ss_store_sk#3, ss_ext_sales_price#4, ss_net_profit#5] +Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_ext_sales_price#4, ss_net_profit#5, d_date_sk#6] + +(11) Scan parquet default.item +Output [3]: [i_item_sk#9, i_class#10, i_category#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [i_item_sk#9, i_class#10, i_category#11] + +(13) Filter [codegen id : 2] +Input [3]: [i_item_sk#9, i_class#10, i_category#11] +Condition : isnotnull(i_item_sk#9) + +(14) BroadcastExchange +Input [3]: [i_item_sk#9, i_class#10, i_category#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#12] + +(15) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#9] +Join condition: None + +(16) Project [codegen id : 4] +Output [5]: [ss_store_sk#3, ss_ext_sales_price#4, ss_net_profit#5, i_class#10, i_category#11] +Input [7]: [ss_item_sk#2, ss_store_sk#3, ss_ext_sales_price#4, ss_net_profit#5, i_item_sk#9, i_class#10, i_category#11] + +(17) Scan parquet default.store +Output [2]: [s_store_sk#13, s_state#14] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [2]: [s_store_sk#13, s_state#14] + +(19) Filter [codegen id : 3] +Input [2]: [s_store_sk#13, s_state#14] +Condition : ((isnotnull(s_state#14) AND (s_state#14 = TN)) AND isnotnull(s_store_sk#13)) + +(20) Project [codegen id : 3] +Output [1]: [s_store_sk#13] +Input [2]: [s_store_sk#13, s_state#14] + +(21) BroadcastExchange +Input [1]: [s_store_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] + +(22) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#13] +Join condition: None + +(23) Project [codegen id : 4] +Output [4]: [ss_ext_sales_price#4, ss_net_profit#5, i_category#11, i_class#10] +Input [6]: [ss_store_sk#3, ss_ext_sales_price#4, ss_net_profit#5, i_class#10, i_category#11, s_store_sk#13] + +(24) Expand [codegen id : 4] +Input [4]: [ss_ext_sales_price#4, ss_net_profit#5, i_category#11, i_class#10] +Arguments: [List(ss_ext_sales_price#4, ss_net_profit#5, i_category#11, i_class#10, 0), List(ss_ext_sales_price#4, ss_net_profit#5, i_category#11, null, 1), List(ss_ext_sales_price#4, ss_net_profit#5, null, null, 3)], [ss_ext_sales_price#4, ss_net_profit#5, i_category#16, i_class#17, spark_grouping_id#18] + +(25) HashAggregate [codegen id : 4] +Input [5]: [ss_ext_sales_price#4, ss_net_profit#5, i_category#16, i_class#17, spark_grouping_id#18] +Keys [3]: [i_category#16, i_class#17, spark_grouping_id#18] +Functions [2]: [partial_sum(UnscaledValue(ss_net_profit#5)), partial_sum(UnscaledValue(ss_ext_sales_price#4))] +Aggregate Attributes [2]: [sum#19, sum#20] +Results [5]: [i_category#16, i_class#17, spark_grouping_id#18, sum#21, sum#22] + +(26) Exchange +Input [5]: [i_category#16, i_class#17, spark_grouping_id#18, sum#21, sum#22] +Arguments: hashpartitioning(i_category#16, i_class#17, spark_grouping_id#18, 5), true, [id=#23] + +(27) HashAggregate [codegen id : 5] +Input [5]: [i_category#16, i_class#17, spark_grouping_id#18, sum#21, sum#22] +Keys [3]: [i_category#16, i_class#17, spark_grouping_id#18] +Functions [2]: [sum(UnscaledValue(ss_net_profit#5)), sum(UnscaledValue(ss_ext_sales_price#4))] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_net_profit#5))#24, sum(UnscaledValue(ss_ext_sales_price#4))#25] +Results [7]: [CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_net_profit#5))#24,17,2)) / promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#4))#25,17,2))), DecimalType(37,20), true) AS gross_margin#26, i_category#16, i_class#17, (cast((shiftright(spark_grouping_id#18, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#18, 0) & 1) as tinyint)) AS lochierarchy#27, (cast((shiftright(spark_grouping_id#18, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#18, 0) & 1) as tinyint)) AS _w1#28, CASE WHEN (cast(cast((shiftright(spark_grouping_id#18, 0) & 1) as tinyint) as int) = 0) THEN i_category#16 END AS _w2#29, CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_net_profit#5))#24,17,2)) / promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#4))#25,17,2))), DecimalType(37,20), true) AS _w3#30] + +(28) Exchange +Input [7]: [gross_margin#26, i_category#16, i_class#17, lochierarchy#27, _w1#28, _w2#29, _w3#30] +Arguments: hashpartitioning(_w1#28, _w2#29, 5), true, [id=#31] + +(29) Sort [codegen id : 6] +Input [7]: [gross_margin#26, i_category#16, i_class#17, lochierarchy#27, _w1#28, _w2#29, _w3#30] +Arguments: [_w1#28 ASC NULLS FIRST, _w2#29 ASC NULLS FIRST, _w3#30 ASC NULLS FIRST], false, 0 + +(30) Window +Input [7]: [gross_margin#26, i_category#16, i_class#17, lochierarchy#27, _w1#28, _w2#29, _w3#30] +Arguments: [rank(_w3#30) windowspecdefinition(_w1#28, _w2#29, _w3#30 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#32], [_w1#28, _w2#29], [_w3#30 ASC NULLS FIRST] + +(31) Project [codegen id : 7] +Output [5]: [gross_margin#26, i_category#16, i_class#17, lochierarchy#27, rank_within_parent#32] +Input [8]: [gross_margin#26, i_category#16, i_class#17, lochierarchy#27, _w1#28, _w2#29, _w3#30, rank_within_parent#32] + +(32) TakeOrderedAndProject +Input [5]: [gross_margin#26, i_category#16, i_class#17, lochierarchy#27, rank_within_parent#32] +Arguments: 100, [lochierarchy#27 DESC NULLS LAST, CASE WHEN (cast(lochierarchy#27 as int) = 0) THEN i_category#16 END ASC NULLS FIRST, rank_within_parent#32 ASC NULLS FIRST], [gross_margin#26, i_category#16, i_class#17, lochierarchy#27, rank_within_parent#32] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36/simplified.txt new file mode 100644 index 0000000000000..aa668141fd783 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36/simplified.txt @@ -0,0 +1,49 @@ +TakeOrderedAndProject [gross_margin,i_category,i_class,lochierarchy,rank_within_parent] + WholeStageCodegen (7) + Project [gross_margin,i_category,i_class,lochierarchy,rank_within_parent] + InputAdapter + Window [_w1,_w2,_w3] + WholeStageCodegen (6) + Sort [_w1,_w2,_w3] + InputAdapter + Exchange [_w1,_w2] #1 + WholeStageCodegen (5) + HashAggregate [i_category,i_class,spark_grouping_id,sum,sum] [_w1,_w2,_w3,gross_margin,lochierarchy,sum,sum,sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_net_profit))] + InputAdapter + Exchange [i_category,i_class,spark_grouping_id] #2 + WholeStageCodegen (4) + HashAggregate [i_category,i_class,spark_grouping_id,ss_ext_sales_price,ss_net_profit] [sum,sum,sum,sum] + Expand [i_category,i_class,ss_ext_sales_price,ss_net_profit] + Project [i_category,i_class,ss_ext_sales_price,ss_net_profit] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [i_category,i_class,ss_ext_sales_price,ss_net_profit,ss_store_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_category,i_class,i_item_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Project [s_store_sk] + Filter [s_state,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_state,s_store_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37.sf100/explain.txt new file mode 100644 index 0000000000000..fc783877fa6cf --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37.sf100/explain.txt @@ -0,0 +1,175 @@ +== Physical Plan == +TakeOrderedAndProject (31) ++- * HashAggregate (30) + +- Exchange (29) + +- * HashAggregate (28) + +- * Project (27) + +- * SortMergeJoin Inner (26) + :- * Sort (20) + : +- Exchange (19) + : +- * Project (18) + : +- * BroadcastHashJoin Inner BuildRight (17) + : :- * Project (11) + : : +- * BroadcastHashJoin Inner BuildLeft (10) + : : :- BroadcastExchange (5) + : : : +- * Project (4) + : : : +- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.item (1) + : : +- * Project (9) + : : +- * Filter (8) + : : +- * ColumnarToRow (7) + : : +- Scan parquet default.inventory (6) + : +- BroadcastExchange (16) + : +- * Project (15) + : +- * Filter (14) + : +- * ColumnarToRow (13) + : +- Scan parquet default.date_dim (12) + +- * Sort (25) + +- Exchange (24) + +- * Filter (23) + +- * ColumnarToRow (22) + +- Scan parquet default.catalog_sales (21) + + +(1) Scan parquet default.item +Output [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,68.00), In(i_manufact_id, [677,940,694,808]), IsNotNull(i_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] + +(3) Filter [codegen id : 1] +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] +Condition : ((((isnotnull(i_current_price#4) AND (i_current_price#4 >= 68.00)) AND (cast(i_current_price#4 as decimal(12,2)) <= 98.00)) AND i_manufact_id#5 IN (677,940,694,808)) AND isnotnull(i_item_sk#1)) + +(4) Project [codegen id : 1] +Output [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] + +(5) BroadcastExchange +Input [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#6] + +(6) Scan parquet default.inventory +Output [3]: [inv_date_sk#7, inv_item_sk#8, inv_quantity_on_hand#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/inventory] +PushedFilters: [IsNotNull(inv_quantity_on_hand), GreaterThanOrEqual(inv_quantity_on_hand,100), LessThanOrEqual(inv_quantity_on_hand,500), IsNotNull(inv_item_sk), IsNotNull(inv_date_sk)] +ReadSchema: struct + +(7) ColumnarToRow +Input [3]: [inv_date_sk#7, inv_item_sk#8, inv_quantity_on_hand#9] + +(8) Filter +Input [3]: [inv_date_sk#7, inv_item_sk#8, inv_quantity_on_hand#9] +Condition : ((((isnotnull(inv_quantity_on_hand#9) AND (inv_quantity_on_hand#9 >= 100)) AND (inv_quantity_on_hand#9 <= 500)) AND isnotnull(inv_item_sk#8)) AND isnotnull(inv_date_sk#7)) + +(9) Project +Output [2]: [inv_date_sk#7, inv_item_sk#8] +Input [3]: [inv_date_sk#7, inv_item_sk#8, inv_quantity_on_hand#9] + +(10) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [inv_item_sk#8] +Join condition: None + +(11) Project [codegen id : 3] +Output [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#7] +Input [6]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#7, inv_item_sk#8] + +(12) Scan parquet default.date_dim +Output [2]: [d_date_sk#10, d_date#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-02-01), LessThanOrEqual(d_date,2000-04-01), IsNotNull(d_date_sk)] +ReadSchema: struct + +(13) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#10, d_date#11] + +(14) Filter [codegen id : 2] +Input [2]: [d_date_sk#10, d_date#11] +Condition : (((isnotnull(d_date#11) AND (d_date#11 >= 10988)) AND (d_date#11 <= 11048)) AND isnotnull(d_date_sk#10)) + +(15) Project [codegen id : 2] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_date#11] + +(16) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(17) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [inv_date_sk#7] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(18) Project [codegen id : 3] +Output [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] +Input [6]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#7, d_date_sk#10] + +(19) Exchange +Input [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] +Arguments: hashpartitioning(i_item_sk#1, 5), true, [id=#13] + +(20) Sort [codegen id : 4] +Input [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] +Arguments: [i_item_sk#1 ASC NULLS FIRST], false, 0 + +(21) Scan parquet default.catalog_sales +Output [1]: [cs_item_sk#14] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk)] +ReadSchema: struct + +(22) ColumnarToRow [codegen id : 5] +Input [1]: [cs_item_sk#14] + +(23) Filter [codegen id : 5] +Input [1]: [cs_item_sk#14] +Condition : isnotnull(cs_item_sk#14) + +(24) Exchange +Input [1]: [cs_item_sk#14] +Arguments: hashpartitioning(cs_item_sk#14, 5), true, [id=#15] + +(25) Sort [codegen id : 6] +Input [1]: [cs_item_sk#14] +Arguments: [cs_item_sk#14 ASC NULLS FIRST], false, 0 + +(26) SortMergeJoin [codegen id : 7] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [cs_item_sk#14] +Join condition: None + +(27) Project [codegen id : 7] +Output [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, cs_item_sk#14] + +(28) HashAggregate [codegen id : 7] +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Keys [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] + +(29) Exchange +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Arguments: hashpartitioning(i_item_id#2, i_item_desc#3, i_current_price#4, 5), true, [id=#16] + +(30) HashAggregate [codegen id : 8] +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Keys [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] + +(31) TakeOrderedAndProject +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Arguments: 100, [i_item_id#2 ASC NULLS FIRST], [i_item_id#2, i_item_desc#3, i_current_price#4] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37.sf100/simplified.txt new file mode 100644 index 0000000000000..aa3b9e817c910 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37.sf100/simplified.txt @@ -0,0 +1,50 @@ +TakeOrderedAndProject [i_current_price,i_item_desc,i_item_id] + WholeStageCodegen (8) + HashAggregate [i_current_price,i_item_desc,i_item_id] + InputAdapter + Exchange [i_current_price,i_item_desc,i_item_id] #1 + WholeStageCodegen (7) + HashAggregate [i_current_price,i_item_desc,i_item_id] + Project [i_current_price,i_item_desc,i_item_id] + SortMergeJoin [cs_item_sk,i_item_sk] + InputAdapter + WholeStageCodegen (4) + Sort [i_item_sk] + InputAdapter + Exchange [i_item_sk] #2 + WholeStageCodegen (3) + Project [i_current_price,i_item_desc,i_item_id,i_item_sk] + BroadcastHashJoin [d_date_sk,inv_date_sk] + Project [i_current_price,i_item_desc,i_item_id,i_item_sk,inv_date_sk] + BroadcastHashJoin [i_item_sk,inv_item_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [i_current_price,i_item_desc,i_item_id,i_item_sk] + Filter [i_current_price,i_item_sk,i_manufact_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_current_price,i_item_desc,i_item_id,i_item_sk,i_manufact_id] + Project [inv_date_sk,inv_item_sk] + Filter [inv_date_sk,inv_item_sk,inv_quantity_on_hand] + ColumnarToRow + InputAdapter + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk] + InputAdapter + WholeStageCodegen (6) + Sort [cs_item_sk] + InputAdapter + Exchange [cs_item_sk] #5 + WholeStageCodegen (5) + Filter [cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_item_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37/explain.txt new file mode 100644 index 0000000000000..ccfd03ea05be7 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37/explain.txt @@ -0,0 +1,160 @@ +== Physical Plan == +TakeOrderedAndProject (28) ++- * HashAggregate (27) + +- Exchange (26) + +- * HashAggregate (25) + +- * Project (24) + +- * BroadcastHashJoin Inner BuildRight (23) + :- * Project (18) + : +- * BroadcastHashJoin Inner BuildRight (17) + : :- * Project (11) + : : +- * BroadcastHashJoin Inner BuildRight (10) + : : :- * Project (4) + : : : +- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.item (1) + : : +- BroadcastExchange (9) + : : +- * Project (8) + : : +- * Filter (7) + : : +- * ColumnarToRow (6) + : : +- Scan parquet default.inventory (5) + : +- BroadcastExchange (16) + : +- * Project (15) + : +- * Filter (14) + : +- * ColumnarToRow (13) + : +- Scan parquet default.date_dim (12) + +- BroadcastExchange (22) + +- * Filter (21) + +- * ColumnarToRow (20) + +- Scan parquet default.catalog_sales (19) + + +(1) Scan parquet default.item +Output [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,68.00), In(i_manufact_id, [677,940,694,808]), IsNotNull(i_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] + +(3) Filter [codegen id : 4] +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] +Condition : ((((isnotnull(i_current_price#4) AND (i_current_price#4 >= 68.00)) AND (cast(i_current_price#4 as decimal(12,2)) <= 98.00)) AND i_manufact_id#5 IN (677,940,694,808)) AND isnotnull(i_item_sk#1)) + +(4) Project [codegen id : 4] +Output [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] + +(5) Scan parquet default.inventory +Output [3]: [inv_date_sk#6, inv_item_sk#7, inv_quantity_on_hand#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/inventory] +PushedFilters: [IsNotNull(inv_quantity_on_hand), GreaterThanOrEqual(inv_quantity_on_hand,100), LessThanOrEqual(inv_quantity_on_hand,500), IsNotNull(inv_item_sk), IsNotNull(inv_date_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [3]: [inv_date_sk#6, inv_item_sk#7, inv_quantity_on_hand#8] + +(7) Filter [codegen id : 1] +Input [3]: [inv_date_sk#6, inv_item_sk#7, inv_quantity_on_hand#8] +Condition : ((((isnotnull(inv_quantity_on_hand#8) AND (inv_quantity_on_hand#8 >= 100)) AND (inv_quantity_on_hand#8 <= 500)) AND isnotnull(inv_item_sk#7)) AND isnotnull(inv_date_sk#6)) + +(8) Project [codegen id : 1] +Output [2]: [inv_date_sk#6, inv_item_sk#7] +Input [3]: [inv_date_sk#6, inv_item_sk#7, inv_quantity_on_hand#8] + +(9) BroadcastExchange +Input [2]: [inv_date_sk#6, inv_item_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [id=#9] + +(10) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [inv_item_sk#7] +Join condition: None + +(11) Project [codegen id : 4] +Output [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#6] +Input [6]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#6, inv_item_sk#7] + +(12) Scan parquet default.date_dim +Output [2]: [d_date_sk#10, d_date#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-02-01), LessThanOrEqual(d_date,2000-04-01), IsNotNull(d_date_sk)] +ReadSchema: struct + +(13) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#10, d_date#11] + +(14) Filter [codegen id : 2] +Input [2]: [d_date_sk#10, d_date#11] +Condition : (((isnotnull(d_date#11) AND (d_date#11 >= 10988)) AND (d_date#11 <= 11048)) AND isnotnull(d_date_sk#10)) + +(15) Project [codegen id : 2] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_date#11] + +(16) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(17) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_date_sk#6] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(18) Project [codegen id : 4] +Output [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] +Input [6]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#6, d_date_sk#10] + +(19) Scan parquet default.catalog_sales +Output [1]: [cs_item_sk#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 3] +Input [1]: [cs_item_sk#13] + +(21) Filter [codegen id : 3] +Input [1]: [cs_item_sk#13] +Condition : isnotnull(cs_item_sk#13) + +(22) BroadcastExchange +Input [1]: [cs_item_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] + +(23) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [cs_item_sk#13] +Join condition: None + +(24) Project [codegen id : 4] +Output [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, cs_item_sk#13] + +(25) HashAggregate [codegen id : 4] +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Keys [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] + +(26) Exchange +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Arguments: hashpartitioning(i_item_id#2, i_item_desc#3, i_current_price#4, 5), true, [id=#15] + +(27) HashAggregate [codegen id : 5] +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Keys [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] + +(28) TakeOrderedAndProject +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Arguments: 100, [i_item_id#2 ASC NULLS FIRST], [i_item_id#2, i_item_desc#3, i_current_price#4] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37/simplified.txt new file mode 100644 index 0000000000000..f895fe34d0b11 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37/simplified.txt @@ -0,0 +1,41 @@ +TakeOrderedAndProject [i_current_price,i_item_desc,i_item_id] + WholeStageCodegen (5) + HashAggregate [i_current_price,i_item_desc,i_item_id] + InputAdapter + Exchange [i_current_price,i_item_desc,i_item_id] #1 + WholeStageCodegen (4) + HashAggregate [i_current_price,i_item_desc,i_item_id] + Project [i_current_price,i_item_desc,i_item_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [i_current_price,i_item_desc,i_item_id,i_item_sk] + BroadcastHashJoin [d_date_sk,inv_date_sk] + Project [i_current_price,i_item_desc,i_item_id,i_item_sk,inv_date_sk] + BroadcastHashJoin [i_item_sk,inv_item_sk] + Project [i_current_price,i_item_desc,i_item_id,i_item_sk] + Filter [i_current_price,i_item_sk,i_manufact_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_current_price,i_item_desc,i_item_id,i_item_sk,i_manufact_id] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [inv_date_sk,inv_item_sk] + Filter [inv_date_sk,inv_item_sk,inv_quantity_on_hand] + ColumnarToRow + InputAdapter + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_item_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/explain.txt new file mode 100644 index 0000000000000..552f0ee332789 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/explain.txt @@ -0,0 +1,393 @@ +== Physical Plan == +CollectLimit (68) ++- * HashAggregate (67) + +- Exchange (66) + +- * HashAggregate (65) + +- * HashAggregate (64) + +- * HashAggregate (63) + +- * HashAggregate (62) + +- * HashAggregate (61) + +- * HashAggregate (60) + +- Exchange (59) + +- * HashAggregate (58) + +- SortMergeJoin LeftSemi (57) + :- SortMergeJoin LeftSemi (39) + : :- * Sort (21) + : : +- Exchange (20) + : : +- * Project (19) + : : +- * SortMergeJoin Inner (18) + : : :- * Sort (12) + : : : +- Exchange (11) + : : : +- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- * Sort (17) + : : +- Exchange (16) + : : +- * Filter (15) + : : +- * ColumnarToRow (14) + : : +- Scan parquet default.customer (13) + : +- * Sort (38) + : +- Exchange (37) + : +- * HashAggregate (36) + : +- Exchange (35) + : +- * HashAggregate (34) + : +- * Project (33) + : +- * SortMergeJoin Inner (32) + : :- * Sort (29) + : : +- Exchange (28) + : : +- * Project (27) + : : +- * BroadcastHashJoin Inner BuildRight (26) + : : :- * Filter (24) + : : : +- * ColumnarToRow (23) + : : : +- Scan parquet default.catalog_sales (22) + : : +- ReusedExchange (25) + : +- * Sort (31) + : +- ReusedExchange (30) + +- * Sort (56) + +- Exchange (55) + +- * HashAggregate (54) + +- Exchange (53) + +- * HashAggregate (52) + +- * Project (51) + +- * SortMergeJoin Inner (50) + :- * Sort (47) + : +- Exchange (46) + : +- * Project (45) + : +- * BroadcastHashJoin Inner BuildRight (44) + : :- * Filter (42) + : : +- * ColumnarToRow (41) + : : +- Scan parquet default.web_sales (40) + : +- ReusedExchange (43) + +- * Sort (49) + +- ReusedExchange (48) + + +(1) Scan parquet default.store_sales +Output [2]: [ss_sold_date_sk#1, ss_customer_sk#2] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 2] +Input [2]: [ss_sold_date_sk#1, ss_customer_sk#2] + +(3) Filter [codegen id : 2] +Input [2]: [ss_sold_date_sk#1, ss_customer_sk#2] +Condition : (isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_customer_sk#2)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#3, d_date#4, d_month_seq#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#3, d_date#4, d_month_seq#5] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#3, d_date#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1200)) AND (d_month_seq#5 <= 1211)) AND isnotnull(d_date_sk#3)) + +(7) Project [codegen id : 1] +Output [2]: [d_date_sk#3, d_date#4] +Input [3]: [d_date_sk#3, d_date#4, d_month_seq#5] + +(8) BroadcastExchange +Input [2]: [d_date_sk#3, d_date#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#6] + +(9) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#3] +Join condition: None + +(10) Project [codegen id : 2] +Output [2]: [ss_customer_sk#2, d_date#4] +Input [4]: [ss_sold_date_sk#1, ss_customer_sk#2, d_date_sk#3, d_date#4] + +(11) Exchange +Input [2]: [ss_customer_sk#2, d_date#4] +Arguments: hashpartitioning(ss_customer_sk#2, 5), true, [id=#7] + +(12) Sort [codegen id : 3] +Input [2]: [ss_customer_sk#2, d_date#4] +Arguments: [ss_customer_sk#2 ASC NULLS FIRST], false, 0 + +(13) Scan parquet default.customer +Output [3]: [c_customer_sk#8, c_first_name#9, c_last_name#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(14) ColumnarToRow [codegen id : 4] +Input [3]: [c_customer_sk#8, c_first_name#9, c_last_name#10] + +(15) Filter [codegen id : 4] +Input [3]: [c_customer_sk#8, c_first_name#9, c_last_name#10] +Condition : isnotnull(c_customer_sk#8) + +(16) Exchange +Input [3]: [c_customer_sk#8, c_first_name#9, c_last_name#10] +Arguments: hashpartitioning(c_customer_sk#8, 5), true, [id=#11] + +(17) Sort [codegen id : 5] +Input [3]: [c_customer_sk#8, c_first_name#9, c_last_name#10] +Arguments: [c_customer_sk#8 ASC NULLS FIRST], false, 0 + +(18) SortMergeJoin [codegen id : 6] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#8] +Join condition: None + +(19) Project [codegen id : 6] +Output [3]: [d_date#4, c_first_name#9, c_last_name#10] +Input [5]: [ss_customer_sk#2, d_date#4, c_customer_sk#8, c_first_name#9, c_last_name#10] + +(20) Exchange +Input [3]: [d_date#4, c_first_name#9, c_last_name#10] +Arguments: hashpartitioning(coalesce(c_last_name#10, ), isnull(c_last_name#10), coalesce(c_first_name#9, ), isnull(c_first_name#9), coalesce(d_date#4, 0), isnull(d_date#4), 5), true, [id=#12] + +(21) Sort [codegen id : 7] +Input [3]: [d_date#4, c_first_name#9, c_last_name#10] +Arguments: [coalesce(c_last_name#10, ) ASC NULLS FIRST, isnull(c_last_name#10) ASC NULLS FIRST, coalesce(c_first_name#9, ) ASC NULLS FIRST, isnull(c_first_name#9) ASC NULLS FIRST, coalesce(d_date#4, 0) ASC NULLS FIRST, isnull(d_date#4) ASC NULLS FIRST], false, 0 + +(22) Scan parquet default.catalog_sales +Output [2]: [cs_sold_date_sk#13, cs_bill_customer_sk#14] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_customer_sk)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 9] +Input [2]: [cs_sold_date_sk#13, cs_bill_customer_sk#14] + +(24) Filter [codegen id : 9] +Input [2]: [cs_sold_date_sk#13, cs_bill_customer_sk#14] +Condition : (isnotnull(cs_sold_date_sk#13) AND isnotnull(cs_bill_customer_sk#14)) + +(25) ReusedExchange [Reuses operator id: 8] +Output [2]: [d_date_sk#15, d_date#16] + +(26) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [cs_sold_date_sk#13] +Right keys [1]: [d_date_sk#15] +Join condition: None + +(27) Project [codegen id : 9] +Output [2]: [cs_bill_customer_sk#14, d_date#16] +Input [4]: [cs_sold_date_sk#13, cs_bill_customer_sk#14, d_date_sk#15, d_date#16] + +(28) Exchange +Input [2]: [cs_bill_customer_sk#14, d_date#16] +Arguments: hashpartitioning(cs_bill_customer_sk#14, 5), true, [id=#17] + +(29) Sort [codegen id : 10] +Input [2]: [cs_bill_customer_sk#14, d_date#16] +Arguments: [cs_bill_customer_sk#14 ASC NULLS FIRST], false, 0 + +(30) ReusedExchange [Reuses operator id: 16] +Output [3]: [c_customer_sk#18, c_first_name#19, c_last_name#20] + +(31) Sort [codegen id : 12] +Input [3]: [c_customer_sk#18, c_first_name#19, c_last_name#20] +Arguments: [c_customer_sk#18 ASC NULLS FIRST], false, 0 + +(32) SortMergeJoin [codegen id : 13] +Left keys [1]: [cs_bill_customer_sk#14] +Right keys [1]: [c_customer_sk#18] +Join condition: None + +(33) Project [codegen id : 13] +Output [3]: [c_last_name#20, c_first_name#19, d_date#16] +Input [5]: [cs_bill_customer_sk#14, d_date#16, c_customer_sk#18, c_first_name#19, c_last_name#20] + +(34) HashAggregate [codegen id : 13] +Input [3]: [c_last_name#20, c_first_name#19, d_date#16] +Keys [3]: [c_last_name#20, c_first_name#19, d_date#16] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#20, c_first_name#19, d_date#16] + +(35) Exchange +Input [3]: [c_last_name#20, c_first_name#19, d_date#16] +Arguments: hashpartitioning(c_last_name#20, c_first_name#19, d_date#16, 5), true, [id=#21] + +(36) HashAggregate [codegen id : 14] +Input [3]: [c_last_name#20, c_first_name#19, d_date#16] +Keys [3]: [c_last_name#20, c_first_name#19, d_date#16] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#20, c_first_name#19, d_date#16] + +(37) Exchange +Input [3]: [c_last_name#20, c_first_name#19, d_date#16] +Arguments: hashpartitioning(coalesce(c_last_name#20, ), isnull(c_last_name#20), coalesce(c_first_name#19, ), isnull(c_first_name#19), coalesce(d_date#16, 0), isnull(d_date#16), 5), true, [id=#22] + +(38) Sort [codegen id : 15] +Input [3]: [c_last_name#20, c_first_name#19, d_date#16] +Arguments: [coalesce(c_last_name#20, ) ASC NULLS FIRST, isnull(c_last_name#20) ASC NULLS FIRST, coalesce(c_first_name#19, ) ASC NULLS FIRST, isnull(c_first_name#19) ASC NULLS FIRST, coalesce(d_date#16, 0) ASC NULLS FIRST, isnull(d_date#16) ASC NULLS FIRST], false, 0 + +(39) SortMergeJoin +Left keys [6]: [coalesce(c_last_name#10, ), isnull(c_last_name#10), coalesce(c_first_name#9, ), isnull(c_first_name#9), coalesce(d_date#4, 0), isnull(d_date#4)] +Right keys [6]: [coalesce(c_last_name#20, ), isnull(c_last_name#20), coalesce(c_first_name#19, ), isnull(c_first_name#19), coalesce(d_date#16, 0), isnull(d_date#16)] +Join condition: None + +(40) Scan parquet default.web_sales +Output [2]: [ws_sold_date_sk#23, ws_bill_customer_sk#24] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_customer_sk)] +ReadSchema: struct + +(41) ColumnarToRow [codegen id : 17] +Input [2]: [ws_sold_date_sk#23, ws_bill_customer_sk#24] + +(42) Filter [codegen id : 17] +Input [2]: [ws_sold_date_sk#23, ws_bill_customer_sk#24] +Condition : (isnotnull(ws_sold_date_sk#23) AND isnotnull(ws_bill_customer_sk#24)) + +(43) ReusedExchange [Reuses operator id: 8] +Output [2]: [d_date_sk#25, d_date#26] + +(44) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_sold_date_sk#23] +Right keys [1]: [d_date_sk#25] +Join condition: None + +(45) Project [codegen id : 17] +Output [2]: [ws_bill_customer_sk#24, d_date#26] +Input [4]: [ws_sold_date_sk#23, ws_bill_customer_sk#24, d_date_sk#25, d_date#26] + +(46) Exchange +Input [2]: [ws_bill_customer_sk#24, d_date#26] +Arguments: hashpartitioning(ws_bill_customer_sk#24, 5), true, [id=#27] + +(47) Sort [codegen id : 18] +Input [2]: [ws_bill_customer_sk#24, d_date#26] +Arguments: [ws_bill_customer_sk#24 ASC NULLS FIRST], false, 0 + +(48) ReusedExchange [Reuses operator id: 16] +Output [3]: [c_customer_sk#28, c_first_name#29, c_last_name#30] + +(49) Sort [codegen id : 20] +Input [3]: [c_customer_sk#28, c_first_name#29, c_last_name#30] +Arguments: [c_customer_sk#28 ASC NULLS FIRST], false, 0 + +(50) SortMergeJoin [codegen id : 21] +Left keys [1]: [ws_bill_customer_sk#24] +Right keys [1]: [c_customer_sk#28] +Join condition: None + +(51) Project [codegen id : 21] +Output [3]: [c_last_name#30, c_first_name#29, d_date#26] +Input [5]: [ws_bill_customer_sk#24, d_date#26, c_customer_sk#28, c_first_name#29, c_last_name#30] + +(52) HashAggregate [codegen id : 21] +Input [3]: [c_last_name#30, c_first_name#29, d_date#26] +Keys [3]: [c_last_name#30, c_first_name#29, d_date#26] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#30, c_first_name#29, d_date#26] + +(53) Exchange +Input [3]: [c_last_name#30, c_first_name#29, d_date#26] +Arguments: hashpartitioning(c_last_name#30, c_first_name#29, d_date#26, 5), true, [id=#31] + +(54) HashAggregate [codegen id : 22] +Input [3]: [c_last_name#30, c_first_name#29, d_date#26] +Keys [3]: [c_last_name#30, c_first_name#29, d_date#26] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#30, c_first_name#29, d_date#26] + +(55) Exchange +Input [3]: [c_last_name#30, c_first_name#29, d_date#26] +Arguments: hashpartitioning(coalesce(c_last_name#30, ), isnull(c_last_name#30), coalesce(c_first_name#29, ), isnull(c_first_name#29), coalesce(d_date#26, 0), isnull(d_date#26), 5), true, [id=#32] + +(56) Sort [codegen id : 23] +Input [3]: [c_last_name#30, c_first_name#29, d_date#26] +Arguments: [coalesce(c_last_name#30, ) ASC NULLS FIRST, isnull(c_last_name#30) ASC NULLS FIRST, coalesce(c_first_name#29, ) ASC NULLS FIRST, isnull(c_first_name#29) ASC NULLS FIRST, coalesce(d_date#26, 0) ASC NULLS FIRST, isnull(d_date#26) ASC NULLS FIRST], false, 0 + +(57) SortMergeJoin +Left keys [6]: [coalesce(c_last_name#10, ), isnull(c_last_name#10), coalesce(c_first_name#9, ), isnull(c_first_name#9), coalesce(d_date#4, 0), isnull(d_date#4)] +Right keys [6]: [coalesce(c_last_name#30, ), isnull(c_last_name#30), coalesce(c_first_name#29, ), isnull(c_first_name#29), coalesce(d_date#26, 0), isnull(d_date#26)] +Join condition: None + +(58) HashAggregate [codegen id : 24] +Input [3]: [d_date#4, c_first_name#9, c_last_name#10] +Keys [3]: [c_last_name#10, c_first_name#9, d_date#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#10, c_first_name#9, d_date#4] + +(59) Exchange +Input [3]: [c_last_name#10, c_first_name#9, d_date#4] +Arguments: hashpartitioning(c_last_name#10, c_first_name#9, d_date#4, 5), true, [id=#33] + +(60) HashAggregate [codegen id : 25] +Input [3]: [c_last_name#10, c_first_name#9, d_date#4] +Keys [3]: [c_last_name#10, c_first_name#9, d_date#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#10, c_first_name#9, d_date#4] + +(61) HashAggregate [codegen id : 25] +Input [3]: [c_last_name#10, c_first_name#9, d_date#4] +Keys [3]: [c_last_name#10, c_first_name#9, d_date#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#10, c_first_name#9, d_date#4] + +(62) HashAggregate [codegen id : 25] +Input [3]: [c_last_name#10, c_first_name#9, d_date#4] +Keys [3]: [c_last_name#10, c_first_name#9, d_date#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#10, c_first_name#9, d_date#4] + +(63) HashAggregate [codegen id : 25] +Input [3]: [c_last_name#10, c_first_name#9, d_date#4] +Keys [3]: [c_last_name#10, c_first_name#9, d_date#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#10, c_first_name#9, d_date#4] + +(64) HashAggregate [codegen id : 25] +Input [3]: [c_last_name#10, c_first_name#9, d_date#4] +Keys [3]: [c_last_name#10, c_first_name#9, d_date#4] +Functions: [] +Aggregate Attributes: [] +Results: [] + +(65) HashAggregate [codegen id : 25] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#34] +Results [1]: [count#35] + +(66) Exchange +Input [1]: [count#35] +Arguments: SinglePartition, true, [id=#36] + +(67) HashAggregate [codegen id : 26] +Input [1]: [count#35] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#37] +Results [1]: [count(1)#37 AS count(1)#38] + +(68) CollectLimit +Input [1]: [count(1)#38] +Arguments: 100 + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/simplified.txt new file mode 100644 index 0000000000000..4a3f651d416fc --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/simplified.txt @@ -0,0 +1,118 @@ +CollectLimit + WholeStageCodegen (26) + HashAggregate [count] [count,count(1),count(1)] + InputAdapter + Exchange #1 + WholeStageCodegen (25) + HashAggregate [count,count] + HashAggregate [c_first_name,c_last_name,d_date] + HashAggregate [c_first_name,c_last_name,d_date] + HashAggregate [c_first_name,c_last_name,d_date] + HashAggregate [c_first_name,c_last_name,d_date] + HashAggregate [c_first_name,c_last_name,d_date] + InputAdapter + Exchange [c_first_name,c_last_name,d_date] #2 + WholeStageCodegen (24) + HashAggregate [c_first_name,c_last_name,d_date] + InputAdapter + SortMergeJoin [c_first_name,c_first_name,c_last_name,c_last_name,d_date,d_date] + SortMergeJoin [c_first_name,c_first_name,c_last_name,c_last_name,d_date,d_date] + WholeStageCodegen (7) + Sort [c_first_name,c_last_name,d_date] + InputAdapter + Exchange [c_first_name,c_last_name,d_date] #3 + WholeStageCodegen (6) + Project [c_first_name,c_last_name,d_date] + SortMergeJoin [c_customer_sk,ss_customer_sk] + InputAdapter + WholeStageCodegen (3) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #4 + WholeStageCodegen (2) + Project [d_date,ss_customer_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_customer_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (1) + Project [d_date,d_date_sk] + Filter [d_date_sk,d_month_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk,d_month_seq] + InputAdapter + WholeStageCodegen (5) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #6 + WholeStageCodegen (4) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name] + WholeStageCodegen (15) + Sort [c_first_name,c_last_name,d_date] + InputAdapter + Exchange [c_first_name,c_last_name,d_date] #7 + WholeStageCodegen (14) + HashAggregate [c_first_name,c_last_name,d_date] + InputAdapter + Exchange [c_first_name,c_last_name,d_date] #8 + WholeStageCodegen (13) + HashAggregate [c_first_name,c_last_name,d_date] + Project [c_first_name,c_last_name,d_date] + SortMergeJoin [c_customer_sk,cs_bill_customer_sk] + InputAdapter + WholeStageCodegen (10) + Sort [cs_bill_customer_sk] + InputAdapter + Exchange [cs_bill_customer_sk] #9 + WholeStageCodegen (9) + Project [cs_bill_customer_sk,d_date] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_bill_customer_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date,d_date_sk] #5 + InputAdapter + WholeStageCodegen (12) + Sort [c_customer_sk] + InputAdapter + ReusedExchange [c_customer_sk,c_first_name,c_last_name] #6 + WholeStageCodegen (23) + Sort [c_first_name,c_last_name,d_date] + InputAdapter + Exchange [c_first_name,c_last_name,d_date] #10 + WholeStageCodegen (22) + HashAggregate [c_first_name,c_last_name,d_date] + InputAdapter + Exchange [c_first_name,c_last_name,d_date] #11 + WholeStageCodegen (21) + HashAggregate [c_first_name,c_last_name,d_date] + Project [c_first_name,c_last_name,d_date] + SortMergeJoin [c_customer_sk,ws_bill_customer_sk] + InputAdapter + WholeStageCodegen (18) + Sort [ws_bill_customer_sk] + InputAdapter + Exchange [ws_bill_customer_sk] #12 + WholeStageCodegen (17) + Project [d_date,ws_bill_customer_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_bill_customer_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date,d_date_sk] #5 + InputAdapter + WholeStageCodegen (20) + Sort [c_customer_sk] + InputAdapter + ReusedExchange [c_customer_sk,c_first_name,c_last_name] #6 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/explain.txt new file mode 100644 index 0000000000000..397b36be79de8 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/explain.txt @@ -0,0 +1,328 @@ +== Physical Plan == +CollectLimit (55) ++- * HashAggregate (54) + +- Exchange (53) + +- * HashAggregate (52) + +- * HashAggregate (51) + +- * HashAggregate (50) + +- * HashAggregate (49) + +- * HashAggregate (48) + +- * HashAggregate (47) + +- Exchange (46) + +- * HashAggregate (45) + +- * BroadcastHashJoin LeftSemi BuildRight (44) + :- * BroadcastHashJoin LeftSemi BuildRight (30) + : :- * Project (16) + : : +- * BroadcastHashJoin Inner BuildRight (15) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.customer (11) + : +- BroadcastExchange (29) + : +- * HashAggregate (28) + : +- Exchange (27) + : +- * HashAggregate (26) + : +- * Project (25) + : +- * BroadcastHashJoin Inner BuildRight (24) + : :- * Project (22) + : : +- * BroadcastHashJoin Inner BuildRight (21) + : : :- * Filter (19) + : : : +- * ColumnarToRow (18) + : : : +- Scan parquet default.catalog_sales (17) + : : +- ReusedExchange (20) + : +- ReusedExchange (23) + +- BroadcastExchange (43) + +- * HashAggregate (42) + +- Exchange (41) + +- * HashAggregate (40) + +- * Project (39) + +- * BroadcastHashJoin Inner BuildRight (38) + :- * Project (36) + : +- * BroadcastHashJoin Inner BuildRight (35) + : :- * Filter (33) + : : +- * ColumnarToRow (32) + : : +- Scan parquet default.web_sales (31) + : +- ReusedExchange (34) + +- ReusedExchange (37) + + +(1) Scan parquet default.store_sales +Output [2]: [ss_sold_date_sk#1, ss_customer_sk#2] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 11] +Input [2]: [ss_sold_date_sk#1, ss_customer_sk#2] + +(3) Filter [codegen id : 11] +Input [2]: [ss_sold_date_sk#1, ss_customer_sk#2] +Condition : (isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_customer_sk#2)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#3, d_date#4, d_month_seq#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#3, d_date#4, d_month_seq#5] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#3, d_date#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1200)) AND (d_month_seq#5 <= 1211)) AND isnotnull(d_date_sk#3)) + +(7) Project [codegen id : 1] +Output [2]: [d_date_sk#3, d_date#4] +Input [3]: [d_date_sk#3, d_date#4, d_month_seq#5] + +(8) BroadcastExchange +Input [2]: [d_date_sk#3, d_date#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#6] + +(9) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#3] +Join condition: None + +(10) Project [codegen id : 11] +Output [2]: [ss_customer_sk#2, d_date#4] +Input [4]: [ss_sold_date_sk#1, ss_customer_sk#2, d_date_sk#3, d_date#4] + +(11) Scan parquet default.customer +Output [3]: [c_customer_sk#7, c_first_name#8, c_last_name#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [c_customer_sk#7, c_first_name#8, c_last_name#9] + +(13) Filter [codegen id : 2] +Input [3]: [c_customer_sk#7, c_first_name#8, c_last_name#9] +Condition : isnotnull(c_customer_sk#7) + +(14) BroadcastExchange +Input [3]: [c_customer_sk#7, c_first_name#8, c_last_name#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#10] + +(15) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#7] +Join condition: None + +(16) Project [codegen id : 11] +Output [3]: [d_date#4, c_first_name#8, c_last_name#9] +Input [5]: [ss_customer_sk#2, d_date#4, c_customer_sk#7, c_first_name#8, c_last_name#9] + +(17) Scan parquet default.catalog_sales +Output [2]: [cs_sold_date_sk#11, cs_bill_customer_sk#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_customer_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 5] +Input [2]: [cs_sold_date_sk#11, cs_bill_customer_sk#12] + +(19) Filter [codegen id : 5] +Input [2]: [cs_sold_date_sk#11, cs_bill_customer_sk#12] +Condition : (isnotnull(cs_sold_date_sk#11) AND isnotnull(cs_bill_customer_sk#12)) + +(20) ReusedExchange [Reuses operator id: 8] +Output [2]: [d_date_sk#13, d_date#14] + +(21) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_sold_date_sk#11] +Right keys [1]: [d_date_sk#13] +Join condition: None + +(22) Project [codegen id : 5] +Output [2]: [cs_bill_customer_sk#12, d_date#14] +Input [4]: [cs_sold_date_sk#11, cs_bill_customer_sk#12, d_date_sk#13, d_date#14] + +(23) ReusedExchange [Reuses operator id: 14] +Output [3]: [c_customer_sk#15, c_first_name#16, c_last_name#17] + +(24) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_bill_customer_sk#12] +Right keys [1]: [c_customer_sk#15] +Join condition: None + +(25) Project [codegen id : 5] +Output [3]: [c_last_name#17, c_first_name#16, d_date#14] +Input [5]: [cs_bill_customer_sk#12, d_date#14, c_customer_sk#15, c_first_name#16, c_last_name#17] + +(26) HashAggregate [codegen id : 5] +Input [3]: [c_last_name#17, c_first_name#16, d_date#14] +Keys [3]: [c_last_name#17, c_first_name#16, d_date#14] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#17, c_first_name#16, d_date#14] + +(27) Exchange +Input [3]: [c_last_name#17, c_first_name#16, d_date#14] +Arguments: hashpartitioning(c_last_name#17, c_first_name#16, d_date#14, 5), true, [id=#18] + +(28) HashAggregate [codegen id : 6] +Input [3]: [c_last_name#17, c_first_name#16, d_date#14] +Keys [3]: [c_last_name#17, c_first_name#16, d_date#14] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#17, c_first_name#16, d_date#14] + +(29) BroadcastExchange +Input [3]: [c_last_name#17, c_first_name#16, d_date#14] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), isnull(input[0, string, true]), coalesce(input[1, string, true], ), isnull(input[1, string, true]), coalesce(input[2, date, true], 0), isnull(input[2, date, true])),false), [id=#19] + +(30) BroadcastHashJoin [codegen id : 11] +Left keys [6]: [coalesce(c_last_name#9, ), isnull(c_last_name#9), coalesce(c_first_name#8, ), isnull(c_first_name#8), coalesce(d_date#4, 0), isnull(d_date#4)] +Right keys [6]: [coalesce(c_last_name#17, ), isnull(c_last_name#17), coalesce(c_first_name#16, ), isnull(c_first_name#16), coalesce(d_date#14, 0), isnull(d_date#14)] +Join condition: None + +(31) Scan parquet default.web_sales +Output [2]: [ws_sold_date_sk#20, ws_bill_customer_sk#21] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_customer_sk)] +ReadSchema: struct + +(32) ColumnarToRow [codegen id : 9] +Input [2]: [ws_sold_date_sk#20, ws_bill_customer_sk#21] + +(33) Filter [codegen id : 9] +Input [2]: [ws_sold_date_sk#20, ws_bill_customer_sk#21] +Condition : (isnotnull(ws_sold_date_sk#20) AND isnotnull(ws_bill_customer_sk#21)) + +(34) ReusedExchange [Reuses operator id: 8] +Output [2]: [d_date_sk#22, d_date#23] + +(35) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ws_sold_date_sk#20] +Right keys [1]: [d_date_sk#22] +Join condition: None + +(36) Project [codegen id : 9] +Output [2]: [ws_bill_customer_sk#21, d_date#23] +Input [4]: [ws_sold_date_sk#20, ws_bill_customer_sk#21, d_date_sk#22, d_date#23] + +(37) ReusedExchange [Reuses operator id: 14] +Output [3]: [c_customer_sk#24, c_first_name#25, c_last_name#26] + +(38) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ws_bill_customer_sk#21] +Right keys [1]: [c_customer_sk#24] +Join condition: None + +(39) Project [codegen id : 9] +Output [3]: [c_last_name#26, c_first_name#25, d_date#23] +Input [5]: [ws_bill_customer_sk#21, d_date#23, c_customer_sk#24, c_first_name#25, c_last_name#26] + +(40) HashAggregate [codegen id : 9] +Input [3]: [c_last_name#26, c_first_name#25, d_date#23] +Keys [3]: [c_last_name#26, c_first_name#25, d_date#23] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#26, c_first_name#25, d_date#23] + +(41) Exchange +Input [3]: [c_last_name#26, c_first_name#25, d_date#23] +Arguments: hashpartitioning(c_last_name#26, c_first_name#25, d_date#23, 5), true, [id=#27] + +(42) HashAggregate [codegen id : 10] +Input [3]: [c_last_name#26, c_first_name#25, d_date#23] +Keys [3]: [c_last_name#26, c_first_name#25, d_date#23] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#26, c_first_name#25, d_date#23] + +(43) BroadcastExchange +Input [3]: [c_last_name#26, c_first_name#25, d_date#23] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), isnull(input[0, string, true]), coalesce(input[1, string, true], ), isnull(input[1, string, true]), coalesce(input[2, date, true], 0), isnull(input[2, date, true])),false), [id=#28] + +(44) BroadcastHashJoin [codegen id : 11] +Left keys [6]: [coalesce(c_last_name#9, ), isnull(c_last_name#9), coalesce(c_first_name#8, ), isnull(c_first_name#8), coalesce(d_date#4, 0), isnull(d_date#4)] +Right keys [6]: [coalesce(c_last_name#26, ), isnull(c_last_name#26), coalesce(c_first_name#25, ), isnull(c_first_name#25), coalesce(d_date#23, 0), isnull(d_date#23)] +Join condition: None + +(45) HashAggregate [codegen id : 11] +Input [3]: [d_date#4, c_first_name#8, c_last_name#9] +Keys [3]: [c_last_name#9, c_first_name#8, d_date#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#9, c_first_name#8, d_date#4] + +(46) Exchange +Input [3]: [c_last_name#9, c_first_name#8, d_date#4] +Arguments: hashpartitioning(c_last_name#9, c_first_name#8, d_date#4, 5), true, [id=#29] + +(47) HashAggregate [codegen id : 12] +Input [3]: [c_last_name#9, c_first_name#8, d_date#4] +Keys [3]: [c_last_name#9, c_first_name#8, d_date#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#9, c_first_name#8, d_date#4] + +(48) HashAggregate [codegen id : 12] +Input [3]: [c_last_name#9, c_first_name#8, d_date#4] +Keys [3]: [c_last_name#9, c_first_name#8, d_date#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#9, c_first_name#8, d_date#4] + +(49) HashAggregate [codegen id : 12] +Input [3]: [c_last_name#9, c_first_name#8, d_date#4] +Keys [3]: [c_last_name#9, c_first_name#8, d_date#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#9, c_first_name#8, d_date#4] + +(50) HashAggregate [codegen id : 12] +Input [3]: [c_last_name#9, c_first_name#8, d_date#4] +Keys [3]: [c_last_name#9, c_first_name#8, d_date#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#9, c_first_name#8, d_date#4] + +(51) HashAggregate [codegen id : 12] +Input [3]: [c_last_name#9, c_first_name#8, d_date#4] +Keys [3]: [c_last_name#9, c_first_name#8, d_date#4] +Functions: [] +Aggregate Attributes: [] +Results: [] + +(52) HashAggregate [codegen id : 12] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#30] +Results [1]: [count#31] + +(53) Exchange +Input [1]: [count#31] +Arguments: SinglePartition, true, [id=#32] + +(54) HashAggregate [codegen id : 13] +Input [1]: [count#31] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#33] +Results [1]: [count(1)#33 AS count(1)#34] + +(55) CollectLimit +Input [1]: [count(1)#34] +Arguments: 100 + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/simplified.txt new file mode 100644 index 0000000000000..ea2c2fce9f4e1 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/simplified.txt @@ -0,0 +1,81 @@ +CollectLimit + WholeStageCodegen (13) + HashAggregate [count] [count,count(1),count(1)] + InputAdapter + Exchange #1 + WholeStageCodegen (12) + HashAggregate [count,count] + HashAggregate [c_first_name,c_last_name,d_date] + HashAggregate [c_first_name,c_last_name,d_date] + HashAggregate [c_first_name,c_last_name,d_date] + HashAggregate [c_first_name,c_last_name,d_date] + HashAggregate [c_first_name,c_last_name,d_date] + InputAdapter + Exchange [c_first_name,c_last_name,d_date] #2 + WholeStageCodegen (11) + HashAggregate [c_first_name,c_last_name,d_date] + BroadcastHashJoin [c_first_name,c_first_name,c_last_name,c_last_name,d_date,d_date] + BroadcastHashJoin [c_first_name,c_first_name,c_last_name,c_last_name,d_date,d_date] + Project [c_first_name,c_last_name,d_date] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Project [d_date,ss_customer_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_customer_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date,d_date_sk] + Filter [d_date_sk,d_month_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (6) + HashAggregate [c_first_name,c_last_name,d_date] + InputAdapter + Exchange [c_first_name,c_last_name,d_date] #6 + WholeStageCodegen (5) + HashAggregate [c_first_name,c_last_name,d_date] + Project [c_first_name,c_last_name,d_date] + BroadcastHashJoin [c_customer_sk,cs_bill_customer_sk] + Project [cs_bill_customer_sk,d_date] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_bill_customer_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date,d_date_sk] #3 + InputAdapter + ReusedExchange [c_customer_sk,c_first_name,c_last_name] #4 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (10) + HashAggregate [c_first_name,c_last_name,d_date] + InputAdapter + Exchange [c_first_name,c_last_name,d_date] #8 + WholeStageCodegen (9) + HashAggregate [c_first_name,c_last_name,d_date] + Project [c_first_name,c_last_name,d_date] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + Project [d_date,ws_bill_customer_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_bill_customer_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date,d_date_sk] #3 + InputAdapter + ReusedExchange [c_customer_sk,c_first_name,c_last_name] #4 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a.sf100/explain.txt new file mode 100644 index 0000000000000..1b3539cf1fd05 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a.sf100/explain.txt @@ -0,0 +1,307 @@ +== Physical Plan == +* Sort (55) ++- Exchange (54) + +- * SortMergeJoin Inner (53) + :- * Sort (29) + : +- Exchange (28) + : +- * Project (27) + : +- * Filter (26) + : +- * HashAggregate (25) + : +- Exchange (24) + : +- * HashAggregate (23) + : +- * Project (22) + : +- * BroadcastHashJoin Inner BuildRight (21) + : :- * Project (16) + : : +- * BroadcastHashJoin Inner BuildRight (15) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.inventory (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.item (11) + : +- BroadcastExchange (20) + : +- * Filter (19) + : +- * ColumnarToRow (18) + : +- Scan parquet default.warehouse (17) + +- * Sort (52) + +- Exchange (51) + +- * Project (50) + +- * Filter (49) + +- * HashAggregate (48) + +- Exchange (47) + +- * HashAggregate (46) + +- * Project (45) + +- * BroadcastHashJoin Inner BuildRight (44) + :- * Project (42) + : +- * BroadcastHashJoin Inner BuildRight (41) + : :- * Project (39) + : : +- * BroadcastHashJoin Inner BuildRight (38) + : : :- * Filter (32) + : : : +- * ColumnarToRow (31) + : : : +- Scan parquet default.inventory (30) + : : +- BroadcastExchange (37) + : : +- * Project (36) + : : +- * Filter (35) + : : +- * ColumnarToRow (34) + : : +- Scan parquet default.date_dim (33) + : +- ReusedExchange (40) + +- ReusedExchange (43) + + +(1) Scan parquet default.inventory +Output [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/inventory] +PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] + +(3) Filter [codegen id : 4] +Input [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] +Condition : ((isnotnull(inv_item_sk#2) AND isnotnull(inv_warehouse_sk#3)) AND isnotnull(inv_date_sk#1)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#5, d_year#6, d_moy#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,1), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] +Condition : ((((isnotnull(d_year#6) AND isnotnull(d_moy#7)) AND (d_year#6 = 2001)) AND (d_moy#7 = 1)) AND isnotnull(d_date_sk#5)) + +(7) Project [codegen id : 1] +Output [2]: [d_date_sk#5, d_moy#7] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] + +(8) BroadcastExchange +Input [2]: [d_date_sk#5, d_moy#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#8] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_date_sk#1] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(10) Project [codegen id : 4] +Output [4]: [inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4, d_moy#7] +Input [6]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4, d_date_sk#5, d_moy#7] + +(11) Scan parquet default.item +Output [1]: [i_item_sk#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [1]: [i_item_sk#9] + +(13) Filter [codegen id : 2] +Input [1]: [i_item_sk#9] +Condition : isnotnull(i_item_sk#9) + +(14) BroadcastExchange +Input [1]: [i_item_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#10] + +(15) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_item_sk#2] +Right keys [1]: [i_item_sk#9] +Join condition: None + +(16) Project [codegen id : 4] +Output [4]: [inv_warehouse_sk#3, inv_quantity_on_hand#4, d_moy#7, i_item_sk#9] +Input [5]: [inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4, d_moy#7, i_item_sk#9] + +(17) Scan parquet default.warehouse +Output [2]: [w_warehouse_sk#11, w_warehouse_name#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [2]: [w_warehouse_sk#11, w_warehouse_name#12] + +(19) Filter [codegen id : 3] +Input [2]: [w_warehouse_sk#11, w_warehouse_name#12] +Condition : isnotnull(w_warehouse_sk#11) + +(20) BroadcastExchange +Input [2]: [w_warehouse_sk#11, w_warehouse_name#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#13] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_warehouse_sk#3] +Right keys [1]: [w_warehouse_sk#11] +Join condition: None + +(22) Project [codegen id : 4] +Output [5]: [inv_quantity_on_hand#4, i_item_sk#9, w_warehouse_sk#11, w_warehouse_name#12, d_moy#7] +Input [6]: [inv_warehouse_sk#3, inv_quantity_on_hand#4, d_moy#7, i_item_sk#9, w_warehouse_sk#11, w_warehouse_name#12] + +(23) HashAggregate [codegen id : 4] +Input [5]: [inv_quantity_on_hand#4, i_item_sk#9, w_warehouse_sk#11, w_warehouse_name#12, d_moy#7] +Keys [4]: [w_warehouse_name#12, w_warehouse_sk#11, i_item_sk#9, d_moy#7] +Functions [2]: [partial_stddev_samp(cast(inv_quantity_on_hand#4 as double)), partial_avg(cast(inv_quantity_on_hand#4 as bigint))] +Aggregate Attributes [5]: [n#14, avg#15, m2#16, sum#17, count#18] +Results [9]: [w_warehouse_name#12, w_warehouse_sk#11, i_item_sk#9, d_moy#7, n#19, avg#20, m2#21, sum#22, count#23] + +(24) Exchange +Input [9]: [w_warehouse_name#12, w_warehouse_sk#11, i_item_sk#9, d_moy#7, n#19, avg#20, m2#21, sum#22, count#23] +Arguments: hashpartitioning(w_warehouse_name#12, w_warehouse_sk#11, i_item_sk#9, d_moy#7, 5), true, [id=#24] + +(25) HashAggregate [codegen id : 5] +Input [9]: [w_warehouse_name#12, w_warehouse_sk#11, i_item_sk#9, d_moy#7, n#19, avg#20, m2#21, sum#22, count#23] +Keys [4]: [w_warehouse_name#12, w_warehouse_sk#11, i_item_sk#9, d_moy#7] +Functions [2]: [stddev_samp(cast(inv_quantity_on_hand#4 as double)), avg(cast(inv_quantity_on_hand#4 as bigint))] +Aggregate Attributes [2]: [stddev_samp(cast(inv_quantity_on_hand#4 as double))#25, avg(cast(inv_quantity_on_hand#4 as bigint))#26] +Results [5]: [w_warehouse_sk#11, i_item_sk#9, d_moy#7, stddev_samp(cast(inv_quantity_on_hand#4 as double))#25 AS stdev#27, avg(cast(inv_quantity_on_hand#4 as bigint))#26 AS mean#28] + +(26) Filter [codegen id : 5] +Input [5]: [w_warehouse_sk#11, i_item_sk#9, d_moy#7, stdev#27, mean#28] +Condition : (CASE WHEN (mean#28 = 0.0) THEN 0.0 ELSE (stdev#27 / mean#28) END > 1.0) + +(27) Project [codegen id : 5] +Output [5]: [w_warehouse_sk#11, i_item_sk#9, d_moy#7, mean#28, CASE WHEN (mean#28 = 0.0) THEN null ELSE (stdev#27 / mean#28) END AS cov#29] +Input [5]: [w_warehouse_sk#11, i_item_sk#9, d_moy#7, stdev#27, mean#28] + +(28) Exchange +Input [5]: [w_warehouse_sk#11, i_item_sk#9, d_moy#7, mean#28, cov#29] +Arguments: hashpartitioning(i_item_sk#9, w_warehouse_sk#11, 5), true, [id=#30] + +(29) Sort [codegen id : 6] +Input [5]: [w_warehouse_sk#11, i_item_sk#9, d_moy#7, mean#28, cov#29] +Arguments: [i_item_sk#9 ASC NULLS FIRST, w_warehouse_sk#11 ASC NULLS FIRST], false, 0 + +(30) Scan parquet default.inventory +Output [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/inventory] +PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 10] +Input [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] + +(32) Filter [codegen id : 10] +Input [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] +Condition : ((isnotnull(inv_item_sk#2) AND isnotnull(inv_warehouse_sk#3)) AND isnotnull(inv_date_sk#1)) + +(33) Scan parquet default.date_dim +Output [3]: [d_date_sk#31, d_year#32, d_moy#33] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,2), IsNotNull(d_date_sk)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 7] +Input [3]: [d_date_sk#31, d_year#32, d_moy#33] + +(35) Filter [codegen id : 7] +Input [3]: [d_date_sk#31, d_year#32, d_moy#33] +Condition : ((((isnotnull(d_year#32) AND isnotnull(d_moy#33)) AND (d_year#32 = 2001)) AND (d_moy#33 = 2)) AND isnotnull(d_date_sk#31)) + +(36) Project [codegen id : 7] +Output [2]: [d_date_sk#31, d_moy#33] +Input [3]: [d_date_sk#31, d_year#32, d_moy#33] + +(37) BroadcastExchange +Input [2]: [d_date_sk#31, d_moy#33] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#34] + +(38) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [inv_date_sk#1] +Right keys [1]: [d_date_sk#31] +Join condition: None + +(39) Project [codegen id : 10] +Output [4]: [inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4, d_moy#33] +Input [6]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4, d_date_sk#31, d_moy#33] + +(40) ReusedExchange [Reuses operator id: 14] +Output [1]: [i_item_sk#35] + +(41) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [inv_item_sk#2] +Right keys [1]: [i_item_sk#35] +Join condition: None + +(42) Project [codegen id : 10] +Output [4]: [inv_warehouse_sk#3, inv_quantity_on_hand#4, d_moy#33, i_item_sk#35] +Input [5]: [inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4, d_moy#33, i_item_sk#35] + +(43) ReusedExchange [Reuses operator id: 20] +Output [2]: [w_warehouse_sk#36, w_warehouse_name#37] + +(44) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [inv_warehouse_sk#3] +Right keys [1]: [w_warehouse_sk#36] +Join condition: None + +(45) Project [codegen id : 10] +Output [5]: [inv_quantity_on_hand#4, i_item_sk#35, w_warehouse_sk#36, w_warehouse_name#37, d_moy#33] +Input [6]: [inv_warehouse_sk#3, inv_quantity_on_hand#4, d_moy#33, i_item_sk#35, w_warehouse_sk#36, w_warehouse_name#37] + +(46) HashAggregate [codegen id : 10] +Input [5]: [inv_quantity_on_hand#4, i_item_sk#35, w_warehouse_sk#36, w_warehouse_name#37, d_moy#33] +Keys [4]: [w_warehouse_name#37, w_warehouse_sk#36, i_item_sk#35, d_moy#33] +Functions [2]: [partial_stddev_samp(cast(inv_quantity_on_hand#4 as double)), partial_avg(cast(inv_quantity_on_hand#4 as bigint))] +Aggregate Attributes [5]: [n#38, avg#39, m2#40, sum#41, count#42] +Results [9]: [w_warehouse_name#37, w_warehouse_sk#36, i_item_sk#35, d_moy#33, n#43, avg#44, m2#45, sum#46, count#47] + +(47) Exchange +Input [9]: [w_warehouse_name#37, w_warehouse_sk#36, i_item_sk#35, d_moy#33, n#43, avg#44, m2#45, sum#46, count#47] +Arguments: hashpartitioning(w_warehouse_name#37, w_warehouse_sk#36, i_item_sk#35, d_moy#33, 5), true, [id=#48] + +(48) HashAggregate [codegen id : 11] +Input [9]: [w_warehouse_name#37, w_warehouse_sk#36, i_item_sk#35, d_moy#33, n#43, avg#44, m2#45, sum#46, count#47] +Keys [4]: [w_warehouse_name#37, w_warehouse_sk#36, i_item_sk#35, d_moy#33] +Functions [2]: [stddev_samp(cast(inv_quantity_on_hand#4 as double)), avg(cast(inv_quantity_on_hand#4 as bigint))] +Aggregate Attributes [2]: [stddev_samp(cast(inv_quantity_on_hand#4 as double))#49, avg(cast(inv_quantity_on_hand#4 as bigint))#50] +Results [5]: [w_warehouse_sk#36, i_item_sk#35, d_moy#33, stddev_samp(cast(inv_quantity_on_hand#4 as double))#49 AS stdev#51, avg(cast(inv_quantity_on_hand#4 as bigint))#50 AS mean#52] + +(49) Filter [codegen id : 11] +Input [5]: [w_warehouse_sk#36, i_item_sk#35, d_moy#33, stdev#51, mean#52] +Condition : (CASE WHEN (mean#52 = 0.0) THEN 0.0 ELSE (stdev#51 / mean#52) END > 1.0) + +(50) Project [codegen id : 11] +Output [5]: [w_warehouse_sk#36, i_item_sk#35, d_moy#33, mean#52, CASE WHEN (mean#52 = 0.0) THEN null ELSE (stdev#51 / mean#52) END AS cov#53] +Input [5]: [w_warehouse_sk#36, i_item_sk#35, d_moy#33, stdev#51, mean#52] + +(51) Exchange +Input [5]: [w_warehouse_sk#36, i_item_sk#35, d_moy#33, mean#52, cov#53] +Arguments: hashpartitioning(i_item_sk#35, w_warehouse_sk#36, 5), true, [id=#54] + +(52) Sort [codegen id : 12] +Input [5]: [w_warehouse_sk#36, i_item_sk#35, d_moy#33, mean#52, cov#53] +Arguments: [i_item_sk#35 ASC NULLS FIRST, w_warehouse_sk#36 ASC NULLS FIRST], false, 0 + +(53) SortMergeJoin [codegen id : 13] +Left keys [2]: [i_item_sk#9, w_warehouse_sk#11] +Right keys [2]: [i_item_sk#35, w_warehouse_sk#36] +Join condition: None + +(54) Exchange +Input [10]: [w_warehouse_sk#11, i_item_sk#9, d_moy#7, mean#28, cov#29, w_warehouse_sk#36, i_item_sk#35, d_moy#33, mean#52, cov#53] +Arguments: rangepartitioning(w_warehouse_sk#11 ASC NULLS FIRST, i_item_sk#9 ASC NULLS FIRST, d_moy#7 ASC NULLS FIRST, mean#28 ASC NULLS FIRST, cov#29 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST, mean#52 ASC NULLS FIRST, cov#53 ASC NULLS FIRST, 5), true, [id=#55] + +(55) Sort [codegen id : 14] +Input [10]: [w_warehouse_sk#11, i_item_sk#9, d_moy#7, mean#28, cov#29, w_warehouse_sk#36, i_item_sk#35, d_moy#33, mean#52, cov#53] +Arguments: [w_warehouse_sk#11 ASC NULLS FIRST, i_item_sk#9 ASC NULLS FIRST, d_moy#7 ASC NULLS FIRST, mean#28 ASC NULLS FIRST, cov#29 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST, mean#52 ASC NULLS FIRST, cov#53 ASC NULLS FIRST], true, 0 + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a.sf100/simplified.txt new file mode 100644 index 0000000000000..c38a3b410e120 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a.sf100/simplified.txt @@ -0,0 +1,86 @@ +WholeStageCodegen (14) + Sort [cov,cov,d_moy,d_moy,i_item_sk,mean,mean,w_warehouse_sk] + InputAdapter + Exchange [cov,cov,d_moy,d_moy,i_item_sk,mean,mean,w_warehouse_sk] #1 + WholeStageCodegen (13) + SortMergeJoin [i_item_sk,i_item_sk,w_warehouse_sk,w_warehouse_sk] + InputAdapter + WholeStageCodegen (6) + Sort [i_item_sk,w_warehouse_sk] + InputAdapter + Exchange [i_item_sk,w_warehouse_sk] #2 + WholeStageCodegen (5) + Project [d_moy,i_item_sk,mean,stdev,w_warehouse_sk] + Filter [mean,stdev] + HashAggregate [avg,count,d_moy,i_item_sk,m2,n,sum,w_warehouse_name,w_warehouse_sk] [avg,avg(cast(inv_quantity_on_hand as bigint)),count,m2,mean,n,stddev_samp(cast(inv_quantity_on_hand as double)),stdev,sum] + InputAdapter + Exchange [d_moy,i_item_sk,w_warehouse_name,w_warehouse_sk] #3 + WholeStageCodegen (4) + HashAggregate [d_moy,i_item_sk,inv_quantity_on_hand,w_warehouse_name,w_warehouse_sk] [avg,avg,count,count,m2,m2,n,n,sum,sum] + Project [d_moy,i_item_sk,inv_quantity_on_hand,w_warehouse_name,w_warehouse_sk] + BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] + Project [d_moy,i_item_sk,inv_quantity_on_hand,inv_warehouse_sk] + BroadcastHashJoin [i_item_sk,inv_item_sk] + Project [d_moy,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] + BroadcastHashJoin [d_date_sk,inv_date_sk] + Filter [inv_date_sk,inv_item_sk,inv_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Project [d_date_sk,d_moy] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (3) + Filter [w_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.warehouse [w_warehouse_name,w_warehouse_sk] + InputAdapter + WholeStageCodegen (12) + Sort [i_item_sk,w_warehouse_sk] + InputAdapter + Exchange [i_item_sk,w_warehouse_sk] #7 + WholeStageCodegen (11) + Project [d_moy,i_item_sk,mean,stdev,w_warehouse_sk] + Filter [mean,stdev] + HashAggregate [avg,count,d_moy,i_item_sk,m2,n,sum,w_warehouse_name,w_warehouse_sk] [avg,avg(cast(inv_quantity_on_hand as bigint)),count,m2,mean,n,stddev_samp(cast(inv_quantity_on_hand as double)),stdev,sum] + InputAdapter + Exchange [d_moy,i_item_sk,w_warehouse_name,w_warehouse_sk] #8 + WholeStageCodegen (10) + HashAggregate [d_moy,i_item_sk,inv_quantity_on_hand,w_warehouse_name,w_warehouse_sk] [avg,avg,count,count,m2,m2,n,n,sum,sum] + Project [d_moy,i_item_sk,inv_quantity_on_hand,w_warehouse_name,w_warehouse_sk] + BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] + Project [d_moy,i_item_sk,inv_quantity_on_hand,inv_warehouse_sk] + BroadcastHashJoin [i_item_sk,inv_item_sk] + Project [d_moy,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] + BroadcastHashJoin [d_date_sk,inv_date_sk] + Filter [inv_date_sk,inv_item_sk,inv_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (7) + Project [d_date_sk,d_moy] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + InputAdapter + ReusedExchange [i_item_sk] #5 + InputAdapter + ReusedExchange [w_warehouse_name,w_warehouse_sk] #6 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/explain.txt new file mode 100644 index 0000000000000..d4b0a075d18bc --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/explain.txt @@ -0,0 +1,292 @@ +== Physical Plan == +* Sort (52) ++- Exchange (51) + +- * BroadcastHashJoin Inner BuildRight (50) + :- * Project (27) + : +- * Filter (26) + : +- * HashAggregate (25) + : +- Exchange (24) + : +- * HashAggregate (23) + : +- * Project (22) + : +- * BroadcastHashJoin Inner BuildRight (21) + : :- * Project (15) + : : +- * BroadcastHashJoin Inner BuildRight (14) + : : :- * Project (9) + : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.inventory (1) + : : : +- BroadcastExchange (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.item (4) + : : +- BroadcastExchange (13) + : : +- * Filter (12) + : : +- * ColumnarToRow (11) + : : +- Scan parquet default.warehouse (10) + : +- BroadcastExchange (20) + : +- * Project (19) + : +- * Filter (18) + : +- * ColumnarToRow (17) + : +- Scan parquet default.date_dim (16) + +- BroadcastExchange (49) + +- * Project (48) + +- * Filter (47) + +- * HashAggregate (46) + +- Exchange (45) + +- * HashAggregate (44) + +- * Project (43) + +- * BroadcastHashJoin Inner BuildRight (42) + :- * Project (36) + : +- * BroadcastHashJoin Inner BuildRight (35) + : :- * Project (33) + : : +- * BroadcastHashJoin Inner BuildRight (32) + : : :- * Filter (30) + : : : +- * ColumnarToRow (29) + : : : +- Scan parquet default.inventory (28) + : : +- ReusedExchange (31) + : +- ReusedExchange (34) + +- BroadcastExchange (41) + +- * Project (40) + +- * Filter (39) + +- * ColumnarToRow (38) + +- Scan parquet default.date_dim (37) + + +(1) Scan parquet default.inventory +Output [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/inventory] +PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] + +(3) Filter [codegen id : 4] +Input [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] +Condition : ((isnotnull(inv_item_sk#2) AND isnotnull(inv_warehouse_sk#3)) AND isnotnull(inv_date_sk#1)) + +(4) Scan parquet default.item +Output [1]: [i_item_sk#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [1]: [i_item_sk#5] + +(6) Filter [codegen id : 1] +Input [1]: [i_item_sk#5] +Condition : isnotnull(i_item_sk#5) + +(7) BroadcastExchange +Input [1]: [i_item_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#6] + +(8) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_item_sk#2] +Right keys [1]: [i_item_sk#5] +Join condition: None + +(9) Project [codegen id : 4] +Output [4]: [inv_date_sk#1, inv_warehouse_sk#3, inv_quantity_on_hand#4, i_item_sk#5] +Input [5]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4, i_item_sk#5] + +(10) Scan parquet default.warehouse +Output [2]: [w_warehouse_sk#7, w_warehouse_name#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [w_warehouse_sk#7, w_warehouse_name#8] + +(12) Filter [codegen id : 2] +Input [2]: [w_warehouse_sk#7, w_warehouse_name#8] +Condition : isnotnull(w_warehouse_sk#7) + +(13) BroadcastExchange +Input [2]: [w_warehouse_sk#7, w_warehouse_name#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#9] + +(14) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_warehouse_sk#3] +Right keys [1]: [w_warehouse_sk#7] +Join condition: None + +(15) Project [codegen id : 4] +Output [5]: [inv_date_sk#1, inv_quantity_on_hand#4, i_item_sk#5, w_warehouse_sk#7, w_warehouse_name#8] +Input [6]: [inv_date_sk#1, inv_warehouse_sk#3, inv_quantity_on_hand#4, i_item_sk#5, w_warehouse_sk#7, w_warehouse_name#8] + +(16) Scan parquet default.date_dim +Output [3]: [d_date_sk#10, d_year#11, d_moy#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,1), IsNotNull(d_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#10, d_year#11, d_moy#12] + +(18) Filter [codegen id : 3] +Input [3]: [d_date_sk#10, d_year#11, d_moy#12] +Condition : ((((isnotnull(d_year#11) AND isnotnull(d_moy#12)) AND (d_year#11 = 2001)) AND (d_moy#12 = 1)) AND isnotnull(d_date_sk#10)) + +(19) Project [codegen id : 3] +Output [2]: [d_date_sk#10, d_moy#12] +Input [3]: [d_date_sk#10, d_year#11, d_moy#12] + +(20) BroadcastExchange +Input [2]: [d_date_sk#10, d_moy#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_date_sk#1] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(22) Project [codegen id : 4] +Output [5]: [inv_quantity_on_hand#4, i_item_sk#5, w_warehouse_sk#7, w_warehouse_name#8, d_moy#12] +Input [7]: [inv_date_sk#1, inv_quantity_on_hand#4, i_item_sk#5, w_warehouse_sk#7, w_warehouse_name#8, d_date_sk#10, d_moy#12] + +(23) HashAggregate [codegen id : 4] +Input [5]: [inv_quantity_on_hand#4, i_item_sk#5, w_warehouse_sk#7, w_warehouse_name#8, d_moy#12] +Keys [4]: [w_warehouse_name#8, w_warehouse_sk#7, i_item_sk#5, d_moy#12] +Functions [2]: [partial_stddev_samp(cast(inv_quantity_on_hand#4 as double)), partial_avg(cast(inv_quantity_on_hand#4 as bigint))] +Aggregate Attributes [5]: [n#14, avg#15, m2#16, sum#17, count#18] +Results [9]: [w_warehouse_name#8, w_warehouse_sk#7, i_item_sk#5, d_moy#12, n#19, avg#20, m2#21, sum#22, count#23] + +(24) Exchange +Input [9]: [w_warehouse_name#8, w_warehouse_sk#7, i_item_sk#5, d_moy#12, n#19, avg#20, m2#21, sum#22, count#23] +Arguments: hashpartitioning(w_warehouse_name#8, w_warehouse_sk#7, i_item_sk#5, d_moy#12, 5), true, [id=#24] + +(25) HashAggregate [codegen id : 10] +Input [9]: [w_warehouse_name#8, w_warehouse_sk#7, i_item_sk#5, d_moy#12, n#19, avg#20, m2#21, sum#22, count#23] +Keys [4]: [w_warehouse_name#8, w_warehouse_sk#7, i_item_sk#5, d_moy#12] +Functions [2]: [stddev_samp(cast(inv_quantity_on_hand#4 as double)), avg(cast(inv_quantity_on_hand#4 as bigint))] +Aggregate Attributes [2]: [stddev_samp(cast(inv_quantity_on_hand#4 as double))#25, avg(cast(inv_quantity_on_hand#4 as bigint))#26] +Results [5]: [w_warehouse_sk#7, i_item_sk#5, d_moy#12, stddev_samp(cast(inv_quantity_on_hand#4 as double))#25 AS stdev#27, avg(cast(inv_quantity_on_hand#4 as bigint))#26 AS mean#28] + +(26) Filter [codegen id : 10] +Input [5]: [w_warehouse_sk#7, i_item_sk#5, d_moy#12, stdev#27, mean#28] +Condition : (CASE WHEN (mean#28 = 0.0) THEN 0.0 ELSE (stdev#27 / mean#28) END > 1.0) + +(27) Project [codegen id : 10] +Output [5]: [w_warehouse_sk#7, i_item_sk#5, d_moy#12, mean#28, CASE WHEN (mean#28 = 0.0) THEN null ELSE (stdev#27 / mean#28) END AS cov#29] +Input [5]: [w_warehouse_sk#7, i_item_sk#5, d_moy#12, stdev#27, mean#28] + +(28) Scan parquet default.inventory +Output [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/inventory] +PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 8] +Input [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] + +(30) Filter [codegen id : 8] +Input [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] +Condition : ((isnotnull(inv_item_sk#2) AND isnotnull(inv_warehouse_sk#3)) AND isnotnull(inv_date_sk#1)) + +(31) ReusedExchange [Reuses operator id: 7] +Output [1]: [i_item_sk#30] + +(32) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [inv_item_sk#2] +Right keys [1]: [i_item_sk#30] +Join condition: None + +(33) Project [codegen id : 8] +Output [4]: [inv_date_sk#1, inv_warehouse_sk#3, inv_quantity_on_hand#4, i_item_sk#30] +Input [5]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4, i_item_sk#30] + +(34) ReusedExchange [Reuses operator id: 13] +Output [2]: [w_warehouse_sk#31, w_warehouse_name#32] + +(35) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [inv_warehouse_sk#3] +Right keys [1]: [w_warehouse_sk#31] +Join condition: None + +(36) Project [codegen id : 8] +Output [5]: [inv_date_sk#1, inv_quantity_on_hand#4, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32] +Input [6]: [inv_date_sk#1, inv_warehouse_sk#3, inv_quantity_on_hand#4, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32] + +(37) Scan parquet default.date_dim +Output [3]: [d_date_sk#33, d_year#34, d_moy#35] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,2), IsNotNull(d_date_sk)] +ReadSchema: struct + +(38) ColumnarToRow [codegen id : 7] +Input [3]: [d_date_sk#33, d_year#34, d_moy#35] + +(39) Filter [codegen id : 7] +Input [3]: [d_date_sk#33, d_year#34, d_moy#35] +Condition : ((((isnotnull(d_year#34) AND isnotnull(d_moy#35)) AND (d_year#34 = 2001)) AND (d_moy#35 = 2)) AND isnotnull(d_date_sk#33)) + +(40) Project [codegen id : 7] +Output [2]: [d_date_sk#33, d_moy#35] +Input [3]: [d_date_sk#33, d_year#34, d_moy#35] + +(41) BroadcastExchange +Input [2]: [d_date_sk#33, d_moy#35] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#36] + +(42) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [inv_date_sk#1] +Right keys [1]: [d_date_sk#33] +Join condition: None + +(43) Project [codegen id : 8] +Output [5]: [inv_quantity_on_hand#4, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32, d_moy#35] +Input [7]: [inv_date_sk#1, inv_quantity_on_hand#4, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32, d_date_sk#33, d_moy#35] + +(44) HashAggregate [codegen id : 8] +Input [5]: [inv_quantity_on_hand#4, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32, d_moy#35] +Keys [4]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35] +Functions [2]: [partial_stddev_samp(cast(inv_quantity_on_hand#4 as double)), partial_avg(cast(inv_quantity_on_hand#4 as bigint))] +Aggregate Attributes [5]: [n#37, avg#38, m2#39, sum#40, count#41] +Results [9]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35, n#42, avg#43, m2#44, sum#45, count#46] + +(45) Exchange +Input [9]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35, n#42, avg#43, m2#44, sum#45, count#46] +Arguments: hashpartitioning(w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35, 5), true, [id=#47] + +(46) HashAggregate [codegen id : 9] +Input [9]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35, n#42, avg#43, m2#44, sum#45, count#46] +Keys [4]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35] +Functions [2]: [stddev_samp(cast(inv_quantity_on_hand#4 as double)), avg(cast(inv_quantity_on_hand#4 as bigint))] +Aggregate Attributes [2]: [stddev_samp(cast(inv_quantity_on_hand#4 as double))#48, avg(cast(inv_quantity_on_hand#4 as bigint))#49] +Results [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, stddev_samp(cast(inv_quantity_on_hand#4 as double))#48 AS stdev#50, avg(cast(inv_quantity_on_hand#4 as bigint))#49 AS mean#51] + +(47) Filter [codegen id : 9] +Input [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, stdev#50, mean#51] +Condition : (CASE WHEN (mean#51 = 0.0) THEN 0.0 ELSE (stdev#50 / mean#51) END > 1.0) + +(48) Project [codegen id : 9] +Output [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, mean#51, CASE WHEN (mean#51 = 0.0) THEN null ELSE (stdev#50 / mean#51) END AS cov#52] +Input [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, stdev#50, mean#51] + +(49) BroadcastExchange +Input [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, mean#51, cov#52] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295))),false), [id=#53] + +(50) BroadcastHashJoin [codegen id : 10] +Left keys [2]: [i_item_sk#5, w_warehouse_sk#7] +Right keys [2]: [i_item_sk#30, w_warehouse_sk#31] +Join condition: None + +(51) Exchange +Input [10]: [w_warehouse_sk#7, i_item_sk#5, d_moy#12, mean#28, cov#29, w_warehouse_sk#31, i_item_sk#30, d_moy#35, mean#51, cov#52] +Arguments: rangepartitioning(w_warehouse_sk#7 ASC NULLS FIRST, i_item_sk#5 ASC NULLS FIRST, d_moy#12 ASC NULLS FIRST, mean#28 ASC NULLS FIRST, cov#29 ASC NULLS FIRST, d_moy#35 ASC NULLS FIRST, mean#51 ASC NULLS FIRST, cov#52 ASC NULLS FIRST, 5), true, [id=#54] + +(52) Sort [codegen id : 11] +Input [10]: [w_warehouse_sk#7, i_item_sk#5, d_moy#12, mean#28, cov#29, w_warehouse_sk#31, i_item_sk#30, d_moy#35, mean#51, cov#52] +Arguments: [w_warehouse_sk#7 ASC NULLS FIRST, i_item_sk#5 ASC NULLS FIRST, d_moy#12 ASC NULLS FIRST, mean#28 ASC NULLS FIRST, cov#29 ASC NULLS FIRST, d_moy#35 ASC NULLS FIRST, mean#51 ASC NULLS FIRST, cov#52 ASC NULLS FIRST], true, 0 + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/simplified.txt new file mode 100644 index 0000000000000..c1a1b4dab7de7 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/simplified.txt @@ -0,0 +1,77 @@ +WholeStageCodegen (11) + Sort [cov,cov,d_moy,d_moy,i_item_sk,mean,mean,w_warehouse_sk] + InputAdapter + Exchange [cov,cov,d_moy,d_moy,i_item_sk,mean,mean,w_warehouse_sk] #1 + WholeStageCodegen (10) + BroadcastHashJoin [i_item_sk,i_item_sk,w_warehouse_sk,w_warehouse_sk] + Project [d_moy,i_item_sk,mean,stdev,w_warehouse_sk] + Filter [mean,stdev] + HashAggregate [avg,count,d_moy,i_item_sk,m2,n,sum,w_warehouse_name,w_warehouse_sk] [avg,avg(cast(inv_quantity_on_hand as bigint)),count,m2,mean,n,stddev_samp(cast(inv_quantity_on_hand as double)),stdev,sum] + InputAdapter + Exchange [d_moy,i_item_sk,w_warehouse_name,w_warehouse_sk] #2 + WholeStageCodegen (4) + HashAggregate [d_moy,i_item_sk,inv_quantity_on_hand,w_warehouse_name,w_warehouse_sk] [avg,avg,count,count,m2,m2,n,n,sum,sum] + Project [d_moy,i_item_sk,inv_quantity_on_hand,w_warehouse_name,w_warehouse_sk] + BroadcastHashJoin [d_date_sk,inv_date_sk] + Project [i_item_sk,inv_date_sk,inv_quantity_on_hand,w_warehouse_name,w_warehouse_sk] + BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] + Project [i_item_sk,inv_date_sk,inv_quantity_on_hand,inv_warehouse_sk] + BroadcastHashJoin [i_item_sk,inv_item_sk] + Filter [inv_date_sk,inv_item_sk,inv_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [w_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.warehouse [w_warehouse_name,w_warehouse_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Project [d_date_sk,d_moy] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (9) + Project [d_moy,i_item_sk,mean,stdev,w_warehouse_sk] + Filter [mean,stdev] + HashAggregate [avg,count,d_moy,i_item_sk,m2,n,sum,w_warehouse_name,w_warehouse_sk] [avg,avg(cast(inv_quantity_on_hand as bigint)),count,m2,mean,n,stddev_samp(cast(inv_quantity_on_hand as double)),stdev,sum] + InputAdapter + Exchange [d_moy,i_item_sk,w_warehouse_name,w_warehouse_sk] #7 + WholeStageCodegen (8) + HashAggregate [d_moy,i_item_sk,inv_quantity_on_hand,w_warehouse_name,w_warehouse_sk] [avg,avg,count,count,m2,m2,n,n,sum,sum] + Project [d_moy,i_item_sk,inv_quantity_on_hand,w_warehouse_name,w_warehouse_sk] + BroadcastHashJoin [d_date_sk,inv_date_sk] + Project [i_item_sk,inv_date_sk,inv_quantity_on_hand,w_warehouse_name,w_warehouse_sk] + BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] + Project [i_item_sk,inv_date_sk,inv_quantity_on_hand,inv_warehouse_sk] + BroadcastHashJoin [i_item_sk,inv_item_sk] + Filter [inv_date_sk,inv_item_sk,inv_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] + InputAdapter + ReusedExchange [i_item_sk] #3 + InputAdapter + ReusedExchange [w_warehouse_name,w_warehouse_sk] #4 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (7) + Project [d_date_sk,d_moy] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b.sf100/explain.txt new file mode 100644 index 0000000000000..61b613f52891f --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b.sf100/explain.txt @@ -0,0 +1,307 @@ +== Physical Plan == +* Sort (55) ++- Exchange (54) + +- * SortMergeJoin Inner (53) + :- * Sort (29) + : +- Exchange (28) + : +- * Project (27) + : +- * Filter (26) + : +- * HashAggregate (25) + : +- Exchange (24) + : +- * HashAggregate (23) + : +- * Project (22) + : +- * BroadcastHashJoin Inner BuildRight (21) + : :- * Project (16) + : : +- * BroadcastHashJoin Inner BuildRight (15) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.inventory (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.item (11) + : +- BroadcastExchange (20) + : +- * Filter (19) + : +- * ColumnarToRow (18) + : +- Scan parquet default.warehouse (17) + +- * Sort (52) + +- Exchange (51) + +- * Project (50) + +- * Filter (49) + +- * HashAggregate (48) + +- Exchange (47) + +- * HashAggregate (46) + +- * Project (45) + +- * BroadcastHashJoin Inner BuildRight (44) + :- * Project (42) + : +- * BroadcastHashJoin Inner BuildRight (41) + : :- * Project (39) + : : +- * BroadcastHashJoin Inner BuildRight (38) + : : :- * Filter (32) + : : : +- * ColumnarToRow (31) + : : : +- Scan parquet default.inventory (30) + : : +- BroadcastExchange (37) + : : +- * Project (36) + : : +- * Filter (35) + : : +- * ColumnarToRow (34) + : : +- Scan parquet default.date_dim (33) + : +- ReusedExchange (40) + +- ReusedExchange (43) + + +(1) Scan parquet default.inventory +Output [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/inventory] +PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] + +(3) Filter [codegen id : 4] +Input [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] +Condition : ((isnotnull(inv_item_sk#2) AND isnotnull(inv_warehouse_sk#3)) AND isnotnull(inv_date_sk#1)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#5, d_year#6, d_moy#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,1), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] +Condition : ((((isnotnull(d_year#6) AND isnotnull(d_moy#7)) AND (d_year#6 = 2001)) AND (d_moy#7 = 1)) AND isnotnull(d_date_sk#5)) + +(7) Project [codegen id : 1] +Output [2]: [d_date_sk#5, d_moy#7] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] + +(8) BroadcastExchange +Input [2]: [d_date_sk#5, d_moy#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#8] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_date_sk#1] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(10) Project [codegen id : 4] +Output [4]: [inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4, d_moy#7] +Input [6]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4, d_date_sk#5, d_moy#7] + +(11) Scan parquet default.item +Output [1]: [i_item_sk#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [1]: [i_item_sk#9] + +(13) Filter [codegen id : 2] +Input [1]: [i_item_sk#9] +Condition : isnotnull(i_item_sk#9) + +(14) BroadcastExchange +Input [1]: [i_item_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#10] + +(15) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_item_sk#2] +Right keys [1]: [i_item_sk#9] +Join condition: None + +(16) Project [codegen id : 4] +Output [4]: [inv_warehouse_sk#3, inv_quantity_on_hand#4, d_moy#7, i_item_sk#9] +Input [5]: [inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4, d_moy#7, i_item_sk#9] + +(17) Scan parquet default.warehouse +Output [2]: [w_warehouse_sk#11, w_warehouse_name#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [2]: [w_warehouse_sk#11, w_warehouse_name#12] + +(19) Filter [codegen id : 3] +Input [2]: [w_warehouse_sk#11, w_warehouse_name#12] +Condition : isnotnull(w_warehouse_sk#11) + +(20) BroadcastExchange +Input [2]: [w_warehouse_sk#11, w_warehouse_name#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#13] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_warehouse_sk#3] +Right keys [1]: [w_warehouse_sk#11] +Join condition: None + +(22) Project [codegen id : 4] +Output [5]: [inv_quantity_on_hand#4, i_item_sk#9, w_warehouse_sk#11, w_warehouse_name#12, d_moy#7] +Input [6]: [inv_warehouse_sk#3, inv_quantity_on_hand#4, d_moy#7, i_item_sk#9, w_warehouse_sk#11, w_warehouse_name#12] + +(23) HashAggregate [codegen id : 4] +Input [5]: [inv_quantity_on_hand#4, i_item_sk#9, w_warehouse_sk#11, w_warehouse_name#12, d_moy#7] +Keys [4]: [w_warehouse_name#12, w_warehouse_sk#11, i_item_sk#9, d_moy#7] +Functions [2]: [partial_stddev_samp(cast(inv_quantity_on_hand#4 as double)), partial_avg(cast(inv_quantity_on_hand#4 as bigint))] +Aggregate Attributes [5]: [n#14, avg#15, m2#16, sum#17, count#18] +Results [9]: [w_warehouse_name#12, w_warehouse_sk#11, i_item_sk#9, d_moy#7, n#19, avg#20, m2#21, sum#22, count#23] + +(24) Exchange +Input [9]: [w_warehouse_name#12, w_warehouse_sk#11, i_item_sk#9, d_moy#7, n#19, avg#20, m2#21, sum#22, count#23] +Arguments: hashpartitioning(w_warehouse_name#12, w_warehouse_sk#11, i_item_sk#9, d_moy#7, 5), true, [id=#24] + +(25) HashAggregate [codegen id : 5] +Input [9]: [w_warehouse_name#12, w_warehouse_sk#11, i_item_sk#9, d_moy#7, n#19, avg#20, m2#21, sum#22, count#23] +Keys [4]: [w_warehouse_name#12, w_warehouse_sk#11, i_item_sk#9, d_moy#7] +Functions [2]: [stddev_samp(cast(inv_quantity_on_hand#4 as double)), avg(cast(inv_quantity_on_hand#4 as bigint))] +Aggregate Attributes [2]: [stddev_samp(cast(inv_quantity_on_hand#4 as double))#25, avg(cast(inv_quantity_on_hand#4 as bigint))#26] +Results [5]: [w_warehouse_sk#11, i_item_sk#9, d_moy#7, stddev_samp(cast(inv_quantity_on_hand#4 as double))#25 AS stdev#27, avg(cast(inv_quantity_on_hand#4 as bigint))#26 AS mean#28] + +(26) Filter [codegen id : 5] +Input [5]: [w_warehouse_sk#11, i_item_sk#9, d_moy#7, stdev#27, mean#28] +Condition : ((CASE WHEN (mean#28 = 0.0) THEN 0.0 ELSE (stdev#27 / mean#28) END > 1.0) AND (CASE WHEN (mean#28 = 0.0) THEN null ELSE (stdev#27 / mean#28) END > 1.5)) + +(27) Project [codegen id : 5] +Output [5]: [w_warehouse_sk#11, i_item_sk#9, d_moy#7, mean#28, CASE WHEN (mean#28 = 0.0) THEN null ELSE (stdev#27 / mean#28) END AS cov#29] +Input [5]: [w_warehouse_sk#11, i_item_sk#9, d_moy#7, stdev#27, mean#28] + +(28) Exchange +Input [5]: [w_warehouse_sk#11, i_item_sk#9, d_moy#7, mean#28, cov#29] +Arguments: hashpartitioning(i_item_sk#9, w_warehouse_sk#11, 5), true, [id=#30] + +(29) Sort [codegen id : 6] +Input [5]: [w_warehouse_sk#11, i_item_sk#9, d_moy#7, mean#28, cov#29] +Arguments: [i_item_sk#9 ASC NULLS FIRST, w_warehouse_sk#11 ASC NULLS FIRST], false, 0 + +(30) Scan parquet default.inventory +Output [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/inventory] +PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 10] +Input [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] + +(32) Filter [codegen id : 10] +Input [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] +Condition : ((isnotnull(inv_item_sk#2) AND isnotnull(inv_warehouse_sk#3)) AND isnotnull(inv_date_sk#1)) + +(33) Scan parquet default.date_dim +Output [3]: [d_date_sk#31, d_year#32, d_moy#33] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,2), IsNotNull(d_date_sk)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 7] +Input [3]: [d_date_sk#31, d_year#32, d_moy#33] + +(35) Filter [codegen id : 7] +Input [3]: [d_date_sk#31, d_year#32, d_moy#33] +Condition : ((((isnotnull(d_year#32) AND isnotnull(d_moy#33)) AND (d_year#32 = 2001)) AND (d_moy#33 = 2)) AND isnotnull(d_date_sk#31)) + +(36) Project [codegen id : 7] +Output [2]: [d_date_sk#31, d_moy#33] +Input [3]: [d_date_sk#31, d_year#32, d_moy#33] + +(37) BroadcastExchange +Input [2]: [d_date_sk#31, d_moy#33] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#34] + +(38) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [inv_date_sk#1] +Right keys [1]: [d_date_sk#31] +Join condition: None + +(39) Project [codegen id : 10] +Output [4]: [inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4, d_moy#33] +Input [6]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4, d_date_sk#31, d_moy#33] + +(40) ReusedExchange [Reuses operator id: 14] +Output [1]: [i_item_sk#35] + +(41) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [inv_item_sk#2] +Right keys [1]: [i_item_sk#35] +Join condition: None + +(42) Project [codegen id : 10] +Output [4]: [inv_warehouse_sk#3, inv_quantity_on_hand#4, d_moy#33, i_item_sk#35] +Input [5]: [inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4, d_moy#33, i_item_sk#35] + +(43) ReusedExchange [Reuses operator id: 20] +Output [2]: [w_warehouse_sk#36, w_warehouse_name#37] + +(44) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [inv_warehouse_sk#3] +Right keys [1]: [w_warehouse_sk#36] +Join condition: None + +(45) Project [codegen id : 10] +Output [5]: [inv_quantity_on_hand#4, i_item_sk#35, w_warehouse_sk#36, w_warehouse_name#37, d_moy#33] +Input [6]: [inv_warehouse_sk#3, inv_quantity_on_hand#4, d_moy#33, i_item_sk#35, w_warehouse_sk#36, w_warehouse_name#37] + +(46) HashAggregate [codegen id : 10] +Input [5]: [inv_quantity_on_hand#4, i_item_sk#35, w_warehouse_sk#36, w_warehouse_name#37, d_moy#33] +Keys [4]: [w_warehouse_name#37, w_warehouse_sk#36, i_item_sk#35, d_moy#33] +Functions [2]: [partial_stddev_samp(cast(inv_quantity_on_hand#4 as double)), partial_avg(cast(inv_quantity_on_hand#4 as bigint))] +Aggregate Attributes [5]: [n#38, avg#39, m2#40, sum#41, count#42] +Results [9]: [w_warehouse_name#37, w_warehouse_sk#36, i_item_sk#35, d_moy#33, n#43, avg#44, m2#45, sum#46, count#47] + +(47) Exchange +Input [9]: [w_warehouse_name#37, w_warehouse_sk#36, i_item_sk#35, d_moy#33, n#43, avg#44, m2#45, sum#46, count#47] +Arguments: hashpartitioning(w_warehouse_name#37, w_warehouse_sk#36, i_item_sk#35, d_moy#33, 5), true, [id=#48] + +(48) HashAggregate [codegen id : 11] +Input [9]: [w_warehouse_name#37, w_warehouse_sk#36, i_item_sk#35, d_moy#33, n#43, avg#44, m2#45, sum#46, count#47] +Keys [4]: [w_warehouse_name#37, w_warehouse_sk#36, i_item_sk#35, d_moy#33] +Functions [2]: [stddev_samp(cast(inv_quantity_on_hand#4 as double)), avg(cast(inv_quantity_on_hand#4 as bigint))] +Aggregate Attributes [2]: [stddev_samp(cast(inv_quantity_on_hand#4 as double))#49, avg(cast(inv_quantity_on_hand#4 as bigint))#50] +Results [5]: [w_warehouse_sk#36, i_item_sk#35, d_moy#33, stddev_samp(cast(inv_quantity_on_hand#4 as double))#49 AS stdev#51, avg(cast(inv_quantity_on_hand#4 as bigint))#50 AS mean#52] + +(49) Filter [codegen id : 11] +Input [5]: [w_warehouse_sk#36, i_item_sk#35, d_moy#33, stdev#51, mean#52] +Condition : (CASE WHEN (mean#52 = 0.0) THEN 0.0 ELSE (stdev#51 / mean#52) END > 1.0) + +(50) Project [codegen id : 11] +Output [5]: [w_warehouse_sk#36, i_item_sk#35, d_moy#33, mean#52, CASE WHEN (mean#52 = 0.0) THEN null ELSE (stdev#51 / mean#52) END AS cov#53] +Input [5]: [w_warehouse_sk#36, i_item_sk#35, d_moy#33, stdev#51, mean#52] + +(51) Exchange +Input [5]: [w_warehouse_sk#36, i_item_sk#35, d_moy#33, mean#52, cov#53] +Arguments: hashpartitioning(i_item_sk#35, w_warehouse_sk#36, 5), true, [id=#54] + +(52) Sort [codegen id : 12] +Input [5]: [w_warehouse_sk#36, i_item_sk#35, d_moy#33, mean#52, cov#53] +Arguments: [i_item_sk#35 ASC NULLS FIRST, w_warehouse_sk#36 ASC NULLS FIRST], false, 0 + +(53) SortMergeJoin [codegen id : 13] +Left keys [2]: [i_item_sk#9, w_warehouse_sk#11] +Right keys [2]: [i_item_sk#35, w_warehouse_sk#36] +Join condition: None + +(54) Exchange +Input [10]: [w_warehouse_sk#11, i_item_sk#9, d_moy#7, mean#28, cov#29, w_warehouse_sk#36, i_item_sk#35, d_moy#33, mean#52, cov#53] +Arguments: rangepartitioning(w_warehouse_sk#11 ASC NULLS FIRST, i_item_sk#9 ASC NULLS FIRST, d_moy#7 ASC NULLS FIRST, mean#28 ASC NULLS FIRST, cov#29 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST, mean#52 ASC NULLS FIRST, cov#53 ASC NULLS FIRST, 5), true, [id=#55] + +(55) Sort [codegen id : 14] +Input [10]: [w_warehouse_sk#11, i_item_sk#9, d_moy#7, mean#28, cov#29, w_warehouse_sk#36, i_item_sk#35, d_moy#33, mean#52, cov#53] +Arguments: [w_warehouse_sk#11 ASC NULLS FIRST, i_item_sk#9 ASC NULLS FIRST, d_moy#7 ASC NULLS FIRST, mean#28 ASC NULLS FIRST, cov#29 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST, mean#52 ASC NULLS FIRST, cov#53 ASC NULLS FIRST], true, 0 + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b.sf100/simplified.txt new file mode 100644 index 0000000000000..c38a3b410e120 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b.sf100/simplified.txt @@ -0,0 +1,86 @@ +WholeStageCodegen (14) + Sort [cov,cov,d_moy,d_moy,i_item_sk,mean,mean,w_warehouse_sk] + InputAdapter + Exchange [cov,cov,d_moy,d_moy,i_item_sk,mean,mean,w_warehouse_sk] #1 + WholeStageCodegen (13) + SortMergeJoin [i_item_sk,i_item_sk,w_warehouse_sk,w_warehouse_sk] + InputAdapter + WholeStageCodegen (6) + Sort [i_item_sk,w_warehouse_sk] + InputAdapter + Exchange [i_item_sk,w_warehouse_sk] #2 + WholeStageCodegen (5) + Project [d_moy,i_item_sk,mean,stdev,w_warehouse_sk] + Filter [mean,stdev] + HashAggregate [avg,count,d_moy,i_item_sk,m2,n,sum,w_warehouse_name,w_warehouse_sk] [avg,avg(cast(inv_quantity_on_hand as bigint)),count,m2,mean,n,stddev_samp(cast(inv_quantity_on_hand as double)),stdev,sum] + InputAdapter + Exchange [d_moy,i_item_sk,w_warehouse_name,w_warehouse_sk] #3 + WholeStageCodegen (4) + HashAggregate [d_moy,i_item_sk,inv_quantity_on_hand,w_warehouse_name,w_warehouse_sk] [avg,avg,count,count,m2,m2,n,n,sum,sum] + Project [d_moy,i_item_sk,inv_quantity_on_hand,w_warehouse_name,w_warehouse_sk] + BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] + Project [d_moy,i_item_sk,inv_quantity_on_hand,inv_warehouse_sk] + BroadcastHashJoin [i_item_sk,inv_item_sk] + Project [d_moy,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] + BroadcastHashJoin [d_date_sk,inv_date_sk] + Filter [inv_date_sk,inv_item_sk,inv_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Project [d_date_sk,d_moy] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (3) + Filter [w_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.warehouse [w_warehouse_name,w_warehouse_sk] + InputAdapter + WholeStageCodegen (12) + Sort [i_item_sk,w_warehouse_sk] + InputAdapter + Exchange [i_item_sk,w_warehouse_sk] #7 + WholeStageCodegen (11) + Project [d_moy,i_item_sk,mean,stdev,w_warehouse_sk] + Filter [mean,stdev] + HashAggregate [avg,count,d_moy,i_item_sk,m2,n,sum,w_warehouse_name,w_warehouse_sk] [avg,avg(cast(inv_quantity_on_hand as bigint)),count,m2,mean,n,stddev_samp(cast(inv_quantity_on_hand as double)),stdev,sum] + InputAdapter + Exchange [d_moy,i_item_sk,w_warehouse_name,w_warehouse_sk] #8 + WholeStageCodegen (10) + HashAggregate [d_moy,i_item_sk,inv_quantity_on_hand,w_warehouse_name,w_warehouse_sk] [avg,avg,count,count,m2,m2,n,n,sum,sum] + Project [d_moy,i_item_sk,inv_quantity_on_hand,w_warehouse_name,w_warehouse_sk] + BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] + Project [d_moy,i_item_sk,inv_quantity_on_hand,inv_warehouse_sk] + BroadcastHashJoin [i_item_sk,inv_item_sk] + Project [d_moy,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] + BroadcastHashJoin [d_date_sk,inv_date_sk] + Filter [inv_date_sk,inv_item_sk,inv_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (7) + Project [d_date_sk,d_moy] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + InputAdapter + ReusedExchange [i_item_sk] #5 + InputAdapter + ReusedExchange [w_warehouse_name,w_warehouse_sk] #6 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/explain.txt new file mode 100644 index 0000000000000..5a963825eab79 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/explain.txt @@ -0,0 +1,292 @@ +== Physical Plan == +* Sort (52) ++- Exchange (51) + +- * BroadcastHashJoin Inner BuildRight (50) + :- * Project (27) + : +- * Filter (26) + : +- * HashAggregate (25) + : +- Exchange (24) + : +- * HashAggregate (23) + : +- * Project (22) + : +- * BroadcastHashJoin Inner BuildRight (21) + : :- * Project (15) + : : +- * BroadcastHashJoin Inner BuildRight (14) + : : :- * Project (9) + : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.inventory (1) + : : : +- BroadcastExchange (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.item (4) + : : +- BroadcastExchange (13) + : : +- * Filter (12) + : : +- * ColumnarToRow (11) + : : +- Scan parquet default.warehouse (10) + : +- BroadcastExchange (20) + : +- * Project (19) + : +- * Filter (18) + : +- * ColumnarToRow (17) + : +- Scan parquet default.date_dim (16) + +- BroadcastExchange (49) + +- * Project (48) + +- * Filter (47) + +- * HashAggregate (46) + +- Exchange (45) + +- * HashAggregate (44) + +- * Project (43) + +- * BroadcastHashJoin Inner BuildRight (42) + :- * Project (36) + : +- * BroadcastHashJoin Inner BuildRight (35) + : :- * Project (33) + : : +- * BroadcastHashJoin Inner BuildRight (32) + : : :- * Filter (30) + : : : +- * ColumnarToRow (29) + : : : +- Scan parquet default.inventory (28) + : : +- ReusedExchange (31) + : +- ReusedExchange (34) + +- BroadcastExchange (41) + +- * Project (40) + +- * Filter (39) + +- * ColumnarToRow (38) + +- Scan parquet default.date_dim (37) + + +(1) Scan parquet default.inventory +Output [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/inventory] +PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] + +(3) Filter [codegen id : 4] +Input [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] +Condition : ((isnotnull(inv_item_sk#2) AND isnotnull(inv_warehouse_sk#3)) AND isnotnull(inv_date_sk#1)) + +(4) Scan parquet default.item +Output [1]: [i_item_sk#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [1]: [i_item_sk#5] + +(6) Filter [codegen id : 1] +Input [1]: [i_item_sk#5] +Condition : isnotnull(i_item_sk#5) + +(7) BroadcastExchange +Input [1]: [i_item_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#6] + +(8) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_item_sk#2] +Right keys [1]: [i_item_sk#5] +Join condition: None + +(9) Project [codegen id : 4] +Output [4]: [inv_date_sk#1, inv_warehouse_sk#3, inv_quantity_on_hand#4, i_item_sk#5] +Input [5]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4, i_item_sk#5] + +(10) Scan parquet default.warehouse +Output [2]: [w_warehouse_sk#7, w_warehouse_name#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [w_warehouse_sk#7, w_warehouse_name#8] + +(12) Filter [codegen id : 2] +Input [2]: [w_warehouse_sk#7, w_warehouse_name#8] +Condition : isnotnull(w_warehouse_sk#7) + +(13) BroadcastExchange +Input [2]: [w_warehouse_sk#7, w_warehouse_name#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#9] + +(14) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_warehouse_sk#3] +Right keys [1]: [w_warehouse_sk#7] +Join condition: None + +(15) Project [codegen id : 4] +Output [5]: [inv_date_sk#1, inv_quantity_on_hand#4, i_item_sk#5, w_warehouse_sk#7, w_warehouse_name#8] +Input [6]: [inv_date_sk#1, inv_warehouse_sk#3, inv_quantity_on_hand#4, i_item_sk#5, w_warehouse_sk#7, w_warehouse_name#8] + +(16) Scan parquet default.date_dim +Output [3]: [d_date_sk#10, d_year#11, d_moy#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,1), IsNotNull(d_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#10, d_year#11, d_moy#12] + +(18) Filter [codegen id : 3] +Input [3]: [d_date_sk#10, d_year#11, d_moy#12] +Condition : ((((isnotnull(d_year#11) AND isnotnull(d_moy#12)) AND (d_year#11 = 2001)) AND (d_moy#12 = 1)) AND isnotnull(d_date_sk#10)) + +(19) Project [codegen id : 3] +Output [2]: [d_date_sk#10, d_moy#12] +Input [3]: [d_date_sk#10, d_year#11, d_moy#12] + +(20) BroadcastExchange +Input [2]: [d_date_sk#10, d_moy#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_date_sk#1] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(22) Project [codegen id : 4] +Output [5]: [inv_quantity_on_hand#4, i_item_sk#5, w_warehouse_sk#7, w_warehouse_name#8, d_moy#12] +Input [7]: [inv_date_sk#1, inv_quantity_on_hand#4, i_item_sk#5, w_warehouse_sk#7, w_warehouse_name#8, d_date_sk#10, d_moy#12] + +(23) HashAggregate [codegen id : 4] +Input [5]: [inv_quantity_on_hand#4, i_item_sk#5, w_warehouse_sk#7, w_warehouse_name#8, d_moy#12] +Keys [4]: [w_warehouse_name#8, w_warehouse_sk#7, i_item_sk#5, d_moy#12] +Functions [2]: [partial_stddev_samp(cast(inv_quantity_on_hand#4 as double)), partial_avg(cast(inv_quantity_on_hand#4 as bigint))] +Aggregate Attributes [5]: [n#14, avg#15, m2#16, sum#17, count#18] +Results [9]: [w_warehouse_name#8, w_warehouse_sk#7, i_item_sk#5, d_moy#12, n#19, avg#20, m2#21, sum#22, count#23] + +(24) Exchange +Input [9]: [w_warehouse_name#8, w_warehouse_sk#7, i_item_sk#5, d_moy#12, n#19, avg#20, m2#21, sum#22, count#23] +Arguments: hashpartitioning(w_warehouse_name#8, w_warehouse_sk#7, i_item_sk#5, d_moy#12, 5), true, [id=#24] + +(25) HashAggregate [codegen id : 10] +Input [9]: [w_warehouse_name#8, w_warehouse_sk#7, i_item_sk#5, d_moy#12, n#19, avg#20, m2#21, sum#22, count#23] +Keys [4]: [w_warehouse_name#8, w_warehouse_sk#7, i_item_sk#5, d_moy#12] +Functions [2]: [stddev_samp(cast(inv_quantity_on_hand#4 as double)), avg(cast(inv_quantity_on_hand#4 as bigint))] +Aggregate Attributes [2]: [stddev_samp(cast(inv_quantity_on_hand#4 as double))#25, avg(cast(inv_quantity_on_hand#4 as bigint))#26] +Results [5]: [w_warehouse_sk#7, i_item_sk#5, d_moy#12, stddev_samp(cast(inv_quantity_on_hand#4 as double))#25 AS stdev#27, avg(cast(inv_quantity_on_hand#4 as bigint))#26 AS mean#28] + +(26) Filter [codegen id : 10] +Input [5]: [w_warehouse_sk#7, i_item_sk#5, d_moy#12, stdev#27, mean#28] +Condition : ((CASE WHEN (mean#28 = 0.0) THEN 0.0 ELSE (stdev#27 / mean#28) END > 1.0) AND (CASE WHEN (mean#28 = 0.0) THEN null ELSE (stdev#27 / mean#28) END > 1.5)) + +(27) Project [codegen id : 10] +Output [5]: [w_warehouse_sk#7, i_item_sk#5, d_moy#12, mean#28, CASE WHEN (mean#28 = 0.0) THEN null ELSE (stdev#27 / mean#28) END AS cov#29] +Input [5]: [w_warehouse_sk#7, i_item_sk#5, d_moy#12, stdev#27, mean#28] + +(28) Scan parquet default.inventory +Output [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/inventory] +PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 8] +Input [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] + +(30) Filter [codegen id : 8] +Input [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] +Condition : ((isnotnull(inv_item_sk#2) AND isnotnull(inv_warehouse_sk#3)) AND isnotnull(inv_date_sk#1)) + +(31) ReusedExchange [Reuses operator id: 7] +Output [1]: [i_item_sk#30] + +(32) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [inv_item_sk#2] +Right keys [1]: [i_item_sk#30] +Join condition: None + +(33) Project [codegen id : 8] +Output [4]: [inv_date_sk#1, inv_warehouse_sk#3, inv_quantity_on_hand#4, i_item_sk#30] +Input [5]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4, i_item_sk#30] + +(34) ReusedExchange [Reuses operator id: 13] +Output [2]: [w_warehouse_sk#31, w_warehouse_name#32] + +(35) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [inv_warehouse_sk#3] +Right keys [1]: [w_warehouse_sk#31] +Join condition: None + +(36) Project [codegen id : 8] +Output [5]: [inv_date_sk#1, inv_quantity_on_hand#4, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32] +Input [6]: [inv_date_sk#1, inv_warehouse_sk#3, inv_quantity_on_hand#4, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32] + +(37) Scan parquet default.date_dim +Output [3]: [d_date_sk#33, d_year#34, d_moy#35] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,2), IsNotNull(d_date_sk)] +ReadSchema: struct + +(38) ColumnarToRow [codegen id : 7] +Input [3]: [d_date_sk#33, d_year#34, d_moy#35] + +(39) Filter [codegen id : 7] +Input [3]: [d_date_sk#33, d_year#34, d_moy#35] +Condition : ((((isnotnull(d_year#34) AND isnotnull(d_moy#35)) AND (d_year#34 = 2001)) AND (d_moy#35 = 2)) AND isnotnull(d_date_sk#33)) + +(40) Project [codegen id : 7] +Output [2]: [d_date_sk#33, d_moy#35] +Input [3]: [d_date_sk#33, d_year#34, d_moy#35] + +(41) BroadcastExchange +Input [2]: [d_date_sk#33, d_moy#35] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#36] + +(42) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [inv_date_sk#1] +Right keys [1]: [d_date_sk#33] +Join condition: None + +(43) Project [codegen id : 8] +Output [5]: [inv_quantity_on_hand#4, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32, d_moy#35] +Input [7]: [inv_date_sk#1, inv_quantity_on_hand#4, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32, d_date_sk#33, d_moy#35] + +(44) HashAggregate [codegen id : 8] +Input [5]: [inv_quantity_on_hand#4, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32, d_moy#35] +Keys [4]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35] +Functions [2]: [partial_stddev_samp(cast(inv_quantity_on_hand#4 as double)), partial_avg(cast(inv_quantity_on_hand#4 as bigint))] +Aggregate Attributes [5]: [n#37, avg#38, m2#39, sum#40, count#41] +Results [9]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35, n#42, avg#43, m2#44, sum#45, count#46] + +(45) Exchange +Input [9]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35, n#42, avg#43, m2#44, sum#45, count#46] +Arguments: hashpartitioning(w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35, 5), true, [id=#47] + +(46) HashAggregate [codegen id : 9] +Input [9]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35, n#42, avg#43, m2#44, sum#45, count#46] +Keys [4]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35] +Functions [2]: [stddev_samp(cast(inv_quantity_on_hand#4 as double)), avg(cast(inv_quantity_on_hand#4 as bigint))] +Aggregate Attributes [2]: [stddev_samp(cast(inv_quantity_on_hand#4 as double))#48, avg(cast(inv_quantity_on_hand#4 as bigint))#49] +Results [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, stddev_samp(cast(inv_quantity_on_hand#4 as double))#48 AS stdev#50, avg(cast(inv_quantity_on_hand#4 as bigint))#49 AS mean#51] + +(47) Filter [codegen id : 9] +Input [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, stdev#50, mean#51] +Condition : (CASE WHEN (mean#51 = 0.0) THEN 0.0 ELSE (stdev#50 / mean#51) END > 1.0) + +(48) Project [codegen id : 9] +Output [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, mean#51, CASE WHEN (mean#51 = 0.0) THEN null ELSE (stdev#50 / mean#51) END AS cov#52] +Input [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, stdev#50, mean#51] + +(49) BroadcastExchange +Input [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, mean#51, cov#52] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295))),false), [id=#53] + +(50) BroadcastHashJoin [codegen id : 10] +Left keys [2]: [i_item_sk#5, w_warehouse_sk#7] +Right keys [2]: [i_item_sk#30, w_warehouse_sk#31] +Join condition: None + +(51) Exchange +Input [10]: [w_warehouse_sk#7, i_item_sk#5, d_moy#12, mean#28, cov#29, w_warehouse_sk#31, i_item_sk#30, d_moy#35, mean#51, cov#52] +Arguments: rangepartitioning(w_warehouse_sk#7 ASC NULLS FIRST, i_item_sk#5 ASC NULLS FIRST, d_moy#12 ASC NULLS FIRST, mean#28 ASC NULLS FIRST, cov#29 ASC NULLS FIRST, d_moy#35 ASC NULLS FIRST, mean#51 ASC NULLS FIRST, cov#52 ASC NULLS FIRST, 5), true, [id=#54] + +(52) Sort [codegen id : 11] +Input [10]: [w_warehouse_sk#7, i_item_sk#5, d_moy#12, mean#28, cov#29, w_warehouse_sk#31, i_item_sk#30, d_moy#35, mean#51, cov#52] +Arguments: [w_warehouse_sk#7 ASC NULLS FIRST, i_item_sk#5 ASC NULLS FIRST, d_moy#12 ASC NULLS FIRST, mean#28 ASC NULLS FIRST, cov#29 ASC NULLS FIRST, d_moy#35 ASC NULLS FIRST, mean#51 ASC NULLS FIRST, cov#52 ASC NULLS FIRST], true, 0 + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/simplified.txt new file mode 100644 index 0000000000000..c1a1b4dab7de7 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/simplified.txt @@ -0,0 +1,77 @@ +WholeStageCodegen (11) + Sort [cov,cov,d_moy,d_moy,i_item_sk,mean,mean,w_warehouse_sk] + InputAdapter + Exchange [cov,cov,d_moy,d_moy,i_item_sk,mean,mean,w_warehouse_sk] #1 + WholeStageCodegen (10) + BroadcastHashJoin [i_item_sk,i_item_sk,w_warehouse_sk,w_warehouse_sk] + Project [d_moy,i_item_sk,mean,stdev,w_warehouse_sk] + Filter [mean,stdev] + HashAggregate [avg,count,d_moy,i_item_sk,m2,n,sum,w_warehouse_name,w_warehouse_sk] [avg,avg(cast(inv_quantity_on_hand as bigint)),count,m2,mean,n,stddev_samp(cast(inv_quantity_on_hand as double)),stdev,sum] + InputAdapter + Exchange [d_moy,i_item_sk,w_warehouse_name,w_warehouse_sk] #2 + WholeStageCodegen (4) + HashAggregate [d_moy,i_item_sk,inv_quantity_on_hand,w_warehouse_name,w_warehouse_sk] [avg,avg,count,count,m2,m2,n,n,sum,sum] + Project [d_moy,i_item_sk,inv_quantity_on_hand,w_warehouse_name,w_warehouse_sk] + BroadcastHashJoin [d_date_sk,inv_date_sk] + Project [i_item_sk,inv_date_sk,inv_quantity_on_hand,w_warehouse_name,w_warehouse_sk] + BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] + Project [i_item_sk,inv_date_sk,inv_quantity_on_hand,inv_warehouse_sk] + BroadcastHashJoin [i_item_sk,inv_item_sk] + Filter [inv_date_sk,inv_item_sk,inv_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [w_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.warehouse [w_warehouse_name,w_warehouse_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Project [d_date_sk,d_moy] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (9) + Project [d_moy,i_item_sk,mean,stdev,w_warehouse_sk] + Filter [mean,stdev] + HashAggregate [avg,count,d_moy,i_item_sk,m2,n,sum,w_warehouse_name,w_warehouse_sk] [avg,avg(cast(inv_quantity_on_hand as bigint)),count,m2,mean,n,stddev_samp(cast(inv_quantity_on_hand as double)),stdev,sum] + InputAdapter + Exchange [d_moy,i_item_sk,w_warehouse_name,w_warehouse_sk] #7 + WholeStageCodegen (8) + HashAggregate [d_moy,i_item_sk,inv_quantity_on_hand,w_warehouse_name,w_warehouse_sk] [avg,avg,count,count,m2,m2,n,n,sum,sum] + Project [d_moy,i_item_sk,inv_quantity_on_hand,w_warehouse_name,w_warehouse_sk] + BroadcastHashJoin [d_date_sk,inv_date_sk] + Project [i_item_sk,inv_date_sk,inv_quantity_on_hand,w_warehouse_name,w_warehouse_sk] + BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] + Project [i_item_sk,inv_date_sk,inv_quantity_on_hand,inv_warehouse_sk] + BroadcastHashJoin [i_item_sk,inv_item_sk] + Filter [inv_date_sk,inv_item_sk,inv_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] + InputAdapter + ReusedExchange [i_item_sk] #3 + InputAdapter + ReusedExchange [w_warehouse_name,w_warehouse_sk] #4 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (7) + Project [d_date_sk,d_moy] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4.sf100/explain.txt new file mode 100644 index 0000000000000..5ec3d359386b4 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4.sf100/explain.txt @@ -0,0 +1,695 @@ +== Physical Plan == +TakeOrderedAndProject (126) ++- * Project (125) + +- * SortMergeJoin Inner (124) + :- * Project (106) + : +- * SortMergeJoin Inner (105) + : :- * Project (85) + : : +- * SortMergeJoin Inner (84) + : : :- * Project (66) + : : : +- * SortMergeJoin Inner (65) + : : : :- * SortMergeJoin Inner (45) + : : : : :- * Sort (24) + : : : : : +- Exchange (23) + : : : : : +- * Filter (22) + : : : : : +- * HashAggregate (21) + : : : : : +- Exchange (20) + : : : : : +- * HashAggregate (19) + : : : : : +- * Project (18) + : : : : : +- * SortMergeJoin Inner (17) + : : : : : :- * Sort (11) + : : : : : : +- Exchange (10) + : : : : : : +- * Project (9) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : : : :- * Filter (3) + : : : : : : : +- * ColumnarToRow (2) + : : : : : : : +- Scan parquet default.store_sales (1) + : : : : : : +- BroadcastExchange (7) + : : : : : : +- * Filter (6) + : : : : : : +- * ColumnarToRow (5) + : : : : : : +- Scan parquet default.date_dim (4) + : : : : : +- * Sort (16) + : : : : : +- Exchange (15) + : : : : : +- * Filter (14) + : : : : : +- * ColumnarToRow (13) + : : : : : +- Scan parquet default.customer (12) + : : : : +- * Sort (44) + : : : : +- Exchange (43) + : : : : +- * HashAggregate (42) + : : : : +- Exchange (41) + : : : : +- * HashAggregate (40) + : : : : +- * Project (39) + : : : : +- * SortMergeJoin Inner (38) + : : : : :- * Sort (35) + : : : : : +- Exchange (34) + : : : : : +- * Project (33) + : : : : : +- * BroadcastHashJoin Inner BuildRight (32) + : : : : : :- * Filter (27) + : : : : : : +- * ColumnarToRow (26) + : : : : : : +- Scan parquet default.store_sales (25) + : : : : : +- BroadcastExchange (31) + : : : : : +- * Filter (30) + : : : : : +- * ColumnarToRow (29) + : : : : : +- Scan parquet default.date_dim (28) + : : : : +- * Sort (37) + : : : : +- ReusedExchange (36) + : : : +- * Sort (64) + : : : +- Exchange (63) + : : : +- * Project (62) + : : : +- * Filter (61) + : : : +- * HashAggregate (60) + : : : +- Exchange (59) + : : : +- * HashAggregate (58) + : : : +- * Project (57) + : : : +- * SortMergeJoin Inner (56) + : : : :- * Sort (53) + : : : : +- Exchange (52) + : : : : +- * Project (51) + : : : : +- * BroadcastHashJoin Inner BuildRight (50) + : : : : :- * Filter (48) + : : : : : +- * ColumnarToRow (47) + : : : : : +- Scan parquet default.catalog_sales (46) + : : : : +- ReusedExchange (49) + : : : +- * Sort (55) + : : : +- ReusedExchange (54) + : : +- * Sort (83) + : : +- Exchange (82) + : : +- * HashAggregate (81) + : : +- Exchange (80) + : : +- * HashAggregate (79) + : : +- * Project (78) + : : +- * SortMergeJoin Inner (77) + : : :- * Sort (74) + : : : +- Exchange (73) + : : : +- * Project (72) + : : : +- * BroadcastHashJoin Inner BuildRight (71) + : : : :- * Filter (69) + : : : : +- * ColumnarToRow (68) + : : : : +- Scan parquet default.catalog_sales (67) + : : : +- ReusedExchange (70) + : : +- * Sort (76) + : : +- ReusedExchange (75) + : +- * Sort (104) + : +- Exchange (103) + : +- * Project (102) + : +- * Filter (101) + : +- * HashAggregate (100) + : +- Exchange (99) + : +- * HashAggregate (98) + : +- * Project (97) + : +- * SortMergeJoin Inner (96) + : :- * Sort (93) + : : +- Exchange (92) + : : +- * Project (91) + : : +- * BroadcastHashJoin Inner BuildRight (90) + : : :- * Filter (88) + : : : +- * ColumnarToRow (87) + : : : +- Scan parquet default.web_sales (86) + : : +- ReusedExchange (89) + : +- * Sort (95) + : +- ReusedExchange (94) + +- * Sort (123) + +- Exchange (122) + +- * HashAggregate (121) + +- Exchange (120) + +- * HashAggregate (119) + +- * Project (118) + +- * SortMergeJoin Inner (117) + :- * Sort (114) + : +- Exchange (113) + : +- * Project (112) + : +- * BroadcastHashJoin Inner BuildRight (111) + : :- * Filter (109) + : : +- * ColumnarToRow (108) + : : +- Scan parquet default.web_sales (107) + : +- ReusedExchange (110) + +- * Sort (116) + +- ReusedExchange (115) + + +(1) Scan parquet default.store_sales +Output [6]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_ext_discount_amt#3, ss_ext_sales_price#4, ss_ext_wholesale_cost#5, ss_ext_list_price#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 2] +Input [6]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_ext_discount_amt#3, ss_ext_sales_price#4, ss_ext_wholesale_cost#5, ss_ext_list_price#6] + +(3) Filter [codegen id : 2] +Input [6]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_ext_discount_amt#3, ss_ext_sales_price#4, ss_ext_wholesale_cost#5, ss_ext_list_price#6] +Condition : (isnotnull(ss_customer_sk#2) AND isnotnull(ss_sold_date_sk#1)) + +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#7, d_year#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#7, d_year#8] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#7, d_year#8] +Condition : ((isnotnull(d_year#8) AND (d_year#8 = 2001)) AND isnotnull(d_date_sk#7)) + +(7) BroadcastExchange +Input [2]: [d_date_sk#7, d_year#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#9] + +(8) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#7] +Join condition: None + +(9) Project [codegen id : 2] +Output [6]: [ss_customer_sk#2, ss_ext_discount_amt#3, ss_ext_sales_price#4, ss_ext_wholesale_cost#5, ss_ext_list_price#6, d_year#8] +Input [8]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_ext_discount_amt#3, ss_ext_sales_price#4, ss_ext_wholesale_cost#5, ss_ext_list_price#6, d_date_sk#7, d_year#8] + +(10) Exchange +Input [6]: [ss_customer_sk#2, ss_ext_discount_amt#3, ss_ext_sales_price#4, ss_ext_wholesale_cost#5, ss_ext_list_price#6, d_year#8] +Arguments: hashpartitioning(ss_customer_sk#2, 5), true, [id=#10] + +(11) Sort [codegen id : 3] +Input [6]: [ss_customer_sk#2, ss_ext_discount_amt#3, ss_ext_sales_price#4, ss_ext_wholesale_cost#5, ss_ext_list_price#6, d_year#8] +Arguments: [ss_customer_sk#2 ASC NULLS FIRST], false, 0 + +(12) Scan parquet default.customer +Output [8]: [c_customer_sk#11, c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(13) ColumnarToRow [codegen id : 4] +Input [8]: [c_customer_sk#11, c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18] + +(14) Filter [codegen id : 4] +Input [8]: [c_customer_sk#11, c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18] +Condition : (isnotnull(c_customer_sk#11) AND isnotnull(c_customer_id#12)) + +(15) Exchange +Input [8]: [c_customer_sk#11, c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18] +Arguments: hashpartitioning(c_customer_sk#11, 5), true, [id=#19] + +(16) Sort [codegen id : 5] +Input [8]: [c_customer_sk#11, c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18] +Arguments: [c_customer_sk#11 ASC NULLS FIRST], false, 0 + +(17) SortMergeJoin [codegen id : 6] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#11] +Join condition: None + +(18) Project [codegen id : 6] +Output [12]: [c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, ss_ext_discount_amt#3, ss_ext_sales_price#4, ss_ext_wholesale_cost#5, ss_ext_list_price#6, d_year#8] +Input [14]: [ss_customer_sk#2, ss_ext_discount_amt#3, ss_ext_sales_price#4, ss_ext_wholesale_cost#5, ss_ext_list_price#6, d_year#8, c_customer_sk#11, c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18] + +(19) HashAggregate [codegen id : 6] +Input [12]: [c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, ss_ext_discount_amt#3, ss_ext_sales_price#4, ss_ext_wholesale_cost#5, ss_ext_list_price#6, d_year#8] +Keys [8]: [c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, d_year#8] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#6 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#5 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#3 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#4 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] +Aggregate Attributes [2]: [sum#20, isEmpty#21] +Results [10]: [c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, d_year#8, sum#22, isEmpty#23] + +(20) Exchange +Input [10]: [c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, d_year#8, sum#22, isEmpty#23] +Arguments: hashpartitioning(c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, d_year#8, 5), true, [id=#24] + +(21) HashAggregate [codegen id : 7] +Input [10]: [c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, d_year#8, sum#22, isEmpty#23] +Keys [8]: [c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, d_year#8] +Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#6 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#5 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#3 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#4 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#6 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#5 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#3 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#4 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#25] +Results [2]: [c_customer_id#12 AS customer_id#26, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#6 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#5 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#3 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#4 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#25 AS year_total#27] + +(22) Filter [codegen id : 7] +Input [2]: [customer_id#26, year_total#27] +Condition : (isnotnull(year_total#27) AND (year_total#27 > 0.000000)) + +(23) Exchange +Input [2]: [customer_id#26, year_total#27] +Arguments: hashpartitioning(customer_id#26, 5), true, [id=#28] + +(24) Sort [codegen id : 8] +Input [2]: [customer_id#26, year_total#27] +Arguments: [customer_id#26 ASC NULLS FIRST], false, 0 + +(25) Scan parquet default.store_sales +Output [6]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_ext_discount_amt#3, ss_ext_sales_price#4, ss_ext_wholesale_cost#5, ss_ext_list_price#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(26) ColumnarToRow [codegen id : 10] +Input [6]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_ext_discount_amt#3, ss_ext_sales_price#4, ss_ext_wholesale_cost#5, ss_ext_list_price#6] + +(27) Filter [codegen id : 10] +Input [6]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_ext_discount_amt#3, ss_ext_sales_price#4, ss_ext_wholesale_cost#5, ss_ext_list_price#6] +Condition : (isnotnull(ss_customer_sk#2) AND isnotnull(ss_sold_date_sk#1)) + +(28) Scan parquet default.date_dim +Output [2]: [d_date_sk#7, d_year#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 9] +Input [2]: [d_date_sk#7, d_year#8] + +(30) Filter [codegen id : 9] +Input [2]: [d_date_sk#7, d_year#8] +Condition : ((isnotnull(d_year#8) AND (d_year#8 = 2002)) AND isnotnull(d_date_sk#7)) + +(31) BroadcastExchange +Input [2]: [d_date_sk#7, d_year#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#29] + +(32) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#7] +Join condition: None + +(33) Project [codegen id : 10] +Output [6]: [ss_customer_sk#2, ss_ext_discount_amt#3, ss_ext_sales_price#4, ss_ext_wholesale_cost#5, ss_ext_list_price#6, d_year#8] +Input [8]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_ext_discount_amt#3, ss_ext_sales_price#4, ss_ext_wholesale_cost#5, ss_ext_list_price#6, d_date_sk#7, d_year#8] + +(34) Exchange +Input [6]: [ss_customer_sk#2, ss_ext_discount_amt#3, ss_ext_sales_price#4, ss_ext_wholesale_cost#5, ss_ext_list_price#6, d_year#8] +Arguments: hashpartitioning(ss_customer_sk#2, 5), true, [id=#30] + +(35) Sort [codegen id : 11] +Input [6]: [ss_customer_sk#2, ss_ext_discount_amt#3, ss_ext_sales_price#4, ss_ext_wholesale_cost#5, ss_ext_list_price#6, d_year#8] +Arguments: [ss_customer_sk#2 ASC NULLS FIRST], false, 0 + +(36) ReusedExchange [Reuses operator id: 15] +Output [8]: [c_customer_sk#11, c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18] + +(37) Sort [codegen id : 13] +Input [8]: [c_customer_sk#11, c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18] +Arguments: [c_customer_sk#11 ASC NULLS FIRST], false, 0 + +(38) SortMergeJoin [codegen id : 14] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#11] +Join condition: None + +(39) Project [codegen id : 14] +Output [12]: [c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, ss_ext_discount_amt#3, ss_ext_sales_price#4, ss_ext_wholesale_cost#5, ss_ext_list_price#6, d_year#8] +Input [14]: [ss_customer_sk#2, ss_ext_discount_amt#3, ss_ext_sales_price#4, ss_ext_wholesale_cost#5, ss_ext_list_price#6, d_year#8, c_customer_sk#11, c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18] + +(40) HashAggregate [codegen id : 14] +Input [12]: [c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, ss_ext_discount_amt#3, ss_ext_sales_price#4, ss_ext_wholesale_cost#5, ss_ext_list_price#6, d_year#8] +Keys [8]: [c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, d_year#8] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#6 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#5 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#3 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#4 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] +Aggregate Attributes [2]: [sum#31, isEmpty#32] +Results [10]: [c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, d_year#8, sum#33, isEmpty#34] + +(41) Exchange +Input [10]: [c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, d_year#8, sum#33, isEmpty#34] +Arguments: hashpartitioning(c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, d_year#8, 5), true, [id=#35] + +(42) HashAggregate [codegen id : 15] +Input [10]: [c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, d_year#8, sum#33, isEmpty#34] +Keys [8]: [c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, d_year#8] +Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#6 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#5 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#3 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#4 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#6 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#5 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#3 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#4 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#36] +Results [8]: [c_customer_id#12 AS customer_id#37, c_first_name#13 AS customer_first_name#38, c_last_name#14 AS customer_last_name#39, c_preferred_cust_flag#15 AS customer_preferred_cust_flag#40, c_birth_country#16 AS customer_birth_country#41, c_login#17 AS customer_login#42, c_email_address#18 AS customer_email_address#43, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#6 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#5 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#3 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#4 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#36 AS year_total#44] + +(43) Exchange +Input [8]: [customer_id#37, customer_first_name#38, customer_last_name#39, customer_preferred_cust_flag#40, customer_birth_country#41, customer_login#42, customer_email_address#43, year_total#44] +Arguments: hashpartitioning(customer_id#37, 5), true, [id=#45] + +(44) Sort [codegen id : 16] +Input [8]: [customer_id#37, customer_first_name#38, customer_last_name#39, customer_preferred_cust_flag#40, customer_birth_country#41, customer_login#42, customer_email_address#43, year_total#44] +Arguments: [customer_id#37 ASC NULLS FIRST], false, 0 + +(45) SortMergeJoin [codegen id : 17] +Left keys [1]: [customer_id#26] +Right keys [1]: [customer_id#37] +Join condition: None + +(46) Scan parquet default.catalog_sales +Output [6]: [cs_sold_date_sk#46, cs_bill_customer_sk#47, cs_ext_discount_amt#48, cs_ext_sales_price#49, cs_ext_wholesale_cost#50, cs_ext_list_price#51] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(47) ColumnarToRow [codegen id : 19] +Input [6]: [cs_sold_date_sk#46, cs_bill_customer_sk#47, cs_ext_discount_amt#48, cs_ext_sales_price#49, cs_ext_wholesale_cost#50, cs_ext_list_price#51] + +(48) Filter [codegen id : 19] +Input [6]: [cs_sold_date_sk#46, cs_bill_customer_sk#47, cs_ext_discount_amt#48, cs_ext_sales_price#49, cs_ext_wholesale_cost#50, cs_ext_list_price#51] +Condition : (isnotnull(cs_bill_customer_sk#47) AND isnotnull(cs_sold_date_sk#46)) + +(49) ReusedExchange [Reuses operator id: 7] +Output [2]: [d_date_sk#7, d_year#8] + +(50) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [cs_sold_date_sk#46] +Right keys [1]: [d_date_sk#7] +Join condition: None + +(51) Project [codegen id : 19] +Output [6]: [cs_bill_customer_sk#47, cs_ext_discount_amt#48, cs_ext_sales_price#49, cs_ext_wholesale_cost#50, cs_ext_list_price#51, d_year#8] +Input [8]: [cs_sold_date_sk#46, cs_bill_customer_sk#47, cs_ext_discount_amt#48, cs_ext_sales_price#49, cs_ext_wholesale_cost#50, cs_ext_list_price#51, d_date_sk#7, d_year#8] + +(52) Exchange +Input [6]: [cs_bill_customer_sk#47, cs_ext_discount_amt#48, cs_ext_sales_price#49, cs_ext_wholesale_cost#50, cs_ext_list_price#51, d_year#8] +Arguments: hashpartitioning(cs_bill_customer_sk#47, 5), true, [id=#52] + +(53) Sort [codegen id : 20] +Input [6]: [cs_bill_customer_sk#47, cs_ext_discount_amt#48, cs_ext_sales_price#49, cs_ext_wholesale_cost#50, cs_ext_list_price#51, d_year#8] +Arguments: [cs_bill_customer_sk#47 ASC NULLS FIRST], false, 0 + +(54) ReusedExchange [Reuses operator id: 15] +Output [8]: [c_customer_sk#11, c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18] + +(55) Sort [codegen id : 22] +Input [8]: [c_customer_sk#11, c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18] +Arguments: [c_customer_sk#11 ASC NULLS FIRST], false, 0 + +(56) SortMergeJoin [codegen id : 23] +Left keys [1]: [cs_bill_customer_sk#47] +Right keys [1]: [c_customer_sk#11] +Join condition: None + +(57) Project [codegen id : 23] +Output [12]: [c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, cs_ext_discount_amt#48, cs_ext_sales_price#49, cs_ext_wholesale_cost#50, cs_ext_list_price#51, d_year#8] +Input [14]: [cs_bill_customer_sk#47, cs_ext_discount_amt#48, cs_ext_sales_price#49, cs_ext_wholesale_cost#50, cs_ext_list_price#51, d_year#8, c_customer_sk#11, c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18] + +(58) HashAggregate [codegen id : 23] +Input [12]: [c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, cs_ext_discount_amt#48, cs_ext_sales_price#49, cs_ext_wholesale_cost#50, cs_ext_list_price#51, d_year#8] +Keys [8]: [c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, d_year#8] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#51 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#50 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#48 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#49 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] +Aggregate Attributes [2]: [sum#53, isEmpty#54] +Results [10]: [c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, d_year#8, sum#55, isEmpty#56] + +(59) Exchange +Input [10]: [c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, d_year#8, sum#55, isEmpty#56] +Arguments: hashpartitioning(c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, d_year#8, 5), true, [id=#57] + +(60) HashAggregate [codegen id : 24] +Input [10]: [c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, d_year#8, sum#55, isEmpty#56] +Keys [8]: [c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, d_year#8] +Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#51 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#50 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#48 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#49 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#51 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#50 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#48 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#49 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#58] +Results [2]: [c_customer_id#12 AS customer_id#59, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#51 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#50 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#48 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#49 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#58 AS year_total#60] + +(61) Filter [codegen id : 24] +Input [2]: [customer_id#59, year_total#60] +Condition : (isnotnull(year_total#60) AND (year_total#60 > 0.000000)) + +(62) Project [codegen id : 24] +Output [2]: [customer_id#59 AS customer_id#61, year_total#60 AS year_total#62] +Input [2]: [customer_id#59, year_total#60] + +(63) Exchange +Input [2]: [customer_id#61, year_total#62] +Arguments: hashpartitioning(customer_id#61, 5), true, [id=#63] + +(64) Sort [codegen id : 25] +Input [2]: [customer_id#61, year_total#62] +Arguments: [customer_id#61 ASC NULLS FIRST], false, 0 + +(65) SortMergeJoin [codegen id : 26] +Left keys [1]: [customer_id#26] +Right keys [1]: [customer_id#61] +Join condition: None + +(66) Project [codegen id : 26] +Output [11]: [customer_id#26, year_total#27, customer_id#37, customer_first_name#38, customer_last_name#39, customer_preferred_cust_flag#40, customer_birth_country#41, customer_login#42, customer_email_address#43, year_total#44, year_total#62] +Input [12]: [customer_id#26, year_total#27, customer_id#37, customer_first_name#38, customer_last_name#39, customer_preferred_cust_flag#40, customer_birth_country#41, customer_login#42, customer_email_address#43, year_total#44, customer_id#61, year_total#62] + +(67) Scan parquet default.catalog_sales +Output [6]: [cs_sold_date_sk#46, cs_bill_customer_sk#47, cs_ext_discount_amt#48, cs_ext_sales_price#49, cs_ext_wholesale_cost#50, cs_ext_list_price#51] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(68) ColumnarToRow [codegen id : 28] +Input [6]: [cs_sold_date_sk#46, cs_bill_customer_sk#47, cs_ext_discount_amt#48, cs_ext_sales_price#49, cs_ext_wholesale_cost#50, cs_ext_list_price#51] + +(69) Filter [codegen id : 28] +Input [6]: [cs_sold_date_sk#46, cs_bill_customer_sk#47, cs_ext_discount_amt#48, cs_ext_sales_price#49, cs_ext_wholesale_cost#50, cs_ext_list_price#51] +Condition : (isnotnull(cs_bill_customer_sk#47) AND isnotnull(cs_sold_date_sk#46)) + +(70) ReusedExchange [Reuses operator id: 31] +Output [2]: [d_date_sk#7, d_year#8] + +(71) BroadcastHashJoin [codegen id : 28] +Left keys [1]: [cs_sold_date_sk#46] +Right keys [1]: [d_date_sk#7] +Join condition: None + +(72) Project [codegen id : 28] +Output [6]: [cs_bill_customer_sk#47, cs_ext_discount_amt#48, cs_ext_sales_price#49, cs_ext_wholesale_cost#50, cs_ext_list_price#51, d_year#8] +Input [8]: [cs_sold_date_sk#46, cs_bill_customer_sk#47, cs_ext_discount_amt#48, cs_ext_sales_price#49, cs_ext_wholesale_cost#50, cs_ext_list_price#51, d_date_sk#7, d_year#8] + +(73) Exchange +Input [6]: [cs_bill_customer_sk#47, cs_ext_discount_amt#48, cs_ext_sales_price#49, cs_ext_wholesale_cost#50, cs_ext_list_price#51, d_year#8] +Arguments: hashpartitioning(cs_bill_customer_sk#47, 5), true, [id=#64] + +(74) Sort [codegen id : 29] +Input [6]: [cs_bill_customer_sk#47, cs_ext_discount_amt#48, cs_ext_sales_price#49, cs_ext_wholesale_cost#50, cs_ext_list_price#51, d_year#8] +Arguments: [cs_bill_customer_sk#47 ASC NULLS FIRST], false, 0 + +(75) ReusedExchange [Reuses operator id: 15] +Output [8]: [c_customer_sk#11, c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18] + +(76) Sort [codegen id : 31] +Input [8]: [c_customer_sk#11, c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18] +Arguments: [c_customer_sk#11 ASC NULLS FIRST], false, 0 + +(77) SortMergeJoin [codegen id : 32] +Left keys [1]: [cs_bill_customer_sk#47] +Right keys [1]: [c_customer_sk#11] +Join condition: None + +(78) Project [codegen id : 32] +Output [12]: [c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, cs_ext_discount_amt#48, cs_ext_sales_price#49, cs_ext_wholesale_cost#50, cs_ext_list_price#51, d_year#8] +Input [14]: [cs_bill_customer_sk#47, cs_ext_discount_amt#48, cs_ext_sales_price#49, cs_ext_wholesale_cost#50, cs_ext_list_price#51, d_year#8, c_customer_sk#11, c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18] + +(79) HashAggregate [codegen id : 32] +Input [12]: [c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, cs_ext_discount_amt#48, cs_ext_sales_price#49, cs_ext_wholesale_cost#50, cs_ext_list_price#51, d_year#8] +Keys [8]: [c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, d_year#8] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#51 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#50 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#48 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#49 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] +Aggregate Attributes [2]: [sum#65, isEmpty#66] +Results [10]: [c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, d_year#8, sum#67, isEmpty#68] + +(80) Exchange +Input [10]: [c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, d_year#8, sum#67, isEmpty#68] +Arguments: hashpartitioning(c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, d_year#8, 5), true, [id=#69] + +(81) HashAggregate [codegen id : 33] +Input [10]: [c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, d_year#8, sum#67, isEmpty#68] +Keys [8]: [c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, d_year#8] +Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#51 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#50 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#48 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#49 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#51 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#50 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#48 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#49 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#70] +Results [2]: [c_customer_id#12 AS customer_id#71, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#51 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#50 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#48 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#49 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#70 AS year_total#72] + +(82) Exchange +Input [2]: [customer_id#71, year_total#72] +Arguments: hashpartitioning(customer_id#71, 5), true, [id=#73] + +(83) Sort [codegen id : 34] +Input [2]: [customer_id#71, year_total#72] +Arguments: [customer_id#71 ASC NULLS FIRST], false, 0 + +(84) SortMergeJoin [codegen id : 35] +Left keys [1]: [customer_id#26] +Right keys [1]: [customer_id#71] +Join condition: (CASE WHEN (year_total#62 > 0.000000) THEN CheckOverflow((promote_precision(year_total#72) / promote_precision(year_total#62)), DecimalType(38,14), true) ELSE null END > CASE WHEN (year_total#27 > 0.000000) THEN CheckOverflow((promote_precision(year_total#44) / promote_precision(year_total#27)), DecimalType(38,14), true) ELSE null END) + +(85) Project [codegen id : 35] +Output [10]: [customer_id#26, customer_id#37, customer_first_name#38, customer_last_name#39, customer_preferred_cust_flag#40, customer_birth_country#41, customer_login#42, customer_email_address#43, year_total#62, year_total#72] +Input [13]: [customer_id#26, year_total#27, customer_id#37, customer_first_name#38, customer_last_name#39, customer_preferred_cust_flag#40, customer_birth_country#41, customer_login#42, customer_email_address#43, year_total#44, year_total#62, customer_id#71, year_total#72] + +(86) Scan parquet default.web_sales +Output [6]: [ws_sold_date_sk#74, ws_bill_customer_sk#75, ws_ext_discount_amt#76, ws_ext_sales_price#77, ws_ext_wholesale_cost#78, ws_ext_list_price#79] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_bill_customer_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(87) ColumnarToRow [codegen id : 37] +Input [6]: [ws_sold_date_sk#74, ws_bill_customer_sk#75, ws_ext_discount_amt#76, ws_ext_sales_price#77, ws_ext_wholesale_cost#78, ws_ext_list_price#79] + +(88) Filter [codegen id : 37] +Input [6]: [ws_sold_date_sk#74, ws_bill_customer_sk#75, ws_ext_discount_amt#76, ws_ext_sales_price#77, ws_ext_wholesale_cost#78, ws_ext_list_price#79] +Condition : (isnotnull(ws_bill_customer_sk#75) AND isnotnull(ws_sold_date_sk#74)) + +(89) ReusedExchange [Reuses operator id: 7] +Output [2]: [d_date_sk#7, d_year#8] + +(90) BroadcastHashJoin [codegen id : 37] +Left keys [1]: [ws_sold_date_sk#74] +Right keys [1]: [d_date_sk#7] +Join condition: None + +(91) Project [codegen id : 37] +Output [6]: [ws_bill_customer_sk#75, ws_ext_discount_amt#76, ws_ext_sales_price#77, ws_ext_wholesale_cost#78, ws_ext_list_price#79, d_year#8] +Input [8]: [ws_sold_date_sk#74, ws_bill_customer_sk#75, ws_ext_discount_amt#76, ws_ext_sales_price#77, ws_ext_wholesale_cost#78, ws_ext_list_price#79, d_date_sk#7, d_year#8] + +(92) Exchange +Input [6]: [ws_bill_customer_sk#75, ws_ext_discount_amt#76, ws_ext_sales_price#77, ws_ext_wholesale_cost#78, ws_ext_list_price#79, d_year#8] +Arguments: hashpartitioning(ws_bill_customer_sk#75, 5), true, [id=#80] + +(93) Sort [codegen id : 38] +Input [6]: [ws_bill_customer_sk#75, ws_ext_discount_amt#76, ws_ext_sales_price#77, ws_ext_wholesale_cost#78, ws_ext_list_price#79, d_year#8] +Arguments: [ws_bill_customer_sk#75 ASC NULLS FIRST], false, 0 + +(94) ReusedExchange [Reuses operator id: 15] +Output [8]: [c_customer_sk#11, c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18] + +(95) Sort [codegen id : 40] +Input [8]: [c_customer_sk#11, c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18] +Arguments: [c_customer_sk#11 ASC NULLS FIRST], false, 0 + +(96) SortMergeJoin [codegen id : 41] +Left keys [1]: [ws_bill_customer_sk#75] +Right keys [1]: [c_customer_sk#11] +Join condition: None + +(97) Project [codegen id : 41] +Output [12]: [c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, ws_ext_discount_amt#76, ws_ext_sales_price#77, ws_ext_wholesale_cost#78, ws_ext_list_price#79, d_year#8] +Input [14]: [ws_bill_customer_sk#75, ws_ext_discount_amt#76, ws_ext_sales_price#77, ws_ext_wholesale_cost#78, ws_ext_list_price#79, d_year#8, c_customer_sk#11, c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18] + +(98) HashAggregate [codegen id : 41] +Input [12]: [c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, ws_ext_discount_amt#76, ws_ext_sales_price#77, ws_ext_wholesale_cost#78, ws_ext_list_price#79, d_year#8] +Keys [8]: [c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, d_year#8] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#79 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#78 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#76 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#77 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] +Aggregate Attributes [2]: [sum#81, isEmpty#82] +Results [10]: [c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, d_year#8, sum#83, isEmpty#84] + +(99) Exchange +Input [10]: [c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, d_year#8, sum#83, isEmpty#84] +Arguments: hashpartitioning(c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, d_year#8, 5), true, [id=#85] + +(100) HashAggregate [codegen id : 42] +Input [10]: [c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, d_year#8, sum#83, isEmpty#84] +Keys [8]: [c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, d_year#8] +Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#79 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#78 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#76 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#77 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#79 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#78 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#76 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#77 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#86] +Results [2]: [c_customer_id#12 AS customer_id#87, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#79 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#78 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#76 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#77 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#86 AS year_total#88] + +(101) Filter [codegen id : 42] +Input [2]: [customer_id#87, year_total#88] +Condition : (isnotnull(year_total#88) AND (year_total#88 > 0.000000)) + +(102) Project [codegen id : 42] +Output [2]: [customer_id#87 AS customer_id#89, year_total#88 AS year_total#90] +Input [2]: [customer_id#87, year_total#88] + +(103) Exchange +Input [2]: [customer_id#89, year_total#90] +Arguments: hashpartitioning(customer_id#89, 5), true, [id=#91] + +(104) Sort [codegen id : 43] +Input [2]: [customer_id#89, year_total#90] +Arguments: [customer_id#89 ASC NULLS FIRST], false, 0 + +(105) SortMergeJoin [codegen id : 44] +Left keys [1]: [customer_id#26] +Right keys [1]: [customer_id#89] +Join condition: None + +(106) Project [codegen id : 44] +Output [11]: [customer_id#26, customer_id#37, customer_first_name#38, customer_last_name#39, customer_preferred_cust_flag#40, customer_birth_country#41, customer_login#42, customer_email_address#43, year_total#62, year_total#72, year_total#90] +Input [12]: [customer_id#26, customer_id#37, customer_first_name#38, customer_last_name#39, customer_preferred_cust_flag#40, customer_birth_country#41, customer_login#42, customer_email_address#43, year_total#62, year_total#72, customer_id#89, year_total#90] + +(107) Scan parquet default.web_sales +Output [6]: [ws_sold_date_sk#74, ws_bill_customer_sk#75, ws_ext_discount_amt#76, ws_ext_sales_price#77, ws_ext_wholesale_cost#78, ws_ext_list_price#79] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_bill_customer_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(108) ColumnarToRow [codegen id : 46] +Input [6]: [ws_sold_date_sk#74, ws_bill_customer_sk#75, ws_ext_discount_amt#76, ws_ext_sales_price#77, ws_ext_wholesale_cost#78, ws_ext_list_price#79] + +(109) Filter [codegen id : 46] +Input [6]: [ws_sold_date_sk#74, ws_bill_customer_sk#75, ws_ext_discount_amt#76, ws_ext_sales_price#77, ws_ext_wholesale_cost#78, ws_ext_list_price#79] +Condition : (isnotnull(ws_bill_customer_sk#75) AND isnotnull(ws_sold_date_sk#74)) + +(110) ReusedExchange [Reuses operator id: 31] +Output [2]: [d_date_sk#7, d_year#8] + +(111) BroadcastHashJoin [codegen id : 46] +Left keys [1]: [ws_sold_date_sk#74] +Right keys [1]: [d_date_sk#7] +Join condition: None + +(112) Project [codegen id : 46] +Output [6]: [ws_bill_customer_sk#75, ws_ext_discount_amt#76, ws_ext_sales_price#77, ws_ext_wholesale_cost#78, ws_ext_list_price#79, d_year#8] +Input [8]: [ws_sold_date_sk#74, ws_bill_customer_sk#75, ws_ext_discount_amt#76, ws_ext_sales_price#77, ws_ext_wholesale_cost#78, ws_ext_list_price#79, d_date_sk#7, d_year#8] + +(113) Exchange +Input [6]: [ws_bill_customer_sk#75, ws_ext_discount_amt#76, ws_ext_sales_price#77, ws_ext_wholesale_cost#78, ws_ext_list_price#79, d_year#8] +Arguments: hashpartitioning(ws_bill_customer_sk#75, 5), true, [id=#92] + +(114) Sort [codegen id : 47] +Input [6]: [ws_bill_customer_sk#75, ws_ext_discount_amt#76, ws_ext_sales_price#77, ws_ext_wholesale_cost#78, ws_ext_list_price#79, d_year#8] +Arguments: [ws_bill_customer_sk#75 ASC NULLS FIRST], false, 0 + +(115) ReusedExchange [Reuses operator id: 15] +Output [8]: [c_customer_sk#11, c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18] + +(116) Sort [codegen id : 49] +Input [8]: [c_customer_sk#11, c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18] +Arguments: [c_customer_sk#11 ASC NULLS FIRST], false, 0 + +(117) SortMergeJoin [codegen id : 50] +Left keys [1]: [ws_bill_customer_sk#75] +Right keys [1]: [c_customer_sk#11] +Join condition: None + +(118) Project [codegen id : 50] +Output [12]: [c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, ws_ext_discount_amt#76, ws_ext_sales_price#77, ws_ext_wholesale_cost#78, ws_ext_list_price#79, d_year#8] +Input [14]: [ws_bill_customer_sk#75, ws_ext_discount_amt#76, ws_ext_sales_price#77, ws_ext_wholesale_cost#78, ws_ext_list_price#79, d_year#8, c_customer_sk#11, c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18] + +(119) HashAggregate [codegen id : 50] +Input [12]: [c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, ws_ext_discount_amt#76, ws_ext_sales_price#77, ws_ext_wholesale_cost#78, ws_ext_list_price#79, d_year#8] +Keys [8]: [c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, d_year#8] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#79 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#78 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#76 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#77 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] +Aggregate Attributes [2]: [sum#93, isEmpty#94] +Results [10]: [c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, d_year#8, sum#95, isEmpty#96] + +(120) Exchange +Input [10]: [c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, d_year#8, sum#95, isEmpty#96] +Arguments: hashpartitioning(c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, d_year#8, 5), true, [id=#97] + +(121) HashAggregate [codegen id : 51] +Input [10]: [c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, d_year#8, sum#95, isEmpty#96] +Keys [8]: [c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, d_year#8] +Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#79 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#78 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#76 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#77 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#79 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#78 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#76 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#77 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#98] +Results [2]: [c_customer_id#12 AS customer_id#99, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#79 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#78 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#76 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#77 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#98 AS year_total#100] + +(122) Exchange +Input [2]: [customer_id#99, year_total#100] +Arguments: hashpartitioning(customer_id#99, 5), true, [id=#101] + +(123) Sort [codegen id : 52] +Input [2]: [customer_id#99, year_total#100] +Arguments: [customer_id#99 ASC NULLS FIRST], false, 0 + +(124) SortMergeJoin [codegen id : 53] +Left keys [1]: [customer_id#26] +Right keys [1]: [customer_id#99] +Join condition: (CASE WHEN (year_total#62 > 0.000000) THEN CheckOverflow((promote_precision(year_total#72) / promote_precision(year_total#62)), DecimalType(38,14), true) ELSE null END > CASE WHEN (year_total#90 > 0.000000) THEN CheckOverflow((promote_precision(year_total#100) / promote_precision(year_total#90)), DecimalType(38,14), true) ELSE null END) + +(125) Project [codegen id : 53] +Output [7]: [customer_id#37, customer_first_name#38, customer_last_name#39, customer_preferred_cust_flag#40, customer_birth_country#41, customer_login#42, customer_email_address#43] +Input [13]: [customer_id#26, customer_id#37, customer_first_name#38, customer_last_name#39, customer_preferred_cust_flag#40, customer_birth_country#41, customer_login#42, customer_email_address#43, year_total#62, year_total#72, year_total#90, customer_id#99, year_total#100] + +(126) TakeOrderedAndProject +Input [7]: [customer_id#37, customer_first_name#38, customer_last_name#39, customer_preferred_cust_flag#40, customer_birth_country#41, customer_login#42, customer_email_address#43] +Arguments: 100, [customer_id#37 ASC NULLS FIRST, customer_first_name#38 ASC NULLS FIRST, customer_last_name#39 ASC NULLS FIRST, customer_preferred_cust_flag#40 ASC NULLS FIRST, customer_birth_country#41 ASC NULLS FIRST, customer_login#42 ASC NULLS FIRST, customer_email_address#43 ASC NULLS FIRST], [customer_id#37, customer_first_name#38, customer_last_name#39, customer_preferred_cust_flag#40, customer_birth_country#41, customer_login#42, customer_email_address#43] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4.sf100/simplified.txt new file mode 100644 index 0000000000000..a3798d59135dd --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4.sf100/simplified.txt @@ -0,0 +1,231 @@ +TakeOrderedAndProject [customer_birth_country,customer_email_address,customer_first_name,customer_id,customer_last_name,customer_login,customer_preferred_cust_flag] + WholeStageCodegen (53) + Project [customer_birth_country,customer_email_address,customer_first_name,customer_id,customer_last_name,customer_login,customer_preferred_cust_flag] + SortMergeJoin [customer_id,customer_id,year_total,year_total,year_total,year_total] + InputAdapter + WholeStageCodegen (44) + Project [customer_birth_country,customer_email_address,customer_first_name,customer_id,customer_id,customer_last_name,customer_login,customer_preferred_cust_flag,year_total,year_total,year_total] + SortMergeJoin [customer_id,customer_id] + InputAdapter + WholeStageCodegen (35) + Project [customer_birth_country,customer_email_address,customer_first_name,customer_id,customer_id,customer_last_name,customer_login,customer_preferred_cust_flag,year_total,year_total] + SortMergeJoin [customer_id,customer_id,year_total,year_total,year_total,year_total] + InputAdapter + WholeStageCodegen (26) + Project [customer_birth_country,customer_email_address,customer_first_name,customer_id,customer_id,customer_last_name,customer_login,customer_preferred_cust_flag,year_total,year_total,year_total] + SortMergeJoin [customer_id,customer_id] + InputAdapter + WholeStageCodegen (17) + SortMergeJoin [customer_id,customer_id] + InputAdapter + WholeStageCodegen (8) + Sort [customer_id] + InputAdapter + Exchange [customer_id] #1 + WholeStageCodegen (7) + Filter [year_total] + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,isEmpty,sum] [customer_id,isEmpty,sum,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true)),year_total] + InputAdapter + Exchange [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year] #2 + WholeStageCodegen (6) + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ss_ext_discount_amt,ss_ext_list_price,ss_ext_sales_price,ss_ext_wholesale_cost] [isEmpty,isEmpty,sum,sum] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ss_ext_discount_amt,ss_ext_list_price,ss_ext_sales_price,ss_ext_wholesale_cost] + SortMergeJoin [c_customer_sk,ss_customer_sk] + InputAdapter + WholeStageCodegen (3) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #3 + WholeStageCodegen (2) + Project [d_year,ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_ext_sales_price,ss_ext_wholesale_cost] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_customer_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + WholeStageCodegen (5) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #5 + WholeStageCodegen (4) + Filter [c_customer_id,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] + InputAdapter + WholeStageCodegen (16) + Sort [customer_id] + InputAdapter + Exchange [customer_id] #6 + WholeStageCodegen (15) + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,isEmpty,sum] [customer_birth_country,customer_email_address,customer_first_name,customer_id,customer_last_name,customer_login,customer_preferred_cust_flag,isEmpty,sum,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true)),year_total] + InputAdapter + Exchange [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year] #7 + WholeStageCodegen (14) + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ss_ext_discount_amt,ss_ext_list_price,ss_ext_sales_price,ss_ext_wholesale_cost] [isEmpty,isEmpty,sum,sum] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ss_ext_discount_amt,ss_ext_list_price,ss_ext_sales_price,ss_ext_wholesale_cost] + SortMergeJoin [c_customer_sk,ss_customer_sk] + InputAdapter + WholeStageCodegen (11) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #8 + WholeStageCodegen (10) + Project [d_year,ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_ext_sales_price,ss_ext_wholesale_cost] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_customer_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_sold_date_sk] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (9) + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + WholeStageCodegen (13) + Sort [c_customer_sk] + InputAdapter + ReusedExchange [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] #5 + InputAdapter + WholeStageCodegen (25) + Sort [customer_id] + InputAdapter + Exchange [customer_id] #10 + WholeStageCodegen (24) + Project [customer_id,year_total] + Filter [year_total] + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,isEmpty,sum] [customer_id,isEmpty,sum,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true)),year_total] + InputAdapter + Exchange [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year] #11 + WholeStageCodegen (23) + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,cs_ext_discount_amt,cs_ext_list_price,cs_ext_sales_price,cs_ext_wholesale_cost,d_year] [isEmpty,isEmpty,sum,sum] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,cs_ext_discount_amt,cs_ext_list_price,cs_ext_sales_price,cs_ext_wholesale_cost,d_year] + SortMergeJoin [c_customer_sk,cs_bill_customer_sk] + InputAdapter + WholeStageCodegen (20) + Sort [cs_bill_customer_sk] + InputAdapter + Exchange [cs_bill_customer_sk] #12 + WholeStageCodegen (19) + Project [cs_bill_customer_sk,cs_ext_discount_amt,cs_ext_list_price,cs_ext_sales_price,cs_ext_wholesale_cost,d_year] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_bill_customer_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_ext_discount_amt,cs_ext_list_price,cs_ext_sales_price,cs_ext_wholesale_cost,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_year] #4 + InputAdapter + WholeStageCodegen (22) + Sort [c_customer_sk] + InputAdapter + ReusedExchange [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] #5 + InputAdapter + WholeStageCodegen (34) + Sort [customer_id] + InputAdapter + Exchange [customer_id] #13 + WholeStageCodegen (33) + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,isEmpty,sum] [customer_id,isEmpty,sum,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true)),year_total] + InputAdapter + Exchange [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year] #14 + WholeStageCodegen (32) + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,cs_ext_discount_amt,cs_ext_list_price,cs_ext_sales_price,cs_ext_wholesale_cost,d_year] [isEmpty,isEmpty,sum,sum] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,cs_ext_discount_amt,cs_ext_list_price,cs_ext_sales_price,cs_ext_wholesale_cost,d_year] + SortMergeJoin [c_customer_sk,cs_bill_customer_sk] + InputAdapter + WholeStageCodegen (29) + Sort [cs_bill_customer_sk] + InputAdapter + Exchange [cs_bill_customer_sk] #15 + WholeStageCodegen (28) + Project [cs_bill_customer_sk,cs_ext_discount_amt,cs_ext_list_price,cs_ext_sales_price,cs_ext_wholesale_cost,d_year] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_bill_customer_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_ext_discount_amt,cs_ext_list_price,cs_ext_sales_price,cs_ext_wholesale_cost,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_year] #9 + InputAdapter + WholeStageCodegen (31) + Sort [c_customer_sk] + InputAdapter + ReusedExchange [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] #5 + InputAdapter + WholeStageCodegen (43) + Sort [customer_id] + InputAdapter + Exchange [customer_id] #16 + WholeStageCodegen (42) + Project [customer_id,year_total] + Filter [year_total] + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,isEmpty,sum] [customer_id,isEmpty,sum,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true)),year_total] + InputAdapter + Exchange [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year] #17 + WholeStageCodegen (41) + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ws_ext_discount_amt,ws_ext_list_price,ws_ext_sales_price,ws_ext_wholesale_cost] [isEmpty,isEmpty,sum,sum] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ws_ext_discount_amt,ws_ext_list_price,ws_ext_sales_price,ws_ext_wholesale_cost] + SortMergeJoin [c_customer_sk,ws_bill_customer_sk] + InputAdapter + WholeStageCodegen (38) + Sort [ws_bill_customer_sk] + InputAdapter + Exchange [ws_bill_customer_sk] #18 + WholeStageCodegen (37) + Project [d_year,ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_ext_sales_price,ws_ext_wholesale_cost] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_bill_customer_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_ext_sales_price,ws_ext_wholesale_cost,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_year] #4 + InputAdapter + WholeStageCodegen (40) + Sort [c_customer_sk] + InputAdapter + ReusedExchange [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] #5 + InputAdapter + WholeStageCodegen (52) + Sort [customer_id] + InputAdapter + Exchange [customer_id] #19 + WholeStageCodegen (51) + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,isEmpty,sum] [customer_id,isEmpty,sum,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true)),year_total] + InputAdapter + Exchange [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year] #20 + WholeStageCodegen (50) + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ws_ext_discount_amt,ws_ext_list_price,ws_ext_sales_price,ws_ext_wholesale_cost] [isEmpty,isEmpty,sum,sum] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ws_ext_discount_amt,ws_ext_list_price,ws_ext_sales_price,ws_ext_wholesale_cost] + SortMergeJoin [c_customer_sk,ws_bill_customer_sk] + InputAdapter + WholeStageCodegen (47) + Sort [ws_bill_customer_sk] + InputAdapter + Exchange [ws_bill_customer_sk] #21 + WholeStageCodegen (46) + Project [d_year,ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_ext_sales_price,ws_ext_wholesale_cost] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_bill_customer_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_ext_sales_price,ws_ext_wholesale_cost,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_year] #9 + InputAdapter + WholeStageCodegen (49) + Sort [c_customer_sk] + InputAdapter + ReusedExchange [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] #5 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/explain.txt new file mode 100644 index 0000000000000..a58e27871b94e --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/explain.txt @@ -0,0 +1,606 @@ +== Physical Plan == +TakeOrderedAndProject (107) ++- * Project (106) + +- * BroadcastHashJoin Inner BuildRight (105) + :- * Project (91) + : +- * BroadcastHashJoin Inner BuildRight (90) + : :- * Project (71) + : : +- * BroadcastHashJoin Inner BuildRight (70) + : : :- * Project (56) + : : : +- * BroadcastHashJoin Inner BuildRight (55) + : : : :- * BroadcastHashJoin Inner BuildRight (36) + : : : : :- * Filter (19) + : : : : : +- * HashAggregate (18) + : : : : : +- Exchange (17) + : : : : : +- * HashAggregate (16) + : : : : : +- * Project (15) + : : : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : : : :- * Project (9) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : : : :- * Filter (3) + : : : : : : : +- * ColumnarToRow (2) + : : : : : : : +- Scan parquet default.customer (1) + : : : : : : +- BroadcastExchange (7) + : : : : : : +- * Filter (6) + : : : : : : +- * ColumnarToRow (5) + : : : : : : +- Scan parquet default.store_sales (4) + : : : : : +- BroadcastExchange (13) + : : : : : +- * Filter (12) + : : : : : +- * ColumnarToRow (11) + : : : : : +- Scan parquet default.date_dim (10) + : : : : +- BroadcastExchange (35) + : : : : +- * HashAggregate (34) + : : : : +- Exchange (33) + : : : : +- * HashAggregate (32) + : : : : +- * Project (31) + : : : : +- * BroadcastHashJoin Inner BuildRight (30) + : : : : :- * Project (25) + : : : : : +- * BroadcastHashJoin Inner BuildRight (24) + : : : : : :- * Filter (22) + : : : : : : +- * ColumnarToRow (21) + : : : : : : +- Scan parquet default.customer (20) + : : : : : +- ReusedExchange (23) + : : : : +- BroadcastExchange (29) + : : : : +- * Filter (28) + : : : : +- * ColumnarToRow (27) + : : : : +- Scan parquet default.date_dim (26) + : : : +- BroadcastExchange (54) + : : : +- * Project (53) + : : : +- * Filter (52) + : : : +- * HashAggregate (51) + : : : +- Exchange (50) + : : : +- * HashAggregate (49) + : : : +- * Project (48) + : : : +- * BroadcastHashJoin Inner BuildRight (47) + : : : :- * Project (45) + : : : : +- * BroadcastHashJoin Inner BuildRight (44) + : : : : :- * Filter (39) + : : : : : +- * ColumnarToRow (38) + : : : : : +- Scan parquet default.customer (37) + : : : : +- BroadcastExchange (43) + : : : : +- * Filter (42) + : : : : +- * ColumnarToRow (41) + : : : : +- Scan parquet default.catalog_sales (40) + : : : +- ReusedExchange (46) + : : +- BroadcastExchange (69) + : : +- * HashAggregate (68) + : : +- Exchange (67) + : : +- * HashAggregate (66) + : : +- * Project (65) + : : +- * BroadcastHashJoin Inner BuildRight (64) + : : :- * Project (62) + : : : +- * BroadcastHashJoin Inner BuildRight (61) + : : : :- * Filter (59) + : : : : +- * ColumnarToRow (58) + : : : : +- Scan parquet default.customer (57) + : : : +- ReusedExchange (60) + : : +- ReusedExchange (63) + : +- BroadcastExchange (89) + : +- * Project (88) + : +- * Filter (87) + : +- * HashAggregate (86) + : +- Exchange (85) + : +- * HashAggregate (84) + : +- * Project (83) + : +- * BroadcastHashJoin Inner BuildRight (82) + : :- * Project (80) + : : +- * BroadcastHashJoin Inner BuildRight (79) + : : :- * Filter (74) + : : : +- * ColumnarToRow (73) + : : : +- Scan parquet default.customer (72) + : : +- BroadcastExchange (78) + : : +- * Filter (77) + : : +- * ColumnarToRow (76) + : : +- Scan parquet default.web_sales (75) + : +- ReusedExchange (81) + +- BroadcastExchange (104) + +- * HashAggregate (103) + +- Exchange (102) + +- * HashAggregate (101) + +- * Project (100) + +- * BroadcastHashJoin Inner BuildRight (99) + :- * Project (97) + : +- * BroadcastHashJoin Inner BuildRight (96) + : :- * Filter (94) + : : +- * ColumnarToRow (93) + : : +- Scan parquet default.customer (92) + : +- ReusedExchange (95) + +- ReusedExchange (98) + + +(1) Scan parquet default.customer +Output [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] + +(3) Filter [codegen id : 3] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(4) Scan parquet default.store_sales +Output [6]: [ss_sold_date_sk#9, ss_customer_sk#10, ss_ext_discount_amt#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_ext_list_price#14] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [6]: [ss_sold_date_sk#9, ss_customer_sk#10, ss_ext_discount_amt#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_ext_list_price#14] + +(6) Filter [codegen id : 1] +Input [6]: [ss_sold_date_sk#9, ss_customer_sk#10, ss_ext_discount_amt#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_ext_list_price#14] +Condition : (isnotnull(ss_customer_sk#10) AND isnotnull(ss_sold_date_sk#9)) + +(7) BroadcastExchange +Input [6]: [ss_sold_date_sk#9, ss_customer_sk#10, ss_ext_discount_amt#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_ext_list_price#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#15] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ss_customer_sk#10] +Join condition: None + +(9) Project [codegen id : 3] +Output [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_sold_date_sk#9, ss_ext_discount_amt#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_ext_list_price#14] +Input [14]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_sold_date_sk#9, ss_customer_sk#10, ss_ext_discount_amt#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_ext_list_price#14] + +(10) Scan parquet default.date_dim +Output [2]: [d_date_sk#16, d_year#17] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#16, d_year#17] + +(12) Filter [codegen id : 2] +Input [2]: [d_date_sk#16, d_year#17] +Condition : ((isnotnull(d_year#17) AND (d_year#17 = 2001)) AND isnotnull(d_date_sk#16)) + +(13) BroadcastExchange +Input [2]: [d_date_sk#16, d_year#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#18] + +(14) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#9] +Right keys [1]: [d_date_sk#16] +Join condition: None + +(15) Project [codegen id : 3] +Output [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_ext_list_price#14, d_year#17] +Input [14]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_sold_date_sk#9, ss_ext_discount_amt#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_ext_list_price#14, d_date_sk#16, d_year#17] + +(16) HashAggregate [codegen id : 3] +Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_ext_list_price#14, d_year#17] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#14 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#13 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#11 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#12 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] +Aggregate Attributes [2]: [sum#19, isEmpty#20] +Results [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#21, isEmpty#22] + +(17) Exchange +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#21, isEmpty#22] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, 5), true, [id=#23] + +(18) HashAggregate [codegen id : 24] +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#21, isEmpty#22] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17] +Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#14 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#13 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#11 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#12 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#14 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#13 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#11 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#12 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#24] +Results [2]: [c_customer_id#2 AS customer_id#25, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#14 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#13 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#11 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#12 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#24 AS year_total#26] + +(19) Filter [codegen id : 24] +Input [2]: [customer_id#25, year_total#26] +Condition : (isnotnull(year_total#26) AND (year_total#26 > 0.000000)) + +(20) Scan parquet default.customer +Output [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 6] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] + +(22) Filter [codegen id : 6] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(23) ReusedExchange [Reuses operator id: 7] +Output [6]: [ss_sold_date_sk#9, ss_customer_sk#10, ss_ext_discount_amt#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_ext_list_price#14] + +(24) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ss_customer_sk#10] +Join condition: None + +(25) Project [codegen id : 6] +Output [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_sold_date_sk#9, ss_ext_discount_amt#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_ext_list_price#14] +Input [14]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_sold_date_sk#9, ss_customer_sk#10, ss_ext_discount_amt#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_ext_list_price#14] + +(26) Scan parquet default.date_dim +Output [2]: [d_date_sk#16, d_year#17] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 5] +Input [2]: [d_date_sk#16, d_year#17] + +(28) Filter [codegen id : 5] +Input [2]: [d_date_sk#16, d_year#17] +Condition : ((isnotnull(d_year#17) AND (d_year#17 = 2002)) AND isnotnull(d_date_sk#16)) + +(29) BroadcastExchange +Input [2]: [d_date_sk#16, d_year#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#27] + +(30) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_sold_date_sk#9] +Right keys [1]: [d_date_sk#16] +Join condition: None + +(31) Project [codegen id : 6] +Output [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_ext_list_price#14, d_year#17] +Input [14]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_sold_date_sk#9, ss_ext_discount_amt#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_ext_list_price#14, d_date_sk#16, d_year#17] + +(32) HashAggregate [codegen id : 6] +Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_ext_list_price#14, d_year#17] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#14 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#13 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#11 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#12 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] +Aggregate Attributes [2]: [sum#28, isEmpty#29] +Results [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#30, isEmpty#31] + +(33) Exchange +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#30, isEmpty#31] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, 5), true, [id=#32] + +(34) HashAggregate [codegen id : 7] +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#30, isEmpty#31] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17] +Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#14 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#13 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#11 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#12 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#14 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#13 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#11 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#12 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#33] +Results [8]: [c_customer_id#2 AS customer_id#34, c_first_name#3 AS customer_first_name#35, c_last_name#4 AS customer_last_name#36, c_preferred_cust_flag#5 AS customer_preferred_cust_flag#37, c_birth_country#6 AS customer_birth_country#38, c_login#7 AS customer_login#39, c_email_address#8 AS customer_email_address#40, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#14 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#13 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#11 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#12 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#33 AS year_total#41] + +(35) BroadcastExchange +Input [8]: [customer_id#34, customer_first_name#35, customer_last_name#36, customer_preferred_cust_flag#37, customer_birth_country#38, customer_login#39, customer_email_address#40, year_total#41] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#42] + +(36) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [customer_id#25] +Right keys [1]: [customer_id#34] +Join condition: None + +(37) Scan parquet default.customer +Output [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(38) ColumnarToRow [codegen id : 10] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] + +(39) Filter [codegen id : 10] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(40) Scan parquet default.catalog_sales +Output [6]: [cs_sold_date_sk#43, cs_bill_customer_sk#44, cs_ext_discount_amt#45, cs_ext_sales_price#46, cs_ext_wholesale_cost#47, cs_ext_list_price#48] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(41) ColumnarToRow [codegen id : 8] +Input [6]: [cs_sold_date_sk#43, cs_bill_customer_sk#44, cs_ext_discount_amt#45, cs_ext_sales_price#46, cs_ext_wholesale_cost#47, cs_ext_list_price#48] + +(42) Filter [codegen id : 8] +Input [6]: [cs_sold_date_sk#43, cs_bill_customer_sk#44, cs_ext_discount_amt#45, cs_ext_sales_price#46, cs_ext_wholesale_cost#47, cs_ext_list_price#48] +Condition : (isnotnull(cs_bill_customer_sk#44) AND isnotnull(cs_sold_date_sk#43)) + +(43) BroadcastExchange +Input [6]: [cs_sold_date_sk#43, cs_bill_customer_sk#44, cs_ext_discount_amt#45, cs_ext_sales_price#46, cs_ext_wholesale_cost#47, cs_ext_list_price#48] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#49] + +(44) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [cs_bill_customer_sk#44] +Join condition: None + +(45) Project [codegen id : 10] +Output [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, cs_sold_date_sk#43, cs_ext_discount_amt#45, cs_ext_sales_price#46, cs_ext_wholesale_cost#47, cs_ext_list_price#48] +Input [14]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, cs_sold_date_sk#43, cs_bill_customer_sk#44, cs_ext_discount_amt#45, cs_ext_sales_price#46, cs_ext_wholesale_cost#47, cs_ext_list_price#48] + +(46) ReusedExchange [Reuses operator id: 13] +Output [2]: [d_date_sk#16, d_year#17] + +(47) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_sold_date_sk#43] +Right keys [1]: [d_date_sk#16] +Join condition: None + +(48) Project [codegen id : 10] +Output [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, cs_ext_discount_amt#45, cs_ext_sales_price#46, cs_ext_wholesale_cost#47, cs_ext_list_price#48, d_year#17] +Input [14]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, cs_sold_date_sk#43, cs_ext_discount_amt#45, cs_ext_sales_price#46, cs_ext_wholesale_cost#47, cs_ext_list_price#48, d_date_sk#16, d_year#17] + +(49) HashAggregate [codegen id : 10] +Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, cs_ext_discount_amt#45, cs_ext_sales_price#46, cs_ext_wholesale_cost#47, cs_ext_list_price#48, d_year#17] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#48 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#47 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#45 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#46 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] +Aggregate Attributes [2]: [sum#50, isEmpty#51] +Results [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#52, isEmpty#53] + +(50) Exchange +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#52, isEmpty#53] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, 5), true, [id=#54] + +(51) HashAggregate [codegen id : 11] +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#52, isEmpty#53] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17] +Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#48 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#47 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#45 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#46 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#48 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#47 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#45 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#46 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#55] +Results [2]: [c_customer_id#2 AS customer_id#56, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#48 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#47 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#45 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#46 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#55 AS year_total#57] + +(52) Filter [codegen id : 11] +Input [2]: [customer_id#56, year_total#57] +Condition : (isnotnull(year_total#57) AND (year_total#57 > 0.000000)) + +(53) Project [codegen id : 11] +Output [2]: [customer_id#56 AS customer_id#58, year_total#57 AS year_total#59] +Input [2]: [customer_id#56, year_total#57] + +(54) BroadcastExchange +Input [2]: [customer_id#58, year_total#59] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#60] + +(55) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [customer_id#25] +Right keys [1]: [customer_id#58] +Join condition: None + +(56) Project [codegen id : 24] +Output [11]: [customer_id#25, year_total#26, customer_id#34, customer_first_name#35, customer_last_name#36, customer_preferred_cust_flag#37, customer_birth_country#38, customer_login#39, customer_email_address#40, year_total#41, year_total#59] +Input [12]: [customer_id#25, year_total#26, customer_id#34, customer_first_name#35, customer_last_name#36, customer_preferred_cust_flag#37, customer_birth_country#38, customer_login#39, customer_email_address#40, year_total#41, customer_id#58, year_total#59] + +(57) Scan parquet default.customer +Output [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(58) ColumnarToRow [codegen id : 14] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] + +(59) Filter [codegen id : 14] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(60) ReusedExchange [Reuses operator id: 43] +Output [6]: [cs_sold_date_sk#43, cs_bill_customer_sk#44, cs_ext_discount_amt#45, cs_ext_sales_price#46, cs_ext_wholesale_cost#47, cs_ext_list_price#48] + +(61) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [cs_bill_customer_sk#44] +Join condition: None + +(62) Project [codegen id : 14] +Output [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, cs_sold_date_sk#43, cs_ext_discount_amt#45, cs_ext_sales_price#46, cs_ext_wholesale_cost#47, cs_ext_list_price#48] +Input [14]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, cs_sold_date_sk#43, cs_bill_customer_sk#44, cs_ext_discount_amt#45, cs_ext_sales_price#46, cs_ext_wholesale_cost#47, cs_ext_list_price#48] + +(63) ReusedExchange [Reuses operator id: 29] +Output [2]: [d_date_sk#16, d_year#17] + +(64) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [cs_sold_date_sk#43] +Right keys [1]: [d_date_sk#16] +Join condition: None + +(65) Project [codegen id : 14] +Output [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, cs_ext_discount_amt#45, cs_ext_sales_price#46, cs_ext_wholesale_cost#47, cs_ext_list_price#48, d_year#17] +Input [14]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, cs_sold_date_sk#43, cs_ext_discount_amt#45, cs_ext_sales_price#46, cs_ext_wholesale_cost#47, cs_ext_list_price#48, d_date_sk#16, d_year#17] + +(66) HashAggregate [codegen id : 14] +Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, cs_ext_discount_amt#45, cs_ext_sales_price#46, cs_ext_wholesale_cost#47, cs_ext_list_price#48, d_year#17] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#48 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#47 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#45 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#46 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] +Aggregate Attributes [2]: [sum#61, isEmpty#62] +Results [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#63, isEmpty#64] + +(67) Exchange +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#63, isEmpty#64] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, 5), true, [id=#65] + +(68) HashAggregate [codegen id : 15] +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#63, isEmpty#64] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17] +Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#48 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#47 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#45 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#46 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#48 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#47 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#45 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#46 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#66] +Results [2]: [c_customer_id#2 AS customer_id#67, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#48 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#47 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#45 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#46 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#66 AS year_total#68] + +(69) BroadcastExchange +Input [2]: [customer_id#67, year_total#68] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#69] + +(70) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [customer_id#25] +Right keys [1]: [customer_id#67] +Join condition: (CASE WHEN (year_total#59 > 0.000000) THEN CheckOverflow((promote_precision(year_total#68) / promote_precision(year_total#59)), DecimalType(38,14), true) ELSE null END > CASE WHEN (year_total#26 > 0.000000) THEN CheckOverflow((promote_precision(year_total#41) / promote_precision(year_total#26)), DecimalType(38,14), true) ELSE null END) + +(71) Project [codegen id : 24] +Output [10]: [customer_id#25, customer_id#34, customer_first_name#35, customer_last_name#36, customer_preferred_cust_flag#37, customer_birth_country#38, customer_login#39, customer_email_address#40, year_total#59, year_total#68] +Input [13]: [customer_id#25, year_total#26, customer_id#34, customer_first_name#35, customer_last_name#36, customer_preferred_cust_flag#37, customer_birth_country#38, customer_login#39, customer_email_address#40, year_total#41, year_total#59, customer_id#67, year_total#68] + +(72) Scan parquet default.customer +Output [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(73) ColumnarToRow [codegen id : 18] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] + +(74) Filter [codegen id : 18] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(75) Scan parquet default.web_sales +Output [6]: [ws_sold_date_sk#70, ws_bill_customer_sk#71, ws_ext_discount_amt#72, ws_ext_sales_price#73, ws_ext_wholesale_cost#74, ws_ext_list_price#75] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_bill_customer_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(76) ColumnarToRow [codegen id : 16] +Input [6]: [ws_sold_date_sk#70, ws_bill_customer_sk#71, ws_ext_discount_amt#72, ws_ext_sales_price#73, ws_ext_wholesale_cost#74, ws_ext_list_price#75] + +(77) Filter [codegen id : 16] +Input [6]: [ws_sold_date_sk#70, ws_bill_customer_sk#71, ws_ext_discount_amt#72, ws_ext_sales_price#73, ws_ext_wholesale_cost#74, ws_ext_list_price#75] +Condition : (isnotnull(ws_bill_customer_sk#71) AND isnotnull(ws_sold_date_sk#70)) + +(78) BroadcastExchange +Input [6]: [ws_sold_date_sk#70, ws_bill_customer_sk#71, ws_ext_discount_amt#72, ws_ext_sales_price#73, ws_ext_wholesale_cost#74, ws_ext_list_price#75] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#76] + +(79) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ws_bill_customer_sk#71] +Join condition: None + +(80) Project [codegen id : 18] +Output [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_sold_date_sk#70, ws_ext_discount_amt#72, ws_ext_sales_price#73, ws_ext_wholesale_cost#74, ws_ext_list_price#75] +Input [14]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_sold_date_sk#70, ws_bill_customer_sk#71, ws_ext_discount_amt#72, ws_ext_sales_price#73, ws_ext_wholesale_cost#74, ws_ext_list_price#75] + +(81) ReusedExchange [Reuses operator id: 13] +Output [2]: [d_date_sk#16, d_year#17] + +(82) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [ws_sold_date_sk#70] +Right keys [1]: [d_date_sk#16] +Join condition: None + +(83) Project [codegen id : 18] +Output [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_ext_discount_amt#72, ws_ext_sales_price#73, ws_ext_wholesale_cost#74, ws_ext_list_price#75, d_year#17] +Input [14]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_sold_date_sk#70, ws_ext_discount_amt#72, ws_ext_sales_price#73, ws_ext_wholesale_cost#74, ws_ext_list_price#75, d_date_sk#16, d_year#17] + +(84) HashAggregate [codegen id : 18] +Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_ext_discount_amt#72, ws_ext_sales_price#73, ws_ext_wholesale_cost#74, ws_ext_list_price#75, d_year#17] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#75 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#74 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#72 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#73 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] +Aggregate Attributes [2]: [sum#77, isEmpty#78] +Results [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#79, isEmpty#80] + +(85) Exchange +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#79, isEmpty#80] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, 5), true, [id=#81] + +(86) HashAggregate [codegen id : 19] +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#79, isEmpty#80] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17] +Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#75 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#74 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#72 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#73 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#75 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#74 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#72 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#73 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#82] +Results [2]: [c_customer_id#2 AS customer_id#83, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#75 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#74 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#72 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#73 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#82 AS year_total#84] + +(87) Filter [codegen id : 19] +Input [2]: [customer_id#83, year_total#84] +Condition : (isnotnull(year_total#84) AND (year_total#84 > 0.000000)) + +(88) Project [codegen id : 19] +Output [2]: [customer_id#83 AS customer_id#85, year_total#84 AS year_total#86] +Input [2]: [customer_id#83, year_total#84] + +(89) BroadcastExchange +Input [2]: [customer_id#85, year_total#86] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#87] + +(90) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [customer_id#25] +Right keys [1]: [customer_id#85] +Join condition: None + +(91) Project [codegen id : 24] +Output [11]: [customer_id#25, customer_id#34, customer_first_name#35, customer_last_name#36, customer_preferred_cust_flag#37, customer_birth_country#38, customer_login#39, customer_email_address#40, year_total#59, year_total#68, year_total#86] +Input [12]: [customer_id#25, customer_id#34, customer_first_name#35, customer_last_name#36, customer_preferred_cust_flag#37, customer_birth_country#38, customer_login#39, customer_email_address#40, year_total#59, year_total#68, customer_id#85, year_total#86] + +(92) Scan parquet default.customer +Output [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(93) ColumnarToRow [codegen id : 22] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] + +(94) Filter [codegen id : 22] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(95) ReusedExchange [Reuses operator id: 78] +Output [6]: [ws_sold_date_sk#70, ws_bill_customer_sk#71, ws_ext_discount_amt#72, ws_ext_sales_price#73, ws_ext_wholesale_cost#74, ws_ext_list_price#75] + +(96) BroadcastHashJoin [codegen id : 22] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ws_bill_customer_sk#71] +Join condition: None + +(97) Project [codegen id : 22] +Output [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_sold_date_sk#70, ws_ext_discount_amt#72, ws_ext_sales_price#73, ws_ext_wholesale_cost#74, ws_ext_list_price#75] +Input [14]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_sold_date_sk#70, ws_bill_customer_sk#71, ws_ext_discount_amt#72, ws_ext_sales_price#73, ws_ext_wholesale_cost#74, ws_ext_list_price#75] + +(98) ReusedExchange [Reuses operator id: 29] +Output [2]: [d_date_sk#16, d_year#17] + +(99) BroadcastHashJoin [codegen id : 22] +Left keys [1]: [ws_sold_date_sk#70] +Right keys [1]: [d_date_sk#16] +Join condition: None + +(100) Project [codegen id : 22] +Output [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_ext_discount_amt#72, ws_ext_sales_price#73, ws_ext_wholesale_cost#74, ws_ext_list_price#75, d_year#17] +Input [14]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_sold_date_sk#70, ws_ext_discount_amt#72, ws_ext_sales_price#73, ws_ext_wholesale_cost#74, ws_ext_list_price#75, d_date_sk#16, d_year#17] + +(101) HashAggregate [codegen id : 22] +Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_ext_discount_amt#72, ws_ext_sales_price#73, ws_ext_wholesale_cost#74, ws_ext_list_price#75, d_year#17] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#75 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#74 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#72 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#73 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] +Aggregate Attributes [2]: [sum#88, isEmpty#89] +Results [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#90, isEmpty#91] + +(102) Exchange +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#90, isEmpty#91] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, 5), true, [id=#92] + +(103) HashAggregate [codegen id : 23] +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#90, isEmpty#91] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17] +Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#75 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#74 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#72 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#73 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#75 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#74 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#72 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#73 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#93] +Results [2]: [c_customer_id#2 AS customer_id#94, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#75 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#74 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#72 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#73 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#93 AS year_total#95] + +(104) BroadcastExchange +Input [2]: [customer_id#94, year_total#95] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#96] + +(105) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [customer_id#25] +Right keys [1]: [customer_id#94] +Join condition: (CASE WHEN (year_total#59 > 0.000000) THEN CheckOverflow((promote_precision(year_total#68) / promote_precision(year_total#59)), DecimalType(38,14), true) ELSE null END > CASE WHEN (year_total#86 > 0.000000) THEN CheckOverflow((promote_precision(year_total#95) / promote_precision(year_total#86)), DecimalType(38,14), true) ELSE null END) + +(106) Project [codegen id : 24] +Output [7]: [customer_id#34, customer_first_name#35, customer_last_name#36, customer_preferred_cust_flag#37, customer_birth_country#38, customer_login#39, customer_email_address#40] +Input [13]: [customer_id#25, customer_id#34, customer_first_name#35, customer_last_name#36, customer_preferred_cust_flag#37, customer_birth_country#38, customer_login#39, customer_email_address#40, year_total#59, year_total#68, year_total#86, customer_id#94, year_total#95] + +(107) TakeOrderedAndProject +Input [7]: [customer_id#34, customer_first_name#35, customer_last_name#36, customer_preferred_cust_flag#37, customer_birth_country#38, customer_login#39, customer_email_address#40] +Arguments: 100, [customer_id#34 ASC NULLS FIRST, customer_first_name#35 ASC NULLS FIRST, customer_last_name#36 ASC NULLS FIRST, customer_preferred_cust_flag#37 ASC NULLS FIRST, customer_birth_country#38 ASC NULLS FIRST, customer_login#39 ASC NULLS FIRST, customer_email_address#40 ASC NULLS FIRST], [customer_id#34, customer_first_name#35, customer_last_name#36, customer_preferred_cust_flag#37, customer_birth_country#38, customer_login#39, customer_email_address#40] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/simplified.txt new file mode 100644 index 0000000000000..3d45f95afa9f0 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/simplified.txt @@ -0,0 +1,158 @@ +TakeOrderedAndProject [customer_birth_country,customer_email_address,customer_first_name,customer_id,customer_last_name,customer_login,customer_preferred_cust_flag] + WholeStageCodegen (24) + Project [customer_birth_country,customer_email_address,customer_first_name,customer_id,customer_last_name,customer_login,customer_preferred_cust_flag] + BroadcastHashJoin [customer_id,customer_id,year_total,year_total,year_total,year_total] + Project [customer_birth_country,customer_email_address,customer_first_name,customer_id,customer_id,customer_last_name,customer_login,customer_preferred_cust_flag,year_total,year_total,year_total] + BroadcastHashJoin [customer_id,customer_id] + Project [customer_birth_country,customer_email_address,customer_first_name,customer_id,customer_id,customer_last_name,customer_login,customer_preferred_cust_flag,year_total,year_total] + BroadcastHashJoin [customer_id,customer_id,year_total,year_total,year_total,year_total] + Project [customer_birth_country,customer_email_address,customer_first_name,customer_id,customer_id,customer_last_name,customer_login,customer_preferred_cust_flag,year_total,year_total,year_total] + BroadcastHashJoin [customer_id,customer_id] + BroadcastHashJoin [customer_id,customer_id] + Filter [year_total] + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,isEmpty,sum] [customer_id,isEmpty,sum,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true)),year_total] + InputAdapter + Exchange [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year] #1 + WholeStageCodegen (3) + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ss_ext_discount_amt,ss_ext_list_price,ss_ext_sales_price,ss_ext_wholesale_cost] [isEmpty,isEmpty,sum,sum] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ss_ext_discount_amt,ss_ext_list_price,ss_ext_sales_price,ss_ext_wholesale_cost] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,ss_ext_discount_amt,ss_ext_list_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [c_customer_id,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [ss_customer_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (7) + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,isEmpty,sum] [customer_birth_country,customer_email_address,customer_first_name,customer_id,customer_last_name,customer_login,customer_preferred_cust_flag,isEmpty,sum,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true)),year_total] + InputAdapter + Exchange [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year] #5 + WholeStageCodegen (6) + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ss_ext_discount_amt,ss_ext_list_price,ss_ext_sales_price,ss_ext_wholesale_cost] [isEmpty,isEmpty,sum,sum] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ss_ext_discount_amt,ss_ext_list_price,ss_ext_sales_price,ss_ext_wholesale_cost] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,ss_ext_discount_amt,ss_ext_list_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [c_customer_id,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] + InputAdapter + ReusedExchange [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_sold_date_sk] #2 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (11) + Project [customer_id,year_total] + Filter [year_total] + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,isEmpty,sum] [customer_id,isEmpty,sum,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true)),year_total] + InputAdapter + Exchange [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year] #8 + WholeStageCodegen (10) + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,cs_ext_discount_amt,cs_ext_list_price,cs_ext_sales_price,cs_ext_wholesale_cost,d_year] [isEmpty,isEmpty,sum,sum] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,cs_ext_discount_amt,cs_ext_list_price,cs_ext_sales_price,cs_ext_wholesale_cost,d_year] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,cs_ext_discount_amt,cs_ext_list_price,cs_ext_sales_price,cs_ext_wholesale_cost,cs_sold_date_sk] + BroadcastHashJoin [c_customer_sk,cs_bill_customer_sk] + Filter [c_customer_id,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (8) + Filter [cs_bill_customer_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_ext_discount_amt,cs_ext_list_price,cs_ext_sales_price,cs_ext_wholesale_cost,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_year] #3 + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (15) + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,isEmpty,sum] [customer_id,isEmpty,sum,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true)),year_total] + InputAdapter + Exchange [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year] #11 + WholeStageCodegen (14) + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,cs_ext_discount_amt,cs_ext_list_price,cs_ext_sales_price,cs_ext_wholesale_cost,d_year] [isEmpty,isEmpty,sum,sum] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,cs_ext_discount_amt,cs_ext_list_price,cs_ext_sales_price,cs_ext_wholesale_cost,d_year] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,cs_ext_discount_amt,cs_ext_list_price,cs_ext_sales_price,cs_ext_wholesale_cost,cs_sold_date_sk] + BroadcastHashJoin [c_customer_sk,cs_bill_customer_sk] + Filter [c_customer_id,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] + InputAdapter + ReusedExchange [cs_bill_customer_sk,cs_ext_discount_amt,cs_ext_list_price,cs_ext_sales_price,cs_ext_wholesale_cost,cs_sold_date_sk] #9 + InputAdapter + ReusedExchange [d_date_sk,d_year] #6 + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (19) + Project [customer_id,year_total] + Filter [year_total] + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,isEmpty,sum] [customer_id,isEmpty,sum,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true)),year_total] + InputAdapter + Exchange [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year] #13 + WholeStageCodegen (18) + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ws_ext_discount_amt,ws_ext_list_price,ws_ext_sales_price,ws_ext_wholesale_cost] [isEmpty,isEmpty,sum,sum] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ws_ext_discount_amt,ws_ext_list_price,ws_ext_sales_price,ws_ext_wholesale_cost] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,ws_ext_discount_amt,ws_ext_list_price,ws_ext_sales_price,ws_ext_wholesale_cost,ws_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + Filter [c_customer_id,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] + InputAdapter + BroadcastExchange #14 + WholeStageCodegen (16) + Filter [ws_bill_customer_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_ext_sales_price,ws_ext_wholesale_cost,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_year] #3 + InputAdapter + BroadcastExchange #15 + WholeStageCodegen (23) + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,isEmpty,sum] [customer_id,isEmpty,sum,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true)),year_total] + InputAdapter + Exchange [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year] #16 + WholeStageCodegen (22) + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ws_ext_discount_amt,ws_ext_list_price,ws_ext_sales_price,ws_ext_wholesale_cost] [isEmpty,isEmpty,sum,sum] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ws_ext_discount_amt,ws_ext_list_price,ws_ext_sales_price,ws_ext_wholesale_cost] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,ws_ext_discount_amt,ws_ext_list_price,ws_ext_sales_price,ws_ext_wholesale_cost,ws_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + Filter [c_customer_id,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] + InputAdapter + ReusedExchange [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_ext_sales_price,ws_ext_wholesale_cost,ws_sold_date_sk] #14 + InputAdapter + ReusedExchange [d_date_sk,d_year] #6 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40.sf100/explain.txt new file mode 100644 index 0000000000000..a23b64f179db5 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40.sf100/explain.txt @@ -0,0 +1,198 @@ +== Physical Plan == +TakeOrderedAndProject (35) ++- * HashAggregate (34) + +- Exchange (33) + +- * HashAggregate (32) + +- * Project (31) + +- * BroadcastHashJoin Inner BuildRight (30) + :- * Project (25) + : +- * BroadcastHashJoin Inner BuildRight (24) + : :- * Project (19) + : : +- * BroadcastHashJoin Inner BuildRight (18) + : : :- * Project (12) + : : : +- SortMergeJoin LeftOuter (11) + : : : :- * Sort (5) + : : : : +- Exchange (4) + : : : : +- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.catalog_sales (1) + : : : +- * Sort (10) + : : : +- Exchange (9) + : : : +- * Filter (8) + : : : +- * ColumnarToRow (7) + : : : +- Scan parquet default.catalog_returns (6) + : : +- BroadcastExchange (17) + : : +- * Project (16) + : : +- * Filter (15) + : : +- * ColumnarToRow (14) + : : +- Scan parquet default.item (13) + : +- BroadcastExchange (23) + : +- * Filter (22) + : +- * ColumnarToRow (21) + : +- Scan parquet default.date_dim (20) + +- BroadcastExchange (29) + +- * Filter (28) + +- * ColumnarToRow (27) + +- Scan parquet default.warehouse (26) + + +(1) Scan parquet default.catalog_sales +Output [5]: [cs_sold_date_sk#1, cs_warehouse_sk#2, cs_item_sk#3, cs_order_number#4, cs_sales_price#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_warehouse_sk), IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [5]: [cs_sold_date_sk#1, cs_warehouse_sk#2, cs_item_sk#3, cs_order_number#4, cs_sales_price#5] + +(3) Filter [codegen id : 1] +Input [5]: [cs_sold_date_sk#1, cs_warehouse_sk#2, cs_item_sk#3, cs_order_number#4, cs_sales_price#5] +Condition : ((isnotnull(cs_warehouse_sk#2) AND isnotnull(cs_item_sk#3)) AND isnotnull(cs_sold_date_sk#1)) + +(4) Exchange +Input [5]: [cs_sold_date_sk#1, cs_warehouse_sk#2, cs_item_sk#3, cs_order_number#4, cs_sales_price#5] +Arguments: hashpartitioning(cs_order_number#4, cs_item_sk#3, 5), true, [id=#6] + +(5) Sort [codegen id : 2] +Input [5]: [cs_sold_date_sk#1, cs_warehouse_sk#2, cs_item_sk#3, cs_order_number#4, cs_sales_price#5] +Arguments: [cs_order_number#4 ASC NULLS FIRST, cs_item_sk#3 ASC NULLS FIRST], false, 0 + +(6) Scan parquet default.catalog_returns +Output [3]: [cr_item_sk#7, cr_order_number#8, cr_refunded_cash#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_returns] +PushedFilters: [IsNotNull(cr_order_number), IsNotNull(cr_item_sk)] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 3] +Input [3]: [cr_item_sk#7, cr_order_number#8, cr_refunded_cash#9] + +(8) Filter [codegen id : 3] +Input [3]: [cr_item_sk#7, cr_order_number#8, cr_refunded_cash#9] +Condition : (isnotnull(cr_order_number#8) AND isnotnull(cr_item_sk#7)) + +(9) Exchange +Input [3]: [cr_item_sk#7, cr_order_number#8, cr_refunded_cash#9] +Arguments: hashpartitioning(cr_order_number#8, cr_item_sk#7, 5), true, [id=#10] + +(10) Sort [codegen id : 4] +Input [3]: [cr_item_sk#7, cr_order_number#8, cr_refunded_cash#9] +Arguments: [cr_order_number#8 ASC NULLS FIRST, cr_item_sk#7 ASC NULLS FIRST], false, 0 + +(11) SortMergeJoin +Left keys [2]: [cs_order_number#4, cs_item_sk#3] +Right keys [2]: [cr_order_number#8, cr_item_sk#7] +Join condition: None + +(12) Project [codegen id : 8] +Output [5]: [cs_sold_date_sk#1, cs_warehouse_sk#2, cs_item_sk#3, cs_sales_price#5, cr_refunded_cash#9] +Input [8]: [cs_sold_date_sk#1, cs_warehouse_sk#2, cs_item_sk#3, cs_order_number#4, cs_sales_price#5, cr_item_sk#7, cr_order_number#8, cr_refunded_cash#9] + +(13) Scan parquet default.item +Output [3]: [i_item_sk#11, i_item_id#12, i_current_price#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,0.99), LessThanOrEqual(i_current_price,1.49), IsNotNull(i_item_sk)] +ReadSchema: struct + +(14) ColumnarToRow [codegen id : 5] +Input [3]: [i_item_sk#11, i_item_id#12, i_current_price#13] + +(15) Filter [codegen id : 5] +Input [3]: [i_item_sk#11, i_item_id#12, i_current_price#13] +Condition : (((isnotnull(i_current_price#13) AND (i_current_price#13 >= 0.99)) AND (i_current_price#13 <= 1.49)) AND isnotnull(i_item_sk#11)) + +(16) Project [codegen id : 5] +Output [2]: [i_item_sk#11, i_item_id#12] +Input [3]: [i_item_sk#11, i_item_id#12, i_current_price#13] + +(17) BroadcastExchange +Input [2]: [i_item_sk#11, i_item_id#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] + +(18) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [cs_item_sk#3] +Right keys [1]: [i_item_sk#11] +Join condition: None + +(19) Project [codegen id : 8] +Output [5]: [cs_sold_date_sk#1, cs_warehouse_sk#2, cs_sales_price#5, cr_refunded_cash#9, i_item_id#12] +Input [7]: [cs_sold_date_sk#1, cs_warehouse_sk#2, cs_item_sk#3, cs_sales_price#5, cr_refunded_cash#9, i_item_sk#11, i_item_id#12] + +(20) Scan parquet default.date_dim +Output [2]: [d_date_sk#15, d_date#16] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-02-10), LessThanOrEqual(d_date,2000-04-10), IsNotNull(d_date_sk)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 6] +Input [2]: [d_date_sk#15, d_date#16] + +(22) Filter [codegen id : 6] +Input [2]: [d_date_sk#15, d_date#16] +Condition : (((isnotnull(d_date#16) AND (d_date#16 >= 10997)) AND (d_date#16 <= 11057)) AND isnotnull(d_date_sk#15)) + +(23) BroadcastExchange +Input [2]: [d_date_sk#15, d_date#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#17] + +(24) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [cs_sold_date_sk#1] +Right keys [1]: [d_date_sk#15] +Join condition: None + +(25) Project [codegen id : 8] +Output [5]: [cs_warehouse_sk#2, cs_sales_price#5, cr_refunded_cash#9, i_item_id#12, d_date#16] +Input [7]: [cs_sold_date_sk#1, cs_warehouse_sk#2, cs_sales_price#5, cr_refunded_cash#9, i_item_id#12, d_date_sk#15, d_date#16] + +(26) Scan parquet default.warehouse +Output [2]: [w_warehouse_sk#18, w_state#19] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 7] +Input [2]: [w_warehouse_sk#18, w_state#19] + +(28) Filter [codegen id : 7] +Input [2]: [w_warehouse_sk#18, w_state#19] +Condition : isnotnull(w_warehouse_sk#18) + +(29) BroadcastExchange +Input [2]: [w_warehouse_sk#18, w_state#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#20] + +(30) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [cs_warehouse_sk#2] +Right keys [1]: [w_warehouse_sk#18] +Join condition: None + +(31) Project [codegen id : 8] +Output [5]: [cs_sales_price#5, cr_refunded_cash#9, w_state#19, i_item_id#12, d_date#16] +Input [7]: [cs_warehouse_sk#2, cs_sales_price#5, cr_refunded_cash#9, i_item_id#12, d_date#16, w_warehouse_sk#18, w_state#19] + +(32) HashAggregate [codegen id : 8] +Input [5]: [cs_sales_price#5, cr_refunded_cash#9, w_state#19, i_item_id#12, d_date#16] +Keys [2]: [w_state#19, i_item_id#12] +Functions [2]: [partial_sum(CASE WHEN (d_date#16 < 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price#5 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#9 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_date#16 >= 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price#5 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#9 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END)] +Aggregate Attributes [4]: [sum#21, isEmpty#22, sum#23, isEmpty#24] +Results [6]: [w_state#19, i_item_id#12, sum#25, isEmpty#26, sum#27, isEmpty#28] + +(33) Exchange +Input [6]: [w_state#19, i_item_id#12, sum#25, isEmpty#26, sum#27, isEmpty#28] +Arguments: hashpartitioning(w_state#19, i_item_id#12, 5), true, [id=#29] + +(34) HashAggregate [codegen id : 9] +Input [6]: [w_state#19, i_item_id#12, sum#25, isEmpty#26, sum#27, isEmpty#28] +Keys [2]: [w_state#19, i_item_id#12] +Functions [2]: [sum(CASE WHEN (d_date#16 < 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price#5 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#9 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END), sum(CASE WHEN (d_date#16 >= 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price#5 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#9 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END)] +Aggregate Attributes [2]: [sum(CASE WHEN (d_date#16 < 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price#5 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#9 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END)#30, sum(CASE WHEN (d_date#16 >= 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price#5 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#9 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END)#31] +Results [4]: [w_state#19, i_item_id#12, sum(CASE WHEN (d_date#16 < 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price#5 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#9 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END)#30 AS sales_before#32, sum(CASE WHEN (d_date#16 >= 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price#5 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#9 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END)#31 AS sales_after#33] + +(35) TakeOrderedAndProject +Input [4]: [w_state#19, i_item_id#12, sales_before#32, sales_after#33] +Arguments: 100, [w_state#19 ASC NULLS FIRST, i_item_id#12 ASC NULLS FIRST], [w_state#19, i_item_id#12, sales_before#32, sales_after#33] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40.sf100/simplified.txt new file mode 100644 index 0000000000000..243a3b5957787 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40.sf100/simplified.txt @@ -0,0 +1,56 @@ +TakeOrderedAndProject [i_item_id,sales_after,sales_before,w_state] + WholeStageCodegen (9) + HashAggregate [i_item_id,isEmpty,isEmpty,sum,sum,w_state] [isEmpty,isEmpty,sales_after,sales_before,sum,sum,sum(CASE WHEN (d_date < 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END),sum(CASE WHEN (d_date >= 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END)] + InputAdapter + Exchange [i_item_id,w_state] #1 + WholeStageCodegen (8) + HashAggregate [cr_refunded_cash,cs_sales_price,d_date,i_item_id,w_state] [isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum] + Project [cr_refunded_cash,cs_sales_price,d_date,i_item_id,w_state] + BroadcastHashJoin [cs_warehouse_sk,w_warehouse_sk] + Project [cr_refunded_cash,cs_sales_price,cs_warehouse_sk,d_date,i_item_id] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cr_refunded_cash,cs_sales_price,cs_sold_date_sk,cs_warehouse_sk,i_item_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cr_refunded_cash,cs_item_sk,cs_sales_price,cs_sold_date_sk,cs_warehouse_sk] + InputAdapter + SortMergeJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] + WholeStageCodegen (2) + Sort [cs_item_sk,cs_order_number] + InputAdapter + Exchange [cs_item_sk,cs_order_number] #2 + WholeStageCodegen (1) + Filter [cs_item_sk,cs_sold_date_sk,cs_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_item_sk,cs_order_number,cs_sales_price,cs_sold_date_sk,cs_warehouse_sk] + WholeStageCodegen (4) + Sort [cr_item_sk,cr_order_number] + InputAdapter + Exchange [cr_item_sk,cr_order_number] #3 + WholeStageCodegen (3) + Filter [cr_item_sk,cr_order_number] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_refunded_cash] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (5) + Project [i_item_id,i_item_sk] + Filter [i_current_price,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_current_price,i_item_id,i_item_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (6) + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (7) + Filter [w_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.warehouse [w_state,w_warehouse_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40/explain.txt new file mode 100644 index 0000000000000..cf2f3f895781e --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40/explain.txt @@ -0,0 +1,183 @@ +== Physical Plan == +TakeOrderedAndProject (32) ++- * HashAggregate (31) + +- Exchange (30) + +- * HashAggregate (29) + +- * Project (28) + +- * BroadcastHashJoin Inner BuildRight (27) + :- * Project (22) + : +- * BroadcastHashJoin Inner BuildRight (21) + : :- * Project (15) + : : +- * BroadcastHashJoin Inner BuildRight (14) + : : :- * Project (9) + : : : +- * BroadcastHashJoin LeftOuter BuildRight (8) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.catalog_sales (1) + : : : +- BroadcastExchange (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.catalog_returns (4) + : : +- BroadcastExchange (13) + : : +- * Filter (12) + : : +- * ColumnarToRow (11) + : : +- Scan parquet default.warehouse (10) + : +- BroadcastExchange (20) + : +- * Project (19) + : +- * Filter (18) + : +- * ColumnarToRow (17) + : +- Scan parquet default.item (16) + +- BroadcastExchange (26) + +- * Filter (25) + +- * ColumnarToRow (24) + +- Scan parquet default.date_dim (23) + + +(1) Scan parquet default.catalog_sales +Output [5]: [cs_sold_date_sk#1, cs_warehouse_sk#2, cs_item_sk#3, cs_order_number#4, cs_sales_price#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_warehouse_sk), IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [5]: [cs_sold_date_sk#1, cs_warehouse_sk#2, cs_item_sk#3, cs_order_number#4, cs_sales_price#5] + +(3) Filter [codegen id : 5] +Input [5]: [cs_sold_date_sk#1, cs_warehouse_sk#2, cs_item_sk#3, cs_order_number#4, cs_sales_price#5] +Condition : ((isnotnull(cs_warehouse_sk#2) AND isnotnull(cs_item_sk#3)) AND isnotnull(cs_sold_date_sk#1)) + +(4) Scan parquet default.catalog_returns +Output [3]: [cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_returns] +PushedFilters: [IsNotNull(cr_order_number), IsNotNull(cr_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8] + +(6) Filter [codegen id : 1] +Input [3]: [cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8] +Condition : (isnotnull(cr_order_number#7) AND isnotnull(cr_item_sk#6)) + +(7) BroadcastExchange +Input [3]: [cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, false] as bigint), 32) | (cast(input[0, int, false] as bigint) & 4294967295))),false), [id=#9] + +(8) BroadcastHashJoin [codegen id : 5] +Left keys [2]: [cs_order_number#4, cs_item_sk#3] +Right keys [2]: [cr_order_number#7, cr_item_sk#6] +Join condition: None + +(9) Project [codegen id : 5] +Output [5]: [cs_sold_date_sk#1, cs_warehouse_sk#2, cs_item_sk#3, cs_sales_price#5, cr_refunded_cash#8] +Input [8]: [cs_sold_date_sk#1, cs_warehouse_sk#2, cs_item_sk#3, cs_order_number#4, cs_sales_price#5, cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8] + +(10) Scan parquet default.warehouse +Output [2]: [w_warehouse_sk#10, w_state#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [w_warehouse_sk#10, w_state#11] + +(12) Filter [codegen id : 2] +Input [2]: [w_warehouse_sk#10, w_state#11] +Condition : isnotnull(w_warehouse_sk#10) + +(13) BroadcastExchange +Input [2]: [w_warehouse_sk#10, w_state#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#12] + +(14) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_warehouse_sk#2] +Right keys [1]: [w_warehouse_sk#10] +Join condition: None + +(15) Project [codegen id : 5] +Output [5]: [cs_sold_date_sk#1, cs_item_sk#3, cs_sales_price#5, cr_refunded_cash#8, w_state#11] +Input [7]: [cs_sold_date_sk#1, cs_warehouse_sk#2, cs_item_sk#3, cs_sales_price#5, cr_refunded_cash#8, w_warehouse_sk#10, w_state#11] + +(16) Scan parquet default.item +Output [3]: [i_item_sk#13, i_item_id#14, i_current_price#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,0.99), LessThanOrEqual(i_current_price,1.49), IsNotNull(i_item_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [3]: [i_item_sk#13, i_item_id#14, i_current_price#15] + +(18) Filter [codegen id : 3] +Input [3]: [i_item_sk#13, i_item_id#14, i_current_price#15] +Condition : (((isnotnull(i_current_price#15) AND (i_current_price#15 >= 0.99)) AND (i_current_price#15 <= 1.49)) AND isnotnull(i_item_sk#13)) + +(19) Project [codegen id : 3] +Output [2]: [i_item_sk#13, i_item_id#14] +Input [3]: [i_item_sk#13, i_item_id#14, i_current_price#15] + +(20) BroadcastExchange +Input [2]: [i_item_sk#13, i_item_id#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] + +(21) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_item_sk#3] +Right keys [1]: [i_item_sk#13] +Join condition: None + +(22) Project [codegen id : 5] +Output [5]: [cs_sold_date_sk#1, cs_sales_price#5, cr_refunded_cash#8, w_state#11, i_item_id#14] +Input [7]: [cs_sold_date_sk#1, cs_item_sk#3, cs_sales_price#5, cr_refunded_cash#8, w_state#11, i_item_sk#13, i_item_id#14] + +(23) Scan parquet default.date_dim +Output [2]: [d_date_sk#17, d_date#18] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-02-10), LessThanOrEqual(d_date,2000-04-10), IsNotNull(d_date_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 4] +Input [2]: [d_date_sk#17, d_date#18] + +(25) Filter [codegen id : 4] +Input [2]: [d_date_sk#17, d_date#18] +Condition : (((isnotnull(d_date#18) AND (d_date#18 >= 10997)) AND (d_date#18 <= 11057)) AND isnotnull(d_date_sk#17)) + +(26) BroadcastExchange +Input [2]: [d_date_sk#17, d_date#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] + +(27) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_sold_date_sk#1] +Right keys [1]: [d_date_sk#17] +Join condition: None + +(28) Project [codegen id : 5] +Output [5]: [cs_sales_price#5, cr_refunded_cash#8, w_state#11, i_item_id#14, d_date#18] +Input [7]: [cs_sold_date_sk#1, cs_sales_price#5, cr_refunded_cash#8, w_state#11, i_item_id#14, d_date_sk#17, d_date#18] + +(29) HashAggregate [codegen id : 5] +Input [5]: [cs_sales_price#5, cr_refunded_cash#8, w_state#11, i_item_id#14, d_date#18] +Keys [2]: [w_state#11, i_item_id#14] +Functions [2]: [partial_sum(CASE WHEN (d_date#18 < 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price#5 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#8 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_date#18 >= 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price#5 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#8 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END)] +Aggregate Attributes [4]: [sum#20, isEmpty#21, sum#22, isEmpty#23] +Results [6]: [w_state#11, i_item_id#14, sum#24, isEmpty#25, sum#26, isEmpty#27] + +(30) Exchange +Input [6]: [w_state#11, i_item_id#14, sum#24, isEmpty#25, sum#26, isEmpty#27] +Arguments: hashpartitioning(w_state#11, i_item_id#14, 5), true, [id=#28] + +(31) HashAggregate [codegen id : 6] +Input [6]: [w_state#11, i_item_id#14, sum#24, isEmpty#25, sum#26, isEmpty#27] +Keys [2]: [w_state#11, i_item_id#14] +Functions [2]: [sum(CASE WHEN (d_date#18 < 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price#5 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#8 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END), sum(CASE WHEN (d_date#18 >= 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price#5 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#8 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END)] +Aggregate Attributes [2]: [sum(CASE WHEN (d_date#18 < 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price#5 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#8 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END)#29, sum(CASE WHEN (d_date#18 >= 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price#5 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#8 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END)#30] +Results [4]: [w_state#11, i_item_id#14, sum(CASE WHEN (d_date#18 < 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price#5 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#8 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END)#29 AS sales_before#31, sum(CASE WHEN (d_date#18 >= 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price#5 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#8 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END)#30 AS sales_after#32] + +(32) TakeOrderedAndProject +Input [4]: [w_state#11, i_item_id#14, sales_before#31, sales_after#32] +Arguments: 100, [w_state#11 ASC NULLS FIRST, i_item_id#14 ASC NULLS FIRST], [w_state#11, i_item_id#14, sales_before#31, sales_after#32] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40/simplified.txt new file mode 100644 index 0000000000000..067e778838b6d --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40/simplified.txt @@ -0,0 +1,48 @@ +TakeOrderedAndProject [i_item_id,sales_after,sales_before,w_state] + WholeStageCodegen (6) + HashAggregate [i_item_id,isEmpty,isEmpty,sum,sum,w_state] [isEmpty,isEmpty,sales_after,sales_before,sum,sum,sum(CASE WHEN (d_date < 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END),sum(CASE WHEN (d_date >= 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END)] + InputAdapter + Exchange [i_item_id,w_state] #1 + WholeStageCodegen (5) + HashAggregate [cr_refunded_cash,cs_sales_price,d_date,i_item_id,w_state] [isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum] + Project [cr_refunded_cash,cs_sales_price,d_date,i_item_id,w_state] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cr_refunded_cash,cs_sales_price,cs_sold_date_sk,i_item_id,w_state] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cr_refunded_cash,cs_item_sk,cs_sales_price,cs_sold_date_sk,w_state] + BroadcastHashJoin [cs_warehouse_sk,w_warehouse_sk] + Project [cr_refunded_cash,cs_item_sk,cs_sales_price,cs_sold_date_sk,cs_warehouse_sk] + BroadcastHashJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] + Filter [cs_item_sk,cs_sold_date_sk,cs_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_item_sk,cs_order_number,cs_sales_price,cs_sold_date_sk,cs_warehouse_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [cr_item_sk,cr_order_number] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_refunded_cash] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [w_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.warehouse [w_state,w_warehouse_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [i_item_id,i_item_sk] + Filter [i_current_price,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_current_price,i_item_id,i_item_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41.sf100/explain.txt new file mode 100644 index 0000000000000..7581089a6014c --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41.sf100/explain.txt @@ -0,0 +1,120 @@ +== Physical Plan == +TakeOrderedAndProject (20) ++- * HashAggregate (19) + +- Exchange (18) + +- * HashAggregate (17) + +- * Project (16) + +- * BroadcastHashJoin Inner BuildRight (15) + :- * Project (4) + : +- * Filter (3) + : +- * ColumnarToRow (2) + : +- Scan parquet default.item (1) + +- BroadcastExchange (14) + +- * Project (13) + +- * Filter (12) + +- * HashAggregate (11) + +- Exchange (10) + +- * HashAggregate (9) + +- * Project (8) + +- * Filter (7) + +- * ColumnarToRow (6) + +- Scan parquet default.item (5) + + +(1) Scan parquet default.item +Output [3]: [i_manufact_id#1, i_manufact#2, i_product_name#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_manufact_id), GreaterThanOrEqual(i_manufact_id,738), LessThanOrEqual(i_manufact_id,778), IsNotNull(i_manufact)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [i_manufact_id#1, i_manufact#2, i_product_name#3] + +(3) Filter [codegen id : 3] +Input [3]: [i_manufact_id#1, i_manufact#2, i_product_name#3] +Condition : (((isnotnull(i_manufact_id#1) AND (i_manufact_id#1 >= 738)) AND (i_manufact_id#1 <= 778)) AND isnotnull(i_manufact#2)) + +(4) Project [codegen id : 3] +Output [2]: [i_manufact#2, i_product_name#3] +Input [3]: [i_manufact_id#1, i_manufact#2, i_product_name#3] + +(5) Scan parquet default.item +Output [5]: [i_category#4, i_manufact#2, i_size#5, i_color#6, i_units#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [Or(Or(And(EqualTo(i_category,Women),Or(And(And(Or(EqualTo(i_color,powder),EqualTo(i_color,khaki)),Or(EqualTo(i_units,Ounce),EqualTo(i_units,Oz))),Or(EqualTo(i_size,medium),EqualTo(i_size,extra large))),And(And(Or(EqualTo(i_color,brown),EqualTo(i_color,honeydew)),Or(EqualTo(i_units,Bunch),EqualTo(i_units,Ton))),Or(EqualTo(i_size,N/A),EqualTo(i_size,small))))),And(EqualTo(i_category,Men),Or(And(And(Or(EqualTo(i_color,floral),EqualTo(i_color,deep)),Or(EqualTo(i_units,N/A),EqualTo(i_units,Dozen))),Or(EqualTo(i_size,petite),EqualTo(i_size,large))),And(And(Or(EqualTo(i_color,light),EqualTo(i_color,cornflower)),Or(EqualTo(i_units,Box),EqualTo(i_units,Pound))),Or(EqualTo(i_size,medium),EqualTo(i_size,extra large)))))),Or(And(EqualTo(i_category,Women),Or(And(And(Or(EqualTo(i_color,midnight),EqualTo(i_color,snow)),Or(EqualTo(i_units,Pallet),EqualTo(i_units,Gross))),Or(EqualTo(i_size,medium),EqualTo(i_size,extra large))),And(And(Or(EqualTo(i_color,cyan),EqualTo(i_color,papaya)),Or(EqualTo(i_units,Cup),EqualTo(i_units,Dram))),Or(EqualTo(i_size,N/A),EqualTo(i_size,small))))),And(EqualTo(i_category,Men),Or(And(And(Or(EqualTo(i_color,orange),EqualTo(i_color,frosted)),Or(EqualTo(i_units,Each),EqualTo(i_units,Tbl))),Or(EqualTo(i_size,petite),EqualTo(i_size,large))),And(And(Or(EqualTo(i_color,forest),EqualTo(i_color,ghost)),Or(EqualTo(i_units,Lb),EqualTo(i_units,Bundle))),Or(EqualTo(i_size,medium),EqualTo(i_size,extra large))))))), IsNotNull(i_manufact)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [5]: [i_category#4, i_manufact#2, i_size#5, i_color#6, i_units#7] + +(7) Filter [codegen id : 1] +Input [5]: [i_category#4, i_manufact#2, i_size#5, i_color#6, i_units#7] +Condition : (((((i_category#4 = Women) AND (((((i_color#6 = powder) OR (i_color#6 = khaki)) AND ((i_units#7 = Ounce) OR (i_units#7 = Oz))) AND ((i_size#5 = medium) OR (i_size#5 = extra large))) OR ((((i_color#6 = brown) OR (i_color#6 = honeydew)) AND ((i_units#7 = Bunch) OR (i_units#7 = Ton))) AND ((i_size#5 = N/A) OR (i_size#5 = small))))) OR ((i_category#4 = Men) AND (((((i_color#6 = floral) OR (i_color#6 = deep)) AND ((i_units#7 = N/A) OR (i_units#7 = Dozen))) AND ((i_size#5 = petite) OR (i_size#5 = large))) OR ((((i_color#6 = light) OR (i_color#6 = cornflower)) AND ((i_units#7 = Box) OR (i_units#7 = Pound))) AND ((i_size#5 = medium) OR (i_size#5 = extra large)))))) OR (((i_category#4 = Women) AND (((((i_color#6 = midnight) OR (i_color#6 = snow)) AND ((i_units#7 = Pallet) OR (i_units#7 = Gross))) AND ((i_size#5 = medium) OR (i_size#5 = extra large))) OR ((((i_color#6 = cyan) OR (i_color#6 = papaya)) AND ((i_units#7 = Cup) OR (i_units#7 = Dram))) AND ((i_size#5 = N/A) OR (i_size#5 = small))))) OR ((i_category#4 = Men) AND (((((i_color#6 = orange) OR (i_color#6 = frosted)) AND ((i_units#7 = Each) OR (i_units#7 = Tbl))) AND ((i_size#5 = petite) OR (i_size#5 = large))) OR ((((i_color#6 = forest) OR (i_color#6 = ghost)) AND ((i_units#7 = Lb) OR (i_units#7 = Bundle))) AND ((i_size#5 = medium) OR (i_size#5 = extra large))))))) AND isnotnull(i_manufact#2)) + +(8) Project [codegen id : 1] +Output [1]: [i_manufact#2] +Input [5]: [i_category#4, i_manufact#2, i_size#5, i_color#6, i_units#7] + +(9) HashAggregate [codegen id : 1] +Input [1]: [i_manufact#2] +Keys [1]: [i_manufact#2] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#8] +Results [2]: [i_manufact#2, count#9] + +(10) Exchange +Input [2]: [i_manufact#2, count#9] +Arguments: hashpartitioning(i_manufact#2, 5), true, [id=#10] + +(11) HashAggregate [codegen id : 2] +Input [2]: [i_manufact#2, count#9] +Keys [1]: [i_manufact#2] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#11] +Results [3]: [count(1)#11 AS item_cnt#12, i_manufact#2 AS i_manufact#2#13, true AS alwaysTrue#14] + +(12) Filter [codegen id : 2] +Input [3]: [item_cnt#12, i_manufact#2#13, alwaysTrue#14] +Condition : (if (isnull(alwaysTrue#14)) 0 else item_cnt#12 > 0) + +(13) Project [codegen id : 2] +Output [1]: [i_manufact#2#13] +Input [3]: [item_cnt#12, i_manufact#2#13, alwaysTrue#14] + +(14) BroadcastExchange +Input [1]: [i_manufact#2#13] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#15] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [i_manufact#2] +Right keys [1]: [i_manufact#2#13] +Join condition: None + +(16) Project [codegen id : 3] +Output [1]: [i_product_name#3] +Input [3]: [i_manufact#2, i_product_name#3, i_manufact#2#13] + +(17) HashAggregate [codegen id : 3] +Input [1]: [i_product_name#3] +Keys [1]: [i_product_name#3] +Functions: [] +Aggregate Attributes: [] +Results [1]: [i_product_name#3] + +(18) Exchange +Input [1]: [i_product_name#3] +Arguments: hashpartitioning(i_product_name#3, 5), true, [id=#16] + +(19) HashAggregate [codegen id : 4] +Input [1]: [i_product_name#3] +Keys [1]: [i_product_name#3] +Functions: [] +Aggregate Attributes: [] +Results [1]: [i_product_name#3] + +(20) TakeOrderedAndProject +Input [1]: [i_product_name#3] +Arguments: 100, [i_product_name#3 ASC NULLS FIRST], [i_product_name#3] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41.sf100/simplified.txt new file mode 100644 index 0000000000000..c5e1d6cb8dce8 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41.sf100/simplified.txt @@ -0,0 +1,29 @@ +TakeOrderedAndProject [i_product_name] + WholeStageCodegen (4) + HashAggregate [i_product_name] + InputAdapter + Exchange [i_product_name] #1 + WholeStageCodegen (3) + HashAggregate [i_product_name] + Project [i_product_name] + BroadcastHashJoin [i_manufact,i_manufact] + Project [i_manufact,i_product_name] + Filter [i_manufact,i_manufact_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_manufact,i_manufact_id,i_product_name] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (2) + Project [i_manufact] + Filter [alwaysTrue,item_cnt] + HashAggregate [count,i_manufact] [alwaysTrue,count,count(1),i_manufact,item_cnt] + InputAdapter + Exchange [i_manufact] #3 + WholeStageCodegen (1) + HashAggregate [i_manufact] [count,count] + Project [i_manufact] + Filter [i_category,i_color,i_manufact,i_size,i_units] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_category,i_color,i_manufact,i_size,i_units] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/explain.txt new file mode 100644 index 0000000000000..9357ba4edb9d5 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/explain.txt @@ -0,0 +1,120 @@ +== Physical Plan == +TakeOrderedAndProject (20) ++- * HashAggregate (19) + +- Exchange (18) + +- * HashAggregate (17) + +- * Project (16) + +- * BroadcastHashJoin Inner BuildRight (15) + :- * Project (4) + : +- * Filter (3) + : +- * ColumnarToRow (2) + : +- Scan parquet default.item (1) + +- BroadcastExchange (14) + +- * Project (13) + +- * Filter (12) + +- * HashAggregate (11) + +- Exchange (10) + +- * HashAggregate (9) + +- * Project (8) + +- * Filter (7) + +- * ColumnarToRow (6) + +- Scan parquet default.item (5) + + +(1) Scan parquet default.item +Output [3]: [i_manufact_id#1, i_manufact#2, i_product_name#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_manufact_id), GreaterThanOrEqual(i_manufact_id,738), LessThanOrEqual(i_manufact_id,778), IsNotNull(i_manufact)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [i_manufact_id#1, i_manufact#2, i_product_name#3] + +(3) Filter [codegen id : 3] +Input [3]: [i_manufact_id#1, i_manufact#2, i_product_name#3] +Condition : (((isnotnull(i_manufact_id#1) AND (i_manufact_id#1 >= 738)) AND (i_manufact_id#1 <= 778)) AND isnotnull(i_manufact#2)) + +(4) Project [codegen id : 3] +Output [2]: [i_manufact#2, i_product_name#3] +Input [3]: [i_manufact_id#1, i_manufact#2, i_product_name#3] + +(5) Scan parquet default.item +Output [5]: [i_category#4, i_manufact#2, i_size#5, i_color#6, i_units#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [Or(Or(And(EqualTo(i_category,Women),Or(And(And(Or(EqualTo(i_color,powder),EqualTo(i_color,khaki)),Or(EqualTo(i_units,Ounce),EqualTo(i_units,Oz))),Or(EqualTo(i_size,medium),EqualTo(i_size,extra large))),And(And(Or(EqualTo(i_color,brown),EqualTo(i_color,honeydew)),Or(EqualTo(i_units,Bunch),EqualTo(i_units,Ton))),Or(EqualTo(i_size,N/A),EqualTo(i_size,small))))),And(EqualTo(i_category,Men),Or(And(And(Or(EqualTo(i_color,floral),EqualTo(i_color,deep)),Or(EqualTo(i_units,N/A),EqualTo(i_units,Dozen))),Or(EqualTo(i_size,petite),EqualTo(i_size,large))),And(And(Or(EqualTo(i_color,light),EqualTo(i_color,cornflower)),Or(EqualTo(i_units,Box),EqualTo(i_units,Pound))),Or(EqualTo(i_size,medium),EqualTo(i_size,extra large)))))),Or(And(EqualTo(i_category,Women),Or(And(And(Or(EqualTo(i_color,midnight),EqualTo(i_color,snow)),Or(EqualTo(i_units,Pallet),EqualTo(i_units,Gross))),Or(EqualTo(i_size,medium),EqualTo(i_size,extra large))),And(And(Or(EqualTo(i_color,cyan),EqualTo(i_color,papaya)),Or(EqualTo(i_units,Cup),EqualTo(i_units,Dram))),Or(EqualTo(i_size,N/A),EqualTo(i_size,small))))),And(EqualTo(i_category,Men),Or(And(And(Or(EqualTo(i_color,orange),EqualTo(i_color,frosted)),Or(EqualTo(i_units,Each),EqualTo(i_units,Tbl))),Or(EqualTo(i_size,petite),EqualTo(i_size,large))),And(And(Or(EqualTo(i_color,forest),EqualTo(i_color,ghost)),Or(EqualTo(i_units,Lb),EqualTo(i_units,Bundle))),Or(EqualTo(i_size,medium),EqualTo(i_size,extra large))))))), IsNotNull(i_manufact)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [5]: [i_category#4, i_manufact#2, i_size#5, i_color#6, i_units#7] + +(7) Filter [codegen id : 1] +Input [5]: [i_category#4, i_manufact#2, i_size#5, i_color#6, i_units#7] +Condition : (((((i_category#4 = Women) AND (((((i_color#6 = powder) OR (i_color#6 = khaki)) AND ((i_units#7 = Ounce) OR (i_units#7 = Oz))) AND ((i_size#5 = medium) OR (i_size#5 = extra large))) OR ((((i_color#6 = brown) OR (i_color#6 = honeydew)) AND ((i_units#7 = Bunch) OR (i_units#7 = Ton))) AND ((i_size#5 = N/A) OR (i_size#5 = small))))) OR ((i_category#4 = Men) AND (((((i_color#6 = floral) OR (i_color#6 = deep)) AND ((i_units#7 = N/A) OR (i_units#7 = Dozen))) AND ((i_size#5 = petite) OR (i_size#5 = large))) OR ((((i_color#6 = light) OR (i_color#6 = cornflower)) AND ((i_units#7 = Box) OR (i_units#7 = Pound))) AND ((i_size#5 = medium) OR (i_size#5 = extra large)))))) OR (((i_category#4 = Women) AND (((((i_color#6 = midnight) OR (i_color#6 = snow)) AND ((i_units#7 = Pallet) OR (i_units#7 = Gross))) AND ((i_size#5 = medium) OR (i_size#5 = extra large))) OR ((((i_color#6 = cyan) OR (i_color#6 = papaya)) AND ((i_units#7 = Cup) OR (i_units#7 = Dram))) AND ((i_size#5 = N/A) OR (i_size#5 = small))))) OR ((i_category#4 = Men) AND (((((i_color#6 = orange) OR (i_color#6 = frosted)) AND ((i_units#7 = Each) OR (i_units#7 = Tbl))) AND ((i_size#5 = petite) OR (i_size#5 = large))) OR ((((i_color#6 = forest) OR (i_color#6 = ghost)) AND ((i_units#7 = Lb) OR (i_units#7 = Bundle))) AND ((i_size#5 = medium) OR (i_size#5 = extra large))))))) AND isnotnull(i_manufact#2)) + +(8) Project [codegen id : 1] +Output [1]: [i_manufact#2] +Input [5]: [i_category#4, i_manufact#2, i_size#5, i_color#6, i_units#7] + +(9) HashAggregate [codegen id : 1] +Input [1]: [i_manufact#2] +Keys [1]: [i_manufact#2] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#8] +Results [2]: [i_manufact#2, count#9] + +(10) Exchange +Input [2]: [i_manufact#2, count#9] +Arguments: hashpartitioning(i_manufact#2, 5), true, [id=#10] + +(11) HashAggregate [codegen id : 2] +Input [2]: [i_manufact#2, count#9] +Keys [1]: [i_manufact#2] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#11] +Results [3]: [count(1)#11 AS item_cnt#12, i_manufact#2 AS i_manufact#2#13, true AS alwaysTrue#14] + +(12) Filter [codegen id : 2] +Input [3]: [item_cnt#12, i_manufact#2#13, alwaysTrue#14] +Condition : (if (isnull(alwaysTrue#14)) 0 else item_cnt#12 > 0) + +(13) Project [codegen id : 2] +Output [1]: [i_manufact#2#13] +Input [3]: [item_cnt#12, i_manufact#2#13, alwaysTrue#14] + +(14) BroadcastExchange +Input [1]: [i_manufact#2#13] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#15] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [i_manufact#2] +Right keys [1]: [i_manufact#2#13] +Join condition: None + +(16) Project [codegen id : 3] +Output [1]: [i_product_name#3] +Input [3]: [i_manufact#2, i_product_name#3, i_manufact#2#13] + +(17) HashAggregate [codegen id : 3] +Input [1]: [i_product_name#3] +Keys [1]: [i_product_name#3] +Functions: [] +Aggregate Attributes: [] +Results [1]: [i_product_name#3] + +(18) Exchange +Input [1]: [i_product_name#3] +Arguments: hashpartitioning(i_product_name#3, 5), true, [id=#16] + +(19) HashAggregate [codegen id : 4] +Input [1]: [i_product_name#3] +Keys [1]: [i_product_name#3] +Functions: [] +Aggregate Attributes: [] +Results [1]: [i_product_name#3] + +(20) TakeOrderedAndProject +Input [1]: [i_product_name#3] +Arguments: 100, [i_product_name#3 ASC NULLS FIRST], [i_product_name#3] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/simplified.txt new file mode 100644 index 0000000000000..c5e1d6cb8dce8 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/simplified.txt @@ -0,0 +1,29 @@ +TakeOrderedAndProject [i_product_name] + WholeStageCodegen (4) + HashAggregate [i_product_name] + InputAdapter + Exchange [i_product_name] #1 + WholeStageCodegen (3) + HashAggregate [i_product_name] + Project [i_product_name] + BroadcastHashJoin [i_manufact,i_manufact] + Project [i_manufact,i_product_name] + Filter [i_manufact,i_manufact_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_manufact,i_manufact_id,i_product_name] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (2) + Project [i_manufact] + Filter [alwaysTrue,item_cnt] + HashAggregate [count,i_manufact] [alwaysTrue,count,count(1),i_manufact,item_cnt] + InputAdapter + Exchange [i_manufact] #3 + WholeStageCodegen (1) + HashAggregate [i_manufact] [count,count] + Project [i_manufact] + Filter [i_category,i_color,i_manufact,i_size,i_units] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_category,i_color,i_manufact,i_size,i_units] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42.sf100/explain.txt new file mode 100644 index 0000000000000..5f7bfb6280592 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42.sf100/explain.txt @@ -0,0 +1,122 @@ +== Physical Plan == +TakeOrderedAndProject (21) ++- * HashAggregate (20) + +- Exchange (19) + +- * HashAggregate (18) + +- * Project (17) + +- * BroadcastHashJoin Inner BuildRight (16) + :- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.store_sales (1) + : +- BroadcastExchange (8) + : +- * Project (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet default.item (4) + +- BroadcastExchange (15) + +- * Project (14) + +- * Filter (13) + +- * ColumnarToRow (12) + +- Scan parquet default.date_dim (11) + + +(1) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3] + +(3) Filter [codegen id : 3] +Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3] +Condition : (isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_item_sk#2)) + +(4) Scan parquet default.item +Output [4]: [i_item_sk#4, i_category_id#5, i_category#6, i_manager_id#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,1), IsNotNull(i_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [i_item_sk#4, i_category_id#5, i_category#6, i_manager_id#7] + +(6) Filter [codegen id : 1] +Input [4]: [i_item_sk#4, i_category_id#5, i_category#6, i_manager_id#7] +Condition : ((isnotnull(i_manager_id#7) AND (i_manager_id#7 = 1)) AND isnotnull(i_item_sk#4)) + +(7) Project [codegen id : 1] +Output [3]: [i_item_sk#4, i_category_id#5, i_category#6] +Input [4]: [i_item_sk#4, i_category_id#5, i_category#6, i_manager_id#7] + +(8) BroadcastExchange +Input [3]: [i_item_sk#4, i_category_id#5, i_category#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#8] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#4] +Join condition: None + +(10) Project [codegen id : 3] +Output [4]: [ss_sold_date_sk#1, ss_ext_sales_price#3, i_category_id#5, i_category#6] +Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3, i_item_sk#4, i_category_id#5, i_category#6] + +(11) Scan parquet default.date_dim +Output [3]: [d_date_sk#9, d_year#10, d_moy#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] + +(13) Filter [codegen id : 2] +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] +Condition : ((((isnotnull(d_moy#11) AND isnotnull(d_year#10)) AND (d_moy#11 = 11)) AND (d_year#10 = 2000)) AND isnotnull(d_date_sk#9)) + +(14) Project [codegen id : 2] +Output [2]: [d_date_sk#9, d_year#10] +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] + +(15) BroadcastExchange +Input [2]: [d_date_sk#9, d_year#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(16) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#9] +Join condition: None + +(17) Project [codegen id : 3] +Output [4]: [d_year#10, ss_ext_sales_price#3, i_category_id#5, i_category#6] +Input [6]: [ss_sold_date_sk#1, ss_ext_sales_price#3, i_category_id#5, i_category#6, d_date_sk#9, d_year#10] + +(18) HashAggregate [codegen id : 3] +Input [4]: [d_year#10, ss_ext_sales_price#3, i_category_id#5, i_category#6] +Keys [3]: [d_year#10, i_category_id#5, i_category#6] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum#13] +Results [4]: [d_year#10, i_category_id#5, i_category#6, sum#14] + +(19) Exchange +Input [4]: [d_year#10, i_category_id#5, i_category#6, sum#14] +Arguments: hashpartitioning(d_year#10, i_category_id#5, i_category#6, 5), true, [id=#15] + +(20) HashAggregate [codegen id : 4] +Input [4]: [d_year#10, i_category_id#5, i_category#6, sum#14] +Keys [3]: [d_year#10, i_category_id#5, i_category#6] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#16] +Results [4]: [d_year#10, i_category_id#5, i_category#6, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#16,17,2) AS sum(ss_ext_sales_price)#17] + +(21) TakeOrderedAndProject +Input [4]: [d_year#10, i_category_id#5, i_category#6, sum(ss_ext_sales_price)#17] +Arguments: 100, [sum(ss_ext_sales_price)#17 DESC NULLS LAST, d_year#10 ASC NULLS FIRST, i_category_id#5 ASC NULLS FIRST, i_category#6 ASC NULLS FIRST], [d_year#10, i_category_id#5, i_category#6, sum(ss_ext_sales_price)#17] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42.sf100/simplified.txt new file mode 100644 index 0000000000000..83050b9af3411 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42.sf100/simplified.txt @@ -0,0 +1,31 @@ +TakeOrderedAndProject [d_year,i_category,i_category_id,sum(ss_ext_sales_price)] + WholeStageCodegen (4) + HashAggregate [d_year,i_category,i_category_id,sum] [sum,sum(UnscaledValue(ss_ext_sales_price)),sum(ss_ext_sales_price)] + InputAdapter + Exchange [d_year,i_category,i_category_id] #1 + WholeStageCodegen (3) + HashAggregate [d_year,i_category,i_category_id,ss_ext_sales_price] [sum,sum] + Project [d_year,i_category,i_category_id,ss_ext_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_category,i_category_id,ss_ext_sales_price,ss_sold_date_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [i_category,i_category_id,i_item_sk] + Filter [i_item_sk,i_manager_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_category,i_category_id,i_item_sk,i_manager_id] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [d_date_sk,d_year] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42/explain.txt new file mode 100644 index 0000000000000..92ac42b19d74f --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42/explain.txt @@ -0,0 +1,122 @@ +== Physical Plan == +TakeOrderedAndProject (21) ++- * HashAggregate (20) + +- Exchange (19) + +- * HashAggregate (18) + +- * Project (17) + +- * BroadcastHashJoin Inner BuildRight (16) + :- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Project (4) + : : +- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.date_dim (1) + : +- BroadcastExchange (8) + : +- * Filter (7) + : +- * ColumnarToRow (6) + : +- Scan parquet default.store_sales (5) + +- BroadcastExchange (15) + +- * Project (14) + +- * Filter (13) + +- * ColumnarToRow (12) + +- Scan parquet default.item (11) + + +(1) Scan parquet default.date_dim +Output [3]: [d_date_sk#1, d_year#2, d_moy#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(3) Filter [codegen id : 3] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Condition : ((((isnotnull(d_moy#3) AND isnotnull(d_year#2)) AND (d_moy#3 = 11)) AND (d_year#2 = 2000)) AND isnotnull(d_date_sk#1)) + +(4) Project [codegen id : 3] +Output [2]: [d_date_sk#1, d_year#2] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(5) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] + +(7) Filter [codegen id : 1] +Input [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] +Condition : (isnotnull(ss_sold_date_sk#4) AND isnotnull(ss_item_sk#5)) + +(8) BroadcastExchange +Input [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [d_date_sk#1] +Right keys [1]: [ss_sold_date_sk#4] +Join condition: None + +(10) Project [codegen id : 3] +Output [3]: [d_year#2, ss_item_sk#5, ss_ext_sales_price#6] +Input [5]: [d_date_sk#1, d_year#2, ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] + +(11) Scan parquet default.item +Output [4]: [i_item_sk#8, i_category_id#9, i_category#10, i_manager_id#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,1), IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [4]: [i_item_sk#8, i_category_id#9, i_category#10, i_manager_id#11] + +(13) Filter [codegen id : 2] +Input [4]: [i_item_sk#8, i_category_id#9, i_category#10, i_manager_id#11] +Condition : ((isnotnull(i_manager_id#11) AND (i_manager_id#11 = 1)) AND isnotnull(i_item_sk#8)) + +(14) Project [codegen id : 2] +Output [3]: [i_item_sk#8, i_category_id#9, i_category#10] +Input [4]: [i_item_sk#8, i_category_id#9, i_category#10, i_manager_id#11] + +(15) BroadcastExchange +Input [3]: [i_item_sk#8, i_category_id#9, i_category#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(16) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_item_sk#5] +Right keys [1]: [i_item_sk#8] +Join condition: None + +(17) Project [codegen id : 3] +Output [4]: [d_year#2, ss_ext_sales_price#6, i_category_id#9, i_category#10] +Input [6]: [d_year#2, ss_item_sk#5, ss_ext_sales_price#6, i_item_sk#8, i_category_id#9, i_category#10] + +(18) HashAggregate [codegen id : 3] +Input [4]: [d_year#2, ss_ext_sales_price#6, i_category_id#9, i_category#10] +Keys [3]: [d_year#2, i_category_id#9, i_category#10] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#6))] +Aggregate Attributes [1]: [sum#13] +Results [4]: [d_year#2, i_category_id#9, i_category#10, sum#14] + +(19) Exchange +Input [4]: [d_year#2, i_category_id#9, i_category#10, sum#14] +Arguments: hashpartitioning(d_year#2, i_category_id#9, i_category#10, 5), true, [id=#15] + +(20) HashAggregate [codegen id : 4] +Input [4]: [d_year#2, i_category_id#9, i_category#10, sum#14] +Keys [3]: [d_year#2, i_category_id#9, i_category#10] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#6))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#6))#16] +Results [4]: [d_year#2, i_category_id#9, i_category#10, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#6))#16,17,2) AS sum(ss_ext_sales_price)#17] + +(21) TakeOrderedAndProject +Input [4]: [d_year#2, i_category_id#9, i_category#10, sum(ss_ext_sales_price)#17] +Arguments: 100, [sum(ss_ext_sales_price)#17 DESC NULLS LAST, d_year#2 ASC NULLS FIRST, i_category_id#9 ASC NULLS FIRST, i_category#10 ASC NULLS FIRST], [d_year#2, i_category_id#9, i_category#10, sum(ss_ext_sales_price)#17] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42/simplified.txt new file mode 100644 index 0000000000000..c8fc6e5c63919 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42/simplified.txt @@ -0,0 +1,31 @@ +TakeOrderedAndProject [d_year,i_category,i_category_id,sum(ss_ext_sales_price)] + WholeStageCodegen (4) + HashAggregate [d_year,i_category,i_category_id,sum] [sum,sum(UnscaledValue(ss_ext_sales_price)),sum(ss_ext_sales_price)] + InputAdapter + Exchange [d_year,i_category,i_category_id] #1 + WholeStageCodegen (3) + HashAggregate [d_year,i_category,i_category_id,ss_ext_sales_price] [sum,sum] + Project [d_year,i_category,i_category_id,ss_ext_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [d_year,ss_ext_sales_price,ss_item_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [d_date_sk,d_year] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [i_category,i_category_id,i_item_sk] + Filter [i_item_sk,i_manager_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_category,i_category_id,i_item_sk,i_manager_id] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43.sf100/explain.txt new file mode 100644 index 0000000000000..e83fdf2d7a348 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43.sf100/explain.txt @@ -0,0 +1,122 @@ +== Physical Plan == +TakeOrderedAndProject (21) ++- * HashAggregate (20) + +- Exchange (19) + +- * HashAggregate (18) + +- * Project (17) + +- * BroadcastHashJoin Inner BuildRight (16) + :- * Project (10) + : +- * BroadcastHashJoin Inner BuildLeft (9) + : :- BroadcastExchange (5) + : : +- * Project (4) + : : +- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.date_dim (1) + : +- * Filter (8) + : +- * ColumnarToRow (7) + : +- Scan parquet default.store_sales (6) + +- BroadcastExchange (15) + +- * Project (14) + +- * Filter (13) + +- * ColumnarToRow (12) + +- Scan parquet default.store (11) + + +(1) Scan parquet default.date_dim +Output [3]: [d_date_sk#1, d_year#2, d_day_name#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#1, d_year#2, d_day_name#3] + +(3) Filter [codegen id : 1] +Input [3]: [d_date_sk#1, d_year#2, d_day_name#3] +Condition : ((isnotnull(d_year#2) AND (d_year#2 = 2000)) AND isnotnull(d_date_sk#1)) + +(4) Project [codegen id : 1] +Output [2]: [d_date_sk#1, d_day_name#3] +Input [3]: [d_date_sk#1, d_year#2, d_day_name#3] + +(5) BroadcastExchange +Input [2]: [d_date_sk#1, d_day_name#3] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#4] + +(6) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#5, ss_store_sk#6, ss_sales_price#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(7) ColumnarToRow +Input [3]: [ss_sold_date_sk#5, ss_store_sk#6, ss_sales_price#7] + +(8) Filter +Input [3]: [ss_sold_date_sk#5, ss_store_sk#6, ss_sales_price#7] +Condition : (isnotnull(ss_sold_date_sk#5) AND isnotnull(ss_store_sk#6)) + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [d_date_sk#1] +Right keys [1]: [ss_sold_date_sk#5] +Join condition: None + +(10) Project [codegen id : 3] +Output [3]: [d_day_name#3, ss_store_sk#6, ss_sales_price#7] +Input [5]: [d_date_sk#1, d_day_name#3, ss_sold_date_sk#5, ss_store_sk#6, ss_sales_price#7] + +(11) Scan parquet default.store +Output [4]: [s_store_sk#8, s_store_id#9, s_store_name#10, s_gmt_offset#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_gmt_offset), EqualTo(s_gmt_offset,-5.00), IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [4]: [s_store_sk#8, s_store_id#9, s_store_name#10, s_gmt_offset#11] + +(13) Filter [codegen id : 2] +Input [4]: [s_store_sk#8, s_store_id#9, s_store_name#10, s_gmt_offset#11] +Condition : ((isnotnull(s_gmt_offset#11) AND (s_gmt_offset#11 = -5.00)) AND isnotnull(s_store_sk#8)) + +(14) Project [codegen id : 2] +Output [3]: [s_store_sk#8, s_store_id#9, s_store_name#10] +Input [4]: [s_store_sk#8, s_store_id#9, s_store_name#10, s_gmt_offset#11] + +(15) BroadcastExchange +Input [3]: [s_store_sk#8, s_store_id#9, s_store_name#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(16) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_store_sk#6] +Right keys [1]: [s_store_sk#8] +Join condition: None + +(17) Project [codegen id : 3] +Output [4]: [d_day_name#3, ss_sales_price#7, s_store_id#9, s_store_name#10] +Input [6]: [d_day_name#3, ss_store_sk#6, ss_sales_price#7, s_store_sk#8, s_store_id#9, s_store_name#10] + +(18) HashAggregate [codegen id : 3] +Input [4]: [d_day_name#3, ss_sales_price#7, s_store_id#9, s_store_name#10] +Keys [2]: [s_store_name#10, s_store_id#9] +Functions [7]: [partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday) THEN ss_sales_price#7 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday) THEN ss_sales_price#7 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday) THEN ss_sales_price#7 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#7 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday) THEN ss_sales_price#7 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday) THEN ss_sales_price#7 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday) THEN ss_sales_price#7 ELSE null END))] +Aggregate Attributes [7]: [sum#13, sum#14, sum#15, sum#16, sum#17, sum#18, sum#19] +Results [9]: [s_store_name#10, s_store_id#9, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26] + +(19) Exchange +Input [9]: [s_store_name#10, s_store_id#9, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26] +Arguments: hashpartitioning(s_store_name#10, s_store_id#9, 5), true, [id=#27] + +(20) HashAggregate [codegen id : 4] +Input [9]: [s_store_name#10, s_store_id#9, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26] +Keys [2]: [s_store_name#10, s_store_id#9] +Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday) THEN ss_sales_price#7 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday) THEN ss_sales_price#7 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday) THEN ss_sales_price#7 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#7 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday) THEN ss_sales_price#7 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday) THEN ss_sales_price#7 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday) THEN ss_sales_price#7 ELSE null END))] +Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday) THEN ss_sales_price#7 ELSE null END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday) THEN ss_sales_price#7 ELSE null END))#29, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday) THEN ss_sales_price#7 ELSE null END))#30, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#7 ELSE null END))#31, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday) THEN ss_sales_price#7 ELSE null END))#32, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday) THEN ss_sales_price#7 ELSE null END))#33, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday) THEN ss_sales_price#7 ELSE null END))#34] +Results [9]: [s_store_name#10, s_store_id#9, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday) THEN ss_sales_price#7 ELSE null END))#28,17,2) AS sun_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday) THEN ss_sales_price#7 ELSE null END))#29,17,2) AS mon_sales#36, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday) THEN ss_sales_price#7 ELSE null END))#30,17,2) AS tue_sales#37, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#7 ELSE null END))#31,17,2) AS wed_sales#38, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday) THEN ss_sales_price#7 ELSE null END))#32,17,2) AS thu_sales#39, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday) THEN ss_sales_price#7 ELSE null END))#33,17,2) AS fri_sales#40, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday) THEN ss_sales_price#7 ELSE null END))#34,17,2) AS sat_sales#41] + +(21) TakeOrderedAndProject +Input [9]: [s_store_name#10, s_store_id#9, sun_sales#35, mon_sales#36, tue_sales#37, wed_sales#38, thu_sales#39, fri_sales#40, sat_sales#41] +Arguments: 100, [s_store_name#10 ASC NULLS FIRST, s_store_id#9 ASC NULLS FIRST, sun_sales#35 ASC NULLS FIRST, mon_sales#36 ASC NULLS FIRST, tue_sales#37 ASC NULLS FIRST, wed_sales#38 ASC NULLS FIRST, thu_sales#39 ASC NULLS FIRST, fri_sales#40 ASC NULLS FIRST, sat_sales#41 ASC NULLS FIRST], [s_store_name#10, s_store_id#9, sun_sales#35, mon_sales#36, tue_sales#37, wed_sales#38, thu_sales#39, fri_sales#40, sat_sales#41] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43.sf100/simplified.txt new file mode 100644 index 0000000000000..2ed0f8f445720 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43.sf100/simplified.txt @@ -0,0 +1,31 @@ +TakeOrderedAndProject [fri_sales,mon_sales,s_store_id,s_store_name,sat_sales,sun_sales,thu_sales,tue_sales,wed_sales] + WholeStageCodegen (4) + HashAggregate [s_store_id,s_store_name,sum,sum,sum,sum,sum,sum,sum] [fri_sales,mon_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sun_sales,thu_sales,tue_sales,wed_sales] + InputAdapter + Exchange [s_store_id,s_store_name] #1 + WholeStageCodegen (3) + HashAggregate [d_day_name,s_store_id,s_store_name,ss_sales_price] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [d_day_name,s_store_id,s_store_name,ss_sales_price] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [d_day_name,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [d_date_sk,d_day_name] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_day_name,d_year] + Filter [ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [s_store_id,s_store_name,s_store_sk] + Filter [s_gmt_offset,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_gmt_offset,s_store_id,s_store_name,s_store_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43/explain.txt new file mode 100644 index 0000000000000..760cc9a4a0ac0 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43/explain.txt @@ -0,0 +1,122 @@ +== Physical Plan == +TakeOrderedAndProject (21) ++- * HashAggregate (20) + +- Exchange (19) + +- * HashAggregate (18) + +- * Project (17) + +- * BroadcastHashJoin Inner BuildRight (16) + :- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Project (4) + : : +- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.date_dim (1) + : +- BroadcastExchange (8) + : +- * Filter (7) + : +- * ColumnarToRow (6) + : +- Scan parquet default.store_sales (5) + +- BroadcastExchange (15) + +- * Project (14) + +- * Filter (13) + +- * ColumnarToRow (12) + +- Scan parquet default.store (11) + + +(1) Scan parquet default.date_dim +Output [3]: [d_date_sk#1, d_year#2, d_day_name#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#1, d_year#2, d_day_name#3] + +(3) Filter [codegen id : 3] +Input [3]: [d_date_sk#1, d_year#2, d_day_name#3] +Condition : ((isnotnull(d_year#2) AND (d_year#2 = 2000)) AND isnotnull(d_date_sk#1)) + +(4) Project [codegen id : 3] +Output [2]: [d_date_sk#1, d_day_name#3] +Input [3]: [d_date_sk#1, d_year#2, d_day_name#3] + +(5) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#4, ss_store_sk#5, ss_sales_price#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [3]: [ss_sold_date_sk#4, ss_store_sk#5, ss_sales_price#6] + +(7) Filter [codegen id : 1] +Input [3]: [ss_sold_date_sk#4, ss_store_sk#5, ss_sales_price#6] +Condition : (isnotnull(ss_sold_date_sk#4) AND isnotnull(ss_store_sk#5)) + +(8) BroadcastExchange +Input [3]: [ss_sold_date_sk#4, ss_store_sk#5, ss_sales_price#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [d_date_sk#1] +Right keys [1]: [ss_sold_date_sk#4] +Join condition: None + +(10) Project [codegen id : 3] +Output [3]: [d_day_name#3, ss_store_sk#5, ss_sales_price#6] +Input [5]: [d_date_sk#1, d_day_name#3, ss_sold_date_sk#4, ss_store_sk#5, ss_sales_price#6] + +(11) Scan parquet default.store +Output [4]: [s_store_sk#8, s_store_id#9, s_store_name#10, s_gmt_offset#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_gmt_offset), EqualTo(s_gmt_offset,-5.00), IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [4]: [s_store_sk#8, s_store_id#9, s_store_name#10, s_gmt_offset#11] + +(13) Filter [codegen id : 2] +Input [4]: [s_store_sk#8, s_store_id#9, s_store_name#10, s_gmt_offset#11] +Condition : ((isnotnull(s_gmt_offset#11) AND (s_gmt_offset#11 = -5.00)) AND isnotnull(s_store_sk#8)) + +(14) Project [codegen id : 2] +Output [3]: [s_store_sk#8, s_store_id#9, s_store_name#10] +Input [4]: [s_store_sk#8, s_store_id#9, s_store_name#10, s_gmt_offset#11] + +(15) BroadcastExchange +Input [3]: [s_store_sk#8, s_store_id#9, s_store_name#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(16) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_store_sk#5] +Right keys [1]: [s_store_sk#8] +Join condition: None + +(17) Project [codegen id : 3] +Output [4]: [d_day_name#3, ss_sales_price#6, s_store_id#9, s_store_name#10] +Input [6]: [d_day_name#3, ss_store_sk#5, ss_sales_price#6, s_store_sk#8, s_store_id#9, s_store_name#10] + +(18) HashAggregate [codegen id : 3] +Input [4]: [d_day_name#3, ss_sales_price#6, s_store_id#9, s_store_name#10] +Keys [2]: [s_store_name#10, s_store_id#9] +Functions [7]: [partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday) THEN ss_sales_price#6 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday) THEN ss_sales_price#6 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday) THEN ss_sales_price#6 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#6 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday) THEN ss_sales_price#6 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday) THEN ss_sales_price#6 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday) THEN ss_sales_price#6 ELSE null END))] +Aggregate Attributes [7]: [sum#13, sum#14, sum#15, sum#16, sum#17, sum#18, sum#19] +Results [9]: [s_store_name#10, s_store_id#9, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26] + +(19) Exchange +Input [9]: [s_store_name#10, s_store_id#9, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26] +Arguments: hashpartitioning(s_store_name#10, s_store_id#9, 5), true, [id=#27] + +(20) HashAggregate [codegen id : 4] +Input [9]: [s_store_name#10, s_store_id#9, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26] +Keys [2]: [s_store_name#10, s_store_id#9] +Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday) THEN ss_sales_price#6 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday) THEN ss_sales_price#6 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday) THEN ss_sales_price#6 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#6 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday) THEN ss_sales_price#6 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday) THEN ss_sales_price#6 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday) THEN ss_sales_price#6 ELSE null END))] +Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday) THEN ss_sales_price#6 ELSE null END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday) THEN ss_sales_price#6 ELSE null END))#29, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday) THEN ss_sales_price#6 ELSE null END))#30, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#6 ELSE null END))#31, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday) THEN ss_sales_price#6 ELSE null END))#32, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday) THEN ss_sales_price#6 ELSE null END))#33, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday) THEN ss_sales_price#6 ELSE null END))#34] +Results [9]: [s_store_name#10, s_store_id#9, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday) THEN ss_sales_price#6 ELSE null END))#28,17,2) AS sun_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday) THEN ss_sales_price#6 ELSE null END))#29,17,2) AS mon_sales#36, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday) THEN ss_sales_price#6 ELSE null END))#30,17,2) AS tue_sales#37, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#6 ELSE null END))#31,17,2) AS wed_sales#38, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday) THEN ss_sales_price#6 ELSE null END))#32,17,2) AS thu_sales#39, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday) THEN ss_sales_price#6 ELSE null END))#33,17,2) AS fri_sales#40, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday) THEN ss_sales_price#6 ELSE null END))#34,17,2) AS sat_sales#41] + +(21) TakeOrderedAndProject +Input [9]: [s_store_name#10, s_store_id#9, sun_sales#35, mon_sales#36, tue_sales#37, wed_sales#38, thu_sales#39, fri_sales#40, sat_sales#41] +Arguments: 100, [s_store_name#10 ASC NULLS FIRST, s_store_id#9 ASC NULLS FIRST, sun_sales#35 ASC NULLS FIRST, mon_sales#36 ASC NULLS FIRST, tue_sales#37 ASC NULLS FIRST, wed_sales#38 ASC NULLS FIRST, thu_sales#39 ASC NULLS FIRST, fri_sales#40 ASC NULLS FIRST, sat_sales#41 ASC NULLS FIRST], [s_store_name#10, s_store_id#9, sun_sales#35, mon_sales#36, tue_sales#37, wed_sales#38, thu_sales#39, fri_sales#40, sat_sales#41] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43/simplified.txt new file mode 100644 index 0000000000000..bf8ac2624db70 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43/simplified.txt @@ -0,0 +1,31 @@ +TakeOrderedAndProject [fri_sales,mon_sales,s_store_id,s_store_name,sat_sales,sun_sales,thu_sales,tue_sales,wed_sales] + WholeStageCodegen (4) + HashAggregate [s_store_id,s_store_name,sum,sum,sum,sum,sum,sum,sum] [fri_sales,mon_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sun_sales,thu_sales,tue_sales,wed_sales] + InputAdapter + Exchange [s_store_id,s_store_name] #1 + WholeStageCodegen (3) + HashAggregate [d_day_name,s_store_id,s_store_name,ss_sales_price] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [d_day_name,s_store_id,s_store_name,ss_sales_price] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [d_day_name,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [d_date_sk,d_day_name] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_day_name,d_year] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [s_store_id,s_store_name,s_store_sk] + Filter [s_gmt_offset,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_gmt_offset,s_store_id,s_store_name,s_store_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44.sf100/explain.txt new file mode 100644 index 0000000000000..685946db4c185 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44.sf100/explain.txt @@ -0,0 +1,248 @@ +== Physical Plan == +TakeOrderedAndProject (36) ++- * Project (35) + +- * BroadcastHashJoin Inner BuildRight (34) + :- * Project (32) + : +- * BroadcastHashJoin Inner BuildRight (31) + : :- * Project (26) + : : +- * BroadcastHashJoin Inner BuildRight (25) + : : :- * Project (14) + : : : +- * Filter (13) + : : : +- Window (12) + : : : +- * Sort (11) + : : : +- Exchange (10) + : : : +- * Project (9) + : : : +- * Filter (8) + : : : +- * HashAggregate (7) + : : : +- Exchange (6) + : : : +- * HashAggregate (5) + : : : +- * Project (4) + : : : +- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.store_sales (1) + : : +- BroadcastExchange (24) + : : +- * Project (23) + : : +- * Filter (22) + : : +- Window (21) + : : +- * Sort (20) + : : +- Exchange (19) + : : +- * Project (18) + : : +- * Filter (17) + : : +- * HashAggregate (16) + : : +- ReusedExchange (15) + : +- BroadcastExchange (30) + : +- * Filter (29) + : +- * ColumnarToRow (28) + : +- Scan parquet default.item (27) + +- ReusedExchange (33) + + +(1) Scan parquet default.store_sales +Output [3]: [ss_item_sk#1, ss_store_sk#2, ss_net_profit#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_store_sk), EqualTo(ss_store_sk,4)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [3]: [ss_item_sk#1, ss_store_sk#2, ss_net_profit#3] + +(3) Filter [codegen id : 1] +Input [3]: [ss_item_sk#1, ss_store_sk#2, ss_net_profit#3] +Condition : (isnotnull(ss_store_sk#2) AND (ss_store_sk#2 = 4)) + +(4) Project [codegen id : 1] +Output [2]: [ss_item_sk#1, ss_net_profit#3] +Input [3]: [ss_item_sk#1, ss_store_sk#2, ss_net_profit#3] + +(5) HashAggregate [codegen id : 1] +Input [2]: [ss_item_sk#1, ss_net_profit#3] +Keys [1]: [ss_item_sk#1] +Functions [1]: [partial_avg(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [2]: [sum#4, count#5] +Results [3]: [ss_item_sk#1, sum#6, count#7] + +(6) Exchange +Input [3]: [ss_item_sk#1, sum#6, count#7] +Arguments: hashpartitioning(ss_item_sk#1, 5), true, [id=#8] + +(7) HashAggregate [codegen id : 2] +Input [3]: [ss_item_sk#1, sum#6, count#7] +Keys [1]: [ss_item_sk#1] +Functions [1]: [avg(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_profit#3))#9] +Results [3]: [ss_item_sk#1 AS item_sk#10, cast((avg(UnscaledValue(ss_net_profit#3))#9 / 100.0) as decimal(11,6)) AS rank_col#11, cast((avg(UnscaledValue(ss_net_profit#3))#9 / 100.0) as decimal(11,6)) AS avg(ss_net_profit#3)#12] + +(8) Filter [codegen id : 2] +Input [3]: [item_sk#10, rank_col#11, avg(ss_net_profit#3)#12] +Condition : (isnotnull(avg(ss_net_profit#3)#12) AND (cast(avg(ss_net_profit#3)#12 as decimal(13,7)) > CheckOverflow((0.900000 * promote_precision(Subquery scalar-subquery#13, [id=#14])), DecimalType(13,7), true))) + +(9) Project [codegen id : 2] +Output [2]: [item_sk#10, rank_col#11] +Input [3]: [item_sk#10, rank_col#11, avg(ss_net_profit#3)#12] + +(10) Exchange +Input [2]: [item_sk#10, rank_col#11] +Arguments: SinglePartition, true, [id=#15] + +(11) Sort [codegen id : 3] +Input [2]: [item_sk#10, rank_col#11] +Arguments: [rank_col#11 ASC NULLS FIRST], false, 0 + +(12) Window +Input [2]: [item_sk#10, rank_col#11] +Arguments: [rank(rank_col#11) windowspecdefinition(rank_col#11 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rnk#16], [rank_col#11 ASC NULLS FIRST] + +(13) Filter [codegen id : 10] +Input [3]: [item_sk#10, rank_col#11, rnk#16] +Condition : ((isnotnull(rnk#16) AND (rnk#16 < 11)) AND isnotnull(item_sk#10)) + +(14) Project [codegen id : 10] +Output [2]: [item_sk#10, rnk#16] +Input [3]: [item_sk#10, rank_col#11, rnk#16] + +(15) ReusedExchange [Reuses operator id: 6] +Output [3]: [ss_item_sk#1, sum#17, count#18] + +(16) HashAggregate [codegen id : 5] +Input [3]: [ss_item_sk#1, sum#17, count#18] +Keys [1]: [ss_item_sk#1] +Functions [1]: [avg(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_profit#3))#19] +Results [3]: [ss_item_sk#1 AS item_sk#20, cast((avg(UnscaledValue(ss_net_profit#3))#19 / 100.0) as decimal(11,6)) AS rank_col#21, cast((avg(UnscaledValue(ss_net_profit#3))#19 / 100.0) as decimal(11,6)) AS avg(ss_net_profit#3)#22] + +(17) Filter [codegen id : 5] +Input [3]: [item_sk#20, rank_col#21, avg(ss_net_profit#3)#22] +Condition : (isnotnull(avg(ss_net_profit#3)#22) AND (cast(avg(ss_net_profit#3)#22 as decimal(13,7)) > CheckOverflow((0.900000 * promote_precision(ReusedSubquery Subquery scalar-subquery#13, [id=#14])), DecimalType(13,7), true))) + +(18) Project [codegen id : 5] +Output [2]: [item_sk#20, rank_col#21] +Input [3]: [item_sk#20, rank_col#21, avg(ss_net_profit#3)#22] + +(19) Exchange +Input [2]: [item_sk#20, rank_col#21] +Arguments: SinglePartition, true, [id=#23] + +(20) Sort [codegen id : 6] +Input [2]: [item_sk#20, rank_col#21] +Arguments: [rank_col#21 DESC NULLS LAST], false, 0 + +(21) Window +Input [2]: [item_sk#20, rank_col#21] +Arguments: [rank(rank_col#21) windowspecdefinition(rank_col#21 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rnk#24], [rank_col#21 DESC NULLS LAST] + +(22) Filter [codegen id : 7] +Input [3]: [item_sk#20, rank_col#21, rnk#24] +Condition : ((isnotnull(rnk#24) AND (rnk#24 < 11)) AND isnotnull(item_sk#20)) + +(23) Project [codegen id : 7] +Output [2]: [item_sk#20, rnk#24] +Input [3]: [item_sk#20, rank_col#21, rnk#24] + +(24) BroadcastExchange +Input [2]: [item_sk#20, rnk#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [id=#25] + +(25) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [rnk#16] +Right keys [1]: [rnk#24] +Join condition: None + +(26) Project [codegen id : 10] +Output [3]: [item_sk#10, rnk#16, item_sk#20] +Input [4]: [item_sk#10, rnk#16, item_sk#20, rnk#24] + +(27) Scan parquet default.item +Output [2]: [i_item_sk#26, i_product_name#27] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(28) ColumnarToRow [codegen id : 8] +Input [2]: [i_item_sk#26, i_product_name#27] + +(29) Filter [codegen id : 8] +Input [2]: [i_item_sk#26, i_product_name#27] +Condition : isnotnull(i_item_sk#26) + +(30) BroadcastExchange +Input [2]: [i_item_sk#26, i_product_name#27] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#28] + +(31) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [item_sk#10] +Right keys [1]: [i_item_sk#26] +Join condition: None + +(32) Project [codegen id : 10] +Output [3]: [rnk#16, item_sk#20, i_product_name#27] +Input [5]: [item_sk#10, rnk#16, item_sk#20, i_item_sk#26, i_product_name#27] + +(33) ReusedExchange [Reuses operator id: 30] +Output [2]: [i_item_sk#29, i_product_name#30] + +(34) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [item_sk#20] +Right keys [1]: [i_item_sk#29] +Join condition: None + +(35) Project [codegen id : 10] +Output [3]: [rnk#16, i_product_name#27 AS best_performing#31, i_product_name#30 AS worst_performing#32] +Input [5]: [rnk#16, item_sk#20, i_product_name#27, i_item_sk#29, i_product_name#30] + +(36) TakeOrderedAndProject +Input [3]: [rnk#16, best_performing#31, worst_performing#32] +Arguments: 100, [rnk#16 ASC NULLS FIRST], [rnk#16, best_performing#31, worst_performing#32] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 8 Hosting Expression = Subquery scalar-subquery#13, [id=#14] +* HashAggregate (43) ++- Exchange (42) + +- * HashAggregate (41) + +- * Project (40) + +- * Filter (39) + +- * ColumnarToRow (38) + +- Scan parquet default.store_sales (37) + + +(37) Scan parquet default.store_sales +Output [3]: [ss_addr_sk#33, ss_store_sk#2, ss_net_profit#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_store_sk), EqualTo(ss_store_sk,4), IsNull(ss_addr_sk)] +ReadSchema: struct + +(38) ColumnarToRow [codegen id : 1] +Input [3]: [ss_addr_sk#33, ss_store_sk#2, ss_net_profit#3] + +(39) Filter [codegen id : 1] +Input [3]: [ss_addr_sk#33, ss_store_sk#2, ss_net_profit#3] +Condition : ((isnotnull(ss_store_sk#2) AND (ss_store_sk#2 = 4)) AND isnull(ss_addr_sk#33)) + +(40) Project [codegen id : 1] +Output [2]: [ss_store_sk#2, ss_net_profit#3] +Input [3]: [ss_addr_sk#33, ss_store_sk#2, ss_net_profit#3] + +(41) HashAggregate [codegen id : 1] +Input [2]: [ss_store_sk#2, ss_net_profit#3] +Keys [1]: [ss_store_sk#2] +Functions [1]: [partial_avg(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [2]: [sum#34, count#35] +Results [3]: [ss_store_sk#2, sum#36, count#37] + +(42) Exchange +Input [3]: [ss_store_sk#2, sum#36, count#37] +Arguments: hashpartitioning(ss_store_sk#2, 5), true, [id=#38] + +(43) HashAggregate [codegen id : 2] +Input [3]: [ss_store_sk#2, sum#36, count#37] +Keys [1]: [ss_store_sk#2] +Functions [1]: [avg(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_profit#3))#39] +Results [1]: [cast((avg(UnscaledValue(ss_net_profit#3))#39 / 100.0) as decimal(11,6)) AS rank_col#40] + +Subquery:2 Hosting operator id = 17 Hosting Expression = ReusedSubquery Subquery scalar-subquery#13, [id=#14] + + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44.sf100/simplified.txt new file mode 100644 index 0000000000000..96e07ea3d8a33 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44.sf100/simplified.txt @@ -0,0 +1,68 @@ +TakeOrderedAndProject [best_performing,rnk,worst_performing] + WholeStageCodegen (10) + Project [i_product_name,i_product_name,rnk] + BroadcastHashJoin [i_item_sk,item_sk] + Project [i_product_name,item_sk,rnk] + BroadcastHashJoin [i_item_sk,item_sk] + Project [item_sk,item_sk,rnk] + BroadcastHashJoin [rnk,rnk] + Project [item_sk,rnk] + Filter [item_sk,rnk] + InputAdapter + Window [rank_col] + WholeStageCodegen (3) + Sort [rank_col] + InputAdapter + Exchange #1 + WholeStageCodegen (2) + Project [item_sk,rank_col] + Filter [avg(ss_net_profit)] + Subquery #1 + WholeStageCodegen (2) + HashAggregate [count,ss_store_sk,sum] [avg(UnscaledValue(ss_net_profit)),count,rank_col,sum] + InputAdapter + Exchange [ss_store_sk] #3 + WholeStageCodegen (1) + HashAggregate [ss_net_profit,ss_store_sk] [count,count,sum,sum] + Project [ss_net_profit,ss_store_sk] + Filter [ss_addr_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_addr_sk,ss_net_profit,ss_store_sk] + HashAggregate [count,ss_item_sk,sum] [avg(UnscaledValue(ss_net_profit)),avg(ss_net_profit),count,item_sk,rank_col,sum] + InputAdapter + Exchange [ss_item_sk] #2 + WholeStageCodegen (1) + HashAggregate [ss_item_sk,ss_net_profit] [count,count,sum,sum] + Project [ss_item_sk,ss_net_profit] + Filter [ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_net_profit,ss_store_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (7) + Project [item_sk,rnk] + Filter [item_sk,rnk] + InputAdapter + Window [rank_col] + WholeStageCodegen (6) + Sort [rank_col] + InputAdapter + Exchange #5 + WholeStageCodegen (5) + Project [item_sk,rank_col] + Filter [avg(ss_net_profit)] + ReusedSubquery [rank_col] #1 + HashAggregate [count,ss_item_sk,sum] [avg(UnscaledValue(ss_net_profit)),avg(ss_net_profit),count,item_sk,rank_col,sum] + InputAdapter + ReusedExchange [count,ss_item_sk,sum] #2 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (8) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_product_name] + InputAdapter + ReusedExchange [i_item_sk,i_product_name] #6 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/explain.txt new file mode 100644 index 0000000000000..5a3b2c4dd7843 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/explain.txt @@ -0,0 +1,248 @@ +== Physical Plan == +TakeOrderedAndProject (36) ++- * Project (35) + +- * BroadcastHashJoin Inner BuildRight (34) + :- * Project (32) + : +- * BroadcastHashJoin Inner BuildRight (31) + : :- * Project (26) + : : +- * BroadcastHashJoin Inner BuildRight (25) + : : :- * Project (14) + : : : +- * Filter (13) + : : : +- Window (12) + : : : +- * Sort (11) + : : : +- Exchange (10) + : : : +- * Project (9) + : : : +- * Filter (8) + : : : +- * HashAggregate (7) + : : : +- Exchange (6) + : : : +- * HashAggregate (5) + : : : +- * Project (4) + : : : +- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.store_sales (1) + : : +- BroadcastExchange (24) + : : +- * Project (23) + : : +- * Filter (22) + : : +- Window (21) + : : +- * Sort (20) + : : +- Exchange (19) + : : +- * Project (18) + : : +- * Filter (17) + : : +- * HashAggregate (16) + : : +- ReusedExchange (15) + : +- BroadcastExchange (30) + : +- * Filter (29) + : +- * ColumnarToRow (28) + : +- Scan parquet default.item (27) + +- ReusedExchange (33) + + +(1) Scan parquet default.store_sales +Output [3]: [ss_item_sk#1, ss_store_sk#2, ss_net_profit#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_store_sk), EqualTo(ss_store_sk,4)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [3]: [ss_item_sk#1, ss_store_sk#2, ss_net_profit#3] + +(3) Filter [codegen id : 1] +Input [3]: [ss_item_sk#1, ss_store_sk#2, ss_net_profit#3] +Condition : (isnotnull(ss_store_sk#2) AND (ss_store_sk#2 = 4)) + +(4) Project [codegen id : 1] +Output [2]: [ss_item_sk#1, ss_net_profit#3] +Input [3]: [ss_item_sk#1, ss_store_sk#2, ss_net_profit#3] + +(5) HashAggregate [codegen id : 1] +Input [2]: [ss_item_sk#1, ss_net_profit#3] +Keys [1]: [ss_item_sk#1] +Functions [1]: [partial_avg(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [2]: [sum#4, count#5] +Results [3]: [ss_item_sk#1, sum#6, count#7] + +(6) Exchange +Input [3]: [ss_item_sk#1, sum#6, count#7] +Arguments: hashpartitioning(ss_item_sk#1, 5), true, [id=#8] + +(7) HashAggregate [codegen id : 2] +Input [3]: [ss_item_sk#1, sum#6, count#7] +Keys [1]: [ss_item_sk#1] +Functions [1]: [avg(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_profit#3))#9] +Results [3]: [ss_item_sk#1 AS item_sk#10, cast((avg(UnscaledValue(ss_net_profit#3))#9 / 100.0) as decimal(11,6)) AS rank_col#11, cast((avg(UnscaledValue(ss_net_profit#3))#9 / 100.0) as decimal(11,6)) AS avg(ss_net_profit#3)#12] + +(8) Filter [codegen id : 2] +Input [3]: [item_sk#10, rank_col#11, avg(ss_net_profit#3)#12] +Condition : (isnotnull(avg(ss_net_profit#3)#12) AND (cast(avg(ss_net_profit#3)#12 as decimal(13,7)) > CheckOverflow((0.900000 * promote_precision(Subquery scalar-subquery#13, [id=#14])), DecimalType(13,7), true))) + +(9) Project [codegen id : 2] +Output [2]: [item_sk#10, rank_col#11] +Input [3]: [item_sk#10, rank_col#11, avg(ss_net_profit#3)#12] + +(10) Exchange +Input [2]: [item_sk#10, rank_col#11] +Arguments: SinglePartition, true, [id=#15] + +(11) Sort [codegen id : 3] +Input [2]: [item_sk#10, rank_col#11] +Arguments: [rank_col#11 ASC NULLS FIRST], false, 0 + +(12) Window +Input [2]: [item_sk#10, rank_col#11] +Arguments: [rank(rank_col#11) windowspecdefinition(rank_col#11 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rnk#16], [rank_col#11 ASC NULLS FIRST] + +(13) Filter [codegen id : 10] +Input [3]: [item_sk#10, rank_col#11, rnk#16] +Condition : ((isnotnull(rnk#16) AND (rnk#16 < 11)) AND isnotnull(item_sk#10)) + +(14) Project [codegen id : 10] +Output [2]: [item_sk#10, rnk#16] +Input [3]: [item_sk#10, rank_col#11, rnk#16] + +(15) ReusedExchange [Reuses operator id: 6] +Output [3]: [ss_item_sk#1, sum#17, count#18] + +(16) HashAggregate [codegen id : 5] +Input [3]: [ss_item_sk#1, sum#17, count#18] +Keys [1]: [ss_item_sk#1] +Functions [1]: [avg(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_profit#3))#19] +Results [3]: [ss_item_sk#1 AS item_sk#20, cast((avg(UnscaledValue(ss_net_profit#3))#19 / 100.0) as decimal(11,6)) AS rank_col#21, cast((avg(UnscaledValue(ss_net_profit#3))#19 / 100.0) as decimal(11,6)) AS avg(ss_net_profit#3)#22] + +(17) Filter [codegen id : 5] +Input [3]: [item_sk#20, rank_col#21, avg(ss_net_profit#3)#22] +Condition : (isnotnull(avg(ss_net_profit#3)#22) AND (cast(avg(ss_net_profit#3)#22 as decimal(13,7)) > CheckOverflow((0.900000 * promote_precision(ReusedSubquery Subquery scalar-subquery#13, [id=#14])), DecimalType(13,7), true))) + +(18) Project [codegen id : 5] +Output [2]: [item_sk#20, rank_col#21] +Input [3]: [item_sk#20, rank_col#21, avg(ss_net_profit#3)#22] + +(19) Exchange +Input [2]: [item_sk#20, rank_col#21] +Arguments: SinglePartition, true, [id=#23] + +(20) Sort [codegen id : 6] +Input [2]: [item_sk#20, rank_col#21] +Arguments: [rank_col#21 DESC NULLS LAST], false, 0 + +(21) Window +Input [2]: [item_sk#20, rank_col#21] +Arguments: [rank(rank_col#21) windowspecdefinition(rank_col#21 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rnk#24], [rank_col#21 DESC NULLS LAST] + +(22) Filter [codegen id : 7] +Input [3]: [item_sk#20, rank_col#21, rnk#24] +Condition : ((isnotnull(rnk#24) AND (rnk#24 < 11)) AND isnotnull(item_sk#20)) + +(23) Project [codegen id : 7] +Output [2]: [item_sk#20, rnk#24] +Input [3]: [item_sk#20, rank_col#21, rnk#24] + +(24) BroadcastExchange +Input [2]: [item_sk#20, rnk#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [id=#25] + +(25) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [rnk#16] +Right keys [1]: [rnk#24] +Join condition: None + +(26) Project [codegen id : 10] +Output [3]: [item_sk#10, rnk#16, item_sk#20] +Input [4]: [item_sk#10, rnk#16, item_sk#20, rnk#24] + +(27) Scan parquet default.item +Output [2]: [i_item_sk#26, i_product_name#27] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(28) ColumnarToRow [codegen id : 8] +Input [2]: [i_item_sk#26, i_product_name#27] + +(29) Filter [codegen id : 8] +Input [2]: [i_item_sk#26, i_product_name#27] +Condition : isnotnull(i_item_sk#26) + +(30) BroadcastExchange +Input [2]: [i_item_sk#26, i_product_name#27] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#28] + +(31) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [item_sk#10] +Right keys [1]: [i_item_sk#26] +Join condition: None + +(32) Project [codegen id : 10] +Output [3]: [rnk#16, item_sk#20, i_product_name#27] +Input [5]: [item_sk#10, rnk#16, item_sk#20, i_item_sk#26, i_product_name#27] + +(33) ReusedExchange [Reuses operator id: 30] +Output [2]: [i_item_sk#29, i_product_name#30] + +(34) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [item_sk#20] +Right keys [1]: [i_item_sk#29] +Join condition: None + +(35) Project [codegen id : 10] +Output [3]: [rnk#16, i_product_name#27 AS best_performing#31, i_product_name#30 AS worst_performing#32] +Input [5]: [rnk#16, item_sk#20, i_product_name#27, i_item_sk#29, i_product_name#30] + +(36) TakeOrderedAndProject +Input [3]: [rnk#16, best_performing#31, worst_performing#32] +Arguments: 100, [rnk#16 ASC NULLS FIRST], [rnk#16, best_performing#31, worst_performing#32] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 8 Hosting Expression = Subquery scalar-subquery#13, [id=#14] +* HashAggregate (43) ++- Exchange (42) + +- * HashAggregate (41) + +- * Project (40) + +- * Filter (39) + +- * ColumnarToRow (38) + +- Scan parquet default.store_sales (37) + + +(37) Scan parquet default.store_sales +Output [3]: [ss_addr_sk#33, ss_store_sk#2, ss_net_profit#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_store_sk), EqualTo(ss_store_sk,4), IsNull(ss_addr_sk)] +ReadSchema: struct + +(38) ColumnarToRow [codegen id : 1] +Input [3]: [ss_addr_sk#33, ss_store_sk#2, ss_net_profit#3] + +(39) Filter [codegen id : 1] +Input [3]: [ss_addr_sk#33, ss_store_sk#2, ss_net_profit#3] +Condition : ((isnotnull(ss_store_sk#2) AND (ss_store_sk#2 = 4)) AND isnull(ss_addr_sk#33)) + +(40) Project [codegen id : 1] +Output [2]: [ss_store_sk#2, ss_net_profit#3] +Input [3]: [ss_addr_sk#33, ss_store_sk#2, ss_net_profit#3] + +(41) HashAggregate [codegen id : 1] +Input [2]: [ss_store_sk#2, ss_net_profit#3] +Keys [1]: [ss_store_sk#2] +Functions [1]: [partial_avg(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [2]: [sum#34, count#35] +Results [3]: [ss_store_sk#2, sum#36, count#37] + +(42) Exchange +Input [3]: [ss_store_sk#2, sum#36, count#37] +Arguments: hashpartitioning(ss_store_sk#2, 5), true, [id=#38] + +(43) HashAggregate [codegen id : 2] +Input [3]: [ss_store_sk#2, sum#36, count#37] +Keys [1]: [ss_store_sk#2] +Functions [1]: [avg(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_profit#3))#39] +Results [1]: [cast((avg(UnscaledValue(ss_net_profit#3))#39 / 100.0) as decimal(11,6)) AS rank_col#40] + +Subquery:2 Hosting operator id = 17 Hosting Expression = ReusedSubquery Subquery scalar-subquery#13, [id=#14] + + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/simplified.txt new file mode 100644 index 0000000000000..96e07ea3d8a33 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/simplified.txt @@ -0,0 +1,68 @@ +TakeOrderedAndProject [best_performing,rnk,worst_performing] + WholeStageCodegen (10) + Project [i_product_name,i_product_name,rnk] + BroadcastHashJoin [i_item_sk,item_sk] + Project [i_product_name,item_sk,rnk] + BroadcastHashJoin [i_item_sk,item_sk] + Project [item_sk,item_sk,rnk] + BroadcastHashJoin [rnk,rnk] + Project [item_sk,rnk] + Filter [item_sk,rnk] + InputAdapter + Window [rank_col] + WholeStageCodegen (3) + Sort [rank_col] + InputAdapter + Exchange #1 + WholeStageCodegen (2) + Project [item_sk,rank_col] + Filter [avg(ss_net_profit)] + Subquery #1 + WholeStageCodegen (2) + HashAggregate [count,ss_store_sk,sum] [avg(UnscaledValue(ss_net_profit)),count,rank_col,sum] + InputAdapter + Exchange [ss_store_sk] #3 + WholeStageCodegen (1) + HashAggregate [ss_net_profit,ss_store_sk] [count,count,sum,sum] + Project [ss_net_profit,ss_store_sk] + Filter [ss_addr_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_addr_sk,ss_net_profit,ss_store_sk] + HashAggregate [count,ss_item_sk,sum] [avg(UnscaledValue(ss_net_profit)),avg(ss_net_profit),count,item_sk,rank_col,sum] + InputAdapter + Exchange [ss_item_sk] #2 + WholeStageCodegen (1) + HashAggregate [ss_item_sk,ss_net_profit] [count,count,sum,sum] + Project [ss_item_sk,ss_net_profit] + Filter [ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_net_profit,ss_store_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (7) + Project [item_sk,rnk] + Filter [item_sk,rnk] + InputAdapter + Window [rank_col] + WholeStageCodegen (6) + Sort [rank_col] + InputAdapter + Exchange #5 + WholeStageCodegen (5) + Project [item_sk,rank_col] + Filter [avg(ss_net_profit)] + ReusedSubquery [rank_col] #1 + HashAggregate [count,ss_item_sk,sum] [avg(UnscaledValue(ss_net_profit)),avg(ss_net_profit),count,item_sk,rank_col,sum] + InputAdapter + ReusedExchange [count,ss_item_sk,sum] #2 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (8) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_product_name] + InputAdapter + ReusedExchange [i_item_sk,i_product_name] #6 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45.sf100/explain.txt new file mode 100644 index 0000000000000..ab8f08566f79e --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45.sf100/explain.txt @@ -0,0 +1,256 @@ +== Physical Plan == +TakeOrderedAndProject (46) ++- * HashAggregate (45) + +- Exchange (44) + +- * HashAggregate (43) + +- * Project (42) + +- * Filter (41) + +- * BroadcastHashJoin ExistenceJoin(exists#1) BuildRight (40) + :- * Project (34) + : +- * SortMergeJoin Inner (33) + : :- * Sort (18) + : : +- Exchange (17) + : : +- * Project (16) + : : +- * BroadcastHashJoin Inner BuildRight (15) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.web_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.item (11) + : +- * Sort (32) + : +- Exchange (31) + : +- * Project (30) + : +- * SortMergeJoin Inner (29) + : :- * Sort (23) + : : +- Exchange (22) + : : +- * Filter (21) + : : +- * ColumnarToRow (20) + : : +- Scan parquet default.customer_address (19) + : +- * Sort (28) + : +- Exchange (27) + : +- * Filter (26) + : +- * ColumnarToRow (25) + : +- Scan parquet default.customer (24) + +- BroadcastExchange (39) + +- * Project (38) + +- * Filter (37) + +- * ColumnarToRow (36) + +- Scan parquet default.item (35) + + +(1) Scan parquet default.web_sales +Output [4]: [ws_sold_date_sk#2, ws_item_sk#3, ws_bill_customer_sk#4, ws_sales_price#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_bill_customer_sk), IsNotNull(ws_sold_date_sk), IsNotNull(ws_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [4]: [ws_sold_date_sk#2, ws_item_sk#3, ws_bill_customer_sk#4, ws_sales_price#5] + +(3) Filter [codegen id : 3] +Input [4]: [ws_sold_date_sk#2, ws_item_sk#3, ws_bill_customer_sk#4, ws_sales_price#5] +Condition : ((isnotnull(ws_bill_customer_sk#4) AND isnotnull(ws_sold_date_sk#2)) AND isnotnull(ws_item_sk#3)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#6, d_year#7, d_qoy#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_qoy#8] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_qoy#8] +Condition : ((((isnotnull(d_qoy#8) AND isnotnull(d_year#7)) AND (d_qoy#8 = 2)) AND (d_year#7 = 2001)) AND isnotnull(d_date_sk#6)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#6] +Input [3]: [d_date_sk#6, d_year#7, d_qoy#8] + +(8) BroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ws_sold_date_sk#2] +Right keys [1]: [d_date_sk#6] +Join condition: None + +(10) Project [codegen id : 3] +Output [3]: [ws_item_sk#3, ws_bill_customer_sk#4, ws_sales_price#5] +Input [5]: [ws_sold_date_sk#2, ws_item_sk#3, ws_bill_customer_sk#4, ws_sales_price#5, d_date_sk#6] + +(11) Scan parquet default.item +Output [2]: [i_item_sk#10, i_item_id#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [i_item_sk#10, i_item_id#11] + +(13) Filter [codegen id : 2] +Input [2]: [i_item_sk#10, i_item_id#11] +Condition : isnotnull(i_item_sk#10) + +(14) BroadcastExchange +Input [2]: [i_item_sk#10, i_item_id#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#12] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ws_item_sk#3] +Right keys [1]: [i_item_sk#10] +Join condition: None + +(16) Project [codegen id : 3] +Output [3]: [ws_bill_customer_sk#4, ws_sales_price#5, i_item_id#11] +Input [5]: [ws_item_sk#3, ws_bill_customer_sk#4, ws_sales_price#5, i_item_sk#10, i_item_id#11] + +(17) Exchange +Input [3]: [ws_bill_customer_sk#4, ws_sales_price#5, i_item_id#11] +Arguments: hashpartitioning(ws_bill_customer_sk#4, 5), true, [id=#13] + +(18) Sort [codegen id : 4] +Input [3]: [ws_bill_customer_sk#4, ws_sales_price#5, i_item_id#11] +Arguments: [ws_bill_customer_sk#4 ASC NULLS FIRST], false, 0 + +(19) Scan parquet default.customer_address +Output [3]: [ca_address_sk#14, ca_city#15, ca_zip#16] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 5] +Input [3]: [ca_address_sk#14, ca_city#15, ca_zip#16] + +(21) Filter [codegen id : 5] +Input [3]: [ca_address_sk#14, ca_city#15, ca_zip#16] +Condition : isnotnull(ca_address_sk#14) + +(22) Exchange +Input [3]: [ca_address_sk#14, ca_city#15, ca_zip#16] +Arguments: hashpartitioning(ca_address_sk#14, 5), true, [id=#17] + +(23) Sort [codegen id : 6] +Input [3]: [ca_address_sk#14, ca_city#15, ca_zip#16] +Arguments: [ca_address_sk#14 ASC NULLS FIRST], false, 0 + +(24) Scan parquet default.customer +Output [2]: [c_customer_sk#18, c_current_addr_sk#19] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(25) ColumnarToRow [codegen id : 7] +Input [2]: [c_customer_sk#18, c_current_addr_sk#19] + +(26) Filter [codegen id : 7] +Input [2]: [c_customer_sk#18, c_current_addr_sk#19] +Condition : (isnotnull(c_customer_sk#18) AND isnotnull(c_current_addr_sk#19)) + +(27) Exchange +Input [2]: [c_customer_sk#18, c_current_addr_sk#19] +Arguments: hashpartitioning(c_current_addr_sk#19, 5), true, [id=#20] + +(28) Sort [codegen id : 8] +Input [2]: [c_customer_sk#18, c_current_addr_sk#19] +Arguments: [c_current_addr_sk#19 ASC NULLS FIRST], false, 0 + +(29) SortMergeJoin [codegen id : 9] +Left keys [1]: [ca_address_sk#14] +Right keys [1]: [c_current_addr_sk#19] +Join condition: None + +(30) Project [codegen id : 9] +Output [3]: [ca_city#15, ca_zip#16, c_customer_sk#18] +Input [5]: [ca_address_sk#14, ca_city#15, ca_zip#16, c_customer_sk#18, c_current_addr_sk#19] + +(31) Exchange +Input [3]: [ca_city#15, ca_zip#16, c_customer_sk#18] +Arguments: hashpartitioning(c_customer_sk#18, 5), true, [id=#21] + +(32) Sort [codegen id : 10] +Input [3]: [ca_city#15, ca_zip#16, c_customer_sk#18] +Arguments: [c_customer_sk#18 ASC NULLS FIRST], false, 0 + +(33) SortMergeJoin [codegen id : 12] +Left keys [1]: [ws_bill_customer_sk#4] +Right keys [1]: [c_customer_sk#18] +Join condition: None + +(34) Project [codegen id : 12] +Output [4]: [ws_sales_price#5, ca_city#15, ca_zip#16, i_item_id#11] +Input [6]: [ws_bill_customer_sk#4, ws_sales_price#5, i_item_id#11, ca_city#15, ca_zip#16, c_customer_sk#18] + +(35) Scan parquet default.item +Output [2]: [i_item_sk#10, i_item_id#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [In(i_item_sk, [2,3,5,7,11,13,17,19,23,29])] +ReadSchema: struct + +(36) ColumnarToRow [codegen id : 11] +Input [2]: [i_item_sk#10, i_item_id#11] + +(37) Filter [codegen id : 11] +Input [2]: [i_item_sk#10, i_item_id#11] +Condition : i_item_sk#10 IN (2,3,5,7,11,13,17,19,23,29) + +(38) Project [codegen id : 11] +Output [1]: [i_item_id#11 AS i_item_id#11#22] +Input [2]: [i_item_sk#10, i_item_id#11] + +(39) BroadcastExchange +Input [1]: [i_item_id#11#22] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#23] + +(40) BroadcastHashJoin [codegen id : 12] +Left keys [1]: [i_item_id#11] +Right keys [1]: [i_item_id#11#22] +Join condition: None + +(41) Filter [codegen id : 12] +Input [5]: [ws_sales_price#5, ca_city#15, ca_zip#16, i_item_id#11, exists#1] +Condition : (substr(ca_zip#16, 1, 5) IN (85669,86197,88274,83405,86475,85392,85460,80348,81792) OR exists#1) + +(42) Project [codegen id : 12] +Output [3]: [ws_sales_price#5, ca_city#15, ca_zip#16] +Input [5]: [ws_sales_price#5, ca_city#15, ca_zip#16, i_item_id#11, exists#1] + +(43) HashAggregate [codegen id : 12] +Input [3]: [ws_sales_price#5, ca_city#15, ca_zip#16] +Keys [2]: [ca_zip#16, ca_city#15] +Functions [1]: [partial_sum(UnscaledValue(ws_sales_price#5))] +Aggregate Attributes [1]: [sum#24] +Results [3]: [ca_zip#16, ca_city#15, sum#25] + +(44) Exchange +Input [3]: [ca_zip#16, ca_city#15, sum#25] +Arguments: hashpartitioning(ca_zip#16, ca_city#15, 5), true, [id=#26] + +(45) HashAggregate [codegen id : 13] +Input [3]: [ca_zip#16, ca_city#15, sum#25] +Keys [2]: [ca_zip#16, ca_city#15] +Functions [1]: [sum(UnscaledValue(ws_sales_price#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_sales_price#5))#27] +Results [3]: [ca_zip#16, ca_city#15, MakeDecimal(sum(UnscaledValue(ws_sales_price#5))#27,17,2) AS sum(ws_sales_price)#28] + +(46) TakeOrderedAndProject +Input [3]: [ca_zip#16, ca_city#15, sum(ws_sales_price)#28] +Arguments: 100, [ca_zip#16 ASC NULLS FIRST, ca_city#15 ASC NULLS FIRST], [ca_zip#16, ca_city#15, sum(ws_sales_price)#28] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45.sf100/simplified.txt new file mode 100644 index 0000000000000..675ed20f170db --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45.sf100/simplified.txt @@ -0,0 +1,77 @@ +TakeOrderedAndProject [ca_city,ca_zip,sum(ws_sales_price)] + WholeStageCodegen (13) + HashAggregate [ca_city,ca_zip,sum] [sum,sum(UnscaledValue(ws_sales_price)),sum(ws_sales_price)] + InputAdapter + Exchange [ca_city,ca_zip] #1 + WholeStageCodegen (12) + HashAggregate [ca_city,ca_zip,ws_sales_price] [sum,sum] + Project [ca_city,ca_zip,ws_sales_price] + Filter [ca_zip,exists] + BroadcastHashJoin [i_item_id,i_item_id] + Project [ca_city,ca_zip,i_item_id,ws_sales_price] + SortMergeJoin [c_customer_sk,ws_bill_customer_sk] + InputAdapter + WholeStageCodegen (4) + Sort [ws_bill_customer_sk] + InputAdapter + Exchange [ws_bill_customer_sk] #2 + WholeStageCodegen (3) + Project [i_item_id,ws_bill_customer_sk,ws_sales_price] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Project [ws_bill_customer_sk,ws_item_sk,ws_sales_price] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_bill_customer_sk,ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_customer_sk,ws_item_sk,ws_sales_price,ws_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_qoy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_qoy,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_id,i_item_sk] + InputAdapter + WholeStageCodegen (10) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #5 + WholeStageCodegen (9) + Project [c_customer_sk,ca_city,ca_zip] + SortMergeJoin [c_current_addr_sk,ca_address_sk] + InputAdapter + WholeStageCodegen (6) + Sort [ca_address_sk] + InputAdapter + Exchange [ca_address_sk] #6 + WholeStageCodegen (5) + Filter [ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_city,ca_zip] + InputAdapter + WholeStageCodegen (8) + Sort [c_current_addr_sk] + InputAdapter + Exchange [c_current_addr_sk] #7 + WholeStageCodegen (7) + Filter [c_current_addr_sk,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_current_addr_sk,c_customer_sk] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (11) + Project [i_item_id] + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_id,i_item_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45/explain.txt new file mode 100644 index 0000000000000..91a98423d049e --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45/explain.txt @@ -0,0 +1,226 @@ +== Physical Plan == +TakeOrderedAndProject (40) ++- * HashAggregate (39) + +- Exchange (38) + +- * HashAggregate (37) + +- * Project (36) + +- * Filter (35) + +- * BroadcastHashJoin ExistenceJoin(exists#1) BuildRight (34) + :- * Project (28) + : +- * BroadcastHashJoin Inner BuildRight (27) + : :- * Project (22) + : : +- * BroadcastHashJoin Inner BuildRight (21) + : : :- * Project (15) + : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : :- * Project (9) + : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.web_sales (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.customer (4) + : : : +- BroadcastExchange (13) + : : : +- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet default.customer_address (10) + : : +- BroadcastExchange (20) + : : +- * Project (19) + : : +- * Filter (18) + : : +- * ColumnarToRow (17) + : : +- Scan parquet default.date_dim (16) + : +- BroadcastExchange (26) + : +- * Filter (25) + : +- * ColumnarToRow (24) + : +- Scan parquet default.item (23) + +- BroadcastExchange (33) + +- * Project (32) + +- * Filter (31) + +- * ColumnarToRow (30) + +- Scan parquet default.item (29) + + +(1) Scan parquet default.web_sales +Output [4]: [ws_sold_date_sk#2, ws_item_sk#3, ws_bill_customer_sk#4, ws_sales_price#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_bill_customer_sk), IsNotNull(ws_sold_date_sk), IsNotNull(ws_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 6] +Input [4]: [ws_sold_date_sk#2, ws_item_sk#3, ws_bill_customer_sk#4, ws_sales_price#5] + +(3) Filter [codegen id : 6] +Input [4]: [ws_sold_date_sk#2, ws_item_sk#3, ws_bill_customer_sk#4, ws_sales_price#5] +Condition : ((isnotnull(ws_bill_customer_sk#4) AND isnotnull(ws_sold_date_sk#2)) AND isnotnull(ws_item_sk#3)) + +(4) Scan parquet default.customer +Output [2]: [c_customer_sk#6, c_current_addr_sk#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [c_customer_sk#6, c_current_addr_sk#7] + +(6) Filter [codegen id : 1] +Input [2]: [c_customer_sk#6, c_current_addr_sk#7] +Condition : (isnotnull(c_customer_sk#6) AND isnotnull(c_current_addr_sk#7)) + +(7) BroadcastExchange +Input [2]: [c_customer_sk#6, c_current_addr_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#8] + +(8) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_bill_customer_sk#4] +Right keys [1]: [c_customer_sk#6] +Join condition: None + +(9) Project [codegen id : 6] +Output [4]: [ws_sold_date_sk#2, ws_item_sk#3, ws_sales_price#5, c_current_addr_sk#7] +Input [6]: [ws_sold_date_sk#2, ws_item_sk#3, ws_bill_customer_sk#4, ws_sales_price#5, c_customer_sk#6, c_current_addr_sk#7] + +(10) Scan parquet default.customer_address +Output [3]: [ca_address_sk#9, ca_city#10, ca_zip#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [ca_address_sk#9, ca_city#10, ca_zip#11] + +(12) Filter [codegen id : 2] +Input [3]: [ca_address_sk#9, ca_city#10, ca_zip#11] +Condition : isnotnull(ca_address_sk#9) + +(13) BroadcastExchange +Input [3]: [ca_address_sk#9, ca_city#10, ca_zip#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#12] + +(14) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [c_current_addr_sk#7] +Right keys [1]: [ca_address_sk#9] +Join condition: None + +(15) Project [codegen id : 6] +Output [5]: [ws_sold_date_sk#2, ws_item_sk#3, ws_sales_price#5, ca_city#10, ca_zip#11] +Input [7]: [ws_sold_date_sk#2, ws_item_sk#3, ws_sales_price#5, c_current_addr_sk#7, ca_address_sk#9, ca_city#10, ca_zip#11] + +(16) Scan parquet default.date_dim +Output [3]: [d_date_sk#13, d_year#14, d_qoy#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#13, d_year#14, d_qoy#15] + +(18) Filter [codegen id : 3] +Input [3]: [d_date_sk#13, d_year#14, d_qoy#15] +Condition : ((((isnotnull(d_qoy#15) AND isnotnull(d_year#14)) AND (d_qoy#15 = 2)) AND (d_year#14 = 2001)) AND isnotnull(d_date_sk#13)) + +(19) Project [codegen id : 3] +Output [1]: [d_date_sk#13] +Input [3]: [d_date_sk#13, d_year#14, d_qoy#15] + +(20) BroadcastExchange +Input [1]: [d_date_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] + +(21) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_sold_date_sk#2] +Right keys [1]: [d_date_sk#13] +Join condition: None + +(22) Project [codegen id : 6] +Output [4]: [ws_item_sk#3, ws_sales_price#5, ca_city#10, ca_zip#11] +Input [6]: [ws_sold_date_sk#2, ws_item_sk#3, ws_sales_price#5, ca_city#10, ca_zip#11, d_date_sk#13] + +(23) Scan parquet default.item +Output [2]: [i_item_sk#17, i_item_id#18] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 4] +Input [2]: [i_item_sk#17, i_item_id#18] + +(25) Filter [codegen id : 4] +Input [2]: [i_item_sk#17, i_item_id#18] +Condition : isnotnull(i_item_sk#17) + +(26) BroadcastExchange +Input [2]: [i_item_sk#17, i_item_id#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] + +(27) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_item_sk#3] +Right keys [1]: [i_item_sk#17] +Join condition: None + +(28) Project [codegen id : 6] +Output [4]: [ws_sales_price#5, ca_city#10, ca_zip#11, i_item_id#18] +Input [6]: [ws_item_sk#3, ws_sales_price#5, ca_city#10, ca_zip#11, i_item_sk#17, i_item_id#18] + +(29) Scan parquet default.item +Output [2]: [i_item_sk#17, i_item_id#18] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [In(i_item_sk, [2,3,5,7,11,13,17,19,23,29])] +ReadSchema: struct + +(30) ColumnarToRow [codegen id : 5] +Input [2]: [i_item_sk#17, i_item_id#18] + +(31) Filter [codegen id : 5] +Input [2]: [i_item_sk#17, i_item_id#18] +Condition : i_item_sk#17 IN (2,3,5,7,11,13,17,19,23,29) + +(32) Project [codegen id : 5] +Output [1]: [i_item_id#18 AS i_item_id#18#20] +Input [2]: [i_item_sk#17, i_item_id#18] + +(33) BroadcastExchange +Input [1]: [i_item_id#18#20] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#21] + +(34) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [i_item_id#18] +Right keys [1]: [i_item_id#18#20] +Join condition: None + +(35) Filter [codegen id : 6] +Input [5]: [ws_sales_price#5, ca_city#10, ca_zip#11, i_item_id#18, exists#1] +Condition : (substr(ca_zip#11, 1, 5) IN (85669,86197,88274,83405,86475,85392,85460,80348,81792) OR exists#1) + +(36) Project [codegen id : 6] +Output [3]: [ws_sales_price#5, ca_city#10, ca_zip#11] +Input [5]: [ws_sales_price#5, ca_city#10, ca_zip#11, i_item_id#18, exists#1] + +(37) HashAggregate [codegen id : 6] +Input [3]: [ws_sales_price#5, ca_city#10, ca_zip#11] +Keys [2]: [ca_zip#11, ca_city#10] +Functions [1]: [partial_sum(UnscaledValue(ws_sales_price#5))] +Aggregate Attributes [1]: [sum#22] +Results [3]: [ca_zip#11, ca_city#10, sum#23] + +(38) Exchange +Input [3]: [ca_zip#11, ca_city#10, sum#23] +Arguments: hashpartitioning(ca_zip#11, ca_city#10, 5), true, [id=#24] + +(39) HashAggregate [codegen id : 7] +Input [3]: [ca_zip#11, ca_city#10, sum#23] +Keys [2]: [ca_zip#11, ca_city#10] +Functions [1]: [sum(UnscaledValue(ws_sales_price#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_sales_price#5))#25] +Results [3]: [ca_zip#11, ca_city#10, MakeDecimal(sum(UnscaledValue(ws_sales_price#5))#25,17,2) AS sum(ws_sales_price)#26] + +(40) TakeOrderedAndProject +Input [3]: [ca_zip#11, ca_city#10, sum(ws_sales_price)#26] +Arguments: 100, [ca_zip#11 ASC NULLS FIRST, ca_city#10 ASC NULLS FIRST], [ca_zip#11, ca_city#10, sum(ws_sales_price)#26] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45/simplified.txt new file mode 100644 index 0000000000000..9b005f76afcdb --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45/simplified.txt @@ -0,0 +1,59 @@ +TakeOrderedAndProject [ca_city,ca_zip,sum(ws_sales_price)] + WholeStageCodegen (7) + HashAggregate [ca_city,ca_zip,sum] [sum,sum(UnscaledValue(ws_sales_price)),sum(ws_sales_price)] + InputAdapter + Exchange [ca_city,ca_zip] #1 + WholeStageCodegen (6) + HashAggregate [ca_city,ca_zip,ws_sales_price] [sum,sum] + Project [ca_city,ca_zip,ws_sales_price] + Filter [ca_zip,exists] + BroadcastHashJoin [i_item_id,i_item_id] + Project [ca_city,ca_zip,i_item_id,ws_sales_price] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Project [ca_city,ca_zip,ws_item_sk,ws_sales_price] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ca_city,ca_zip,ws_item_sk,ws_sales_price,ws_sold_date_sk] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_addr_sk,ws_item_sk,ws_sales_price,ws_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + Filter [ws_bill_customer_sk,ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_customer_sk,ws_item_sk,ws_sales_price,ws_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [c_current_addr_sk,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_current_addr_sk,c_customer_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_city,ca_zip] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_date_sk,d_qoy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_qoy,d_year] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_id,i_item_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Project [i_item_id] + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_id,i_item_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46.sf100/explain.txt new file mode 100644 index 0000000000000..a5120c1fe1c27 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46.sf100/explain.txt @@ -0,0 +1,281 @@ +== Physical Plan == +TakeOrderedAndProject (51) ++- * Project (50) + +- * SortMergeJoin Inner (49) + :- * Sort (14) + : +- Exchange (13) + : +- * Project (12) + : +- * SortMergeJoin Inner (11) + : :- * Sort (5) + : : +- Exchange (4) + : : +- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.customer (1) + : +- * Sort (10) + : +- Exchange (9) + : +- * Filter (8) + : +- * ColumnarToRow (7) + : +- Scan parquet default.customer_address (6) + +- * Sort (48) + +- Exchange (47) + +- * HashAggregate (46) + +- * HashAggregate (45) + +- * Project (44) + +- * SortMergeJoin Inner (43) + :- * Sort (40) + : +- Exchange (39) + : +- * Project (38) + : +- * BroadcastHashJoin Inner BuildRight (37) + : :- * Project (31) + : : +- * BroadcastHashJoin Inner BuildRight (30) + : : :- * Project (24) + : : : +- * BroadcastHashJoin Inner BuildRight (23) + : : : :- * Filter (17) + : : : : +- * ColumnarToRow (16) + : : : : +- Scan parquet default.store_sales (15) + : : : +- BroadcastExchange (22) + : : : +- * Project (21) + : : : +- * Filter (20) + : : : +- * ColumnarToRow (19) + : : : +- Scan parquet default.date_dim (18) + : : +- BroadcastExchange (29) + : : +- * Project (28) + : : +- * Filter (27) + : : +- * ColumnarToRow (26) + : : +- Scan parquet default.store (25) + : +- BroadcastExchange (36) + : +- * Project (35) + : +- * Filter (34) + : +- * ColumnarToRow (33) + : +- Scan parquet default.household_demographics (32) + +- * Sort (42) + +- ReusedExchange (41) + + +(1) Scan parquet default.customer +Output [4]: [c_customer_sk#1, c_current_addr_sk#2, c_first_name#3, c_last_name#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [4]: [c_customer_sk#1, c_current_addr_sk#2, c_first_name#3, c_last_name#4] + +(3) Filter [codegen id : 1] +Input [4]: [c_customer_sk#1, c_current_addr_sk#2, c_first_name#3, c_last_name#4] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_current_addr_sk#2)) + +(4) Exchange +Input [4]: [c_customer_sk#1, c_current_addr_sk#2, c_first_name#3, c_last_name#4] +Arguments: hashpartitioning(c_current_addr_sk#2, 5), true, [id=#5] + +(5) Sort [codegen id : 2] +Input [4]: [c_customer_sk#1, c_current_addr_sk#2, c_first_name#3, c_last_name#4] +Arguments: [c_current_addr_sk#2 ASC NULLS FIRST], false, 0 + +(6) Scan parquet default.customer_address +Output [2]: [ca_address_sk#6, ca_city#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_city)] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 3] +Input [2]: [ca_address_sk#6, ca_city#7] + +(8) Filter [codegen id : 3] +Input [2]: [ca_address_sk#6, ca_city#7] +Condition : (isnotnull(ca_address_sk#6) AND isnotnull(ca_city#7)) + +(9) Exchange +Input [2]: [ca_address_sk#6, ca_city#7] +Arguments: hashpartitioning(ca_address_sk#6, 5), true, [id=#8] + +(10) Sort [codegen id : 4] +Input [2]: [ca_address_sk#6, ca_city#7] +Arguments: [ca_address_sk#6 ASC NULLS FIRST], false, 0 + +(11) SortMergeJoin [codegen id : 5] +Left keys [1]: [c_current_addr_sk#2] +Right keys [1]: [ca_address_sk#6] +Join condition: None + +(12) Project [codegen id : 5] +Output [4]: [c_customer_sk#1, c_first_name#3, c_last_name#4, ca_city#7] +Input [6]: [c_customer_sk#1, c_current_addr_sk#2, c_first_name#3, c_last_name#4, ca_address_sk#6, ca_city#7] + +(13) Exchange +Input [4]: [c_customer_sk#1, c_first_name#3, c_last_name#4, ca_city#7] +Arguments: hashpartitioning(c_customer_sk#1, 5), true, [id=#9] + +(14) Sort [codegen id : 6] +Input [4]: [c_customer_sk#1, c_first_name#3, c_last_name#4, ca_city#7] +Arguments: [c_customer_sk#1 ASC NULLS FIRST], false, 0 + +(15) Scan parquet default.store_sales +Output [8]: [ss_sold_date_sk#10, ss_customer_sk#11, ss_hdemo_sk#12, ss_addr_sk#13, ss_store_sk#14, ss_ticket_number#15, ss_coupon_amt#16, ss_net_profit#17] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(16) ColumnarToRow [codegen id : 10] +Input [8]: [ss_sold_date_sk#10, ss_customer_sk#11, ss_hdemo_sk#12, ss_addr_sk#13, ss_store_sk#14, ss_ticket_number#15, ss_coupon_amt#16, ss_net_profit#17] + +(17) Filter [codegen id : 10] +Input [8]: [ss_sold_date_sk#10, ss_customer_sk#11, ss_hdemo_sk#12, ss_addr_sk#13, ss_store_sk#14, ss_ticket_number#15, ss_coupon_amt#16, ss_net_profit#17] +Condition : ((((isnotnull(ss_sold_date_sk#10) AND isnotnull(ss_store_sk#14)) AND isnotnull(ss_hdemo_sk#12)) AND isnotnull(ss_addr_sk#13)) AND isnotnull(ss_customer_sk#11)) + +(18) Scan parquet default.date_dim +Output [3]: [d_date_sk#18, d_year#19, d_dow#20] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [In(d_dow, [6,0]), In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 7] +Input [3]: [d_date_sk#18, d_year#19, d_dow#20] + +(20) Filter [codegen id : 7] +Input [3]: [d_date_sk#18, d_year#19, d_dow#20] +Condition : ((d_dow#20 IN (6,0) AND d_year#19 IN (1999,2000,2001)) AND isnotnull(d_date_sk#18)) + +(21) Project [codegen id : 7] +Output [1]: [d_date_sk#18] +Input [3]: [d_date_sk#18, d_year#19, d_dow#20] + +(22) BroadcastExchange +Input [1]: [d_date_sk#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#21] + +(23) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ss_sold_date_sk#10] +Right keys [1]: [d_date_sk#18] +Join condition: None + +(24) Project [codegen id : 10] +Output [7]: [ss_customer_sk#11, ss_hdemo_sk#12, ss_addr_sk#13, ss_store_sk#14, ss_ticket_number#15, ss_coupon_amt#16, ss_net_profit#17] +Input [9]: [ss_sold_date_sk#10, ss_customer_sk#11, ss_hdemo_sk#12, ss_addr_sk#13, ss_store_sk#14, ss_ticket_number#15, ss_coupon_amt#16, ss_net_profit#17, d_date_sk#18] + +(25) Scan parquet default.store +Output [2]: [s_store_sk#22, s_city#23] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store] +PushedFilters: [In(s_city, [Fairview,Midway]), IsNotNull(s_store_sk)] +ReadSchema: struct + +(26) ColumnarToRow [codegen id : 8] +Input [2]: [s_store_sk#22, s_city#23] + +(27) Filter [codegen id : 8] +Input [2]: [s_store_sk#22, s_city#23] +Condition : (s_city#23 IN (Fairview,Midway) AND isnotnull(s_store_sk#22)) + +(28) Project [codegen id : 8] +Output [1]: [s_store_sk#22] +Input [2]: [s_store_sk#22, s_city#23] + +(29) BroadcastExchange +Input [1]: [s_store_sk#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#24] + +(30) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ss_store_sk#14] +Right keys [1]: [s_store_sk#22] +Join condition: None + +(31) Project [codegen id : 10] +Output [6]: [ss_customer_sk#11, ss_hdemo_sk#12, ss_addr_sk#13, ss_ticket_number#15, ss_coupon_amt#16, ss_net_profit#17] +Input [8]: [ss_customer_sk#11, ss_hdemo_sk#12, ss_addr_sk#13, ss_store_sk#14, ss_ticket_number#15, ss_coupon_amt#16, ss_net_profit#17, s_store_sk#22] + +(32) Scan parquet default.household_demographics +Output [3]: [hd_demo_sk#25, hd_dep_count#26, hd_vehicle_count#27] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/household_demographics] +PushedFilters: [Or(EqualTo(hd_dep_count,4),EqualTo(hd_vehicle_count,3)), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(33) ColumnarToRow [codegen id : 9] +Input [3]: [hd_demo_sk#25, hd_dep_count#26, hd_vehicle_count#27] + +(34) Filter [codegen id : 9] +Input [3]: [hd_demo_sk#25, hd_dep_count#26, hd_vehicle_count#27] +Condition : (((hd_dep_count#26 = 4) OR (hd_vehicle_count#27 = 3)) AND isnotnull(hd_demo_sk#25)) + +(35) Project [codegen id : 9] +Output [1]: [hd_demo_sk#25] +Input [3]: [hd_demo_sk#25, hd_dep_count#26, hd_vehicle_count#27] + +(36) BroadcastExchange +Input [1]: [hd_demo_sk#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#28] + +(37) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ss_hdemo_sk#12] +Right keys [1]: [hd_demo_sk#25] +Join condition: None + +(38) Project [codegen id : 10] +Output [5]: [ss_customer_sk#11, ss_addr_sk#13, ss_ticket_number#15, ss_coupon_amt#16, ss_net_profit#17] +Input [7]: [ss_customer_sk#11, ss_hdemo_sk#12, ss_addr_sk#13, ss_ticket_number#15, ss_coupon_amt#16, ss_net_profit#17, hd_demo_sk#25] + +(39) Exchange +Input [5]: [ss_customer_sk#11, ss_addr_sk#13, ss_ticket_number#15, ss_coupon_amt#16, ss_net_profit#17] +Arguments: hashpartitioning(ss_addr_sk#13, 5), true, [id=#29] + +(40) Sort [codegen id : 11] +Input [5]: [ss_customer_sk#11, ss_addr_sk#13, ss_ticket_number#15, ss_coupon_amt#16, ss_net_profit#17] +Arguments: [ss_addr_sk#13 ASC NULLS FIRST], false, 0 + +(41) ReusedExchange [Reuses operator id: 9] +Output [2]: [ca_address_sk#6, ca_city#7] + +(42) Sort [codegen id : 13] +Input [2]: [ca_address_sk#6, ca_city#7] +Arguments: [ca_address_sk#6 ASC NULLS FIRST], false, 0 + +(43) SortMergeJoin [codegen id : 14] +Left keys [1]: [ss_addr_sk#13] +Right keys [1]: [ca_address_sk#6] +Join condition: None + +(44) Project [codegen id : 14] +Output [6]: [ss_customer_sk#11, ss_addr_sk#13, ss_ticket_number#15, ss_coupon_amt#16, ss_net_profit#17, ca_city#7] +Input [7]: [ss_customer_sk#11, ss_addr_sk#13, ss_ticket_number#15, ss_coupon_amt#16, ss_net_profit#17, ca_address_sk#6, ca_city#7] + +(45) HashAggregate [codegen id : 14] +Input [6]: [ss_customer_sk#11, ss_addr_sk#13, ss_ticket_number#15, ss_coupon_amt#16, ss_net_profit#17, ca_city#7] +Keys [4]: [ss_ticket_number#15, ss_customer_sk#11, ss_addr_sk#13, ca_city#7] +Functions [2]: [partial_sum(UnscaledValue(ss_coupon_amt#16)), partial_sum(UnscaledValue(ss_net_profit#17))] +Aggregate Attributes [2]: [sum#30, sum#31] +Results [6]: [ss_ticket_number#15, ss_customer_sk#11, ss_addr_sk#13, ca_city#7, sum#32, sum#33] + +(46) HashAggregate [codegen id : 14] +Input [6]: [ss_ticket_number#15, ss_customer_sk#11, ss_addr_sk#13, ca_city#7, sum#32, sum#33] +Keys [4]: [ss_ticket_number#15, ss_customer_sk#11, ss_addr_sk#13, ca_city#7] +Functions [2]: [sum(UnscaledValue(ss_coupon_amt#16)), sum(UnscaledValue(ss_net_profit#17))] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_coupon_amt#16))#34, sum(UnscaledValue(ss_net_profit#17))#35] +Results [5]: [ss_ticket_number#15, ss_customer_sk#11, ca_city#7 AS bought_city#36, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#16))#34,17,2) AS amt#37, MakeDecimal(sum(UnscaledValue(ss_net_profit#17))#35,17,2) AS profit#38] + +(47) Exchange +Input [5]: [ss_ticket_number#15, ss_customer_sk#11, bought_city#36, amt#37, profit#38] +Arguments: hashpartitioning(ss_customer_sk#11, 5), true, [id=#39] + +(48) Sort [codegen id : 15] +Input [5]: [ss_ticket_number#15, ss_customer_sk#11, bought_city#36, amt#37, profit#38] +Arguments: [ss_customer_sk#11 ASC NULLS FIRST], false, 0 + +(49) SortMergeJoin [codegen id : 16] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ss_customer_sk#11] +Join condition: NOT (ca_city#7 = bought_city#36) + +(50) Project [codegen id : 16] +Output [7]: [c_last_name#4, c_first_name#3, ca_city#7, bought_city#36, ss_ticket_number#15, amt#37, profit#38] +Input [9]: [c_customer_sk#1, c_first_name#3, c_last_name#4, ca_city#7, ss_ticket_number#15, ss_customer_sk#11, bought_city#36, amt#37, profit#38] + +(51) TakeOrderedAndProject +Input [7]: [c_last_name#4, c_first_name#3, ca_city#7, bought_city#36, ss_ticket_number#15, amt#37, profit#38] +Arguments: 100, [c_last_name#4 ASC NULLS FIRST, c_first_name#3 ASC NULLS FIRST, ca_city#7 ASC NULLS FIRST, bought_city#36 ASC NULLS FIRST, ss_ticket_number#15 ASC NULLS FIRST], [c_last_name#4, c_first_name#3, ca_city#7, bought_city#36, ss_ticket_number#15, amt#37, profit#38] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46.sf100/simplified.txt new file mode 100644 index 0000000000000..db6b41286a64e --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46.sf100/simplified.txt @@ -0,0 +1,87 @@ +TakeOrderedAndProject [amt,bought_city,c_first_name,c_last_name,ca_city,profit,ss_ticket_number] + WholeStageCodegen (16) + Project [amt,bought_city,c_first_name,c_last_name,ca_city,profit,ss_ticket_number] + SortMergeJoin [bought_city,c_customer_sk,ca_city,ss_customer_sk] + InputAdapter + WholeStageCodegen (6) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #1 + WholeStageCodegen (5) + Project [c_customer_sk,c_first_name,c_last_name,ca_city] + SortMergeJoin [c_current_addr_sk,ca_address_sk] + InputAdapter + WholeStageCodegen (2) + Sort [c_current_addr_sk] + InputAdapter + Exchange [c_current_addr_sk] #2 + WholeStageCodegen (1) + Filter [c_current_addr_sk,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_current_addr_sk,c_customer_sk,c_first_name,c_last_name] + InputAdapter + WholeStageCodegen (4) + Sort [ca_address_sk] + InputAdapter + Exchange [ca_address_sk] #3 + WholeStageCodegen (3) + Filter [ca_address_sk,ca_city] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_city] + InputAdapter + WholeStageCodegen (15) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #4 + WholeStageCodegen (14) + HashAggregate [ca_city,ss_addr_sk,ss_customer_sk,ss_ticket_number,sum,sum] [amt,bought_city,profit,sum,sum,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_net_profit))] + HashAggregate [ca_city,ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,ss_ticket_number] [sum,sum,sum,sum] + Project [ca_city,ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,ss_ticket_number] + SortMergeJoin [ca_address_sk,ss_addr_sk] + InputAdapter + WholeStageCodegen (11) + Sort [ss_addr_sk] + InputAdapter + Exchange [ss_addr_sk] #5 + WholeStageCodegen (10) + Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,ss_ticket_number] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_ticket_number] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_store_sk,ss_ticket_number] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_addr_sk,ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (7) + Project [d_date_sk] + Filter [d_date_sk,d_dow,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_dow,d_year] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (8) + Project [s_store_sk] + Filter [s_city,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_city,s_store_sk] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (9) + Project [hd_demo_sk] + Filter [hd_demo_sk,hd_dep_count,hd_vehicle_count] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] + InputAdapter + WholeStageCodegen (13) + Sort [ca_address_sk] + InputAdapter + ReusedExchange [ca_address_sk,ca_city] #3 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46/explain.txt new file mode 100644 index 0000000000000..b2c3231d872e6 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46/explain.txt @@ -0,0 +1,241 @@ +== Physical Plan == +TakeOrderedAndProject (43) ++- * Project (42) + +- * BroadcastHashJoin Inner BuildRight (41) + :- * Project (39) + : +- * BroadcastHashJoin Inner BuildRight (38) + : :- * HashAggregate (33) + : : +- Exchange (32) + : : +- * HashAggregate (31) + : : +- * Project (30) + : : +- * BroadcastHashJoin Inner BuildRight (29) + : : :- * Project (24) + : : : +- * BroadcastHashJoin Inner BuildRight (23) + : : : :- * Project (17) + : : : : +- * BroadcastHashJoin Inner BuildRight (16) + : : : : :- * Project (10) + : : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : : :- * Filter (3) + : : : : : : +- * ColumnarToRow (2) + : : : : : : +- Scan parquet default.store_sales (1) + : : : : : +- BroadcastExchange (8) + : : : : : +- * Project (7) + : : : : : +- * Filter (6) + : : : : : +- * ColumnarToRow (5) + : : : : : +- Scan parquet default.date_dim (4) + : : : : +- BroadcastExchange (15) + : : : : +- * Project (14) + : : : : +- * Filter (13) + : : : : +- * ColumnarToRow (12) + : : : : +- Scan parquet default.store (11) + : : : +- BroadcastExchange (22) + : : : +- * Project (21) + : : : +- * Filter (20) + : : : +- * ColumnarToRow (19) + : : : +- Scan parquet default.household_demographics (18) + : : +- BroadcastExchange (28) + : : +- * Filter (27) + : : +- * ColumnarToRow (26) + : : +- Scan parquet default.customer_address (25) + : +- BroadcastExchange (37) + : +- * Filter (36) + : +- * ColumnarToRow (35) + : +- Scan parquet default.customer (34) + +- ReusedExchange (40) + + +(1) Scan parquet default.store_sales +Output [8]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [8]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] + +(3) Filter [codegen id : 5] +Input [8]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] +Condition : ((((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#5)) AND isnotnull(ss_hdemo_sk#3)) AND isnotnull(ss_addr_sk#4)) AND isnotnull(ss_customer_sk#2)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#9, d_year#10, d_dow#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [In(d_dow, [6,0]), In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#9, d_year#10, d_dow#11] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#9, d_year#10, d_dow#11] +Condition : ((d_dow#11 IN (6,0) AND d_year#10 IN (1999,2000,2001)) AND isnotnull(d_date_sk#9)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#9] +Input [3]: [d_date_sk#9, d_year#10, d_dow#11] + +(8) BroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(9) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#9] +Join condition: None + +(10) Project [codegen id : 5] +Output [7]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] +Input [9]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, d_date_sk#9] + +(11) Scan parquet default.store +Output [2]: [s_store_sk#13, s_city#14] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store] +PushedFilters: [In(s_city, [Fairview,Midway]), IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [s_store_sk#13, s_city#14] + +(13) Filter [codegen id : 2] +Input [2]: [s_store_sk#13, s_city#14] +Condition : (s_city#14 IN (Fairview,Midway) AND isnotnull(s_store_sk#13)) + +(14) Project [codegen id : 2] +Output [1]: [s_store_sk#13] +Input [2]: [s_store_sk#13, s_city#14] + +(15) BroadcastExchange +Input [1]: [s_store_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] + +(16) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_store_sk#5] +Right keys [1]: [s_store_sk#13] +Join condition: None + +(17) Project [codegen id : 5] +Output [6]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] +Input [8]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, s_store_sk#13] + +(18) Scan parquet default.household_demographics +Output [3]: [hd_demo_sk#16, hd_dep_count#17, hd_vehicle_count#18] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/household_demographics] +PushedFilters: [Or(EqualTo(hd_dep_count,4),EqualTo(hd_vehicle_count,3)), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [3]: [hd_demo_sk#16, hd_dep_count#17, hd_vehicle_count#18] + +(20) Filter [codegen id : 3] +Input [3]: [hd_demo_sk#16, hd_dep_count#17, hd_vehicle_count#18] +Condition : (((hd_dep_count#17 = 4) OR (hd_vehicle_count#18 = 3)) AND isnotnull(hd_demo_sk#16)) + +(21) Project [codegen id : 3] +Output [1]: [hd_demo_sk#16] +Input [3]: [hd_demo_sk#16, hd_dep_count#17, hd_vehicle_count#18] + +(22) BroadcastExchange +Input [1]: [hd_demo_sk#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#19] + +(23) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_hdemo_sk#3] +Right keys [1]: [hd_demo_sk#16] +Join condition: None + +(24) Project [codegen id : 5] +Output [5]: [ss_customer_sk#2, ss_addr_sk#4, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] +Input [7]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, hd_demo_sk#16] + +(25) Scan parquet default.customer_address +Output [2]: [ca_address_sk#20, ca_city#21] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_city)] +ReadSchema: struct + +(26) ColumnarToRow [codegen id : 4] +Input [2]: [ca_address_sk#20, ca_city#21] + +(27) Filter [codegen id : 4] +Input [2]: [ca_address_sk#20, ca_city#21] +Condition : (isnotnull(ca_address_sk#20) AND isnotnull(ca_city#21)) + +(28) BroadcastExchange +Input [2]: [ca_address_sk#20, ca_city#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#22] + +(29) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_addr_sk#4] +Right keys [1]: [ca_address_sk#20] +Join condition: None + +(30) Project [codegen id : 5] +Output [6]: [ss_customer_sk#2, ss_addr_sk#4, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, ca_city#21] +Input [7]: [ss_customer_sk#2, ss_addr_sk#4, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, ca_address_sk#20, ca_city#21] + +(31) HashAggregate [codegen id : 5] +Input [6]: [ss_customer_sk#2, ss_addr_sk#4, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, ca_city#21] +Keys [4]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#21] +Functions [2]: [partial_sum(UnscaledValue(ss_coupon_amt#7)), partial_sum(UnscaledValue(ss_net_profit#8))] +Aggregate Attributes [2]: [sum#23, sum#24] +Results [6]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#21, sum#25, sum#26] + +(32) Exchange +Input [6]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#21, sum#25, sum#26] +Arguments: hashpartitioning(ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#21, 5), true, [id=#27] + +(33) HashAggregate [codegen id : 8] +Input [6]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#21, sum#25, sum#26] +Keys [4]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#21] +Functions [2]: [sum(UnscaledValue(ss_coupon_amt#7)), sum(UnscaledValue(ss_net_profit#8))] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_coupon_amt#7))#28, sum(UnscaledValue(ss_net_profit#8))#29] +Results [5]: [ss_ticket_number#6, ss_customer_sk#2, ca_city#21 AS bought_city#30, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#7))#28,17,2) AS amt#31, MakeDecimal(sum(UnscaledValue(ss_net_profit#8))#29,17,2) AS profit#32] + +(34) Scan parquet default.customer +Output [4]: [c_customer_sk#33, c_current_addr_sk#34, c_first_name#35, c_last_name#36] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(35) ColumnarToRow [codegen id : 6] +Input [4]: [c_customer_sk#33, c_current_addr_sk#34, c_first_name#35, c_last_name#36] + +(36) Filter [codegen id : 6] +Input [4]: [c_customer_sk#33, c_current_addr_sk#34, c_first_name#35, c_last_name#36] +Condition : (isnotnull(c_customer_sk#33) AND isnotnull(c_current_addr_sk#34)) + +(37) BroadcastExchange +Input [4]: [c_customer_sk#33, c_current_addr_sk#34, c_first_name#35, c_last_name#36] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#37] + +(38) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#33] +Join condition: None + +(39) Project [codegen id : 8] +Output [7]: [ss_ticket_number#6, bought_city#30, amt#31, profit#32, c_current_addr_sk#34, c_first_name#35, c_last_name#36] +Input [9]: [ss_ticket_number#6, ss_customer_sk#2, bought_city#30, amt#31, profit#32, c_customer_sk#33, c_current_addr_sk#34, c_first_name#35, c_last_name#36] + +(40) ReusedExchange [Reuses operator id: 28] +Output [2]: [ca_address_sk#20, ca_city#21] + +(41) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [c_current_addr_sk#34] +Right keys [1]: [ca_address_sk#20] +Join condition: NOT (ca_city#21 = bought_city#30) + +(42) Project [codegen id : 8] +Output [7]: [c_last_name#36, c_first_name#35, ca_city#21, bought_city#30, ss_ticket_number#6, amt#31, profit#32] +Input [9]: [ss_ticket_number#6, bought_city#30, amt#31, profit#32, c_current_addr_sk#34, c_first_name#35, c_last_name#36, ca_address_sk#20, ca_city#21] + +(43) TakeOrderedAndProject +Input [7]: [c_last_name#36, c_first_name#35, ca_city#21, bought_city#30, ss_ticket_number#6, amt#31, profit#32] +Arguments: 100, [c_last_name#36 ASC NULLS FIRST, c_first_name#35 ASC NULLS FIRST, ca_city#21 ASC NULLS FIRST, bought_city#30 ASC NULLS FIRST, ss_ticket_number#6 ASC NULLS FIRST], [c_last_name#36, c_first_name#35, ca_city#21, bought_city#30, ss_ticket_number#6, amt#31, profit#32] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46/simplified.txt new file mode 100644 index 0000000000000..edf9894f202ea --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46/simplified.txt @@ -0,0 +1,63 @@ +TakeOrderedAndProject [amt,bought_city,c_first_name,c_last_name,ca_city,profit,ss_ticket_number] + WholeStageCodegen (8) + Project [amt,bought_city,c_first_name,c_last_name,ca_city,profit,ss_ticket_number] + BroadcastHashJoin [bought_city,c_current_addr_sk,ca_address_sk,ca_city] + Project [amt,bought_city,c_current_addr_sk,c_first_name,c_last_name,profit,ss_ticket_number] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + HashAggregate [ca_city,ss_addr_sk,ss_customer_sk,ss_ticket_number,sum,sum] [amt,bought_city,profit,sum,sum,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_net_profit))] + InputAdapter + Exchange [ca_city,ss_addr_sk,ss_customer_sk,ss_ticket_number] #1 + WholeStageCodegen (5) + HashAggregate [ca_city,ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,ss_ticket_number] [sum,sum,sum,sum] + Project [ca_city,ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,ss_ticket_number] + BroadcastHashJoin [ca_address_sk,ss_addr_sk] + Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,ss_ticket_number] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_ticket_number] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_store_sk,ss_ticket_number] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_addr_sk,ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_dow,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_dow,d_year] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [s_store_sk] + Filter [s_city,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_city,s_store_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [hd_demo_sk] + Filter [hd_demo_sk,hd_dep_count,hd_vehicle_count] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Filter [ca_address_sk,ca_city] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_city] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (6) + Filter [c_current_addr_sk,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_current_addr_sk,c_customer_sk,c_first_name,c_last_name] + InputAdapter + ReusedExchange [ca_address_sk,ca_city] #5 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47.sf100/explain.txt new file mode 100644 index 0000000000000..563e94e6a0950 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47.sf100/explain.txt @@ -0,0 +1,313 @@ +== Physical Plan == +TakeOrderedAndProject (58) ++- * Project (57) + +- * SortMergeJoin Inner (56) + :- * Project (48) + : +- * SortMergeJoin Inner (47) + : :- * Sort (37) + : : +- Exchange (36) + : : +- * Filter (35) + : : +- Window (34) + : : +- * Sort (33) + : : +- Exchange (32) + : : +- * Project (31) + : : +- Window (30) + : : +- * Sort (29) + : : +- Exchange (28) + : : +- * HashAggregate (27) + : : +- Exchange (26) + : : +- * HashAggregate (25) + : : +- * Project (24) + : : +- * SortMergeJoin Inner (23) + : : :- * Sort (17) + : : : +- Exchange (16) + : : : +- * Project (15) + : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : :- * Project (9) + : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.store_sales (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.date_dim (4) + : : : +- BroadcastExchange (13) + : : : +- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet default.store (10) + : : +- * Sort (22) + : : +- Exchange (21) + : : +- * Filter (20) + : : +- * ColumnarToRow (19) + : : +- Scan parquet default.item (18) + : +- * Sort (46) + : +- Exchange (45) + : +- * Project (44) + : +- * Filter (43) + : +- Window (42) + : +- * Sort (41) + : +- Exchange (40) + : +- * HashAggregate (39) + : +- ReusedExchange (38) + +- * Sort (55) + +- Exchange (54) + +- * Project (53) + +- * Filter (52) + +- Window (51) + +- * Sort (50) + +- ReusedExchange (49) + + +(1) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_sales_price#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_sales_price#4] + +(3) Filter [codegen id : 3] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_sales_price#4] +Condition : ((isnotnull(ss_item_sk#2) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_store_sk#3)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#5, d_year#6, d_moy#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000),EqualTo(d_moy,1))), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] +Condition : ((((d_year#6 = 1999) OR ((d_year#6 = 1998) AND (d_moy#7 = 12))) OR ((d_year#6 = 2000) AND (d_moy#7 = 1))) AND isnotnull(d_date_sk#5)) + +(7) BroadcastExchange +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#8] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(9) Project [codegen id : 3] +Output [5]: [ss_item_sk#2, ss_store_sk#3, ss_sales_price#4, d_year#6, d_moy#7] +Input [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_sales_price#4, d_date_sk#5, d_year#6, d_moy#7] + +(10) Scan parquet default.store +Output [3]: [s_store_sk#9, s_store_name#10, s_company_name#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_name), IsNotNull(s_company_name)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [s_store_sk#9, s_store_name#10, s_company_name#11] + +(12) Filter [codegen id : 2] +Input [3]: [s_store_sk#9, s_store_name#10, s_company_name#11] +Condition : ((isnotnull(s_store_sk#9) AND isnotnull(s_store_name#10)) AND isnotnull(s_company_name#11)) + +(13) BroadcastExchange +Input [3]: [s_store_sk#9, s_store_name#10, s_company_name#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#12] + +(14) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#9] +Join condition: None + +(15) Project [codegen id : 3] +Output [6]: [ss_item_sk#2, ss_sales_price#4, d_year#6, d_moy#7, s_store_name#10, s_company_name#11] +Input [8]: [ss_item_sk#2, ss_store_sk#3, ss_sales_price#4, d_year#6, d_moy#7, s_store_sk#9, s_store_name#10, s_company_name#11] + +(16) Exchange +Input [6]: [ss_item_sk#2, ss_sales_price#4, d_year#6, d_moy#7, s_store_name#10, s_company_name#11] +Arguments: hashpartitioning(ss_item_sk#2, 5), true, [id=#13] + +(17) Sort [codegen id : 4] +Input [6]: [ss_item_sk#2, ss_sales_price#4, d_year#6, d_moy#7, s_store_name#10, s_company_name#11] +Arguments: [ss_item_sk#2 ASC NULLS FIRST], false, 0 + +(18) Scan parquet default.item +Output [3]: [i_item_sk#14, i_brand#15, i_category#16] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_category), IsNotNull(i_brand)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 5] +Input [3]: [i_item_sk#14, i_brand#15, i_category#16] + +(20) Filter [codegen id : 5] +Input [3]: [i_item_sk#14, i_brand#15, i_category#16] +Condition : ((isnotnull(i_item_sk#14) AND isnotnull(i_category#16)) AND isnotnull(i_brand#15)) + +(21) Exchange +Input [3]: [i_item_sk#14, i_brand#15, i_category#16] +Arguments: hashpartitioning(i_item_sk#14, 5), true, [id=#17] + +(22) Sort [codegen id : 6] +Input [3]: [i_item_sk#14, i_brand#15, i_category#16] +Arguments: [i_item_sk#14 ASC NULLS FIRST], false, 0 + +(23) SortMergeJoin [codegen id : 7] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#14] +Join condition: None + +(24) Project [codegen id : 7] +Output [7]: [i_brand#15, i_category#16, ss_sales_price#4, d_year#6, d_moy#7, s_store_name#10, s_company_name#11] +Input [9]: [ss_item_sk#2, ss_sales_price#4, d_year#6, d_moy#7, s_store_name#10, s_company_name#11, i_item_sk#14, i_brand#15, i_category#16] + +(25) HashAggregate [codegen id : 7] +Input [7]: [i_brand#15, i_category#16, ss_sales_price#4, d_year#6, d_moy#7, s_store_name#10, s_company_name#11] +Keys [6]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#6, d_moy#7] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#4))] +Aggregate Attributes [1]: [sum#18] +Results [7]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#6, d_moy#7, sum#19] + +(26) Exchange +Input [7]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#6, d_moy#7, sum#19] +Arguments: hashpartitioning(i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#6, d_moy#7, 5), true, [id=#20] + +(27) HashAggregate [codegen id : 8] +Input [7]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#6, d_moy#7, sum#19] +Keys [6]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#6, d_moy#7] +Functions [1]: [sum(UnscaledValue(ss_sales_price#4))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#4))#21] +Results [8]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#6, d_moy#7, MakeDecimal(sum(UnscaledValue(ss_sales_price#4))#21,17,2) AS sum_sales#22, MakeDecimal(sum(UnscaledValue(ss_sales_price#4))#21,17,2) AS _w0#23] + +(28) Exchange +Input [8]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#6, d_moy#7, sum_sales#22, _w0#23] +Arguments: hashpartitioning(i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#6, 5), true, [id=#24] + +(29) Sort [codegen id : 9] +Input [8]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#6, d_moy#7, sum_sales#22, _w0#23] +Arguments: [i_category#16 ASC NULLS FIRST, i_brand#15 ASC NULLS FIRST, s_store_name#10 ASC NULLS FIRST, s_company_name#11 ASC NULLS FIRST, d_year#6 ASC NULLS FIRST], false, 0 + +(30) Window +Input [8]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#6, d_moy#7, sum_sales#22, _w0#23] +Arguments: [avg(_w0#23) windowspecdefinition(i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#6, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#25], [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#6] + +(31) Project [codegen id : 10] +Output [8]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#6, d_moy#7, sum_sales#22, avg_monthly_sales#25] +Input [9]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#6, d_moy#7, sum_sales#22, _w0#23, avg_monthly_sales#25] + +(32) Exchange +Input [8]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#6, d_moy#7, sum_sales#22, avg_monthly_sales#25] +Arguments: hashpartitioning(i_category#16, i_brand#15, s_store_name#10, s_company_name#11, 5), true, [id=#26] + +(33) Sort [codegen id : 11] +Input [8]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#6, d_moy#7, sum_sales#22, avg_monthly_sales#25] +Arguments: [i_category#16 ASC NULLS FIRST, i_brand#15 ASC NULLS FIRST, s_store_name#10 ASC NULLS FIRST, s_company_name#11 ASC NULLS FIRST, d_year#6 ASC NULLS FIRST, d_moy#7 ASC NULLS FIRST], false, 0 + +(34) Window +Input [8]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#6, d_moy#7, sum_sales#22, avg_monthly_sales#25] +Arguments: [rank(d_year#6, d_moy#7) windowspecdefinition(i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#6 ASC NULLS FIRST, d_moy#7 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#27], [i_category#16, i_brand#15, s_store_name#10, s_company_name#11], [d_year#6 ASC NULLS FIRST, d_moy#7 ASC NULLS FIRST] + +(35) Filter [codegen id : 12] +Input [9]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#6, d_moy#7, sum_sales#22, avg_monthly_sales#25, rn#27] +Condition : (((((isnotnull(avg_monthly_sales#25) AND isnotnull(d_year#6)) AND (d_year#6 = 1999)) AND (avg_monthly_sales#25 > 0.000000)) AND (CASE WHEN (avg_monthly_sales#25 > 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#22 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(22,6), true))) / promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(38,16), true) ELSE null END > 0.1000000000000000)) AND isnotnull(rn#27)) + +(36) Exchange +Input [9]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#6, d_moy#7, sum_sales#22, avg_monthly_sales#25, rn#27] +Arguments: hashpartitioning(i_category#16, i_brand#15, s_store_name#10, s_company_name#11, rn#27, 5), true, [id=#28] + +(37) Sort [codegen id : 13] +Input [9]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#6, d_moy#7, sum_sales#22, avg_monthly_sales#25, rn#27] +Arguments: [i_category#16 ASC NULLS FIRST, i_brand#15 ASC NULLS FIRST, s_store_name#10 ASC NULLS FIRST, s_company_name#11 ASC NULLS FIRST, rn#27 ASC NULLS FIRST], false, 0 + +(38) ReusedExchange [Reuses operator id: 26] +Output [7]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, d_year#33, d_moy#34, sum#35] + +(39) HashAggregate [codegen id : 21] +Input [7]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, d_year#33, d_moy#34, sum#35] +Keys [6]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, d_year#33, d_moy#34] +Functions [1]: [sum(UnscaledValue(ss_sales_price#4))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#4))#36] +Results [7]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, d_year#33, d_moy#34, MakeDecimal(sum(UnscaledValue(ss_sales_price#4))#36,17,2) AS sum_sales#37] + +(40) Exchange +Input [7]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, d_year#33, d_moy#34, sum_sales#37] +Arguments: hashpartitioning(i_category#29, i_brand#30, s_store_name#31, s_company_name#32, 5), true, [id=#38] + +(41) Sort [codegen id : 22] +Input [7]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, d_year#33, d_moy#34, sum_sales#37] +Arguments: [i_category#29 ASC NULLS FIRST, i_brand#30 ASC NULLS FIRST, s_store_name#31 ASC NULLS FIRST, s_company_name#32 ASC NULLS FIRST, d_year#33 ASC NULLS FIRST, d_moy#34 ASC NULLS FIRST], false, 0 + +(42) Window +Input [7]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, d_year#33, d_moy#34, sum_sales#37] +Arguments: [rank(d_year#33, d_moy#34) windowspecdefinition(i_category#29, i_brand#30, s_store_name#31, s_company_name#32, d_year#33 ASC NULLS FIRST, d_moy#34 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#39], [i_category#29, i_brand#30, s_store_name#31, s_company_name#32], [d_year#33 ASC NULLS FIRST, d_moy#34 ASC NULLS FIRST] + +(43) Filter [codegen id : 23] +Input [8]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, d_year#33, d_moy#34, sum_sales#37, rn#39] +Condition : isnotnull(rn#39) + +(44) Project [codegen id : 23] +Output [6]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, sum_sales#37, rn#39] +Input [8]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, d_year#33, d_moy#34, sum_sales#37, rn#39] + +(45) Exchange +Input [6]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, sum_sales#37, rn#39] +Arguments: hashpartitioning(i_category#29, i_brand#30, s_store_name#31, s_company_name#32, (rn#39 + 1), 5), true, [id=#40] + +(46) Sort [codegen id : 24] +Input [6]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, sum_sales#37, rn#39] +Arguments: [i_category#29 ASC NULLS FIRST, i_brand#30 ASC NULLS FIRST, s_store_name#31 ASC NULLS FIRST, s_company_name#32 ASC NULLS FIRST, (rn#39 + 1) ASC NULLS FIRST], false, 0 + +(47) SortMergeJoin [codegen id : 25] +Left keys [5]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, rn#27] +Right keys [5]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, (rn#39 + 1)] +Join condition: None + +(48) Project [codegen id : 25] +Output [10]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#6, d_moy#7, sum_sales#22, avg_monthly_sales#25, rn#27, sum_sales#37] +Input [15]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#6, d_moy#7, sum_sales#22, avg_monthly_sales#25, rn#27, i_category#29, i_brand#30, s_store_name#31, s_company_name#32, sum_sales#37, rn#39] + +(49) ReusedExchange [Reuses operator id: 40] +Output [7]: [i_category#41, i_brand#42, s_store_name#43, s_company_name#44, d_year#45, d_moy#46, sum_sales#47] + +(50) Sort [codegen id : 34] +Input [7]: [i_category#41, i_brand#42, s_store_name#43, s_company_name#44, d_year#45, d_moy#46, sum_sales#47] +Arguments: [i_category#41 ASC NULLS FIRST, i_brand#42 ASC NULLS FIRST, s_store_name#43 ASC NULLS FIRST, s_company_name#44 ASC NULLS FIRST, d_year#45 ASC NULLS FIRST, d_moy#46 ASC NULLS FIRST], false, 0 + +(51) Window +Input [7]: [i_category#41, i_brand#42, s_store_name#43, s_company_name#44, d_year#45, d_moy#46, sum_sales#47] +Arguments: [rank(d_year#45, d_moy#46) windowspecdefinition(i_category#41, i_brand#42, s_store_name#43, s_company_name#44, d_year#45 ASC NULLS FIRST, d_moy#46 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#48], [i_category#41, i_brand#42, s_store_name#43, s_company_name#44], [d_year#45 ASC NULLS FIRST, d_moy#46 ASC NULLS FIRST] + +(52) Filter [codegen id : 35] +Input [8]: [i_category#41, i_brand#42, s_store_name#43, s_company_name#44, d_year#45, d_moy#46, sum_sales#47, rn#48] +Condition : isnotnull(rn#48) + +(53) Project [codegen id : 35] +Output [6]: [i_category#41, i_brand#42, s_store_name#43, s_company_name#44, sum_sales#47, rn#48] +Input [8]: [i_category#41, i_brand#42, s_store_name#43, s_company_name#44, d_year#45, d_moy#46, sum_sales#47, rn#48] + +(54) Exchange +Input [6]: [i_category#41, i_brand#42, s_store_name#43, s_company_name#44, sum_sales#47, rn#48] +Arguments: hashpartitioning(i_category#41, i_brand#42, s_store_name#43, s_company_name#44, (rn#48 - 1), 5), true, [id=#49] + +(55) Sort [codegen id : 36] +Input [6]: [i_category#41, i_brand#42, s_store_name#43, s_company_name#44, sum_sales#47, rn#48] +Arguments: [i_category#41 ASC NULLS FIRST, i_brand#42 ASC NULLS FIRST, s_store_name#43 ASC NULLS FIRST, s_company_name#44 ASC NULLS FIRST, (rn#48 - 1) ASC NULLS FIRST], false, 0 + +(56) SortMergeJoin [codegen id : 37] +Left keys [5]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, rn#27] +Right keys [5]: [i_category#41, i_brand#42, s_store_name#43, s_company_name#44, (rn#48 - 1)] +Join condition: None + +(57) Project [codegen id : 37] +Output [10]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#6, d_moy#7, avg_monthly_sales#25, sum_sales#22, sum_sales#37 AS psum#50, sum_sales#47 AS nsum#51] +Input [16]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#6, d_moy#7, sum_sales#22, avg_monthly_sales#25, rn#27, sum_sales#37, i_category#41, i_brand#42, s_store_name#43, s_company_name#44, sum_sales#47, rn#48] + +(58) TakeOrderedAndProject +Input [10]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#6, d_moy#7, avg_monthly_sales#25, sum_sales#22, psum#50, nsum#51] +Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#22 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(22,6), true) ASC NULLS FIRST, s_store_name#10 ASC NULLS FIRST], [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#6, d_moy#7, avg_monthly_sales#25, sum_sales#22, psum#50, nsum#51] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47.sf100/simplified.txt new file mode 100644 index 0000000000000..8f2656dc29d18 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47.sf100/simplified.txt @@ -0,0 +1,107 @@ +TakeOrderedAndProject [avg_monthly_sales,d_moy,d_year,i_brand,i_category,nsum,psum,s_company_name,s_store_name,sum_sales] + WholeStageCodegen (37) + Project [avg_monthly_sales,d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum_sales,sum_sales,sum_sales] + SortMergeJoin [i_brand,i_brand,i_category,i_category,rn,rn,s_company_name,s_company_name,s_store_name,s_store_name] + InputAdapter + WholeStageCodegen (25) + Project [avg_monthly_sales,d_moy,d_year,i_brand,i_category,rn,s_company_name,s_store_name,sum_sales,sum_sales] + SortMergeJoin [i_brand,i_brand,i_category,i_category,rn,rn,s_company_name,s_company_name,s_store_name,s_store_name] + InputAdapter + WholeStageCodegen (13) + Sort [i_brand,i_category,rn,s_company_name,s_store_name] + InputAdapter + Exchange [i_brand,i_category,rn,s_company_name,s_store_name] #1 + WholeStageCodegen (12) + Filter [avg_monthly_sales,d_year,rn,sum_sales] + InputAdapter + Window [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] + WholeStageCodegen (11) + Sort [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] + InputAdapter + Exchange [i_brand,i_category,s_company_name,s_store_name] #2 + WholeStageCodegen (10) + Project [avg_monthly_sales,d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum_sales] + InputAdapter + Window [_w0,d_year,i_brand,i_category,s_company_name,s_store_name] + WholeStageCodegen (9) + Sort [d_year,i_brand,i_category,s_company_name,s_store_name] + InputAdapter + Exchange [d_year,i_brand,i_category,s_company_name,s_store_name] #3 + WholeStageCodegen (8) + HashAggregate [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum] [_w0,sum,sum(UnscaledValue(ss_sales_price)),sum_sales] + InputAdapter + Exchange [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] #4 + WholeStageCodegen (7) + HashAggregate [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,ss_sales_price] [sum,sum] + Project [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,ss_sales_price] + SortMergeJoin [i_item_sk,ss_item_sk] + InputAdapter + WholeStageCodegen (4) + Sort [ss_item_sk] + InputAdapter + Exchange [ss_item_sk] #5 + WholeStageCodegen (3) + Project [d_moy,d_year,s_company_name,s_store_name,ss_item_sk,ss_sales_price] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [d_moy,d_year,ss_item_sk,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (1) + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (2) + Filter [s_company_name,s_store_name,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_company_name,s_store_name,s_store_sk] + InputAdapter + WholeStageCodegen (6) + Sort [i_item_sk] + InputAdapter + Exchange [i_item_sk] #8 + WholeStageCodegen (5) + Filter [i_brand,i_category,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand,i_category,i_item_sk] + InputAdapter + WholeStageCodegen (24) + Sort [i_brand,i_category,rn,s_company_name,s_store_name] + InputAdapter + Exchange [i_brand,i_category,rn,s_company_name,s_store_name] #9 + WholeStageCodegen (23) + Project [i_brand,i_category,rn,s_company_name,s_store_name,sum_sales] + Filter [rn] + InputAdapter + Window [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] + WholeStageCodegen (22) + Sort [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] + InputAdapter + Exchange [i_brand,i_category,s_company_name,s_store_name] #10 + WholeStageCodegen (21) + HashAggregate [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum] [sum,sum(UnscaledValue(ss_sales_price)),sum_sales] + InputAdapter + ReusedExchange [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum] #4 + InputAdapter + WholeStageCodegen (36) + Sort [i_brand,i_category,rn,s_company_name,s_store_name] + InputAdapter + Exchange [i_brand,i_category,rn,s_company_name,s_store_name] #11 + WholeStageCodegen (35) + Project [i_brand,i_category,rn,s_company_name,s_store_name,sum_sales] + Filter [rn] + InputAdapter + Window [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] + WholeStageCodegen (34) + Sort [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] + InputAdapter + ReusedExchange [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum_sales] #10 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/explain.txt new file mode 100644 index 0000000000000..7c0a66d64972b --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/explain.txt @@ -0,0 +1,278 @@ +== Physical Plan == +TakeOrderedAndProject (51) ++- * Project (50) + +- * BroadcastHashJoin Inner BuildRight (49) + :- * Project (42) + : +- * BroadcastHashJoin Inner BuildRight (41) + : :- * Filter (32) + : : +- Window (31) + : : +- * Sort (30) + : : +- Exchange (29) + : : +- * Project (28) + : : +- Window (27) + : : +- * Sort (26) + : : +- Exchange (25) + : : +- * HashAggregate (24) + : : +- Exchange (23) + : : +- * HashAggregate (22) + : : +- * Project (21) + : : +- * BroadcastHashJoin Inner BuildRight (20) + : : :- * Project (15) + : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : :- * Project (9) + : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.item (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.store_sales (4) + : : : +- BroadcastExchange (13) + : : : +- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet default.date_dim (10) + : : +- BroadcastExchange (19) + : : +- * Filter (18) + : : +- * ColumnarToRow (17) + : : +- Scan parquet default.store (16) + : +- BroadcastExchange (40) + : +- * Project (39) + : +- * Filter (38) + : +- Window (37) + : +- * Sort (36) + : +- Exchange (35) + : +- * HashAggregate (34) + : +- ReusedExchange (33) + +- BroadcastExchange (48) + +- * Project (47) + +- * Filter (46) + +- Window (45) + +- * Sort (44) + +- ReusedExchange (43) + + +(1) Scan parquet default.item +Output [3]: [i_item_sk#1, i_brand#2, i_category#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_category), IsNotNull(i_brand)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [3]: [i_item_sk#1, i_brand#2, i_category#3] + +(3) Filter [codegen id : 4] +Input [3]: [i_item_sk#1, i_brand#2, i_category#3] +Condition : ((isnotnull(i_item_sk#1) AND isnotnull(i_category#3)) AND isnotnull(i_brand#2)) + +(4) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#4, ss_item_sk#5, ss_store_sk#6, ss_sales_price#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [ss_sold_date_sk#4, ss_item_sk#5, ss_store_sk#6, ss_sales_price#7] + +(6) Filter [codegen id : 1] +Input [4]: [ss_sold_date_sk#4, ss_item_sk#5, ss_store_sk#6, ss_sales_price#7] +Condition : ((isnotnull(ss_item_sk#5) AND isnotnull(ss_sold_date_sk#4)) AND isnotnull(ss_store_sk#6)) + +(7) BroadcastExchange +Input [4]: [ss_sold_date_sk#4, ss_item_sk#5, ss_store_sk#6, ss_sales_price#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#8] + +(8) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [ss_item_sk#5] +Join condition: None + +(9) Project [codegen id : 4] +Output [5]: [i_brand#2, i_category#3, ss_sold_date_sk#4, ss_store_sk#6, ss_sales_price#7] +Input [7]: [i_item_sk#1, i_brand#2, i_category#3, ss_sold_date_sk#4, ss_item_sk#5, ss_store_sk#6, ss_sales_price#7] + +(10) Scan parquet default.date_dim +Output [3]: [d_date_sk#9, d_year#10, d_moy#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000),EqualTo(d_moy,1))), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] + +(12) Filter [codegen id : 2] +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] +Condition : ((((d_year#10 = 1999) OR ((d_year#10 = 1998) AND (d_moy#11 = 12))) OR ((d_year#10 = 2000) AND (d_moy#11 = 1))) AND isnotnull(d_date_sk#9)) + +(13) BroadcastExchange +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#12] + +(14) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#4] +Right keys [1]: [d_date_sk#9] +Join condition: None + +(15) Project [codegen id : 4] +Output [6]: [i_brand#2, i_category#3, ss_store_sk#6, ss_sales_price#7, d_year#10, d_moy#11] +Input [8]: [i_brand#2, i_category#3, ss_sold_date_sk#4, ss_store_sk#6, ss_sales_price#7, d_date_sk#9, d_year#10, d_moy#11] + +(16) Scan parquet default.store +Output [3]: [s_store_sk#13, s_store_name#14, s_company_name#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_name), IsNotNull(s_company_name)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [3]: [s_store_sk#13, s_store_name#14, s_company_name#15] + +(18) Filter [codegen id : 3] +Input [3]: [s_store_sk#13, s_store_name#14, s_company_name#15] +Condition : ((isnotnull(s_store_sk#13) AND isnotnull(s_store_name#14)) AND isnotnull(s_company_name#15)) + +(19) BroadcastExchange +Input [3]: [s_store_sk#13, s_store_name#14, s_company_name#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#16] + +(20) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#6] +Right keys [1]: [s_store_sk#13] +Join condition: None + +(21) Project [codegen id : 4] +Output [7]: [i_brand#2, i_category#3, ss_sales_price#7, d_year#10, d_moy#11, s_store_name#14, s_company_name#15] +Input [9]: [i_brand#2, i_category#3, ss_store_sk#6, ss_sales_price#7, d_year#10, d_moy#11, s_store_sk#13, s_store_name#14, s_company_name#15] + +(22) HashAggregate [codegen id : 4] +Input [7]: [i_brand#2, i_category#3, ss_sales_price#7, d_year#10, d_moy#11, s_store_name#14, s_company_name#15] +Keys [6]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#7))] +Aggregate Attributes [1]: [sum#17] +Results [7]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum#18] + +(23) Exchange +Input [7]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum#18] +Arguments: hashpartitioning(i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, 5), true, [id=#19] + +(24) HashAggregate [codegen id : 5] +Input [7]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum#18] +Keys [6]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11] +Functions [1]: [sum(UnscaledValue(ss_sales_price#7))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#7))#20] +Results [8]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, MakeDecimal(sum(UnscaledValue(ss_sales_price#7))#20,17,2) AS sum_sales#21, MakeDecimal(sum(UnscaledValue(ss_sales_price#7))#20,17,2) AS _w0#22] + +(25) Exchange +Input [8]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum_sales#21, _w0#22] +Arguments: hashpartitioning(i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, 5), true, [id=#23] + +(26) Sort [codegen id : 6] +Input [8]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum_sales#21, _w0#22] +Arguments: [i_category#3 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, s_store_name#14 ASC NULLS FIRST, s_company_name#15 ASC NULLS FIRST, d_year#10 ASC NULLS FIRST], false, 0 + +(27) Window +Input [8]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum_sales#21, _w0#22] +Arguments: [avg(_w0#22) windowspecdefinition(i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#24], [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10] + +(28) Project [codegen id : 7] +Output [8]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum_sales#21, avg_monthly_sales#24] +Input [9]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum_sales#21, _w0#22, avg_monthly_sales#24] + +(29) Exchange +Input [8]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum_sales#21, avg_monthly_sales#24] +Arguments: hashpartitioning(i_category#3, i_brand#2, s_store_name#14, s_company_name#15, 5), true, [id=#25] + +(30) Sort [codegen id : 8] +Input [8]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum_sales#21, avg_monthly_sales#24] +Arguments: [i_category#3 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, s_store_name#14 ASC NULLS FIRST, s_company_name#15 ASC NULLS FIRST, d_year#10 ASC NULLS FIRST, d_moy#11 ASC NULLS FIRST], false, 0 + +(31) Window +Input [8]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum_sales#21, avg_monthly_sales#24] +Arguments: [rank(d_year#10, d_moy#11) windowspecdefinition(i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10 ASC NULLS FIRST, d_moy#11 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#26], [i_category#3, i_brand#2, s_store_name#14, s_company_name#15], [d_year#10 ASC NULLS FIRST, d_moy#11 ASC NULLS FIRST] + +(32) Filter [codegen id : 23] +Input [9]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum_sales#21, avg_monthly_sales#24, rn#26] +Condition : (((((isnotnull(d_year#10) AND isnotnull(avg_monthly_sales#24)) AND (d_year#10 = 1999)) AND (avg_monthly_sales#24 > 0.000000)) AND (CASE WHEN (avg_monthly_sales#24 > 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(22,6), true))) / promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(38,16), true) ELSE null END > 0.1000000000000000)) AND isnotnull(rn#26)) + +(33) ReusedExchange [Reuses operator id: 23] +Output [7]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31, d_moy#32, sum#33] + +(34) HashAggregate [codegen id : 13] +Input [7]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31, d_moy#32, sum#33] +Keys [6]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31, d_moy#32] +Functions [1]: [sum(UnscaledValue(ss_sales_price#7))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#7))#34] +Results [7]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31, d_moy#32, MakeDecimal(sum(UnscaledValue(ss_sales_price#7))#34,17,2) AS sum_sales#35] + +(35) Exchange +Input [7]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31, d_moy#32, sum_sales#35] +Arguments: hashpartitioning(i_category#27, i_brand#28, s_store_name#29, s_company_name#30, 5), true, [id=#36] + +(36) Sort [codegen id : 14] +Input [7]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31, d_moy#32, sum_sales#35] +Arguments: [i_category#27 ASC NULLS FIRST, i_brand#28 ASC NULLS FIRST, s_store_name#29 ASC NULLS FIRST, s_company_name#30 ASC NULLS FIRST, d_year#31 ASC NULLS FIRST, d_moy#32 ASC NULLS FIRST], false, 0 + +(37) Window +Input [7]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31, d_moy#32, sum_sales#35] +Arguments: [rank(d_year#31, d_moy#32) windowspecdefinition(i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31 ASC NULLS FIRST, d_moy#32 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#37], [i_category#27, i_brand#28, s_store_name#29, s_company_name#30], [d_year#31 ASC NULLS FIRST, d_moy#32 ASC NULLS FIRST] + +(38) Filter [codegen id : 15] +Input [8]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31, d_moy#32, sum_sales#35, rn#37] +Condition : isnotnull(rn#37) + +(39) Project [codegen id : 15] +Output [6]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, sum_sales#35, rn#37] +Input [8]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31, d_moy#32, sum_sales#35, rn#37] + +(40) BroadcastExchange +Input [6]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, sum_sales#35, rn#37] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, true] + 1)),false), [id=#38] + +(41) BroadcastHashJoin [codegen id : 23] +Left keys [5]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, rn#26] +Right keys [5]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, (rn#37 + 1)] +Join condition: None + +(42) Project [codegen id : 23] +Output [10]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum_sales#21, avg_monthly_sales#24, rn#26, sum_sales#35] +Input [15]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum_sales#21, avg_monthly_sales#24, rn#26, i_category#27, i_brand#28, s_store_name#29, s_company_name#30, sum_sales#35, rn#37] + +(43) ReusedExchange [Reuses operator id: 35] +Output [7]: [i_category#39, i_brand#40, s_store_name#41, s_company_name#42, d_year#43, d_moy#44, sum_sales#45] + +(44) Sort [codegen id : 21] +Input [7]: [i_category#39, i_brand#40, s_store_name#41, s_company_name#42, d_year#43, d_moy#44, sum_sales#45] +Arguments: [i_category#39 ASC NULLS FIRST, i_brand#40 ASC NULLS FIRST, s_store_name#41 ASC NULLS FIRST, s_company_name#42 ASC NULLS FIRST, d_year#43 ASC NULLS FIRST, d_moy#44 ASC NULLS FIRST], false, 0 + +(45) Window +Input [7]: [i_category#39, i_brand#40, s_store_name#41, s_company_name#42, d_year#43, d_moy#44, sum_sales#45] +Arguments: [rank(d_year#43, d_moy#44) windowspecdefinition(i_category#39, i_brand#40, s_store_name#41, s_company_name#42, d_year#43 ASC NULLS FIRST, d_moy#44 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#46], [i_category#39, i_brand#40, s_store_name#41, s_company_name#42], [d_year#43 ASC NULLS FIRST, d_moy#44 ASC NULLS FIRST] + +(46) Filter [codegen id : 22] +Input [8]: [i_category#39, i_brand#40, s_store_name#41, s_company_name#42, d_year#43, d_moy#44, sum_sales#45, rn#46] +Condition : isnotnull(rn#46) + +(47) Project [codegen id : 22] +Output [6]: [i_category#39, i_brand#40, s_store_name#41, s_company_name#42, sum_sales#45, rn#46] +Input [8]: [i_category#39, i_brand#40, s_store_name#41, s_company_name#42, d_year#43, d_moy#44, sum_sales#45, rn#46] + +(48) BroadcastExchange +Input [6]: [i_category#39, i_brand#40, s_store_name#41, s_company_name#42, sum_sales#45, rn#46] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, true] - 1)),false), [id=#47] + +(49) BroadcastHashJoin [codegen id : 23] +Left keys [5]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, rn#26] +Right keys [5]: [i_category#39, i_brand#40, s_store_name#41, s_company_name#42, (rn#46 - 1)] +Join condition: None + +(50) Project [codegen id : 23] +Output [10]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, avg_monthly_sales#24, sum_sales#21, sum_sales#35 AS psum#48, sum_sales#45 AS nsum#49] +Input [16]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum_sales#21, avg_monthly_sales#24, rn#26, sum_sales#35, i_category#39, i_brand#40, s_store_name#41, s_company_name#42, sum_sales#45, rn#46] + +(51) TakeOrderedAndProject +Input [10]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, avg_monthly_sales#24, sum_sales#21, psum#48, nsum#49] +Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(22,6), true) ASC NULLS FIRST, s_store_name#14 ASC NULLS FIRST], [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, avg_monthly_sales#24, sum_sales#21, psum#48, nsum#49] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/simplified.txt new file mode 100644 index 0000000000000..c2cb621d55fcd --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/simplified.txt @@ -0,0 +1,84 @@ +TakeOrderedAndProject [avg_monthly_sales,d_moy,d_year,i_brand,i_category,nsum,psum,s_company_name,s_store_name,sum_sales] + WholeStageCodegen (23) + Project [avg_monthly_sales,d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum_sales,sum_sales,sum_sales] + BroadcastHashJoin [i_brand,i_brand,i_category,i_category,rn,rn,s_company_name,s_company_name,s_store_name,s_store_name] + Project [avg_monthly_sales,d_moy,d_year,i_brand,i_category,rn,s_company_name,s_store_name,sum_sales,sum_sales] + BroadcastHashJoin [i_brand,i_brand,i_category,i_category,rn,rn,s_company_name,s_company_name,s_store_name,s_store_name] + Filter [avg_monthly_sales,d_year,rn,sum_sales] + InputAdapter + Window [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] + WholeStageCodegen (8) + Sort [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] + InputAdapter + Exchange [i_brand,i_category,s_company_name,s_store_name] #1 + WholeStageCodegen (7) + Project [avg_monthly_sales,d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum_sales] + InputAdapter + Window [_w0,d_year,i_brand,i_category,s_company_name,s_store_name] + WholeStageCodegen (6) + Sort [d_year,i_brand,i_category,s_company_name,s_store_name] + InputAdapter + Exchange [d_year,i_brand,i_category,s_company_name,s_store_name] #2 + WholeStageCodegen (5) + HashAggregate [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum] [_w0,sum,sum(UnscaledValue(ss_sales_price)),sum_sales] + InputAdapter + Exchange [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] #3 + WholeStageCodegen (4) + HashAggregate [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,ss_sales_price] [sum,sum] + Project [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,ss_sales_price] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [d_moy,d_year,i_brand,i_category,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_brand,i_category,ss_sales_price,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Filter [i_brand,i_category,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand,i_category,i_item_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (3) + Filter [s_company_name,s_store_name,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_company_name,s_store_name,s_store_sk] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (15) + Project [i_brand,i_category,rn,s_company_name,s_store_name,sum_sales] + Filter [rn] + InputAdapter + Window [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] + WholeStageCodegen (14) + Sort [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] + InputAdapter + Exchange [i_brand,i_category,s_company_name,s_store_name] #8 + WholeStageCodegen (13) + HashAggregate [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum] [sum,sum(UnscaledValue(ss_sales_price)),sum_sales] + InputAdapter + ReusedExchange [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum] #3 + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (22) + Project [i_brand,i_category,rn,s_company_name,s_store_name,sum_sales] + Filter [rn] + InputAdapter + Window [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] + WholeStageCodegen (21) + Sort [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] + InputAdapter + ReusedExchange [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum_sales] #8 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48.sf100/explain.txt new file mode 100644 index 0000000000000..8f547d5b3c846 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48.sf100/explain.txt @@ -0,0 +1,183 @@ +== Physical Plan == +* HashAggregate (32) ++- Exchange (31) + +- * HashAggregate (30) + +- * Project (29) + +- * BroadcastHashJoin Inner BuildRight (28) + :- * Project (22) + : +- * BroadcastHashJoin Inner BuildRight (21) + : :- * Project (15) + : : +- * BroadcastHashJoin Inner BuildRight (14) + : : :- * Project (9) + : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.store (4) + : : +- BroadcastExchange (13) + : : +- * Filter (12) + : : +- * ColumnarToRow (11) + : : +- Scan parquet default.customer_demographics (10) + : +- BroadcastExchange (20) + : +- * Project (19) + : +- * Filter (18) + : +- * ColumnarToRow (17) + : +- Scan parquet default.customer_address (16) + +- BroadcastExchange (27) + +- * Project (26) + +- * Filter (25) + +- * ColumnarToRow (24) + +- Scan parquet default.date_dim (23) + + +(1) Scan parquet default.store_sales +Output [7]: [ss_sold_date_sk#1, ss_cdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_sales_price#6, ss_net_profit#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_cdemo_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_sold_date_sk), Or(Or(And(GreaterThanOrEqual(ss_sales_price,100.00),LessThanOrEqual(ss_sales_price,150.00)),And(GreaterThanOrEqual(ss_sales_price,50.00),LessThanOrEqual(ss_sales_price,100.00))),And(GreaterThanOrEqual(ss_sales_price,150.00),LessThanOrEqual(ss_sales_price,200.00))), Or(Or(And(GreaterThanOrEqual(ss_net_profit,0.00),LessThanOrEqual(ss_net_profit,2000.00)),And(GreaterThanOrEqual(ss_net_profit,150.00),LessThanOrEqual(ss_net_profit,3000.00))),And(GreaterThanOrEqual(ss_net_profit,50.00),LessThanOrEqual(ss_net_profit,25000.00)))] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [7]: [ss_sold_date_sk#1, ss_cdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_sales_price#6, ss_net_profit#7] + +(3) Filter [codegen id : 5] +Input [7]: [ss_sold_date_sk#1, ss_cdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_sales_price#6, ss_net_profit#7] +Condition : (((((isnotnull(ss_store_sk#4) AND isnotnull(ss_cdemo_sk#2)) AND isnotnull(ss_addr_sk#3)) AND isnotnull(ss_sold_date_sk#1)) AND ((((ss_sales_price#6 >= 100.00) AND (ss_sales_price#6 <= 150.00)) OR ((ss_sales_price#6 >= 50.00) AND (ss_sales_price#6 <= 100.00))) OR ((ss_sales_price#6 >= 150.00) AND (ss_sales_price#6 <= 200.00)))) AND ((((ss_net_profit#7 >= 0.00) AND (ss_net_profit#7 <= 2000.00)) OR ((ss_net_profit#7 >= 150.00) AND (ss_net_profit#7 <= 3000.00))) OR ((ss_net_profit#7 >= 50.00) AND (ss_net_profit#7 <= 25000.00)))) + +(4) Scan parquet default.store +Output [1]: [s_store_sk#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [1]: [s_store_sk#8] + +(6) Filter [codegen id : 1] +Input [1]: [s_store_sk#8] +Condition : isnotnull(s_store_sk#8) + +(7) BroadcastExchange +Input [1]: [s_store_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#9] + +(8) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#8] +Join condition: None + +(9) Project [codegen id : 5] +Output [6]: [ss_sold_date_sk#1, ss_cdemo_sk#2, ss_addr_sk#3, ss_quantity#5, ss_sales_price#6, ss_net_profit#7] +Input [8]: [ss_sold_date_sk#1, ss_cdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_sales_price#6, ss_net_profit#7, s_store_sk#8] + +(10) Scan parquet default.customer_demographics +Output [3]: [cd_demo_sk#10, cd_marital_status#11, cd_education_status#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk), Or(Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,4 yr Degree)),And(EqualTo(cd_marital_status,D),EqualTo(cd_education_status,2 yr Degree))),And(EqualTo(cd_marital_status,S),EqualTo(cd_education_status,College)))] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [cd_demo_sk#10, cd_marital_status#11, cd_education_status#12] + +(12) Filter [codegen id : 2] +Input [3]: [cd_demo_sk#10, cd_marital_status#11, cd_education_status#12] +Condition : (isnotnull(cd_demo_sk#10) AND ((((cd_marital_status#11 = M) AND (cd_education_status#12 = 4 yr Degree)) OR ((cd_marital_status#11 = D) AND (cd_education_status#12 = 2 yr Degree))) OR ((cd_marital_status#11 = S) AND (cd_education_status#12 = College)))) + +(13) BroadcastExchange +Input [3]: [cd_demo_sk#10, cd_marital_status#11, cd_education_status#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#13] + +(14) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#10] +Join condition: ((((((cd_marital_status#11 = M) AND (cd_education_status#12 = 4 yr Degree)) AND (ss_sales_price#6 >= 100.00)) AND (ss_sales_price#6 <= 150.00)) OR ((((cd_marital_status#11 = D) AND (cd_education_status#12 = 2 yr Degree)) AND (ss_sales_price#6 >= 50.00)) AND (ss_sales_price#6 <= 100.00))) OR ((((cd_marital_status#11 = S) AND (cd_education_status#12 = College)) AND (ss_sales_price#6 >= 150.00)) AND (ss_sales_price#6 <= 200.00))) + +(15) Project [codegen id : 5] +Output [4]: [ss_sold_date_sk#1, ss_addr_sk#3, ss_quantity#5, ss_net_profit#7] +Input [9]: [ss_sold_date_sk#1, ss_cdemo_sk#2, ss_addr_sk#3, ss_quantity#5, ss_sales_price#6, ss_net_profit#7, cd_demo_sk#10, cd_marital_status#11, cd_education_status#12] + +(16) Scan parquet default.customer_address +Output [3]: [ca_address_sk#14, ca_state#15, ca_country#16] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer_address] +PushedFilters: [IsNotNull(ca_country), EqualTo(ca_country,United States), IsNotNull(ca_address_sk), Or(Or(In(ca_state, [CO,OH,TX]),In(ca_state, [OR,MN,KY])),In(ca_state, [VA,CA,MS]))] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [3]: [ca_address_sk#14, ca_state#15, ca_country#16] + +(18) Filter [codegen id : 3] +Input [3]: [ca_address_sk#14, ca_state#15, ca_country#16] +Condition : (((isnotnull(ca_country#16) AND (ca_country#16 = United States)) AND isnotnull(ca_address_sk#14)) AND ((ca_state#15 IN (CO,OH,TX) OR ca_state#15 IN (OR,MN,KY)) OR ca_state#15 IN (VA,CA,MS))) + +(19) Project [codegen id : 3] +Output [2]: [ca_address_sk#14, ca_state#15] +Input [3]: [ca_address_sk#14, ca_state#15, ca_country#16] + +(20) BroadcastExchange +Input [2]: [ca_address_sk#14, ca_state#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#17] + +(21) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_addr_sk#3] +Right keys [1]: [ca_address_sk#14] +Join condition: ((((ca_state#15 IN (CO,OH,TX) AND (ss_net_profit#7 >= 0.00)) AND (ss_net_profit#7 <= 2000.00)) OR ((ca_state#15 IN (OR,MN,KY) AND (ss_net_profit#7 >= 150.00)) AND (ss_net_profit#7 <= 3000.00))) OR ((ca_state#15 IN (VA,CA,MS) AND (ss_net_profit#7 >= 50.00)) AND (ss_net_profit#7 <= 25000.00))) + +(22) Project [codegen id : 5] +Output [2]: [ss_sold_date_sk#1, ss_quantity#5] +Input [6]: [ss_sold_date_sk#1, ss_addr_sk#3, ss_quantity#5, ss_net_profit#7, ca_address_sk#14, ca_state#15] + +(23) Scan parquet default.date_dim +Output [2]: [d_date_sk#18, d_year#19] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 4] +Input [2]: [d_date_sk#18, d_year#19] + +(25) Filter [codegen id : 4] +Input [2]: [d_date_sk#18, d_year#19] +Condition : ((isnotnull(d_year#19) AND (d_year#19 = 2001)) AND isnotnull(d_date_sk#18)) + +(26) Project [codegen id : 4] +Output [1]: [d_date_sk#18] +Input [2]: [d_date_sk#18, d_year#19] + +(27) BroadcastExchange +Input [1]: [d_date_sk#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] + +(28) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#18] +Join condition: None + +(29) Project [codegen id : 5] +Output [1]: [ss_quantity#5] +Input [3]: [ss_sold_date_sk#1, ss_quantity#5, d_date_sk#18] + +(30) HashAggregate [codegen id : 5] +Input [1]: [ss_quantity#5] +Keys: [] +Functions [1]: [partial_sum(cast(ss_quantity#5 as bigint))] +Aggregate Attributes [1]: [sum#21] +Results [1]: [sum#22] + +(31) Exchange +Input [1]: [sum#22] +Arguments: SinglePartition, true, [id=#23] + +(32) HashAggregate [codegen id : 6] +Input [1]: [sum#22] +Keys: [] +Functions [1]: [sum(cast(ss_quantity#5 as bigint))] +Aggregate Attributes [1]: [sum(cast(ss_quantity#5 as bigint))#24] +Results [1]: [sum(cast(ss_quantity#5 as bigint))#24 AS sum(ss_quantity)#25] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48.sf100/simplified.txt new file mode 100644 index 0000000000000..0a68c861c2844 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48.sf100/simplified.txt @@ -0,0 +1,48 @@ +WholeStageCodegen (6) + HashAggregate [sum] [sum,sum(cast(ss_quantity as bigint)),sum(ss_quantity)] + InputAdapter + Exchange #1 + WholeStageCodegen (5) + HashAggregate [ss_quantity] [sum,sum] + Project [ss_quantity] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_quantity,ss_sold_date_sk] + BroadcastHashJoin [ca_address_sk,ca_state,ss_addr_sk,ss_net_profit] + Project [ss_addr_sk,ss_net_profit,ss_quantity,ss_sold_date_sk] + BroadcastHashJoin [cd_demo_sk,cd_education_status,cd_marital_status,ss_cdemo_sk,ss_sales_price] + Project [ss_addr_sk,ss_cdemo_sk,ss_net_profit,ss_quantity,ss_sales_price,ss_sold_date_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Filter [ss_addr_sk,ss_cdemo_sk,ss_net_profit,ss_sales_price,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_addr_sk,ss_cdemo_sk,ss_net_profit,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [cd_demo_sk,cd_education_status,cd_marital_status] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_education_status,cd_marital_status] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [ca_address_sk,ca_state] + Filter [ca_address_sk,ca_country,ca_state] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_country,ca_state] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Project [d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48/explain.txt new file mode 100644 index 0000000000000..79fadac22c93c --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48/explain.txt @@ -0,0 +1,183 @@ +== Physical Plan == +* HashAggregate (32) ++- Exchange (31) + +- * HashAggregate (30) + +- * Project (29) + +- * BroadcastHashJoin Inner BuildRight (28) + :- * Project (22) + : +- * BroadcastHashJoin Inner BuildRight (21) + : :- * Project (15) + : : +- * BroadcastHashJoin Inner BuildRight (14) + : : :- * Project (9) + : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.store (4) + : : +- BroadcastExchange (13) + : : +- * Filter (12) + : : +- * ColumnarToRow (11) + : : +- Scan parquet default.customer_demographics (10) + : +- BroadcastExchange (20) + : +- * Project (19) + : +- * Filter (18) + : +- * ColumnarToRow (17) + : +- Scan parquet default.customer_address (16) + +- BroadcastExchange (27) + +- * Project (26) + +- * Filter (25) + +- * ColumnarToRow (24) + +- Scan parquet default.date_dim (23) + + +(1) Scan parquet default.store_sales +Output [7]: [ss_sold_date_sk#1, ss_cdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_sales_price#6, ss_net_profit#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_cdemo_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_sold_date_sk), Or(Or(And(GreaterThanOrEqual(ss_sales_price,100.00),LessThanOrEqual(ss_sales_price,150.00)),And(GreaterThanOrEqual(ss_sales_price,50.00),LessThanOrEqual(ss_sales_price,100.00))),And(GreaterThanOrEqual(ss_sales_price,150.00),LessThanOrEqual(ss_sales_price,200.00))), Or(Or(And(GreaterThanOrEqual(ss_net_profit,0.00),LessThanOrEqual(ss_net_profit,2000.00)),And(GreaterThanOrEqual(ss_net_profit,150.00),LessThanOrEqual(ss_net_profit,3000.00))),And(GreaterThanOrEqual(ss_net_profit,50.00),LessThanOrEqual(ss_net_profit,25000.00)))] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [7]: [ss_sold_date_sk#1, ss_cdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_sales_price#6, ss_net_profit#7] + +(3) Filter [codegen id : 5] +Input [7]: [ss_sold_date_sk#1, ss_cdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_sales_price#6, ss_net_profit#7] +Condition : (((((isnotnull(ss_store_sk#4) AND isnotnull(ss_cdemo_sk#2)) AND isnotnull(ss_addr_sk#3)) AND isnotnull(ss_sold_date_sk#1)) AND ((((ss_sales_price#6 >= 100.00) AND (ss_sales_price#6 <= 150.00)) OR ((ss_sales_price#6 >= 50.00) AND (ss_sales_price#6 <= 100.00))) OR ((ss_sales_price#6 >= 150.00) AND (ss_sales_price#6 <= 200.00)))) AND ((((ss_net_profit#7 >= 0.00) AND (ss_net_profit#7 <= 2000.00)) OR ((ss_net_profit#7 >= 150.00) AND (ss_net_profit#7 <= 3000.00))) OR ((ss_net_profit#7 >= 50.00) AND (ss_net_profit#7 <= 25000.00)))) + +(4) Scan parquet default.store +Output [1]: [s_store_sk#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [1]: [s_store_sk#8] + +(6) Filter [codegen id : 1] +Input [1]: [s_store_sk#8] +Condition : isnotnull(s_store_sk#8) + +(7) BroadcastExchange +Input [1]: [s_store_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#9] + +(8) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#8] +Join condition: None + +(9) Project [codegen id : 5] +Output [6]: [ss_sold_date_sk#1, ss_cdemo_sk#2, ss_addr_sk#3, ss_quantity#5, ss_sales_price#6, ss_net_profit#7] +Input [8]: [ss_sold_date_sk#1, ss_cdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_sales_price#6, ss_net_profit#7, s_store_sk#8] + +(10) Scan parquet default.customer_demographics +Output [3]: [cd_demo_sk#10, cd_marital_status#11, cd_education_status#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk), Or(Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,4 yr Degree)),And(EqualTo(cd_marital_status,D),EqualTo(cd_education_status,2 yr Degree))),And(EqualTo(cd_marital_status,S),EqualTo(cd_education_status,College)))] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [cd_demo_sk#10, cd_marital_status#11, cd_education_status#12] + +(12) Filter [codegen id : 2] +Input [3]: [cd_demo_sk#10, cd_marital_status#11, cd_education_status#12] +Condition : (isnotnull(cd_demo_sk#10) AND ((((cd_marital_status#11 = M) AND (cd_education_status#12 = 4 yr Degree)) OR ((cd_marital_status#11 = D) AND (cd_education_status#12 = 2 yr Degree))) OR ((cd_marital_status#11 = S) AND (cd_education_status#12 = College)))) + +(13) BroadcastExchange +Input [3]: [cd_demo_sk#10, cd_marital_status#11, cd_education_status#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#13] + +(14) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#10] +Join condition: ((((((cd_marital_status#11 = M) AND (cd_education_status#12 = 4 yr Degree)) AND (ss_sales_price#6 >= 100.00)) AND (ss_sales_price#6 <= 150.00)) OR ((((cd_marital_status#11 = D) AND (cd_education_status#12 = 2 yr Degree)) AND (ss_sales_price#6 >= 50.00)) AND (ss_sales_price#6 <= 100.00))) OR ((((cd_marital_status#11 = S) AND (cd_education_status#12 = College)) AND (ss_sales_price#6 >= 150.00)) AND (ss_sales_price#6 <= 200.00))) + +(15) Project [codegen id : 5] +Output [4]: [ss_sold_date_sk#1, ss_addr_sk#3, ss_quantity#5, ss_net_profit#7] +Input [9]: [ss_sold_date_sk#1, ss_cdemo_sk#2, ss_addr_sk#3, ss_quantity#5, ss_sales_price#6, ss_net_profit#7, cd_demo_sk#10, cd_marital_status#11, cd_education_status#12] + +(16) Scan parquet default.customer_address +Output [3]: [ca_address_sk#14, ca_state#15, ca_country#16] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer_address] +PushedFilters: [IsNotNull(ca_country), EqualTo(ca_country,United States), IsNotNull(ca_address_sk), Or(Or(In(ca_state, [CO,OH,TX]),In(ca_state, [OR,MN,KY])),In(ca_state, [VA,CA,MS]))] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [3]: [ca_address_sk#14, ca_state#15, ca_country#16] + +(18) Filter [codegen id : 3] +Input [3]: [ca_address_sk#14, ca_state#15, ca_country#16] +Condition : (((isnotnull(ca_country#16) AND (ca_country#16 = United States)) AND isnotnull(ca_address_sk#14)) AND ((ca_state#15 IN (CO,OH,TX) OR ca_state#15 IN (OR,MN,KY)) OR ca_state#15 IN (VA,CA,MS))) + +(19) Project [codegen id : 3] +Output [2]: [ca_address_sk#14, ca_state#15] +Input [3]: [ca_address_sk#14, ca_state#15, ca_country#16] + +(20) BroadcastExchange +Input [2]: [ca_address_sk#14, ca_state#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#17] + +(21) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_addr_sk#3] +Right keys [1]: [ca_address_sk#14] +Join condition: ((((ca_state#15 IN (CO,OH,TX) AND (ss_net_profit#7 >= 0.00)) AND (ss_net_profit#7 <= 2000.00)) OR ((ca_state#15 IN (OR,MN,KY) AND (ss_net_profit#7 >= 150.00)) AND (ss_net_profit#7 <= 3000.00))) OR ((ca_state#15 IN (VA,CA,MS) AND (ss_net_profit#7 >= 50.00)) AND (ss_net_profit#7 <= 25000.00))) + +(22) Project [codegen id : 5] +Output [2]: [ss_sold_date_sk#1, ss_quantity#5] +Input [6]: [ss_sold_date_sk#1, ss_addr_sk#3, ss_quantity#5, ss_net_profit#7, ca_address_sk#14, ca_state#15] + +(23) Scan parquet default.date_dim +Output [2]: [d_date_sk#18, d_year#19] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 4] +Input [2]: [d_date_sk#18, d_year#19] + +(25) Filter [codegen id : 4] +Input [2]: [d_date_sk#18, d_year#19] +Condition : ((isnotnull(d_year#19) AND (d_year#19 = 2001)) AND isnotnull(d_date_sk#18)) + +(26) Project [codegen id : 4] +Output [1]: [d_date_sk#18] +Input [2]: [d_date_sk#18, d_year#19] + +(27) BroadcastExchange +Input [1]: [d_date_sk#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] + +(28) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#18] +Join condition: None + +(29) Project [codegen id : 5] +Output [1]: [ss_quantity#5] +Input [3]: [ss_sold_date_sk#1, ss_quantity#5, d_date_sk#18] + +(30) HashAggregate [codegen id : 5] +Input [1]: [ss_quantity#5] +Keys: [] +Functions [1]: [partial_sum(cast(ss_quantity#5 as bigint))] +Aggregate Attributes [1]: [sum#21] +Results [1]: [sum#22] + +(31) Exchange +Input [1]: [sum#22] +Arguments: SinglePartition, true, [id=#23] + +(32) HashAggregate [codegen id : 6] +Input [1]: [sum#22] +Keys: [] +Functions [1]: [sum(cast(ss_quantity#5 as bigint))] +Aggregate Attributes [1]: [sum(cast(ss_quantity#5 as bigint))#24] +Results [1]: [sum(cast(ss_quantity#5 as bigint))#24 AS sum(ss_quantity)#25] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48/simplified.txt new file mode 100644 index 0000000000000..0a68c861c2844 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48/simplified.txt @@ -0,0 +1,48 @@ +WholeStageCodegen (6) + HashAggregate [sum] [sum,sum(cast(ss_quantity as bigint)),sum(ss_quantity)] + InputAdapter + Exchange #1 + WholeStageCodegen (5) + HashAggregate [ss_quantity] [sum,sum] + Project [ss_quantity] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_quantity,ss_sold_date_sk] + BroadcastHashJoin [ca_address_sk,ca_state,ss_addr_sk,ss_net_profit] + Project [ss_addr_sk,ss_net_profit,ss_quantity,ss_sold_date_sk] + BroadcastHashJoin [cd_demo_sk,cd_education_status,cd_marital_status,ss_cdemo_sk,ss_sales_price] + Project [ss_addr_sk,ss_cdemo_sk,ss_net_profit,ss_quantity,ss_sales_price,ss_sold_date_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Filter [ss_addr_sk,ss_cdemo_sk,ss_net_profit,ss_sales_price,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_addr_sk,ss_cdemo_sk,ss_net_profit,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [cd_demo_sk,cd_education_status,cd_marital_status] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_education_status,cd_marital_status] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [ca_address_sk,ca_state] + Filter [ca_address_sk,ca_country,ca_state] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_country,ca_state] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Project [d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49.sf100/explain.txt new file mode 100644 index 0000000000000..ea0845178bd87 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49.sf100/explain.txt @@ -0,0 +1,478 @@ +== Physical Plan == +TakeOrderedAndProject (87) ++- * HashAggregate (86) + +- Exchange (85) + +- * HashAggregate (84) + +- Union (83) + :- * Project (30) + : +- * Filter (29) + : +- Window (28) + : +- * Sort (27) + : +- Window (26) + : +- * Sort (25) + : +- Exchange (24) + : +- * HashAggregate (23) + : +- Exchange (22) + : +- * HashAggregate (21) + : +- * Project (20) + : +- * SortMergeJoin Inner (19) + : :- * Sort (13) + : : +- Exchange (12) + : : +- * Project (11) + : : +- * BroadcastHashJoin Inner BuildRight (10) + : : :- * Project (4) + : : : +- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.web_sales (1) + : : +- BroadcastExchange (9) + : : +- * Project (8) + : : +- * Filter (7) + : : +- * ColumnarToRow (6) + : : +- Scan parquet default.date_dim (5) + : +- * Sort (18) + : +- Exchange (17) + : +- * Filter (16) + : +- * ColumnarToRow (15) + : +- Scan parquet default.web_returns (14) + :- * Project (56) + : +- * Filter (55) + : +- Window (54) + : +- * Sort (53) + : +- Window (52) + : +- * Sort (51) + : +- Exchange (50) + : +- * HashAggregate (49) + : +- Exchange (48) + : +- * HashAggregate (47) + : +- * Project (46) + : +- * SortMergeJoin Inner (45) + : :- * Sort (39) + : : +- Exchange (38) + : : +- * Project (37) + : : +- * BroadcastHashJoin Inner BuildRight (36) + : : :- * Project (34) + : : : +- * Filter (33) + : : : +- * ColumnarToRow (32) + : : : +- Scan parquet default.catalog_sales (31) + : : +- ReusedExchange (35) + : +- * Sort (44) + : +- Exchange (43) + : +- * Filter (42) + : +- * ColumnarToRow (41) + : +- Scan parquet default.catalog_returns (40) + +- * Project (82) + +- * Filter (81) + +- Window (80) + +- * Sort (79) + +- Window (78) + +- * Sort (77) + +- Exchange (76) + +- * HashAggregate (75) + +- Exchange (74) + +- * HashAggregate (73) + +- * Project (72) + +- * SortMergeJoin Inner (71) + :- * Sort (65) + : +- Exchange (64) + : +- * Project (63) + : +- * BroadcastHashJoin Inner BuildRight (62) + : :- * Project (60) + : : +- * Filter (59) + : : +- * ColumnarToRow (58) + : : +- Scan parquet default.store_sales (57) + : +- ReusedExchange (61) + +- * Sort (70) + +- Exchange (69) + +- * Filter (68) + +- * ColumnarToRow (67) + +- Scan parquet default.store_returns (66) + + +(1) Scan parquet default.web_sales +Output [6]: [ws_sold_date_sk#1, ws_item_sk#2, ws_order_number#3, ws_quantity#4, ws_net_paid#5, ws_net_profit#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_net_profit), IsNotNull(ws_net_paid), IsNotNull(ws_quantity), GreaterThan(ws_net_profit,1.00), GreaterThan(ws_net_paid,0.00), GreaterThan(ws_quantity,0), IsNotNull(ws_order_number), IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 2] +Input [6]: [ws_sold_date_sk#1, ws_item_sk#2, ws_order_number#3, ws_quantity#4, ws_net_paid#5, ws_net_profit#6] + +(3) Filter [codegen id : 2] +Input [6]: [ws_sold_date_sk#1, ws_item_sk#2, ws_order_number#3, ws_quantity#4, ws_net_paid#5, ws_net_profit#6] +Condition : ((((((((isnotnull(ws_net_profit#6) AND isnotnull(ws_net_paid#5)) AND isnotnull(ws_quantity#4)) AND (ws_net_profit#6 > 1.00)) AND (ws_net_paid#5 > 0.00)) AND (ws_quantity#4 > 0)) AND isnotnull(ws_order_number#3)) AND isnotnull(ws_item_sk#2)) AND isnotnull(ws_sold_date_sk#1)) + +(4) Project [codegen id : 2] +Output [5]: [ws_sold_date_sk#1, ws_item_sk#2, ws_order_number#3, ws_quantity#4, ws_net_paid#5] +Input [6]: [ws_sold_date_sk#1, ws_item_sk#2, ws_order_number#3, ws_quantity#4, ws_net_paid#5, ws_net_profit#6] + +(5) Scan parquet default.date_dim +Output [3]: [d_date_sk#7, d_year#8, d_moy#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,12), IsNotNull(d_date_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#7, d_year#8, d_moy#9] + +(7) Filter [codegen id : 1] +Input [3]: [d_date_sk#7, d_year#8, d_moy#9] +Condition : ((((isnotnull(d_year#8) AND isnotnull(d_moy#9)) AND (d_year#8 = 2001)) AND (d_moy#9 = 12)) AND isnotnull(d_date_sk#7)) + +(8) Project [codegen id : 1] +Output [1]: [d_date_sk#7] +Input [3]: [d_date_sk#7, d_year#8, d_moy#9] + +(9) BroadcastExchange +Input [1]: [d_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#10] + +(10) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ws_sold_date_sk#1] +Right keys [1]: [d_date_sk#7] +Join condition: None + +(11) Project [codegen id : 2] +Output [4]: [ws_item_sk#2, ws_order_number#3, ws_quantity#4, ws_net_paid#5] +Input [6]: [ws_sold_date_sk#1, ws_item_sk#2, ws_order_number#3, ws_quantity#4, ws_net_paid#5, d_date_sk#7] + +(12) Exchange +Input [4]: [ws_item_sk#2, ws_order_number#3, ws_quantity#4, ws_net_paid#5] +Arguments: hashpartitioning(cast(ws_order_number#3 as bigint), cast(ws_item_sk#2 as bigint), 5), true, [id=#11] + +(13) Sort [codegen id : 3] +Input [4]: [ws_item_sk#2, ws_order_number#3, ws_quantity#4, ws_net_paid#5] +Arguments: [cast(ws_order_number#3 as bigint) ASC NULLS FIRST, cast(ws_item_sk#2 as bigint) ASC NULLS FIRST], false, 0 + +(14) Scan parquet default.web_returns +Output [4]: [wr_item_sk#12, wr_order_number#13, wr_return_quantity#14, wr_return_amt#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_returns] +PushedFilters: [IsNotNull(wr_return_amt), GreaterThan(wr_return_amt,10000.00), IsNotNull(wr_order_number), IsNotNull(wr_item_sk)] +ReadSchema: struct + +(15) ColumnarToRow [codegen id : 4] +Input [4]: [wr_item_sk#12, wr_order_number#13, wr_return_quantity#14, wr_return_amt#15] + +(16) Filter [codegen id : 4] +Input [4]: [wr_item_sk#12, wr_order_number#13, wr_return_quantity#14, wr_return_amt#15] +Condition : (((isnotnull(wr_return_amt#15) AND (wr_return_amt#15 > 10000.00)) AND isnotnull(wr_order_number#13)) AND isnotnull(wr_item_sk#12)) + +(17) Exchange +Input [4]: [wr_item_sk#12, wr_order_number#13, wr_return_quantity#14, wr_return_amt#15] +Arguments: hashpartitioning(wr_order_number#13, wr_item_sk#12, 5), true, [id=#16] + +(18) Sort [codegen id : 5] +Input [4]: [wr_item_sk#12, wr_order_number#13, wr_return_quantity#14, wr_return_amt#15] +Arguments: [wr_order_number#13 ASC NULLS FIRST, wr_item_sk#12 ASC NULLS FIRST], false, 0 + +(19) SortMergeJoin [codegen id : 6] +Left keys [2]: [cast(ws_order_number#3 as bigint), cast(ws_item_sk#2 as bigint)] +Right keys [2]: [wr_order_number#13, wr_item_sk#12] +Join condition: None + +(20) Project [codegen id : 6] +Output [5]: [ws_item_sk#2, ws_quantity#4, ws_net_paid#5, wr_return_quantity#14, wr_return_amt#15] +Input [8]: [ws_item_sk#2, ws_order_number#3, ws_quantity#4, ws_net_paid#5, wr_item_sk#12, wr_order_number#13, wr_return_quantity#14, wr_return_amt#15] + +(21) HashAggregate [codegen id : 6] +Input [5]: [ws_item_sk#2, ws_quantity#4, ws_net_paid#5, wr_return_quantity#14, wr_return_amt#15] +Keys [1]: [ws_item_sk#2] +Functions [4]: [partial_sum(cast(coalesce(wr_return_quantity#14, 0) as bigint)), partial_sum(cast(coalesce(ws_quantity#4, 0) as bigint)), partial_sum(coalesce(cast(wr_return_amt#15 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ws_net_paid#5 as decimal(12,2)), 0.00))] +Aggregate Attributes [6]: [sum#17, sum#18, sum#19, isEmpty#20, sum#21, isEmpty#22] +Results [7]: [ws_item_sk#2, sum#23, sum#24, sum#25, isEmpty#26, sum#27, isEmpty#28] + +(22) Exchange +Input [7]: [ws_item_sk#2, sum#23, sum#24, sum#25, isEmpty#26, sum#27, isEmpty#28] +Arguments: hashpartitioning(ws_item_sk#2, 5), true, [id=#29] + +(23) HashAggregate [codegen id : 7] +Input [7]: [ws_item_sk#2, sum#23, sum#24, sum#25, isEmpty#26, sum#27, isEmpty#28] +Keys [1]: [ws_item_sk#2] +Functions [4]: [sum(cast(coalesce(wr_return_quantity#14, 0) as bigint)), sum(cast(coalesce(ws_quantity#4, 0) as bigint)), sum(coalesce(cast(wr_return_amt#15 as decimal(12,2)), 0.00)), sum(coalesce(cast(ws_net_paid#5 as decimal(12,2)), 0.00))] +Aggregate Attributes [4]: [sum(cast(coalesce(wr_return_quantity#14, 0) as bigint))#30, sum(cast(coalesce(ws_quantity#4, 0) as bigint))#31, sum(coalesce(cast(wr_return_amt#15 as decimal(12,2)), 0.00))#32, sum(coalesce(cast(ws_net_paid#5 as decimal(12,2)), 0.00))#33] +Results [3]: [ws_item_sk#2 AS item#34, CheckOverflow((promote_precision(cast(sum(cast(coalesce(wr_return_quantity#14, 0) as bigint))#30 as decimal(15,4))) / promote_precision(cast(sum(cast(coalesce(ws_quantity#4, 0) as bigint))#31 as decimal(15,4)))), DecimalType(35,20), true) AS return_ratio#35, CheckOverflow((promote_precision(cast(sum(coalesce(cast(wr_return_amt#15 as decimal(12,2)), 0.00))#32 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(ws_net_paid#5 as decimal(12,2)), 0.00))#33 as decimal(15,4)))), DecimalType(35,20), true) AS currency_ratio#36] + +(24) Exchange +Input [3]: [item#34, return_ratio#35, currency_ratio#36] +Arguments: SinglePartition, true, [id=#37] + +(25) Sort [codegen id : 8] +Input [3]: [item#34, return_ratio#35, currency_ratio#36] +Arguments: [return_ratio#35 ASC NULLS FIRST], false, 0 + +(26) Window +Input [3]: [item#34, return_ratio#35, currency_ratio#36] +Arguments: [rank(return_ratio#35) windowspecdefinition(return_ratio#35 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#38], [return_ratio#35 ASC NULLS FIRST] + +(27) Sort [codegen id : 9] +Input [4]: [item#34, return_ratio#35, currency_ratio#36, return_rank#38] +Arguments: [currency_ratio#36 ASC NULLS FIRST], false, 0 + +(28) Window +Input [4]: [item#34, return_ratio#35, currency_ratio#36, return_rank#38] +Arguments: [rank(currency_ratio#36) windowspecdefinition(currency_ratio#36 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#39], [currency_ratio#36 ASC NULLS FIRST] + +(29) Filter [codegen id : 10] +Input [5]: [item#34, return_ratio#35, currency_ratio#36, return_rank#38, currency_rank#39] +Condition : ((return_rank#38 <= 10) OR (currency_rank#39 <= 10)) + +(30) Project [codegen id : 10] +Output [5]: [web AS channel#40, item#34, return_ratio#35, return_rank#38, currency_rank#39] +Input [5]: [item#34, return_ratio#35, currency_ratio#36, return_rank#38, currency_rank#39] + +(31) Scan parquet default.catalog_sales +Output [6]: [cs_sold_date_sk#41, cs_item_sk#42, cs_order_number#43, cs_quantity#44, cs_net_paid#45, cs_net_profit#46] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_net_profit), IsNotNull(cs_quantity), IsNotNull(cs_net_paid), GreaterThan(cs_net_profit,1.00), GreaterThan(cs_net_paid,0.00), GreaterThan(cs_quantity,0), IsNotNull(cs_item_sk), IsNotNull(cs_order_number), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(32) ColumnarToRow [codegen id : 12] +Input [6]: [cs_sold_date_sk#41, cs_item_sk#42, cs_order_number#43, cs_quantity#44, cs_net_paid#45, cs_net_profit#46] + +(33) Filter [codegen id : 12] +Input [6]: [cs_sold_date_sk#41, cs_item_sk#42, cs_order_number#43, cs_quantity#44, cs_net_paid#45, cs_net_profit#46] +Condition : ((((((((isnotnull(cs_net_profit#46) AND isnotnull(cs_quantity#44)) AND isnotnull(cs_net_paid#45)) AND (cs_net_profit#46 > 1.00)) AND (cs_net_paid#45 > 0.00)) AND (cs_quantity#44 > 0)) AND isnotnull(cs_item_sk#42)) AND isnotnull(cs_order_number#43)) AND isnotnull(cs_sold_date_sk#41)) + +(34) Project [codegen id : 12] +Output [5]: [cs_sold_date_sk#41, cs_item_sk#42, cs_order_number#43, cs_quantity#44, cs_net_paid#45] +Input [6]: [cs_sold_date_sk#41, cs_item_sk#42, cs_order_number#43, cs_quantity#44, cs_net_paid#45, cs_net_profit#46] + +(35) ReusedExchange [Reuses operator id: 9] +Output [1]: [d_date_sk#7] + +(36) BroadcastHashJoin [codegen id : 12] +Left keys [1]: [cs_sold_date_sk#41] +Right keys [1]: [d_date_sk#7] +Join condition: None + +(37) Project [codegen id : 12] +Output [4]: [cs_item_sk#42, cs_order_number#43, cs_quantity#44, cs_net_paid#45] +Input [6]: [cs_sold_date_sk#41, cs_item_sk#42, cs_order_number#43, cs_quantity#44, cs_net_paid#45, d_date_sk#7] + +(38) Exchange +Input [4]: [cs_item_sk#42, cs_order_number#43, cs_quantity#44, cs_net_paid#45] +Arguments: hashpartitioning(cs_order_number#43, cs_item_sk#42, 5), true, [id=#47] + +(39) Sort [codegen id : 13] +Input [4]: [cs_item_sk#42, cs_order_number#43, cs_quantity#44, cs_net_paid#45] +Arguments: [cs_order_number#43 ASC NULLS FIRST, cs_item_sk#42 ASC NULLS FIRST], false, 0 + +(40) Scan parquet default.catalog_returns +Output [4]: [cr_item_sk#48, cr_order_number#49, cr_return_quantity#50, cr_return_amount#51] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_returns] +PushedFilters: [IsNotNull(cr_return_amount), GreaterThan(cr_return_amount,10000.00), IsNotNull(cr_item_sk), IsNotNull(cr_order_number)] +ReadSchema: struct + +(41) ColumnarToRow [codegen id : 14] +Input [4]: [cr_item_sk#48, cr_order_number#49, cr_return_quantity#50, cr_return_amount#51] + +(42) Filter [codegen id : 14] +Input [4]: [cr_item_sk#48, cr_order_number#49, cr_return_quantity#50, cr_return_amount#51] +Condition : (((isnotnull(cr_return_amount#51) AND (cr_return_amount#51 > 10000.00)) AND isnotnull(cr_item_sk#48)) AND isnotnull(cr_order_number#49)) + +(43) Exchange +Input [4]: [cr_item_sk#48, cr_order_number#49, cr_return_quantity#50, cr_return_amount#51] +Arguments: hashpartitioning(cr_order_number#49, cr_item_sk#48, 5), true, [id=#52] + +(44) Sort [codegen id : 15] +Input [4]: [cr_item_sk#48, cr_order_number#49, cr_return_quantity#50, cr_return_amount#51] +Arguments: [cr_order_number#49 ASC NULLS FIRST, cr_item_sk#48 ASC NULLS FIRST], false, 0 + +(45) SortMergeJoin [codegen id : 16] +Left keys [2]: [cs_order_number#43, cs_item_sk#42] +Right keys [2]: [cr_order_number#49, cr_item_sk#48] +Join condition: None + +(46) Project [codegen id : 16] +Output [5]: [cs_item_sk#42, cs_quantity#44, cs_net_paid#45, cr_return_quantity#50, cr_return_amount#51] +Input [8]: [cs_item_sk#42, cs_order_number#43, cs_quantity#44, cs_net_paid#45, cr_item_sk#48, cr_order_number#49, cr_return_quantity#50, cr_return_amount#51] + +(47) HashAggregate [codegen id : 16] +Input [5]: [cs_item_sk#42, cs_quantity#44, cs_net_paid#45, cr_return_quantity#50, cr_return_amount#51] +Keys [1]: [cs_item_sk#42] +Functions [4]: [partial_sum(cast(coalesce(cr_return_quantity#50, 0) as bigint)), partial_sum(cast(coalesce(cs_quantity#44, 0) as bigint)), partial_sum(coalesce(cast(cr_return_amount#51 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(cs_net_paid#45 as decimal(12,2)), 0.00))] +Aggregate Attributes [6]: [sum#53, sum#54, sum#55, isEmpty#56, sum#57, isEmpty#58] +Results [7]: [cs_item_sk#42, sum#59, sum#60, sum#61, isEmpty#62, sum#63, isEmpty#64] + +(48) Exchange +Input [7]: [cs_item_sk#42, sum#59, sum#60, sum#61, isEmpty#62, sum#63, isEmpty#64] +Arguments: hashpartitioning(cs_item_sk#42, 5), true, [id=#65] + +(49) HashAggregate [codegen id : 17] +Input [7]: [cs_item_sk#42, sum#59, sum#60, sum#61, isEmpty#62, sum#63, isEmpty#64] +Keys [1]: [cs_item_sk#42] +Functions [4]: [sum(cast(coalesce(cr_return_quantity#50, 0) as bigint)), sum(cast(coalesce(cs_quantity#44, 0) as bigint)), sum(coalesce(cast(cr_return_amount#51 as decimal(12,2)), 0.00)), sum(coalesce(cast(cs_net_paid#45 as decimal(12,2)), 0.00))] +Aggregate Attributes [4]: [sum(cast(coalesce(cr_return_quantity#50, 0) as bigint))#66, sum(cast(coalesce(cs_quantity#44, 0) as bigint))#67, sum(coalesce(cast(cr_return_amount#51 as decimal(12,2)), 0.00))#68, sum(coalesce(cast(cs_net_paid#45 as decimal(12,2)), 0.00))#69] +Results [3]: [cs_item_sk#42 AS item#70, CheckOverflow((promote_precision(cast(sum(cast(coalesce(cr_return_quantity#50, 0) as bigint))#66 as decimal(15,4))) / promote_precision(cast(sum(cast(coalesce(cs_quantity#44, 0) as bigint))#67 as decimal(15,4)))), DecimalType(35,20), true) AS return_ratio#71, CheckOverflow((promote_precision(cast(sum(coalesce(cast(cr_return_amount#51 as decimal(12,2)), 0.00))#68 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(cs_net_paid#45 as decimal(12,2)), 0.00))#69 as decimal(15,4)))), DecimalType(35,20), true) AS currency_ratio#72] + +(50) Exchange +Input [3]: [item#70, return_ratio#71, currency_ratio#72] +Arguments: SinglePartition, true, [id=#73] + +(51) Sort [codegen id : 18] +Input [3]: [item#70, return_ratio#71, currency_ratio#72] +Arguments: [return_ratio#71 ASC NULLS FIRST], false, 0 + +(52) Window +Input [3]: [item#70, return_ratio#71, currency_ratio#72] +Arguments: [rank(return_ratio#71) windowspecdefinition(return_ratio#71 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#74], [return_ratio#71 ASC NULLS FIRST] + +(53) Sort [codegen id : 19] +Input [4]: [item#70, return_ratio#71, currency_ratio#72, return_rank#74] +Arguments: [currency_ratio#72 ASC NULLS FIRST], false, 0 + +(54) Window +Input [4]: [item#70, return_ratio#71, currency_ratio#72, return_rank#74] +Arguments: [rank(currency_ratio#72) windowspecdefinition(currency_ratio#72 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#75], [currency_ratio#72 ASC NULLS FIRST] + +(55) Filter [codegen id : 20] +Input [5]: [item#70, return_ratio#71, currency_ratio#72, return_rank#74, currency_rank#75] +Condition : ((return_rank#74 <= 10) OR (currency_rank#75 <= 10)) + +(56) Project [codegen id : 20] +Output [5]: [catalog AS channel#76, item#70, return_ratio#71, return_rank#74, currency_rank#75] +Input [5]: [item#70, return_ratio#71, currency_ratio#72, return_rank#74, currency_rank#75] + +(57) Scan parquet default.store_sales +Output [6]: [ss_sold_date_sk#77, ss_item_sk#78, ss_ticket_number#79, ss_quantity#80, ss_net_paid#81, ss_net_profit#82] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_net_profit), IsNotNull(ss_quantity), IsNotNull(ss_net_paid), GreaterThan(ss_net_profit,1.00), GreaterThan(ss_net_paid,0.00), GreaterThan(ss_quantity,0), IsNotNull(ss_item_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(58) ColumnarToRow [codegen id : 22] +Input [6]: [ss_sold_date_sk#77, ss_item_sk#78, ss_ticket_number#79, ss_quantity#80, ss_net_paid#81, ss_net_profit#82] + +(59) Filter [codegen id : 22] +Input [6]: [ss_sold_date_sk#77, ss_item_sk#78, ss_ticket_number#79, ss_quantity#80, ss_net_paid#81, ss_net_profit#82] +Condition : ((((((((isnotnull(ss_net_profit#82) AND isnotnull(ss_quantity#80)) AND isnotnull(ss_net_paid#81)) AND (ss_net_profit#82 > 1.00)) AND (ss_net_paid#81 > 0.00)) AND (ss_quantity#80 > 0)) AND isnotnull(ss_item_sk#78)) AND isnotnull(ss_ticket_number#79)) AND isnotnull(ss_sold_date_sk#77)) + +(60) Project [codegen id : 22] +Output [5]: [ss_sold_date_sk#77, ss_item_sk#78, ss_ticket_number#79, ss_quantity#80, ss_net_paid#81] +Input [6]: [ss_sold_date_sk#77, ss_item_sk#78, ss_ticket_number#79, ss_quantity#80, ss_net_paid#81, ss_net_profit#82] + +(61) ReusedExchange [Reuses operator id: 9] +Output [1]: [d_date_sk#7] + +(62) BroadcastHashJoin [codegen id : 22] +Left keys [1]: [ss_sold_date_sk#77] +Right keys [1]: [d_date_sk#7] +Join condition: None + +(63) Project [codegen id : 22] +Output [4]: [ss_item_sk#78, ss_ticket_number#79, ss_quantity#80, ss_net_paid#81] +Input [6]: [ss_sold_date_sk#77, ss_item_sk#78, ss_ticket_number#79, ss_quantity#80, ss_net_paid#81, d_date_sk#7] + +(64) Exchange +Input [4]: [ss_item_sk#78, ss_ticket_number#79, ss_quantity#80, ss_net_paid#81] +Arguments: hashpartitioning(cast(ss_ticket_number#79 as bigint), cast(ss_item_sk#78 as bigint), 5), true, [id=#83] + +(65) Sort [codegen id : 23] +Input [4]: [ss_item_sk#78, ss_ticket_number#79, ss_quantity#80, ss_net_paid#81] +Arguments: [cast(ss_ticket_number#79 as bigint) ASC NULLS FIRST, cast(ss_item_sk#78 as bigint) ASC NULLS FIRST], false, 0 + +(66) Scan parquet default.store_returns +Output [4]: [sr_item_sk#84, sr_ticket_number#85, sr_return_quantity#86, sr_return_amt#87] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_returns] +PushedFilters: [IsNotNull(sr_return_amt), GreaterThan(sr_return_amt,10000.00), IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] +ReadSchema: struct + +(67) ColumnarToRow [codegen id : 24] +Input [4]: [sr_item_sk#84, sr_ticket_number#85, sr_return_quantity#86, sr_return_amt#87] + +(68) Filter [codegen id : 24] +Input [4]: [sr_item_sk#84, sr_ticket_number#85, sr_return_quantity#86, sr_return_amt#87] +Condition : (((isnotnull(sr_return_amt#87) AND (sr_return_amt#87 > 10000.00)) AND isnotnull(sr_ticket_number#85)) AND isnotnull(sr_item_sk#84)) + +(69) Exchange +Input [4]: [sr_item_sk#84, sr_ticket_number#85, sr_return_quantity#86, sr_return_amt#87] +Arguments: hashpartitioning(sr_ticket_number#85, sr_item_sk#84, 5), true, [id=#88] + +(70) Sort [codegen id : 25] +Input [4]: [sr_item_sk#84, sr_ticket_number#85, sr_return_quantity#86, sr_return_amt#87] +Arguments: [sr_ticket_number#85 ASC NULLS FIRST, sr_item_sk#84 ASC NULLS FIRST], false, 0 + +(71) SortMergeJoin [codegen id : 26] +Left keys [2]: [cast(ss_ticket_number#79 as bigint), cast(ss_item_sk#78 as bigint)] +Right keys [2]: [sr_ticket_number#85, sr_item_sk#84] +Join condition: None + +(72) Project [codegen id : 26] +Output [5]: [ss_item_sk#78, ss_quantity#80, ss_net_paid#81, sr_return_quantity#86, sr_return_amt#87] +Input [8]: [ss_item_sk#78, ss_ticket_number#79, ss_quantity#80, ss_net_paid#81, sr_item_sk#84, sr_ticket_number#85, sr_return_quantity#86, sr_return_amt#87] + +(73) HashAggregate [codegen id : 26] +Input [5]: [ss_item_sk#78, ss_quantity#80, ss_net_paid#81, sr_return_quantity#86, sr_return_amt#87] +Keys [1]: [ss_item_sk#78] +Functions [4]: [partial_sum(cast(coalesce(sr_return_quantity#86, 0) as bigint)), partial_sum(cast(coalesce(ss_quantity#80, 0) as bigint)), partial_sum(coalesce(cast(sr_return_amt#87 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ss_net_paid#81 as decimal(12,2)), 0.00))] +Aggregate Attributes [6]: [sum#89, sum#90, sum#91, isEmpty#92, sum#93, isEmpty#94] +Results [7]: [ss_item_sk#78, sum#95, sum#96, sum#97, isEmpty#98, sum#99, isEmpty#100] + +(74) Exchange +Input [7]: [ss_item_sk#78, sum#95, sum#96, sum#97, isEmpty#98, sum#99, isEmpty#100] +Arguments: hashpartitioning(ss_item_sk#78, 5), true, [id=#101] + +(75) HashAggregate [codegen id : 27] +Input [7]: [ss_item_sk#78, sum#95, sum#96, sum#97, isEmpty#98, sum#99, isEmpty#100] +Keys [1]: [ss_item_sk#78] +Functions [4]: [sum(cast(coalesce(sr_return_quantity#86, 0) as bigint)), sum(cast(coalesce(ss_quantity#80, 0) as bigint)), sum(coalesce(cast(sr_return_amt#87 as decimal(12,2)), 0.00)), sum(coalesce(cast(ss_net_paid#81 as decimal(12,2)), 0.00))] +Aggregate Attributes [4]: [sum(cast(coalesce(sr_return_quantity#86, 0) as bigint))#102, sum(cast(coalesce(ss_quantity#80, 0) as bigint))#103, sum(coalesce(cast(sr_return_amt#87 as decimal(12,2)), 0.00))#104, sum(coalesce(cast(ss_net_paid#81 as decimal(12,2)), 0.00))#105] +Results [3]: [ss_item_sk#78 AS item#106, CheckOverflow((promote_precision(cast(sum(cast(coalesce(sr_return_quantity#86, 0) as bigint))#102 as decimal(15,4))) / promote_precision(cast(sum(cast(coalesce(ss_quantity#80, 0) as bigint))#103 as decimal(15,4)))), DecimalType(35,20), true) AS return_ratio#107, CheckOverflow((promote_precision(cast(sum(coalesce(cast(sr_return_amt#87 as decimal(12,2)), 0.00))#104 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(ss_net_paid#81 as decimal(12,2)), 0.00))#105 as decimal(15,4)))), DecimalType(35,20), true) AS currency_ratio#108] + +(76) Exchange +Input [3]: [item#106, return_ratio#107, currency_ratio#108] +Arguments: SinglePartition, true, [id=#109] + +(77) Sort [codegen id : 28] +Input [3]: [item#106, return_ratio#107, currency_ratio#108] +Arguments: [return_ratio#107 ASC NULLS FIRST], false, 0 + +(78) Window +Input [3]: [item#106, return_ratio#107, currency_ratio#108] +Arguments: [rank(return_ratio#107) windowspecdefinition(return_ratio#107 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#110], [return_ratio#107 ASC NULLS FIRST] + +(79) Sort [codegen id : 29] +Input [4]: [item#106, return_ratio#107, currency_ratio#108, return_rank#110] +Arguments: [currency_ratio#108 ASC NULLS FIRST], false, 0 + +(80) Window +Input [4]: [item#106, return_ratio#107, currency_ratio#108, return_rank#110] +Arguments: [rank(currency_ratio#108) windowspecdefinition(currency_ratio#108 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#111], [currency_ratio#108 ASC NULLS FIRST] + +(81) Filter [codegen id : 30] +Input [5]: [item#106, return_ratio#107, currency_ratio#108, return_rank#110, currency_rank#111] +Condition : ((return_rank#110 <= 10) OR (currency_rank#111 <= 10)) + +(82) Project [codegen id : 30] +Output [5]: [store AS channel#112, item#106, return_ratio#107, return_rank#110, currency_rank#111] +Input [5]: [item#106, return_ratio#107, currency_ratio#108, return_rank#110, currency_rank#111] + +(83) Union + +(84) HashAggregate [codegen id : 31] +Input [5]: [channel#40, item#34, return_ratio#35, return_rank#38, currency_rank#39] +Keys [5]: [channel#40, item#34, return_ratio#35, return_rank#38, currency_rank#39] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#40, item#34, return_ratio#35, return_rank#38, currency_rank#39] + +(85) Exchange +Input [5]: [channel#40, item#34, return_ratio#35, return_rank#38, currency_rank#39] +Arguments: hashpartitioning(channel#40, item#34, return_ratio#35, return_rank#38, currency_rank#39, 5), true, [id=#113] + +(86) HashAggregate [codegen id : 32] +Input [5]: [channel#40, item#34, return_ratio#35, return_rank#38, currency_rank#39] +Keys [5]: [channel#40, item#34, return_ratio#35, return_rank#38, currency_rank#39] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#40, item#34, return_ratio#35, return_rank#38, currency_rank#39] + +(87) TakeOrderedAndProject +Input [5]: [channel#40, item#34, return_ratio#35, return_rank#38, currency_rank#39] +Arguments: 100, [channel#40 ASC NULLS FIRST, return_rank#38 ASC NULLS FIRST, currency_rank#39 ASC NULLS FIRST], [channel#40, item#34, return_ratio#35, return_rank#38, currency_rank#39] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49.sf100/simplified.txt new file mode 100644 index 0000000000000..7fc20fb4df8d2 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49.sf100/simplified.txt @@ -0,0 +1,153 @@ +TakeOrderedAndProject [channel,currency_rank,item,return_rank,return_ratio] + WholeStageCodegen (32) + HashAggregate [channel,currency_rank,item,return_rank,return_ratio] + InputAdapter + Exchange [channel,currency_rank,item,return_rank,return_ratio] #1 + WholeStageCodegen (31) + HashAggregate [channel,currency_rank,item,return_rank,return_ratio] + InputAdapter + Union + WholeStageCodegen (10) + Project [currency_rank,item,return_rank,return_ratio] + Filter [currency_rank,return_rank] + InputAdapter + Window [currency_ratio] + WholeStageCodegen (9) + Sort [currency_ratio] + InputAdapter + Window [return_ratio] + WholeStageCodegen (8) + Sort [return_ratio] + InputAdapter + Exchange #2 + WholeStageCodegen (7) + HashAggregate [isEmpty,isEmpty,sum,sum,sum,sum,ws_item_sk] [currency_ratio,isEmpty,isEmpty,item,return_ratio,sum,sum,sum,sum,sum(cast(coalesce(wr_return_quantity, 0) as bigint)),sum(cast(coalesce(ws_quantity, 0) as bigint)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ws_net_paid as decimal(12,2)), 0.00))] + InputAdapter + Exchange [ws_item_sk] #3 + WholeStageCodegen (6) + HashAggregate [wr_return_amt,wr_return_quantity,ws_item_sk,ws_net_paid,ws_quantity] [isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum,sum,sum] + Project [wr_return_amt,wr_return_quantity,ws_item_sk,ws_net_paid,ws_quantity] + SortMergeJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] + InputAdapter + WholeStageCodegen (3) + Sort [ws_item_sk,ws_order_number] + InputAdapter + Exchange [ws_item_sk,ws_order_number] #4 + WholeStageCodegen (2) + Project [ws_item_sk,ws_net_paid,ws_order_number,ws_quantity] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ws_item_sk,ws_net_paid,ws_order_number,ws_quantity,ws_sold_date_sk] + Filter [ws_item_sk,ws_net_paid,ws_net_profit,ws_order_number,ws_quantity,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_item_sk,ws_net_paid,ws_net_profit,ws_order_number,ws_quantity,ws_sold_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + InputAdapter + WholeStageCodegen (5) + Sort [wr_item_sk,wr_order_number] + InputAdapter + Exchange [wr_item_sk,wr_order_number] #6 + WholeStageCodegen (4) + Filter [wr_item_sk,wr_order_number,wr_return_amt] + ColumnarToRow + InputAdapter + Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_return_quantity] + WholeStageCodegen (20) + Project [currency_rank,item,return_rank,return_ratio] + Filter [currency_rank,return_rank] + InputAdapter + Window [currency_ratio] + WholeStageCodegen (19) + Sort [currency_ratio] + InputAdapter + Window [return_ratio] + WholeStageCodegen (18) + Sort [return_ratio] + InputAdapter + Exchange #7 + WholeStageCodegen (17) + HashAggregate [cs_item_sk,isEmpty,isEmpty,sum,sum,sum,sum] [currency_ratio,isEmpty,isEmpty,item,return_ratio,sum,sum,sum,sum,sum(cast(coalesce(cr_return_quantity, 0) as bigint)),sum(cast(coalesce(cs_quantity, 0) as bigint)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(coalesce(cast(cs_net_paid as decimal(12,2)), 0.00))] + InputAdapter + Exchange [cs_item_sk] #8 + WholeStageCodegen (16) + HashAggregate [cr_return_amount,cr_return_quantity,cs_item_sk,cs_net_paid,cs_quantity] [isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum,sum,sum] + Project [cr_return_amount,cr_return_quantity,cs_item_sk,cs_net_paid,cs_quantity] + SortMergeJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] + InputAdapter + WholeStageCodegen (13) + Sort [cs_item_sk,cs_order_number] + InputAdapter + Exchange [cs_item_sk,cs_order_number] #9 + WholeStageCodegen (12) + Project [cs_item_sk,cs_net_paid,cs_order_number,cs_quantity] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_item_sk,cs_net_paid,cs_order_number,cs_quantity,cs_sold_date_sk] + Filter [cs_item_sk,cs_net_paid,cs_net_profit,cs_order_number,cs_quantity,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_item_sk,cs_net_paid,cs_net_profit,cs_order_number,cs_quantity,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #5 + InputAdapter + WholeStageCodegen (15) + Sort [cr_item_sk,cr_order_number] + InputAdapter + Exchange [cr_item_sk,cr_order_number] #10 + WholeStageCodegen (14) + Filter [cr_item_sk,cr_order_number,cr_return_amount] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_amount,cr_return_quantity] + WholeStageCodegen (30) + Project [currency_rank,item,return_rank,return_ratio] + Filter [currency_rank,return_rank] + InputAdapter + Window [currency_ratio] + WholeStageCodegen (29) + Sort [currency_ratio] + InputAdapter + Window [return_ratio] + WholeStageCodegen (28) + Sort [return_ratio] + InputAdapter + Exchange #11 + WholeStageCodegen (27) + HashAggregate [isEmpty,isEmpty,ss_item_sk,sum,sum,sum,sum] [currency_ratio,isEmpty,isEmpty,item,return_ratio,sum,sum,sum,sum,sum(cast(coalesce(sr_return_quantity, 0) as bigint)),sum(cast(coalesce(ss_quantity, 0) as bigint)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ss_net_paid as decimal(12,2)), 0.00))] + InputAdapter + Exchange [ss_item_sk] #12 + WholeStageCodegen (26) + HashAggregate [sr_return_amt,sr_return_quantity,ss_item_sk,ss_net_paid,ss_quantity] [isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum,sum,sum] + Project [sr_return_amt,sr_return_quantity,ss_item_sk,ss_net_paid,ss_quantity] + SortMergeJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + InputAdapter + WholeStageCodegen (23) + Sort [ss_item_sk,ss_ticket_number] + InputAdapter + Exchange [ss_item_sk,ss_ticket_number] #13 + WholeStageCodegen (22) + Project [ss_item_sk,ss_net_paid,ss_quantity,ss_ticket_number] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_item_sk,ss_net_paid,ss_quantity,ss_sold_date_sk,ss_ticket_number] + Filter [ss_item_sk,ss_net_paid,ss_net_profit,ss_quantity,ss_sold_date_sk,ss_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_net_paid,ss_net_profit,ss_quantity,ss_sold_date_sk,ss_ticket_number] + InputAdapter + ReusedExchange [d_date_sk] #5 + InputAdapter + WholeStageCodegen (25) + Sort [sr_item_sk,sr_ticket_number] + InputAdapter + Exchange [sr_item_sk,sr_ticket_number] #14 + WholeStageCodegen (24) + Filter [sr_item_sk,sr_return_amt,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_item_sk,sr_return_amt,sr_return_quantity,sr_ticket_number] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/explain.txt new file mode 100644 index 0000000000000..883661759e561 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/explain.txt @@ -0,0 +1,433 @@ +== Physical Plan == +TakeOrderedAndProject (78) ++- * HashAggregate (77) + +- Exchange (76) + +- * HashAggregate (75) + +- Union (74) + :- * Project (27) + : +- * Filter (26) + : +- Window (25) + : +- * Sort (24) + : +- Window (23) + : +- * Sort (22) + : +- Exchange (21) + : +- * HashAggregate (20) + : +- Exchange (19) + : +- * HashAggregate (18) + : +- * Project (17) + : +- * BroadcastHashJoin Inner BuildRight (16) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Project (4) + : : : +- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.web_sales (1) + : : +- BroadcastExchange (8) + : : +- * Filter (7) + : : +- * ColumnarToRow (6) + : : +- Scan parquet default.web_returns (5) + : +- BroadcastExchange (15) + : +- * Project (14) + : +- * Filter (13) + : +- * ColumnarToRow (12) + : +- Scan parquet default.date_dim (11) + :- * Project (50) + : +- * Filter (49) + : +- Window (48) + : +- * Sort (47) + : +- Window (46) + : +- * Sort (45) + : +- Exchange (44) + : +- * HashAggregate (43) + : +- Exchange (42) + : +- * HashAggregate (41) + : +- * Project (40) + : +- * BroadcastHashJoin Inner BuildRight (39) + : :- * Project (37) + : : +- * BroadcastHashJoin Inner BuildRight (36) + : : :- * Project (31) + : : : +- * Filter (30) + : : : +- * ColumnarToRow (29) + : : : +- Scan parquet default.catalog_sales (28) + : : +- BroadcastExchange (35) + : : +- * Filter (34) + : : +- * ColumnarToRow (33) + : : +- Scan parquet default.catalog_returns (32) + : +- ReusedExchange (38) + +- * Project (73) + +- * Filter (72) + +- Window (71) + +- * Sort (70) + +- Window (69) + +- * Sort (68) + +- Exchange (67) + +- * HashAggregate (66) + +- Exchange (65) + +- * HashAggregate (64) + +- * Project (63) + +- * BroadcastHashJoin Inner BuildRight (62) + :- * Project (60) + : +- * BroadcastHashJoin Inner BuildRight (59) + : :- * Project (54) + : : +- * Filter (53) + : : +- * ColumnarToRow (52) + : : +- Scan parquet default.store_sales (51) + : +- BroadcastExchange (58) + : +- * Filter (57) + : +- * ColumnarToRow (56) + : +- Scan parquet default.store_returns (55) + +- ReusedExchange (61) + + +(1) Scan parquet default.web_sales +Output [6]: [ws_sold_date_sk#1, ws_item_sk#2, ws_order_number#3, ws_quantity#4, ws_net_paid#5, ws_net_profit#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_quantity), IsNotNull(ws_net_profit), IsNotNull(ws_net_paid), GreaterThan(ws_net_profit,1.00), GreaterThan(ws_net_paid,0.00), GreaterThan(ws_quantity,0), IsNotNull(ws_item_sk), IsNotNull(ws_order_number), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [6]: [ws_sold_date_sk#1, ws_item_sk#2, ws_order_number#3, ws_quantity#4, ws_net_paid#5, ws_net_profit#6] + +(3) Filter [codegen id : 3] +Input [6]: [ws_sold_date_sk#1, ws_item_sk#2, ws_order_number#3, ws_quantity#4, ws_net_paid#5, ws_net_profit#6] +Condition : ((((((((isnotnull(ws_quantity#4) AND isnotnull(ws_net_profit#6)) AND isnotnull(ws_net_paid#5)) AND (ws_net_profit#6 > 1.00)) AND (ws_net_paid#5 > 0.00)) AND (ws_quantity#4 > 0)) AND isnotnull(ws_item_sk#2)) AND isnotnull(ws_order_number#3)) AND isnotnull(ws_sold_date_sk#1)) + +(4) Project [codegen id : 3] +Output [5]: [ws_sold_date_sk#1, ws_item_sk#2, ws_order_number#3, ws_quantity#4, ws_net_paid#5] +Input [6]: [ws_sold_date_sk#1, ws_item_sk#2, ws_order_number#3, ws_quantity#4, ws_net_paid#5, ws_net_profit#6] + +(5) Scan parquet default.web_returns +Output [4]: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_returns] +PushedFilters: [IsNotNull(wr_return_amt), GreaterThan(wr_return_amt,10000.00), IsNotNull(wr_order_number), IsNotNull(wr_item_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [4]: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10] + +(7) Filter [codegen id : 1] +Input [4]: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10] +Condition : (((isnotnull(wr_return_amt#10) AND (wr_return_amt#10 > 10000.00)) AND isnotnull(wr_order_number#8)) AND isnotnull(wr_item_sk#7)) + +(8) BroadcastExchange +Input [4]: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10] +Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false], input[0, bigint, false]),false), [id=#11] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [2]: [cast(ws_order_number#3 as bigint), cast(ws_item_sk#2 as bigint)] +Right keys [2]: [wr_order_number#8, wr_item_sk#7] +Join condition: None + +(10) Project [codegen id : 3] +Output [6]: [ws_sold_date_sk#1, ws_item_sk#2, ws_quantity#4, ws_net_paid#5, wr_return_quantity#9, wr_return_amt#10] +Input [9]: [ws_sold_date_sk#1, ws_item_sk#2, ws_order_number#3, ws_quantity#4, ws_net_paid#5, wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10] + +(11) Scan parquet default.date_dim +Output [3]: [d_date_sk#12, d_year#13, d_moy#14] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,12), IsNotNull(d_date_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#12, d_year#13, d_moy#14] + +(13) Filter [codegen id : 2] +Input [3]: [d_date_sk#12, d_year#13, d_moy#14] +Condition : ((((isnotnull(d_year#13) AND isnotnull(d_moy#14)) AND (d_year#13 = 2001)) AND (d_moy#14 = 12)) AND isnotnull(d_date_sk#12)) + +(14) Project [codegen id : 2] +Output [1]: [d_date_sk#12] +Input [3]: [d_date_sk#12, d_year#13, d_moy#14] + +(15) BroadcastExchange +Input [1]: [d_date_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] + +(16) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ws_sold_date_sk#1] +Right keys [1]: [d_date_sk#12] +Join condition: None + +(17) Project [codegen id : 3] +Output [5]: [ws_item_sk#2, ws_quantity#4, ws_net_paid#5, wr_return_quantity#9, wr_return_amt#10] +Input [7]: [ws_sold_date_sk#1, ws_item_sk#2, ws_quantity#4, ws_net_paid#5, wr_return_quantity#9, wr_return_amt#10, d_date_sk#12] + +(18) HashAggregate [codegen id : 3] +Input [5]: [ws_item_sk#2, ws_quantity#4, ws_net_paid#5, wr_return_quantity#9, wr_return_amt#10] +Keys [1]: [ws_item_sk#2] +Functions [4]: [partial_sum(cast(coalesce(wr_return_quantity#9, 0) as bigint)), partial_sum(cast(coalesce(ws_quantity#4, 0) as bigint)), partial_sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ws_net_paid#5 as decimal(12,2)), 0.00))] +Aggregate Attributes [6]: [sum#16, sum#17, sum#18, isEmpty#19, sum#20, isEmpty#21] +Results [7]: [ws_item_sk#2, sum#22, sum#23, sum#24, isEmpty#25, sum#26, isEmpty#27] + +(19) Exchange +Input [7]: [ws_item_sk#2, sum#22, sum#23, sum#24, isEmpty#25, sum#26, isEmpty#27] +Arguments: hashpartitioning(ws_item_sk#2, 5), true, [id=#28] + +(20) HashAggregate [codegen id : 4] +Input [7]: [ws_item_sk#2, sum#22, sum#23, sum#24, isEmpty#25, sum#26, isEmpty#27] +Keys [1]: [ws_item_sk#2] +Functions [4]: [sum(cast(coalesce(wr_return_quantity#9, 0) as bigint)), sum(cast(coalesce(ws_quantity#4, 0) as bigint)), sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00)), sum(coalesce(cast(ws_net_paid#5 as decimal(12,2)), 0.00))] +Aggregate Attributes [4]: [sum(cast(coalesce(wr_return_quantity#9, 0) as bigint))#29, sum(cast(coalesce(ws_quantity#4, 0) as bigint))#30, sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00))#31, sum(coalesce(cast(ws_net_paid#5 as decimal(12,2)), 0.00))#32] +Results [3]: [ws_item_sk#2 AS item#33, CheckOverflow((promote_precision(cast(sum(cast(coalesce(wr_return_quantity#9, 0) as bigint))#29 as decimal(15,4))) / promote_precision(cast(sum(cast(coalesce(ws_quantity#4, 0) as bigint))#30 as decimal(15,4)))), DecimalType(35,20), true) AS return_ratio#34, CheckOverflow((promote_precision(cast(sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00))#31 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(ws_net_paid#5 as decimal(12,2)), 0.00))#32 as decimal(15,4)))), DecimalType(35,20), true) AS currency_ratio#35] + +(21) Exchange +Input [3]: [item#33, return_ratio#34, currency_ratio#35] +Arguments: SinglePartition, true, [id=#36] + +(22) Sort [codegen id : 5] +Input [3]: [item#33, return_ratio#34, currency_ratio#35] +Arguments: [return_ratio#34 ASC NULLS FIRST], false, 0 + +(23) Window +Input [3]: [item#33, return_ratio#34, currency_ratio#35] +Arguments: [rank(return_ratio#34) windowspecdefinition(return_ratio#34 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#37], [return_ratio#34 ASC NULLS FIRST] + +(24) Sort [codegen id : 6] +Input [4]: [item#33, return_ratio#34, currency_ratio#35, return_rank#37] +Arguments: [currency_ratio#35 ASC NULLS FIRST], false, 0 + +(25) Window +Input [4]: [item#33, return_ratio#34, currency_ratio#35, return_rank#37] +Arguments: [rank(currency_ratio#35) windowspecdefinition(currency_ratio#35 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#38], [currency_ratio#35 ASC NULLS FIRST] + +(26) Filter [codegen id : 7] +Input [5]: [item#33, return_ratio#34, currency_ratio#35, return_rank#37, currency_rank#38] +Condition : ((return_rank#37 <= 10) OR (currency_rank#38 <= 10)) + +(27) Project [codegen id : 7] +Output [5]: [web AS channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] +Input [5]: [item#33, return_ratio#34, currency_ratio#35, return_rank#37, currency_rank#38] + +(28) Scan parquet default.catalog_sales +Output [6]: [cs_sold_date_sk#40, cs_item_sk#41, cs_order_number#42, cs_quantity#43, cs_net_paid#44, cs_net_profit#45] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_quantity), IsNotNull(cs_net_paid), IsNotNull(cs_net_profit), GreaterThan(cs_net_profit,1.00), GreaterThan(cs_net_paid,0.00), GreaterThan(cs_quantity,0), IsNotNull(cs_item_sk), IsNotNull(cs_order_number), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 10] +Input [6]: [cs_sold_date_sk#40, cs_item_sk#41, cs_order_number#42, cs_quantity#43, cs_net_paid#44, cs_net_profit#45] + +(30) Filter [codegen id : 10] +Input [6]: [cs_sold_date_sk#40, cs_item_sk#41, cs_order_number#42, cs_quantity#43, cs_net_paid#44, cs_net_profit#45] +Condition : ((((((((isnotnull(cs_quantity#43) AND isnotnull(cs_net_paid#44)) AND isnotnull(cs_net_profit#45)) AND (cs_net_profit#45 > 1.00)) AND (cs_net_paid#44 > 0.00)) AND (cs_quantity#43 > 0)) AND isnotnull(cs_item_sk#41)) AND isnotnull(cs_order_number#42)) AND isnotnull(cs_sold_date_sk#40)) + +(31) Project [codegen id : 10] +Output [5]: [cs_sold_date_sk#40, cs_item_sk#41, cs_order_number#42, cs_quantity#43, cs_net_paid#44] +Input [6]: [cs_sold_date_sk#40, cs_item_sk#41, cs_order_number#42, cs_quantity#43, cs_net_paid#44, cs_net_profit#45] + +(32) Scan parquet default.catalog_returns +Output [4]: [cr_item_sk#46, cr_order_number#47, cr_return_quantity#48, cr_return_amount#49] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_returns] +PushedFilters: [IsNotNull(cr_return_amount), GreaterThan(cr_return_amount,10000.00), IsNotNull(cr_order_number), IsNotNull(cr_item_sk)] +ReadSchema: struct + +(33) ColumnarToRow [codegen id : 8] +Input [4]: [cr_item_sk#46, cr_order_number#47, cr_return_quantity#48, cr_return_amount#49] + +(34) Filter [codegen id : 8] +Input [4]: [cr_item_sk#46, cr_order_number#47, cr_return_quantity#48, cr_return_amount#49] +Condition : (((isnotnull(cr_return_amount#49) AND (cr_return_amount#49 > 10000.00)) AND isnotnull(cr_order_number#47)) AND isnotnull(cr_item_sk#46)) + +(35) BroadcastExchange +Input [4]: [cr_item_sk#46, cr_order_number#47, cr_return_quantity#48, cr_return_amount#49] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, false] as bigint), 32) | (cast(input[0, int, false] as bigint) & 4294967295))),false), [id=#50] + +(36) BroadcastHashJoin [codegen id : 10] +Left keys [2]: [cs_order_number#42, cs_item_sk#41] +Right keys [2]: [cr_order_number#47, cr_item_sk#46] +Join condition: None + +(37) Project [codegen id : 10] +Output [6]: [cs_sold_date_sk#40, cs_item_sk#41, cs_quantity#43, cs_net_paid#44, cr_return_quantity#48, cr_return_amount#49] +Input [9]: [cs_sold_date_sk#40, cs_item_sk#41, cs_order_number#42, cs_quantity#43, cs_net_paid#44, cr_item_sk#46, cr_order_number#47, cr_return_quantity#48, cr_return_amount#49] + +(38) ReusedExchange [Reuses operator id: 15] +Output [1]: [d_date_sk#12] + +(39) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_sold_date_sk#40] +Right keys [1]: [d_date_sk#12] +Join condition: None + +(40) Project [codegen id : 10] +Output [5]: [cs_item_sk#41, cs_quantity#43, cs_net_paid#44, cr_return_quantity#48, cr_return_amount#49] +Input [7]: [cs_sold_date_sk#40, cs_item_sk#41, cs_quantity#43, cs_net_paid#44, cr_return_quantity#48, cr_return_amount#49, d_date_sk#12] + +(41) HashAggregate [codegen id : 10] +Input [5]: [cs_item_sk#41, cs_quantity#43, cs_net_paid#44, cr_return_quantity#48, cr_return_amount#49] +Keys [1]: [cs_item_sk#41] +Functions [4]: [partial_sum(cast(coalesce(cr_return_quantity#48, 0) as bigint)), partial_sum(cast(coalesce(cs_quantity#43, 0) as bigint)), partial_sum(coalesce(cast(cr_return_amount#49 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(cs_net_paid#44 as decimal(12,2)), 0.00))] +Aggregate Attributes [6]: [sum#51, sum#52, sum#53, isEmpty#54, sum#55, isEmpty#56] +Results [7]: [cs_item_sk#41, sum#57, sum#58, sum#59, isEmpty#60, sum#61, isEmpty#62] + +(42) Exchange +Input [7]: [cs_item_sk#41, sum#57, sum#58, sum#59, isEmpty#60, sum#61, isEmpty#62] +Arguments: hashpartitioning(cs_item_sk#41, 5), true, [id=#63] + +(43) HashAggregate [codegen id : 11] +Input [7]: [cs_item_sk#41, sum#57, sum#58, sum#59, isEmpty#60, sum#61, isEmpty#62] +Keys [1]: [cs_item_sk#41] +Functions [4]: [sum(cast(coalesce(cr_return_quantity#48, 0) as bigint)), sum(cast(coalesce(cs_quantity#43, 0) as bigint)), sum(coalesce(cast(cr_return_amount#49 as decimal(12,2)), 0.00)), sum(coalesce(cast(cs_net_paid#44 as decimal(12,2)), 0.00))] +Aggregate Attributes [4]: [sum(cast(coalesce(cr_return_quantity#48, 0) as bigint))#64, sum(cast(coalesce(cs_quantity#43, 0) as bigint))#65, sum(coalesce(cast(cr_return_amount#49 as decimal(12,2)), 0.00))#66, sum(coalesce(cast(cs_net_paid#44 as decimal(12,2)), 0.00))#67] +Results [3]: [cs_item_sk#41 AS item#68, CheckOverflow((promote_precision(cast(sum(cast(coalesce(cr_return_quantity#48, 0) as bigint))#64 as decimal(15,4))) / promote_precision(cast(sum(cast(coalesce(cs_quantity#43, 0) as bigint))#65 as decimal(15,4)))), DecimalType(35,20), true) AS return_ratio#69, CheckOverflow((promote_precision(cast(sum(coalesce(cast(cr_return_amount#49 as decimal(12,2)), 0.00))#66 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(cs_net_paid#44 as decimal(12,2)), 0.00))#67 as decimal(15,4)))), DecimalType(35,20), true) AS currency_ratio#70] + +(44) Exchange +Input [3]: [item#68, return_ratio#69, currency_ratio#70] +Arguments: SinglePartition, true, [id=#71] + +(45) Sort [codegen id : 12] +Input [3]: [item#68, return_ratio#69, currency_ratio#70] +Arguments: [return_ratio#69 ASC NULLS FIRST], false, 0 + +(46) Window +Input [3]: [item#68, return_ratio#69, currency_ratio#70] +Arguments: [rank(return_ratio#69) windowspecdefinition(return_ratio#69 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#72], [return_ratio#69 ASC NULLS FIRST] + +(47) Sort [codegen id : 13] +Input [4]: [item#68, return_ratio#69, currency_ratio#70, return_rank#72] +Arguments: [currency_ratio#70 ASC NULLS FIRST], false, 0 + +(48) Window +Input [4]: [item#68, return_ratio#69, currency_ratio#70, return_rank#72] +Arguments: [rank(currency_ratio#70) windowspecdefinition(currency_ratio#70 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#73], [currency_ratio#70 ASC NULLS FIRST] + +(49) Filter [codegen id : 14] +Input [5]: [item#68, return_ratio#69, currency_ratio#70, return_rank#72, currency_rank#73] +Condition : ((return_rank#72 <= 10) OR (currency_rank#73 <= 10)) + +(50) Project [codegen id : 14] +Output [5]: [catalog AS channel#74, item#68, return_ratio#69, return_rank#72, currency_rank#73] +Input [5]: [item#68, return_ratio#69, currency_ratio#70, return_rank#72, currency_rank#73] + +(51) Scan parquet default.store_sales +Output [6]: [ss_sold_date_sk#75, ss_item_sk#76, ss_ticket_number#77, ss_quantity#78, ss_net_paid#79, ss_net_profit#80] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_net_profit), IsNotNull(ss_quantity), IsNotNull(ss_net_paid), GreaterThan(ss_net_profit,1.00), GreaterThan(ss_net_paid,0.00), GreaterThan(ss_quantity,0), IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(52) ColumnarToRow [codegen id : 17] +Input [6]: [ss_sold_date_sk#75, ss_item_sk#76, ss_ticket_number#77, ss_quantity#78, ss_net_paid#79, ss_net_profit#80] + +(53) Filter [codegen id : 17] +Input [6]: [ss_sold_date_sk#75, ss_item_sk#76, ss_ticket_number#77, ss_quantity#78, ss_net_paid#79, ss_net_profit#80] +Condition : ((((((((isnotnull(ss_net_profit#80) AND isnotnull(ss_quantity#78)) AND isnotnull(ss_net_paid#79)) AND (ss_net_profit#80 > 1.00)) AND (ss_net_paid#79 > 0.00)) AND (ss_quantity#78 > 0)) AND isnotnull(ss_ticket_number#77)) AND isnotnull(ss_item_sk#76)) AND isnotnull(ss_sold_date_sk#75)) + +(54) Project [codegen id : 17] +Output [5]: [ss_sold_date_sk#75, ss_item_sk#76, ss_ticket_number#77, ss_quantity#78, ss_net_paid#79] +Input [6]: [ss_sold_date_sk#75, ss_item_sk#76, ss_ticket_number#77, ss_quantity#78, ss_net_paid#79, ss_net_profit#80] + +(55) Scan parquet default.store_returns +Output [4]: [sr_item_sk#81, sr_ticket_number#82, sr_return_quantity#83, sr_return_amt#84] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_returns] +PushedFilters: [IsNotNull(sr_return_amt), GreaterThan(sr_return_amt,10000.00), IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)] +ReadSchema: struct + +(56) ColumnarToRow [codegen id : 15] +Input [4]: [sr_item_sk#81, sr_ticket_number#82, sr_return_quantity#83, sr_return_amt#84] + +(57) Filter [codegen id : 15] +Input [4]: [sr_item_sk#81, sr_ticket_number#82, sr_return_quantity#83, sr_return_amt#84] +Condition : (((isnotnull(sr_return_amt#84) AND (sr_return_amt#84 > 10000.00)) AND isnotnull(sr_item_sk#81)) AND isnotnull(sr_ticket_number#82)) + +(58) BroadcastExchange +Input [4]: [sr_item_sk#81, sr_ticket_number#82, sr_return_quantity#83, sr_return_amt#84] +Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false], input[0, bigint, false]),false), [id=#85] + +(59) BroadcastHashJoin [codegen id : 17] +Left keys [2]: [cast(ss_ticket_number#77 as bigint), cast(ss_item_sk#76 as bigint)] +Right keys [2]: [sr_ticket_number#82, sr_item_sk#81] +Join condition: None + +(60) Project [codegen id : 17] +Output [6]: [ss_sold_date_sk#75, ss_item_sk#76, ss_quantity#78, ss_net_paid#79, sr_return_quantity#83, sr_return_amt#84] +Input [9]: [ss_sold_date_sk#75, ss_item_sk#76, ss_ticket_number#77, ss_quantity#78, ss_net_paid#79, sr_item_sk#81, sr_ticket_number#82, sr_return_quantity#83, sr_return_amt#84] + +(61) ReusedExchange [Reuses operator id: 15] +Output [1]: [d_date_sk#12] + +(62) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ss_sold_date_sk#75] +Right keys [1]: [d_date_sk#12] +Join condition: None + +(63) Project [codegen id : 17] +Output [5]: [ss_item_sk#76, ss_quantity#78, ss_net_paid#79, sr_return_quantity#83, sr_return_amt#84] +Input [7]: [ss_sold_date_sk#75, ss_item_sk#76, ss_quantity#78, ss_net_paid#79, sr_return_quantity#83, sr_return_amt#84, d_date_sk#12] + +(64) HashAggregate [codegen id : 17] +Input [5]: [ss_item_sk#76, ss_quantity#78, ss_net_paid#79, sr_return_quantity#83, sr_return_amt#84] +Keys [1]: [ss_item_sk#76] +Functions [4]: [partial_sum(cast(coalesce(sr_return_quantity#83, 0) as bigint)), partial_sum(cast(coalesce(ss_quantity#78, 0) as bigint)), partial_sum(coalesce(cast(sr_return_amt#84 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ss_net_paid#79 as decimal(12,2)), 0.00))] +Aggregate Attributes [6]: [sum#86, sum#87, sum#88, isEmpty#89, sum#90, isEmpty#91] +Results [7]: [ss_item_sk#76, sum#92, sum#93, sum#94, isEmpty#95, sum#96, isEmpty#97] + +(65) Exchange +Input [7]: [ss_item_sk#76, sum#92, sum#93, sum#94, isEmpty#95, sum#96, isEmpty#97] +Arguments: hashpartitioning(ss_item_sk#76, 5), true, [id=#98] + +(66) HashAggregate [codegen id : 18] +Input [7]: [ss_item_sk#76, sum#92, sum#93, sum#94, isEmpty#95, sum#96, isEmpty#97] +Keys [1]: [ss_item_sk#76] +Functions [4]: [sum(cast(coalesce(sr_return_quantity#83, 0) as bigint)), sum(cast(coalesce(ss_quantity#78, 0) as bigint)), sum(coalesce(cast(sr_return_amt#84 as decimal(12,2)), 0.00)), sum(coalesce(cast(ss_net_paid#79 as decimal(12,2)), 0.00))] +Aggregate Attributes [4]: [sum(cast(coalesce(sr_return_quantity#83, 0) as bigint))#99, sum(cast(coalesce(ss_quantity#78, 0) as bigint))#100, sum(coalesce(cast(sr_return_amt#84 as decimal(12,2)), 0.00))#101, sum(coalesce(cast(ss_net_paid#79 as decimal(12,2)), 0.00))#102] +Results [3]: [ss_item_sk#76 AS item#103, CheckOverflow((promote_precision(cast(sum(cast(coalesce(sr_return_quantity#83, 0) as bigint))#99 as decimal(15,4))) / promote_precision(cast(sum(cast(coalesce(ss_quantity#78, 0) as bigint))#100 as decimal(15,4)))), DecimalType(35,20), true) AS return_ratio#104, CheckOverflow((promote_precision(cast(sum(coalesce(cast(sr_return_amt#84 as decimal(12,2)), 0.00))#101 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(ss_net_paid#79 as decimal(12,2)), 0.00))#102 as decimal(15,4)))), DecimalType(35,20), true) AS currency_ratio#105] + +(67) Exchange +Input [3]: [item#103, return_ratio#104, currency_ratio#105] +Arguments: SinglePartition, true, [id=#106] + +(68) Sort [codegen id : 19] +Input [3]: [item#103, return_ratio#104, currency_ratio#105] +Arguments: [return_ratio#104 ASC NULLS FIRST], false, 0 + +(69) Window +Input [3]: [item#103, return_ratio#104, currency_ratio#105] +Arguments: [rank(return_ratio#104) windowspecdefinition(return_ratio#104 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#107], [return_ratio#104 ASC NULLS FIRST] + +(70) Sort [codegen id : 20] +Input [4]: [item#103, return_ratio#104, currency_ratio#105, return_rank#107] +Arguments: [currency_ratio#105 ASC NULLS FIRST], false, 0 + +(71) Window +Input [4]: [item#103, return_ratio#104, currency_ratio#105, return_rank#107] +Arguments: [rank(currency_ratio#105) windowspecdefinition(currency_ratio#105 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#108], [currency_ratio#105 ASC NULLS FIRST] + +(72) Filter [codegen id : 21] +Input [5]: [item#103, return_ratio#104, currency_ratio#105, return_rank#107, currency_rank#108] +Condition : ((return_rank#107 <= 10) OR (currency_rank#108 <= 10)) + +(73) Project [codegen id : 21] +Output [5]: [store AS channel#109, item#103, return_ratio#104, return_rank#107, currency_rank#108] +Input [5]: [item#103, return_ratio#104, currency_ratio#105, return_rank#107, currency_rank#108] + +(74) Union + +(75) HashAggregate [codegen id : 22] +Input [5]: [channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] +Keys [5]: [channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] + +(76) Exchange +Input [5]: [channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] +Arguments: hashpartitioning(channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38, 5), true, [id=#110] + +(77) HashAggregate [codegen id : 23] +Input [5]: [channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] +Keys [5]: [channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] + +(78) TakeOrderedAndProject +Input [5]: [channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] +Arguments: 100, [channel#39 ASC NULLS FIRST, return_rank#37 ASC NULLS FIRST, currency_rank#38 ASC NULLS FIRST], [channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/simplified.txt new file mode 100644 index 0000000000000..acba83ae8e411 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/simplified.txt @@ -0,0 +1,126 @@ +TakeOrderedAndProject [channel,currency_rank,item,return_rank,return_ratio] + WholeStageCodegen (23) + HashAggregate [channel,currency_rank,item,return_rank,return_ratio] + InputAdapter + Exchange [channel,currency_rank,item,return_rank,return_ratio] #1 + WholeStageCodegen (22) + HashAggregate [channel,currency_rank,item,return_rank,return_ratio] + InputAdapter + Union + WholeStageCodegen (7) + Project [currency_rank,item,return_rank,return_ratio] + Filter [currency_rank,return_rank] + InputAdapter + Window [currency_ratio] + WholeStageCodegen (6) + Sort [currency_ratio] + InputAdapter + Window [return_ratio] + WholeStageCodegen (5) + Sort [return_ratio] + InputAdapter + Exchange #2 + WholeStageCodegen (4) + HashAggregate [isEmpty,isEmpty,sum,sum,sum,sum,ws_item_sk] [currency_ratio,isEmpty,isEmpty,item,return_ratio,sum,sum,sum,sum,sum(cast(coalesce(wr_return_quantity, 0) as bigint)),sum(cast(coalesce(ws_quantity, 0) as bigint)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ws_net_paid as decimal(12,2)), 0.00))] + InputAdapter + Exchange [ws_item_sk] #3 + WholeStageCodegen (3) + HashAggregate [wr_return_amt,wr_return_quantity,ws_item_sk,ws_net_paid,ws_quantity] [isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum,sum,sum] + Project [wr_return_amt,wr_return_quantity,ws_item_sk,ws_net_paid,ws_quantity] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [wr_return_amt,wr_return_quantity,ws_item_sk,ws_net_paid,ws_quantity,ws_sold_date_sk] + BroadcastHashJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] + Project [ws_item_sk,ws_net_paid,ws_order_number,ws_quantity,ws_sold_date_sk] + Filter [ws_item_sk,ws_net_paid,ws_net_profit,ws_order_number,ws_quantity,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_item_sk,ws_net_paid,ws_net_profit,ws_order_number,ws_quantity,ws_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Filter [wr_item_sk,wr_order_number,wr_return_amt] + ColumnarToRow + InputAdapter + Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_return_quantity] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + WholeStageCodegen (14) + Project [currency_rank,item,return_rank,return_ratio] + Filter [currency_rank,return_rank] + InputAdapter + Window [currency_ratio] + WholeStageCodegen (13) + Sort [currency_ratio] + InputAdapter + Window [return_ratio] + WholeStageCodegen (12) + Sort [return_ratio] + InputAdapter + Exchange #6 + WholeStageCodegen (11) + HashAggregate [cs_item_sk,isEmpty,isEmpty,sum,sum,sum,sum] [currency_ratio,isEmpty,isEmpty,item,return_ratio,sum,sum,sum,sum,sum(cast(coalesce(cr_return_quantity, 0) as bigint)),sum(cast(coalesce(cs_quantity, 0) as bigint)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(coalesce(cast(cs_net_paid as decimal(12,2)), 0.00))] + InputAdapter + Exchange [cs_item_sk] #7 + WholeStageCodegen (10) + HashAggregate [cr_return_amount,cr_return_quantity,cs_item_sk,cs_net_paid,cs_quantity] [isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum,sum,sum] + Project [cr_return_amount,cr_return_quantity,cs_item_sk,cs_net_paid,cs_quantity] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cr_return_amount,cr_return_quantity,cs_item_sk,cs_net_paid,cs_quantity,cs_sold_date_sk] + BroadcastHashJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] + Project [cs_item_sk,cs_net_paid,cs_order_number,cs_quantity,cs_sold_date_sk] + Filter [cs_item_sk,cs_net_paid,cs_net_profit,cs_order_number,cs_quantity,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_item_sk,cs_net_paid,cs_net_profit,cs_order_number,cs_quantity,cs_sold_date_sk] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (8) + Filter [cr_item_sk,cr_order_number,cr_return_amount] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_amount,cr_return_quantity] + InputAdapter + ReusedExchange [d_date_sk] #5 + WholeStageCodegen (21) + Project [currency_rank,item,return_rank,return_ratio] + Filter [currency_rank,return_rank] + InputAdapter + Window [currency_ratio] + WholeStageCodegen (20) + Sort [currency_ratio] + InputAdapter + Window [return_ratio] + WholeStageCodegen (19) + Sort [return_ratio] + InputAdapter + Exchange #9 + WholeStageCodegen (18) + HashAggregate [isEmpty,isEmpty,ss_item_sk,sum,sum,sum,sum] [currency_ratio,isEmpty,isEmpty,item,return_ratio,sum,sum,sum,sum,sum(cast(coalesce(sr_return_quantity, 0) as bigint)),sum(cast(coalesce(ss_quantity, 0) as bigint)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ss_net_paid as decimal(12,2)), 0.00))] + InputAdapter + Exchange [ss_item_sk] #10 + WholeStageCodegen (17) + HashAggregate [sr_return_amt,sr_return_quantity,ss_item_sk,ss_net_paid,ss_quantity] [isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum,sum,sum] + Project [sr_return_amt,sr_return_quantity,ss_item_sk,ss_net_paid,ss_quantity] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [sr_return_amt,sr_return_quantity,ss_item_sk,ss_net_paid,ss_quantity,ss_sold_date_sk] + BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + Project [ss_item_sk,ss_net_paid,ss_quantity,ss_sold_date_sk,ss_ticket_number] + Filter [ss_item_sk,ss_net_paid,ss_net_profit,ss_quantity,ss_sold_date_sk,ss_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_net_paid,ss_net_profit,ss_quantity,ss_sold_date_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (15) + Filter [sr_item_sk,sr_return_amt,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_item_sk,sr_return_amt,sr_return_quantity,sr_ticket_number] + InputAdapter + ReusedExchange [d_date_sk] #5 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.sf100/explain.txt new file mode 100644 index 0000000000000..034301643add7 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.sf100/explain.txt @@ -0,0 +1,450 @@ +== Physical Plan == +TakeOrderedAndProject (81) ++- * HashAggregate (80) + +- Exchange (79) + +- * HashAggregate (78) + +- * Expand (77) + +- Union (76) + :- * HashAggregate (25) + : +- Exchange (24) + : +- * HashAggregate (23) + : +- * Project (22) + : +- * BroadcastHashJoin Inner BuildRight (21) + : :- * Project (16) + : : +- * BroadcastHashJoin Inner BuildRight (15) + : : :- Union (9) + : : : :- * Project (4) + : : : : +- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- * Project (8) + : : : +- * Filter (7) + : : : +- * ColumnarToRow (6) + : : : +- Scan parquet default.store_returns (5) + : : +- BroadcastExchange (14) + : : +- * Project (13) + : : +- * Filter (12) + : : +- * ColumnarToRow (11) + : : +- Scan parquet default.date_dim (10) + : +- BroadcastExchange (20) + : +- * Filter (19) + : +- * ColumnarToRow (18) + : +- Scan parquet default.store (17) + :- * HashAggregate (46) + : +- Exchange (45) + : +- * HashAggregate (44) + : +- * Project (43) + : +- * BroadcastHashJoin Inner BuildRight (42) + : :- * Project (37) + : : +- * BroadcastHashJoin Inner BuildRight (36) + : : :- Union (34) + : : : :- * Project (29) + : : : : +- * Filter (28) + : : : : +- * ColumnarToRow (27) + : : : : +- Scan parquet default.catalog_sales (26) + : : : +- * Project (33) + : : : +- * Filter (32) + : : : +- * ColumnarToRow (31) + : : : +- Scan parquet default.catalog_returns (30) + : : +- ReusedExchange (35) + : +- BroadcastExchange (41) + : +- * Filter (40) + : +- * ColumnarToRow (39) + : +- Scan parquet default.catalog_page (38) + +- * HashAggregate (75) + +- Exchange (74) + +- * HashAggregate (73) + +- * Project (72) + +- * BroadcastHashJoin Inner BuildRight (71) + :- * Project (66) + : +- * BroadcastHashJoin Inner BuildRight (65) + : :- Union (63) + : : :- * Project (50) + : : : +- * Filter (49) + : : : +- * ColumnarToRow (48) + : : : +- Scan parquet default.web_sales (47) + : : +- * Project (62) + : : +- * SortMergeJoin Inner (61) + : : :- * Sort (55) + : : : +- Exchange (54) + : : : +- * Filter (53) + : : : +- * ColumnarToRow (52) + : : : +- Scan parquet default.web_returns (51) + : : +- * Sort (60) + : : +- Exchange (59) + : : +- * Filter (58) + : : +- * ColumnarToRow (57) + : : +- Scan parquet default.web_sales (56) + : +- ReusedExchange (64) + +- BroadcastExchange (70) + +- * Filter (69) + +- * ColumnarToRow (68) + +- Scan parquet default.web_site (67) + + +(1) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [4]: [ss_sold_date_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] + +(3) Filter [codegen id : 1] +Input [4]: [ss_sold_date_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] +Condition : (isnotnull(cast(ss_sold_date_sk#1 as bigint)) AND isnotnull(cast(ss_store_sk#2 as bigint))) + +(4) Project [codegen id : 1] +Output [6]: [cast(ss_store_sk#2 as bigint) AS store_sk#5, cast(ss_sold_date_sk#1 as bigint) AS date_sk#6, ss_ext_sales_price#3 AS sales_price#7, ss_net_profit#4 AS profit#8, 0.00 AS return_amt#9, 0.00 AS net_loss#10] +Input [4]: [ss_sold_date_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] + +(5) Scan parquet default.store_returns +Output [4]: [sr_returned_date_sk#11, sr_store_sk#12, sr_return_amt#13, sr_net_loss#14] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_returns] +PushedFilters: [IsNotNull(sr_returned_date_sk), IsNotNull(sr_store_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 2] +Input [4]: [sr_returned_date_sk#11, sr_store_sk#12, sr_return_amt#13, sr_net_loss#14] + +(7) Filter [codegen id : 2] +Input [4]: [sr_returned_date_sk#11, sr_store_sk#12, sr_return_amt#13, sr_net_loss#14] +Condition : (isnotnull(sr_returned_date_sk#11) AND isnotnull(sr_store_sk#12)) + +(8) Project [codegen id : 2] +Output [6]: [sr_store_sk#12 AS store_sk#15, sr_returned_date_sk#11 AS date_sk#16, 0.00 AS sales_price#17, 0.00 AS profit#18, sr_return_amt#13 AS return_amt#19, sr_net_loss#14 AS net_loss#20] +Input [4]: [sr_returned_date_sk#11, sr_store_sk#12, sr_return_amt#13, sr_net_loss#14] + +(9) Union + +(10) Scan parquet default.date_dim +Output [2]: [d_date_sk#21, d_date#22] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-23), LessThanOrEqual(d_date,2000-09-06), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 3] +Input [2]: [d_date_sk#21, d_date#22] + +(12) Filter [codegen id : 3] +Input [2]: [d_date_sk#21, d_date#22] +Condition : (((isnotnull(d_date#22) AND (d_date#22 >= 11192)) AND (d_date#22 <= 11206)) AND isnotnull(d_date_sk#21)) + +(13) Project [codegen id : 3] +Output [1]: [d_date_sk#21] +Input [2]: [d_date_sk#21, d_date#22] + +(14) BroadcastExchange +Input [1]: [d_date_sk#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#23] + +(15) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [date_sk#6] +Right keys [1]: [cast(d_date_sk#21 as bigint)] +Join condition: None + +(16) Project [codegen id : 5] +Output [5]: [store_sk#5, sales_price#7, profit#8, return_amt#9, net_loss#10] +Input [7]: [store_sk#5, date_sk#6, sales_price#7, profit#8, return_amt#9, net_loss#10, d_date_sk#21] + +(17) Scan parquet default.store +Output [2]: [s_store_sk#24, s_store_id#25] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 4] +Input [2]: [s_store_sk#24, s_store_id#25] + +(19) Filter [codegen id : 4] +Input [2]: [s_store_sk#24, s_store_id#25] +Condition : isnotnull(s_store_sk#24) + +(20) BroadcastExchange +Input [2]: [s_store_sk#24, s_store_id#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#26] + +(21) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [store_sk#5] +Right keys [1]: [cast(s_store_sk#24 as bigint)] +Join condition: None + +(22) Project [codegen id : 5] +Output [5]: [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#25] +Input [7]: [store_sk#5, sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_sk#24, s_store_id#25] + +(23) HashAggregate [codegen id : 5] +Input [5]: [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#25] +Keys [1]: [s_store_id#25] +Functions [4]: [partial_sum(UnscaledValue(sales_price#7)), partial_sum(UnscaledValue(return_amt#9)), partial_sum(UnscaledValue(profit#8)), partial_sum(UnscaledValue(net_loss#10))] +Aggregate Attributes [4]: [sum#27, sum#28, sum#29, sum#30] +Results [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34] + +(24) Exchange +Input [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34] +Arguments: hashpartitioning(s_store_id#25, 5), true, [id=#35] + +(25) HashAggregate [codegen id : 6] +Input [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34] +Keys [1]: [s_store_id#25] +Functions [4]: [sum(UnscaledValue(sales_price#7)), sum(UnscaledValue(return_amt#9)), sum(UnscaledValue(profit#8)), sum(UnscaledValue(net_loss#10))] +Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#7))#36, sum(UnscaledValue(return_amt#9))#37, sum(UnscaledValue(profit#8))#38, sum(UnscaledValue(net_loss#10))#39] +Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#7))#36,17,2) AS sales#40, MakeDecimal(sum(UnscaledValue(return_amt#9))#37,17,2) AS RETURNS#41, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#8))#38,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#10))#39,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#42, store channel AS channel#43, concat(store, s_store_id#25) AS id#44] + +(26) Scan parquet default.catalog_sales +Output [4]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_ext_sales_price#47, cs_net_profit#48] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_catalog_page_sk)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 7] +Input [4]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_ext_sales_price#47, cs_net_profit#48] + +(28) Filter [codegen id : 7] +Input [4]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_ext_sales_price#47, cs_net_profit#48] +Condition : (isnotnull(cs_sold_date_sk#45) AND isnotnull(cs_catalog_page_sk#46)) + +(29) Project [codegen id : 7] +Output [6]: [cs_catalog_page_sk#46 AS page_sk#49, cs_sold_date_sk#45 AS date_sk#50, cs_ext_sales_price#47 AS sales_price#51, cs_net_profit#48 AS profit#52, 0.00 AS return_amt#53, 0.00 AS net_loss#54] +Input [4]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_ext_sales_price#47, cs_net_profit#48] + +(30) Scan parquet default.catalog_returns +Output [4]: [cr_returned_date_sk#55, cr_catalog_page_sk#56, cr_return_amount#57, cr_net_loss#58] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_returns] +PushedFilters: [IsNotNull(cr_returned_date_sk), IsNotNull(cr_catalog_page_sk)] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 8] +Input [4]: [cr_returned_date_sk#55, cr_catalog_page_sk#56, cr_return_amount#57, cr_net_loss#58] + +(32) Filter [codegen id : 8] +Input [4]: [cr_returned_date_sk#55, cr_catalog_page_sk#56, cr_return_amount#57, cr_net_loss#58] +Condition : (isnotnull(cr_returned_date_sk#55) AND isnotnull(cr_catalog_page_sk#56)) + +(33) Project [codegen id : 8] +Output [6]: [cr_catalog_page_sk#56 AS page_sk#59, cr_returned_date_sk#55 AS date_sk#60, 0.00 AS sales_price#61, 0.00 AS profit#62, cr_return_amount#57 AS return_amt#63, cr_net_loss#58 AS net_loss#64] +Input [4]: [cr_returned_date_sk#55, cr_catalog_page_sk#56, cr_return_amount#57, cr_net_loss#58] + +(34) Union + +(35) ReusedExchange [Reuses operator id: 14] +Output [1]: [d_date_sk#21] + +(36) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [date_sk#50] +Right keys [1]: [d_date_sk#21] +Join condition: None + +(37) Project [codegen id : 11] +Output [5]: [page_sk#49, sales_price#51, profit#52, return_amt#53, net_loss#54] +Input [7]: [page_sk#49, date_sk#50, sales_price#51, profit#52, return_amt#53, net_loss#54, d_date_sk#21] + +(38) Scan parquet default.catalog_page +Output [2]: [cp_catalog_page_sk#65, cp_catalog_page_id#66] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_page] +PushedFilters: [IsNotNull(cp_catalog_page_sk)] +ReadSchema: struct + +(39) ColumnarToRow [codegen id : 10] +Input [2]: [cp_catalog_page_sk#65, cp_catalog_page_id#66] + +(40) Filter [codegen id : 10] +Input [2]: [cp_catalog_page_sk#65, cp_catalog_page_id#66] +Condition : isnotnull(cp_catalog_page_sk#65) + +(41) BroadcastExchange +Input [2]: [cp_catalog_page_sk#65, cp_catalog_page_id#66] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#67] + +(42) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [page_sk#49] +Right keys [1]: [cp_catalog_page_sk#65] +Join condition: None + +(43) Project [codegen id : 11] +Output [5]: [sales_price#51, profit#52, return_amt#53, net_loss#54, cp_catalog_page_id#66] +Input [7]: [page_sk#49, sales_price#51, profit#52, return_amt#53, net_loss#54, cp_catalog_page_sk#65, cp_catalog_page_id#66] + +(44) HashAggregate [codegen id : 11] +Input [5]: [sales_price#51, profit#52, return_amt#53, net_loss#54, cp_catalog_page_id#66] +Keys [1]: [cp_catalog_page_id#66] +Functions [4]: [partial_sum(UnscaledValue(sales_price#51)), partial_sum(UnscaledValue(return_amt#53)), partial_sum(UnscaledValue(profit#52)), partial_sum(UnscaledValue(net_loss#54))] +Aggregate Attributes [4]: [sum#68, sum#69, sum#70, sum#71] +Results [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75] + +(45) Exchange +Input [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75] +Arguments: hashpartitioning(cp_catalog_page_id#66, 5), true, [id=#76] + +(46) HashAggregate [codegen id : 12] +Input [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75] +Keys [1]: [cp_catalog_page_id#66] +Functions [4]: [sum(UnscaledValue(sales_price#51)), sum(UnscaledValue(return_amt#53)), sum(UnscaledValue(profit#52)), sum(UnscaledValue(net_loss#54))] +Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#51))#77, sum(UnscaledValue(return_amt#53))#78, sum(UnscaledValue(profit#52))#79, sum(UnscaledValue(net_loss#54))#80] +Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#51))#77,17,2) AS sales#81, MakeDecimal(sum(UnscaledValue(return_amt#53))#78,17,2) AS RETURNS#82, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#52))#79,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#54))#80,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#83, catalog channel AS channel#84, concat(catalog_page, cp_catalog_page_id#66) AS id#85] + +(47) Scan parquet default.web_sales +Output [4]: [ws_sold_date_sk#86, ws_web_site_sk#87, ws_ext_sales_price#88, ws_net_profit#89] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_web_site_sk)] +ReadSchema: struct + +(48) ColumnarToRow [codegen id : 13] +Input [4]: [ws_sold_date_sk#86, ws_web_site_sk#87, ws_ext_sales_price#88, ws_net_profit#89] + +(49) Filter [codegen id : 13] +Input [4]: [ws_sold_date_sk#86, ws_web_site_sk#87, ws_ext_sales_price#88, ws_net_profit#89] +Condition : (isnotnull(cast(ws_sold_date_sk#86 as bigint)) AND isnotnull(ws_web_site_sk#87)) + +(50) Project [codegen id : 13] +Output [6]: [ws_web_site_sk#87 AS wsr_web_site_sk#90, cast(ws_sold_date_sk#86 as bigint) AS date_sk#91, ws_ext_sales_price#88 AS sales_price#92, ws_net_profit#89 AS profit#93, 0.00 AS return_amt#94, 0.00 AS net_loss#95] +Input [4]: [ws_sold_date_sk#86, ws_web_site_sk#87, ws_ext_sales_price#88, ws_net_profit#89] + +(51) Scan parquet default.web_returns +Output [5]: [wr_returned_date_sk#96, wr_item_sk#97, wr_order_number#98, wr_return_amt#99, wr_net_loss#100] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_returns] +PushedFilters: [IsNotNull(wr_returned_date_sk)] +ReadSchema: struct + +(52) ColumnarToRow [codegen id : 14] +Input [5]: [wr_returned_date_sk#96, wr_item_sk#97, wr_order_number#98, wr_return_amt#99, wr_net_loss#100] + +(53) Filter [codegen id : 14] +Input [5]: [wr_returned_date_sk#96, wr_item_sk#97, wr_order_number#98, wr_return_amt#99, wr_net_loss#100] +Condition : isnotnull(wr_returned_date_sk#96) + +(54) Exchange +Input [5]: [wr_returned_date_sk#96, wr_item_sk#97, wr_order_number#98, wr_return_amt#99, wr_net_loss#100] +Arguments: hashpartitioning(wr_item_sk#97, wr_order_number#98, 5), true, [id=#101] + +(55) Sort [codegen id : 15] +Input [5]: [wr_returned_date_sk#96, wr_item_sk#97, wr_order_number#98, wr_return_amt#99, wr_net_loss#100] +Arguments: [wr_item_sk#97 ASC NULLS FIRST, wr_order_number#98 ASC NULLS FIRST], false, 0 + +(56) Scan parquet default.web_sales +Output [3]: [ws_item_sk#102, ws_web_site_sk#87, ws_order_number#103] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_order_number), IsNotNull(ws_web_site_sk)] +ReadSchema: struct + +(57) ColumnarToRow [codegen id : 16] +Input [3]: [ws_item_sk#102, ws_web_site_sk#87, ws_order_number#103] + +(58) Filter [codegen id : 16] +Input [3]: [ws_item_sk#102, ws_web_site_sk#87, ws_order_number#103] +Condition : ((isnotnull(ws_item_sk#102) AND isnotnull(ws_order_number#103)) AND isnotnull(ws_web_site_sk#87)) + +(59) Exchange +Input [3]: [ws_item_sk#102, ws_web_site_sk#87, ws_order_number#103] +Arguments: hashpartitioning(cast(ws_item_sk#102 as bigint), cast(ws_order_number#103 as bigint), 5), true, [id=#104] + +(60) Sort [codegen id : 17] +Input [3]: [ws_item_sk#102, ws_web_site_sk#87, ws_order_number#103] +Arguments: [cast(ws_item_sk#102 as bigint) ASC NULLS FIRST, cast(ws_order_number#103 as bigint) ASC NULLS FIRST], false, 0 + +(61) SortMergeJoin [codegen id : 18] +Left keys [2]: [wr_item_sk#97, wr_order_number#98] +Right keys [2]: [cast(ws_item_sk#102 as bigint), cast(ws_order_number#103 as bigint)] +Join condition: None + +(62) Project [codegen id : 18] +Output [6]: [ws_web_site_sk#87 AS wsr_web_site_sk#105, wr_returned_date_sk#96 AS date_sk#106, 0.00 AS sales_price#107, 0.00 AS profit#108, wr_return_amt#99 AS return_amt#109, wr_net_loss#100 AS net_loss#110] +Input [8]: [wr_returned_date_sk#96, wr_item_sk#97, wr_order_number#98, wr_return_amt#99, wr_net_loss#100, ws_item_sk#102, ws_web_site_sk#87, ws_order_number#103] + +(63) Union + +(64) ReusedExchange [Reuses operator id: 14] +Output [1]: [d_date_sk#21] + +(65) BroadcastHashJoin [codegen id : 21] +Left keys [1]: [date_sk#91] +Right keys [1]: [cast(d_date_sk#21 as bigint)] +Join condition: None + +(66) Project [codegen id : 21] +Output [5]: [wsr_web_site_sk#90, sales_price#92, profit#93, return_amt#94, net_loss#95] +Input [7]: [wsr_web_site_sk#90, date_sk#91, sales_price#92, profit#93, return_amt#94, net_loss#95, d_date_sk#21] + +(67) Scan parquet default.web_site +Output [2]: [web_site_sk#111, web_site_id#112] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_site] +PushedFilters: [IsNotNull(web_site_sk)] +ReadSchema: struct + +(68) ColumnarToRow [codegen id : 20] +Input [2]: [web_site_sk#111, web_site_id#112] + +(69) Filter [codegen id : 20] +Input [2]: [web_site_sk#111, web_site_id#112] +Condition : isnotnull(web_site_sk#111) + +(70) BroadcastExchange +Input [2]: [web_site_sk#111, web_site_id#112] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#113] + +(71) BroadcastHashJoin [codegen id : 21] +Left keys [1]: [wsr_web_site_sk#90] +Right keys [1]: [web_site_sk#111] +Join condition: None + +(72) Project [codegen id : 21] +Output [5]: [sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_id#112] +Input [7]: [wsr_web_site_sk#90, sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_sk#111, web_site_id#112] + +(73) HashAggregate [codegen id : 21] +Input [5]: [sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_id#112] +Keys [1]: [web_site_id#112] +Functions [4]: [partial_sum(UnscaledValue(sales_price#92)), partial_sum(UnscaledValue(return_amt#94)), partial_sum(UnscaledValue(profit#93)), partial_sum(UnscaledValue(net_loss#95))] +Aggregate Attributes [4]: [sum#114, sum#115, sum#116, sum#117] +Results [5]: [web_site_id#112, sum#118, sum#119, sum#120, sum#121] + +(74) Exchange +Input [5]: [web_site_id#112, sum#118, sum#119, sum#120, sum#121] +Arguments: hashpartitioning(web_site_id#112, 5), true, [id=#122] + +(75) HashAggregate [codegen id : 22] +Input [5]: [web_site_id#112, sum#118, sum#119, sum#120, sum#121] +Keys [1]: [web_site_id#112] +Functions [4]: [sum(UnscaledValue(sales_price#92)), sum(UnscaledValue(return_amt#94)), sum(UnscaledValue(profit#93)), sum(UnscaledValue(net_loss#95))] +Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#92))#123, sum(UnscaledValue(return_amt#94))#124, sum(UnscaledValue(profit#93))#125, sum(UnscaledValue(net_loss#95))#126] +Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#92))#123,17,2) AS sales#127, MakeDecimal(sum(UnscaledValue(return_amt#94))#124,17,2) AS RETURNS#128, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#93))#125,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#95))#126,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#129, web channel AS channel#130, concat(web_site, web_site_id#112) AS id#131] + +(76) Union + +(77) Expand [codegen id : 23] +Input [5]: [sales#40, RETURNS#41, profit#42, channel#43, id#44] +Arguments: [List(sales#40, returns#41, profit#42, channel#43, id#44, 0), List(sales#40, returns#41, profit#42, channel#43, null, 1), List(sales#40, returns#41, profit#42, null, null, 3)], [sales#40, returns#41, profit#42, channel#132, id#133, spark_grouping_id#134] + +(78) HashAggregate [codegen id : 23] +Input [6]: [sales#40, returns#41, profit#42, channel#132, id#133, spark_grouping_id#134] +Keys [3]: [channel#132, id#133, spark_grouping_id#134] +Functions [3]: [partial_sum(sales#40), partial_sum(returns#41), partial_sum(profit#42)] +Aggregate Attributes [6]: [sum#135, isEmpty#136, sum#137, isEmpty#138, sum#139, isEmpty#140] +Results [9]: [channel#132, id#133, spark_grouping_id#134, sum#141, isEmpty#142, sum#143, isEmpty#144, sum#145, isEmpty#146] + +(79) Exchange +Input [9]: [channel#132, id#133, spark_grouping_id#134, sum#141, isEmpty#142, sum#143, isEmpty#144, sum#145, isEmpty#146] +Arguments: hashpartitioning(channel#132, id#133, spark_grouping_id#134, 5), true, [id=#147] + +(80) HashAggregate [codegen id : 24] +Input [9]: [channel#132, id#133, spark_grouping_id#134, sum#141, isEmpty#142, sum#143, isEmpty#144, sum#145, isEmpty#146] +Keys [3]: [channel#132, id#133, spark_grouping_id#134] +Functions [3]: [sum(sales#40), sum(returns#41), sum(profit#42)] +Aggregate Attributes [3]: [sum(sales#40)#148, sum(returns#41)#149, sum(profit#42)#150] +Results [5]: [channel#132, id#133, sum(sales#40)#148 AS sales#151, sum(returns#41)#149 AS returns#152, sum(profit#42)#150 AS profit#153] + +(81) TakeOrderedAndProject +Input [5]: [channel#132, id#133, sales#151, returns#152, profit#153] +Arguments: 100, [channel#132 ASC NULLS FIRST, id#133 ASC NULLS FIRST], [channel#132, id#133, sales#151, returns#152, profit#153] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.sf100/simplified.txt new file mode 100644 index 0000000000000..71e204433c6ef --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.sf100/simplified.txt @@ -0,0 +1,132 @@ +TakeOrderedAndProject [channel,id,profit,returns,sales] + WholeStageCodegen (24) + HashAggregate [channel,id,isEmpty,isEmpty,isEmpty,spark_grouping_id,sum,sum,sum] [isEmpty,isEmpty,isEmpty,profit,returns,sales,sum,sum,sum,sum(profit),sum(returns),sum(sales)] + InputAdapter + Exchange [channel,id,spark_grouping_id] #1 + WholeStageCodegen (23) + HashAggregate [channel,id,profit,returns,sales,spark_grouping_id] [isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum] + Expand [channel,id,profit,returns,sales] + InputAdapter + Union + WholeStageCodegen (6) + HashAggregate [s_store_id,sum,sum,sum,sum] [RETURNS,channel,id,profit,sales,sum,sum,sum,sum,sum(UnscaledValue(net_loss)),sum(UnscaledValue(profit)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(sales_price))] + InputAdapter + Exchange [s_store_id] #2 + WholeStageCodegen (5) + HashAggregate [net_loss,profit,return_amt,s_store_id,sales_price] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [net_loss,profit,return_amt,s_store_id,sales_price] + BroadcastHashJoin [s_store_sk,store_sk] + Project [net_loss,profit,return_amt,sales_price,store_sk] + BroadcastHashJoin [d_date_sk,date_sk] + InputAdapter + Union + WholeStageCodegen (1) + Project [ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,ss_store_sk] + Filter [ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,ss_store_sk] + WholeStageCodegen (2) + Project [sr_net_loss,sr_return_amt,sr_returned_date_sk,sr_store_sk] + Filter [sr_returned_date_sk,sr_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_net_loss,sr_return_amt,sr_returned_date_sk,sr_store_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (4) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_id,s_store_sk] + WholeStageCodegen (12) + HashAggregate [cp_catalog_page_id,sum,sum,sum,sum] [RETURNS,channel,id,profit,sales,sum,sum,sum,sum,sum(UnscaledValue(net_loss)),sum(UnscaledValue(profit)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(sales_price))] + InputAdapter + Exchange [cp_catalog_page_id] #5 + WholeStageCodegen (11) + HashAggregate [cp_catalog_page_id,net_loss,profit,return_amt,sales_price] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [cp_catalog_page_id,net_loss,profit,return_amt,sales_price] + BroadcastHashJoin [cp_catalog_page_sk,page_sk] + Project [net_loss,page_sk,profit,return_amt,sales_price] + BroadcastHashJoin [d_date_sk,date_sk] + InputAdapter + Union + WholeStageCodegen (7) + Project [cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] + Filter [cs_catalog_page_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] + WholeStageCodegen (8) + Project [cr_catalog_page_sk,cr_net_loss,cr_return_amount,cr_returned_date_sk] + Filter [cr_catalog_page_sk,cr_returned_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_catalog_page_sk,cr_net_loss,cr_return_amount,cr_returned_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (10) + Filter [cp_catalog_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_page [cp_catalog_page_id,cp_catalog_page_sk] + WholeStageCodegen (22) + HashAggregate [sum,sum,sum,sum,web_site_id] [RETURNS,channel,id,profit,sales,sum,sum,sum,sum,sum(UnscaledValue(net_loss)),sum(UnscaledValue(profit)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(sales_price))] + InputAdapter + Exchange [web_site_id] #7 + WholeStageCodegen (21) + HashAggregate [net_loss,profit,return_amt,sales_price,web_site_id] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [net_loss,profit,return_amt,sales_price,web_site_id] + BroadcastHashJoin [web_site_sk,wsr_web_site_sk] + Project [net_loss,profit,return_amt,sales_price,wsr_web_site_sk] + BroadcastHashJoin [d_date_sk,date_sk] + InputAdapter + Union + WholeStageCodegen (13) + Project [ws_ext_sales_price,ws_net_profit,ws_sold_date_sk,ws_web_site_sk] + Filter [ws_sold_date_sk,ws_web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_ext_sales_price,ws_net_profit,ws_sold_date_sk,ws_web_site_sk] + WholeStageCodegen (18) + Project [wr_net_loss,wr_return_amt,wr_returned_date_sk,ws_web_site_sk] + SortMergeJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] + InputAdapter + WholeStageCodegen (15) + Sort [wr_item_sk,wr_order_number] + InputAdapter + Exchange [wr_item_sk,wr_order_number] #8 + WholeStageCodegen (14) + Filter [wr_returned_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_returns [wr_item_sk,wr_net_loss,wr_order_number,wr_return_amt,wr_returned_date_sk] + InputAdapter + WholeStageCodegen (17) + Sort [ws_item_sk,ws_order_number] + InputAdapter + Exchange [ws_item_sk,ws_order_number] #9 + WholeStageCodegen (16) + Filter [ws_item_sk,ws_order_number,ws_web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_item_sk,ws_order_number,ws_web_site_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (20) + Filter [web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_site [web_site_id,web_site_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/explain.txt new file mode 100644 index 0000000000000..cbe5ed4a5b6aa --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/explain.txt @@ -0,0 +1,435 @@ +== Physical Plan == +TakeOrderedAndProject (78) ++- * HashAggregate (77) + +- Exchange (76) + +- * HashAggregate (75) + +- * Expand (74) + +- Union (73) + :- * HashAggregate (25) + : +- Exchange (24) + : +- * HashAggregate (23) + : +- * Project (22) + : +- * BroadcastHashJoin Inner BuildRight (21) + : :- * Project (16) + : : +- * BroadcastHashJoin Inner BuildRight (15) + : : :- Union (9) + : : : :- * Project (4) + : : : : +- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- * Project (8) + : : : +- * Filter (7) + : : : +- * ColumnarToRow (6) + : : : +- Scan parquet default.store_returns (5) + : : +- BroadcastExchange (14) + : : +- * Project (13) + : : +- * Filter (12) + : : +- * ColumnarToRow (11) + : : +- Scan parquet default.date_dim (10) + : +- BroadcastExchange (20) + : +- * Filter (19) + : +- * ColumnarToRow (18) + : +- Scan parquet default.store (17) + :- * HashAggregate (46) + : +- Exchange (45) + : +- * HashAggregate (44) + : +- * Project (43) + : +- * BroadcastHashJoin Inner BuildRight (42) + : :- * Project (37) + : : +- * BroadcastHashJoin Inner BuildRight (36) + : : :- Union (34) + : : : :- * Project (29) + : : : : +- * Filter (28) + : : : : +- * ColumnarToRow (27) + : : : : +- Scan parquet default.catalog_sales (26) + : : : +- * Project (33) + : : : +- * Filter (32) + : : : +- * ColumnarToRow (31) + : : : +- Scan parquet default.catalog_returns (30) + : : +- ReusedExchange (35) + : +- BroadcastExchange (41) + : +- * Filter (40) + : +- * ColumnarToRow (39) + : +- Scan parquet default.catalog_page (38) + +- * HashAggregate (72) + +- Exchange (71) + +- * HashAggregate (70) + +- * Project (69) + +- * BroadcastHashJoin Inner BuildRight (68) + :- * Project (63) + : +- * BroadcastHashJoin Inner BuildRight (62) + : :- Union (60) + : : :- * Project (50) + : : : +- * Filter (49) + : : : +- * ColumnarToRow (48) + : : : +- Scan parquet default.web_sales (47) + : : +- * Project (59) + : : +- * BroadcastHashJoin Inner BuildRight (58) + : : :- * Filter (53) + : : : +- * ColumnarToRow (52) + : : : +- Scan parquet default.web_returns (51) + : : +- BroadcastExchange (57) + : : +- * Filter (56) + : : +- * ColumnarToRow (55) + : : +- Scan parquet default.web_sales (54) + : +- ReusedExchange (61) + +- BroadcastExchange (67) + +- * Filter (66) + +- * ColumnarToRow (65) + +- Scan parquet default.web_site (64) + + +(1) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [4]: [ss_sold_date_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] + +(3) Filter [codegen id : 1] +Input [4]: [ss_sold_date_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] +Condition : (isnotnull(cast(ss_sold_date_sk#1 as bigint)) AND isnotnull(cast(ss_store_sk#2 as bigint))) + +(4) Project [codegen id : 1] +Output [6]: [cast(ss_store_sk#2 as bigint) AS store_sk#5, cast(ss_sold_date_sk#1 as bigint) AS date_sk#6, ss_ext_sales_price#3 AS sales_price#7, ss_net_profit#4 AS profit#8, 0.00 AS return_amt#9, 0.00 AS net_loss#10] +Input [4]: [ss_sold_date_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] + +(5) Scan parquet default.store_returns +Output [4]: [sr_returned_date_sk#11, sr_store_sk#12, sr_return_amt#13, sr_net_loss#14] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_returns] +PushedFilters: [IsNotNull(sr_returned_date_sk), IsNotNull(sr_store_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 2] +Input [4]: [sr_returned_date_sk#11, sr_store_sk#12, sr_return_amt#13, sr_net_loss#14] + +(7) Filter [codegen id : 2] +Input [4]: [sr_returned_date_sk#11, sr_store_sk#12, sr_return_amt#13, sr_net_loss#14] +Condition : (isnotnull(sr_returned_date_sk#11) AND isnotnull(sr_store_sk#12)) + +(8) Project [codegen id : 2] +Output [6]: [sr_store_sk#12 AS store_sk#15, sr_returned_date_sk#11 AS date_sk#16, 0.00 AS sales_price#17, 0.00 AS profit#18, sr_return_amt#13 AS return_amt#19, sr_net_loss#14 AS net_loss#20] +Input [4]: [sr_returned_date_sk#11, sr_store_sk#12, sr_return_amt#13, sr_net_loss#14] + +(9) Union + +(10) Scan parquet default.date_dim +Output [2]: [d_date_sk#21, d_date#22] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-23), LessThanOrEqual(d_date,2000-09-06), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 3] +Input [2]: [d_date_sk#21, d_date#22] + +(12) Filter [codegen id : 3] +Input [2]: [d_date_sk#21, d_date#22] +Condition : (((isnotnull(d_date#22) AND (d_date#22 >= 11192)) AND (d_date#22 <= 11206)) AND isnotnull(d_date_sk#21)) + +(13) Project [codegen id : 3] +Output [1]: [d_date_sk#21] +Input [2]: [d_date_sk#21, d_date#22] + +(14) BroadcastExchange +Input [1]: [d_date_sk#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#23] + +(15) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [date_sk#6] +Right keys [1]: [cast(d_date_sk#21 as bigint)] +Join condition: None + +(16) Project [codegen id : 5] +Output [5]: [store_sk#5, sales_price#7, profit#8, return_amt#9, net_loss#10] +Input [7]: [store_sk#5, date_sk#6, sales_price#7, profit#8, return_amt#9, net_loss#10, d_date_sk#21] + +(17) Scan parquet default.store +Output [2]: [s_store_sk#24, s_store_id#25] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 4] +Input [2]: [s_store_sk#24, s_store_id#25] + +(19) Filter [codegen id : 4] +Input [2]: [s_store_sk#24, s_store_id#25] +Condition : isnotnull(s_store_sk#24) + +(20) BroadcastExchange +Input [2]: [s_store_sk#24, s_store_id#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#26] + +(21) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [store_sk#5] +Right keys [1]: [cast(s_store_sk#24 as bigint)] +Join condition: None + +(22) Project [codegen id : 5] +Output [5]: [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#25] +Input [7]: [store_sk#5, sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_sk#24, s_store_id#25] + +(23) HashAggregate [codegen id : 5] +Input [5]: [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#25] +Keys [1]: [s_store_id#25] +Functions [4]: [partial_sum(UnscaledValue(sales_price#7)), partial_sum(UnscaledValue(return_amt#9)), partial_sum(UnscaledValue(profit#8)), partial_sum(UnscaledValue(net_loss#10))] +Aggregate Attributes [4]: [sum#27, sum#28, sum#29, sum#30] +Results [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34] + +(24) Exchange +Input [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34] +Arguments: hashpartitioning(s_store_id#25, 5), true, [id=#35] + +(25) HashAggregate [codegen id : 6] +Input [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34] +Keys [1]: [s_store_id#25] +Functions [4]: [sum(UnscaledValue(sales_price#7)), sum(UnscaledValue(return_amt#9)), sum(UnscaledValue(profit#8)), sum(UnscaledValue(net_loss#10))] +Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#7))#36, sum(UnscaledValue(return_amt#9))#37, sum(UnscaledValue(profit#8))#38, sum(UnscaledValue(net_loss#10))#39] +Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#7))#36,17,2) AS sales#40, MakeDecimal(sum(UnscaledValue(return_amt#9))#37,17,2) AS RETURNS#41, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#8))#38,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#10))#39,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#42, store channel AS channel#43, concat(store, s_store_id#25) AS id#44] + +(26) Scan parquet default.catalog_sales +Output [4]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_ext_sales_price#47, cs_net_profit#48] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_catalog_page_sk)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 7] +Input [4]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_ext_sales_price#47, cs_net_profit#48] + +(28) Filter [codegen id : 7] +Input [4]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_ext_sales_price#47, cs_net_profit#48] +Condition : (isnotnull(cs_sold_date_sk#45) AND isnotnull(cs_catalog_page_sk#46)) + +(29) Project [codegen id : 7] +Output [6]: [cs_catalog_page_sk#46 AS page_sk#49, cs_sold_date_sk#45 AS date_sk#50, cs_ext_sales_price#47 AS sales_price#51, cs_net_profit#48 AS profit#52, 0.00 AS return_amt#53, 0.00 AS net_loss#54] +Input [4]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_ext_sales_price#47, cs_net_profit#48] + +(30) Scan parquet default.catalog_returns +Output [4]: [cr_returned_date_sk#55, cr_catalog_page_sk#56, cr_return_amount#57, cr_net_loss#58] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_returns] +PushedFilters: [IsNotNull(cr_returned_date_sk), IsNotNull(cr_catalog_page_sk)] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 8] +Input [4]: [cr_returned_date_sk#55, cr_catalog_page_sk#56, cr_return_amount#57, cr_net_loss#58] + +(32) Filter [codegen id : 8] +Input [4]: [cr_returned_date_sk#55, cr_catalog_page_sk#56, cr_return_amount#57, cr_net_loss#58] +Condition : (isnotnull(cr_returned_date_sk#55) AND isnotnull(cr_catalog_page_sk#56)) + +(33) Project [codegen id : 8] +Output [6]: [cr_catalog_page_sk#56 AS page_sk#59, cr_returned_date_sk#55 AS date_sk#60, 0.00 AS sales_price#61, 0.00 AS profit#62, cr_return_amount#57 AS return_amt#63, cr_net_loss#58 AS net_loss#64] +Input [4]: [cr_returned_date_sk#55, cr_catalog_page_sk#56, cr_return_amount#57, cr_net_loss#58] + +(34) Union + +(35) ReusedExchange [Reuses operator id: 14] +Output [1]: [d_date_sk#21] + +(36) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [date_sk#50] +Right keys [1]: [d_date_sk#21] +Join condition: None + +(37) Project [codegen id : 11] +Output [5]: [page_sk#49, sales_price#51, profit#52, return_amt#53, net_loss#54] +Input [7]: [page_sk#49, date_sk#50, sales_price#51, profit#52, return_amt#53, net_loss#54, d_date_sk#21] + +(38) Scan parquet default.catalog_page +Output [2]: [cp_catalog_page_sk#65, cp_catalog_page_id#66] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_page] +PushedFilters: [IsNotNull(cp_catalog_page_sk)] +ReadSchema: struct + +(39) ColumnarToRow [codegen id : 10] +Input [2]: [cp_catalog_page_sk#65, cp_catalog_page_id#66] + +(40) Filter [codegen id : 10] +Input [2]: [cp_catalog_page_sk#65, cp_catalog_page_id#66] +Condition : isnotnull(cp_catalog_page_sk#65) + +(41) BroadcastExchange +Input [2]: [cp_catalog_page_sk#65, cp_catalog_page_id#66] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#67] + +(42) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [page_sk#49] +Right keys [1]: [cp_catalog_page_sk#65] +Join condition: None + +(43) Project [codegen id : 11] +Output [5]: [sales_price#51, profit#52, return_amt#53, net_loss#54, cp_catalog_page_id#66] +Input [7]: [page_sk#49, sales_price#51, profit#52, return_amt#53, net_loss#54, cp_catalog_page_sk#65, cp_catalog_page_id#66] + +(44) HashAggregate [codegen id : 11] +Input [5]: [sales_price#51, profit#52, return_amt#53, net_loss#54, cp_catalog_page_id#66] +Keys [1]: [cp_catalog_page_id#66] +Functions [4]: [partial_sum(UnscaledValue(sales_price#51)), partial_sum(UnscaledValue(return_amt#53)), partial_sum(UnscaledValue(profit#52)), partial_sum(UnscaledValue(net_loss#54))] +Aggregate Attributes [4]: [sum#68, sum#69, sum#70, sum#71] +Results [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75] + +(45) Exchange +Input [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75] +Arguments: hashpartitioning(cp_catalog_page_id#66, 5), true, [id=#76] + +(46) HashAggregate [codegen id : 12] +Input [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75] +Keys [1]: [cp_catalog_page_id#66] +Functions [4]: [sum(UnscaledValue(sales_price#51)), sum(UnscaledValue(return_amt#53)), sum(UnscaledValue(profit#52)), sum(UnscaledValue(net_loss#54))] +Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#51))#77, sum(UnscaledValue(return_amt#53))#78, sum(UnscaledValue(profit#52))#79, sum(UnscaledValue(net_loss#54))#80] +Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#51))#77,17,2) AS sales#81, MakeDecimal(sum(UnscaledValue(return_amt#53))#78,17,2) AS RETURNS#82, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#52))#79,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#54))#80,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#83, catalog channel AS channel#84, concat(catalog_page, cp_catalog_page_id#66) AS id#85] + +(47) Scan parquet default.web_sales +Output [4]: [ws_sold_date_sk#86, ws_web_site_sk#87, ws_ext_sales_price#88, ws_net_profit#89] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_web_site_sk)] +ReadSchema: struct + +(48) ColumnarToRow [codegen id : 13] +Input [4]: [ws_sold_date_sk#86, ws_web_site_sk#87, ws_ext_sales_price#88, ws_net_profit#89] + +(49) Filter [codegen id : 13] +Input [4]: [ws_sold_date_sk#86, ws_web_site_sk#87, ws_ext_sales_price#88, ws_net_profit#89] +Condition : (isnotnull(cast(ws_sold_date_sk#86 as bigint)) AND isnotnull(ws_web_site_sk#87)) + +(50) Project [codegen id : 13] +Output [6]: [ws_web_site_sk#87 AS wsr_web_site_sk#90, cast(ws_sold_date_sk#86 as bigint) AS date_sk#91, ws_ext_sales_price#88 AS sales_price#92, ws_net_profit#89 AS profit#93, 0.00 AS return_amt#94, 0.00 AS net_loss#95] +Input [4]: [ws_sold_date_sk#86, ws_web_site_sk#87, ws_ext_sales_price#88, ws_net_profit#89] + +(51) Scan parquet default.web_returns +Output [5]: [wr_returned_date_sk#96, wr_item_sk#97, wr_order_number#98, wr_return_amt#99, wr_net_loss#100] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_returns] +PushedFilters: [IsNotNull(wr_returned_date_sk)] +ReadSchema: struct + +(52) ColumnarToRow [codegen id : 15] +Input [5]: [wr_returned_date_sk#96, wr_item_sk#97, wr_order_number#98, wr_return_amt#99, wr_net_loss#100] + +(53) Filter [codegen id : 15] +Input [5]: [wr_returned_date_sk#96, wr_item_sk#97, wr_order_number#98, wr_return_amt#99, wr_net_loss#100] +Condition : isnotnull(wr_returned_date_sk#96) + +(54) Scan parquet default.web_sales +Output [3]: [ws_item_sk#101, ws_web_site_sk#87, ws_order_number#102] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_order_number), IsNotNull(ws_web_site_sk)] +ReadSchema: struct + +(55) ColumnarToRow [codegen id : 14] +Input [3]: [ws_item_sk#101, ws_web_site_sk#87, ws_order_number#102] + +(56) Filter [codegen id : 14] +Input [3]: [ws_item_sk#101, ws_web_site_sk#87, ws_order_number#102] +Condition : ((isnotnull(ws_item_sk#101) AND isnotnull(ws_order_number#102)) AND isnotnull(ws_web_site_sk#87)) + +(57) BroadcastExchange +Input [3]: [ws_item_sk#101, ws_web_site_sk#87, ws_order_number#102] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint), cast(input[2, int, false] as bigint)),false), [id=#103] + +(58) BroadcastHashJoin [codegen id : 15] +Left keys [2]: [wr_item_sk#97, wr_order_number#98] +Right keys [2]: [cast(ws_item_sk#101 as bigint), cast(ws_order_number#102 as bigint)] +Join condition: None + +(59) Project [codegen id : 15] +Output [6]: [ws_web_site_sk#87 AS wsr_web_site_sk#104, wr_returned_date_sk#96 AS date_sk#105, 0.00 AS sales_price#106, 0.00 AS profit#107, wr_return_amt#99 AS return_amt#108, wr_net_loss#100 AS net_loss#109] +Input [8]: [wr_returned_date_sk#96, wr_item_sk#97, wr_order_number#98, wr_return_amt#99, wr_net_loss#100, ws_item_sk#101, ws_web_site_sk#87, ws_order_number#102] + +(60) Union + +(61) ReusedExchange [Reuses operator id: 14] +Output [1]: [d_date_sk#21] + +(62) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [date_sk#91] +Right keys [1]: [cast(d_date_sk#21 as bigint)] +Join condition: None + +(63) Project [codegen id : 18] +Output [5]: [wsr_web_site_sk#90, sales_price#92, profit#93, return_amt#94, net_loss#95] +Input [7]: [wsr_web_site_sk#90, date_sk#91, sales_price#92, profit#93, return_amt#94, net_loss#95, d_date_sk#21] + +(64) Scan parquet default.web_site +Output [2]: [web_site_sk#110, web_site_id#111] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_site] +PushedFilters: [IsNotNull(web_site_sk)] +ReadSchema: struct + +(65) ColumnarToRow [codegen id : 17] +Input [2]: [web_site_sk#110, web_site_id#111] + +(66) Filter [codegen id : 17] +Input [2]: [web_site_sk#110, web_site_id#111] +Condition : isnotnull(web_site_sk#110) + +(67) BroadcastExchange +Input [2]: [web_site_sk#110, web_site_id#111] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#112] + +(68) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [wsr_web_site_sk#90] +Right keys [1]: [web_site_sk#110] +Join condition: None + +(69) Project [codegen id : 18] +Output [5]: [sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_id#111] +Input [7]: [wsr_web_site_sk#90, sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_sk#110, web_site_id#111] + +(70) HashAggregate [codegen id : 18] +Input [5]: [sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_id#111] +Keys [1]: [web_site_id#111] +Functions [4]: [partial_sum(UnscaledValue(sales_price#92)), partial_sum(UnscaledValue(return_amt#94)), partial_sum(UnscaledValue(profit#93)), partial_sum(UnscaledValue(net_loss#95))] +Aggregate Attributes [4]: [sum#113, sum#114, sum#115, sum#116] +Results [5]: [web_site_id#111, sum#117, sum#118, sum#119, sum#120] + +(71) Exchange +Input [5]: [web_site_id#111, sum#117, sum#118, sum#119, sum#120] +Arguments: hashpartitioning(web_site_id#111, 5), true, [id=#121] + +(72) HashAggregate [codegen id : 19] +Input [5]: [web_site_id#111, sum#117, sum#118, sum#119, sum#120] +Keys [1]: [web_site_id#111] +Functions [4]: [sum(UnscaledValue(sales_price#92)), sum(UnscaledValue(return_amt#94)), sum(UnscaledValue(profit#93)), sum(UnscaledValue(net_loss#95))] +Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#92))#122, sum(UnscaledValue(return_amt#94))#123, sum(UnscaledValue(profit#93))#124, sum(UnscaledValue(net_loss#95))#125] +Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#92))#122,17,2) AS sales#126, MakeDecimal(sum(UnscaledValue(return_amt#94))#123,17,2) AS RETURNS#127, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#93))#124,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#95))#125,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#128, web channel AS channel#129, concat(web_site, web_site_id#111) AS id#130] + +(73) Union + +(74) Expand [codegen id : 20] +Input [5]: [sales#40, RETURNS#41, profit#42, channel#43, id#44] +Arguments: [List(sales#40, returns#41, profit#42, channel#43, id#44, 0), List(sales#40, returns#41, profit#42, channel#43, null, 1), List(sales#40, returns#41, profit#42, null, null, 3)], [sales#40, returns#41, profit#42, channel#131, id#132, spark_grouping_id#133] + +(75) HashAggregate [codegen id : 20] +Input [6]: [sales#40, returns#41, profit#42, channel#131, id#132, spark_grouping_id#133] +Keys [3]: [channel#131, id#132, spark_grouping_id#133] +Functions [3]: [partial_sum(sales#40), partial_sum(returns#41), partial_sum(profit#42)] +Aggregate Attributes [6]: [sum#134, isEmpty#135, sum#136, isEmpty#137, sum#138, isEmpty#139] +Results [9]: [channel#131, id#132, spark_grouping_id#133, sum#140, isEmpty#141, sum#142, isEmpty#143, sum#144, isEmpty#145] + +(76) Exchange +Input [9]: [channel#131, id#132, spark_grouping_id#133, sum#140, isEmpty#141, sum#142, isEmpty#143, sum#144, isEmpty#145] +Arguments: hashpartitioning(channel#131, id#132, spark_grouping_id#133, 5), true, [id=#146] + +(77) HashAggregate [codegen id : 21] +Input [9]: [channel#131, id#132, spark_grouping_id#133, sum#140, isEmpty#141, sum#142, isEmpty#143, sum#144, isEmpty#145] +Keys [3]: [channel#131, id#132, spark_grouping_id#133] +Functions [3]: [sum(sales#40), sum(returns#41), sum(profit#42)] +Aggregate Attributes [3]: [sum(sales#40)#147, sum(returns#41)#148, sum(profit#42)#149] +Results [5]: [channel#131, id#132, sum(sales#40)#147 AS sales#150, sum(returns#41)#148 AS returns#151, sum(profit#42)#149 AS profit#152] + +(78) TakeOrderedAndProject +Input [5]: [channel#131, id#132, sales#150, returns#151, profit#152] +Arguments: 100, [channel#131 ASC NULLS FIRST, id#132 ASC NULLS FIRST], [channel#131, id#132, sales#150, returns#151, profit#152] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/simplified.txt new file mode 100644 index 0000000000000..0bb6f48db3e12 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/simplified.txt @@ -0,0 +1,123 @@ +TakeOrderedAndProject [channel,id,profit,returns,sales] + WholeStageCodegen (21) + HashAggregate [channel,id,isEmpty,isEmpty,isEmpty,spark_grouping_id,sum,sum,sum] [isEmpty,isEmpty,isEmpty,profit,returns,sales,sum,sum,sum,sum(profit),sum(returns),sum(sales)] + InputAdapter + Exchange [channel,id,spark_grouping_id] #1 + WholeStageCodegen (20) + HashAggregate [channel,id,profit,returns,sales,spark_grouping_id] [isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum] + Expand [channel,id,profit,returns,sales] + InputAdapter + Union + WholeStageCodegen (6) + HashAggregate [s_store_id,sum,sum,sum,sum] [RETURNS,channel,id,profit,sales,sum,sum,sum,sum,sum(UnscaledValue(net_loss)),sum(UnscaledValue(profit)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(sales_price))] + InputAdapter + Exchange [s_store_id] #2 + WholeStageCodegen (5) + HashAggregate [net_loss,profit,return_amt,s_store_id,sales_price] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [net_loss,profit,return_amt,s_store_id,sales_price] + BroadcastHashJoin [s_store_sk,store_sk] + Project [net_loss,profit,return_amt,sales_price,store_sk] + BroadcastHashJoin [d_date_sk,date_sk] + InputAdapter + Union + WholeStageCodegen (1) + Project [ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,ss_store_sk] + Filter [ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,ss_store_sk] + WholeStageCodegen (2) + Project [sr_net_loss,sr_return_amt,sr_returned_date_sk,sr_store_sk] + Filter [sr_returned_date_sk,sr_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_net_loss,sr_return_amt,sr_returned_date_sk,sr_store_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (4) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_id,s_store_sk] + WholeStageCodegen (12) + HashAggregate [cp_catalog_page_id,sum,sum,sum,sum] [RETURNS,channel,id,profit,sales,sum,sum,sum,sum,sum(UnscaledValue(net_loss)),sum(UnscaledValue(profit)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(sales_price))] + InputAdapter + Exchange [cp_catalog_page_id] #5 + WholeStageCodegen (11) + HashAggregate [cp_catalog_page_id,net_loss,profit,return_amt,sales_price] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [cp_catalog_page_id,net_loss,profit,return_amt,sales_price] + BroadcastHashJoin [cp_catalog_page_sk,page_sk] + Project [net_loss,page_sk,profit,return_amt,sales_price] + BroadcastHashJoin [d_date_sk,date_sk] + InputAdapter + Union + WholeStageCodegen (7) + Project [cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] + Filter [cs_catalog_page_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] + WholeStageCodegen (8) + Project [cr_catalog_page_sk,cr_net_loss,cr_return_amount,cr_returned_date_sk] + Filter [cr_catalog_page_sk,cr_returned_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_catalog_page_sk,cr_net_loss,cr_return_amount,cr_returned_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (10) + Filter [cp_catalog_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_page [cp_catalog_page_id,cp_catalog_page_sk] + WholeStageCodegen (19) + HashAggregate [sum,sum,sum,sum,web_site_id] [RETURNS,channel,id,profit,sales,sum,sum,sum,sum,sum(UnscaledValue(net_loss)),sum(UnscaledValue(profit)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(sales_price))] + InputAdapter + Exchange [web_site_id] #7 + WholeStageCodegen (18) + HashAggregate [net_loss,profit,return_amt,sales_price,web_site_id] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [net_loss,profit,return_amt,sales_price,web_site_id] + BroadcastHashJoin [web_site_sk,wsr_web_site_sk] + Project [net_loss,profit,return_amt,sales_price,wsr_web_site_sk] + BroadcastHashJoin [d_date_sk,date_sk] + InputAdapter + Union + WholeStageCodegen (13) + Project [ws_ext_sales_price,ws_net_profit,ws_sold_date_sk,ws_web_site_sk] + Filter [ws_sold_date_sk,ws_web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_ext_sales_price,ws_net_profit,ws_sold_date_sk,ws_web_site_sk] + WholeStageCodegen (15) + Project [wr_net_loss,wr_return_amt,wr_returned_date_sk,ws_web_site_sk] + BroadcastHashJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] + Filter [wr_returned_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_returns [wr_item_sk,wr_net_loss,wr_order_number,wr_return_amt,wr_returned_date_sk] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (14) + Filter [ws_item_sk,ws_order_number,ws_web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_item_sk,ws_order_number,ws_web_site_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (17) + Filter [web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_site [web_site_id,web_site_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.sf100/explain.txt new file mode 100644 index 0000000000000..6327f03620f61 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.sf100/explain.txt @@ -0,0 +1,198 @@ +== Physical Plan == +TakeOrderedAndProject (35) ++- * HashAggregate (34) + +- Exchange (33) + +- * HashAggregate (32) + +- * Project (31) + +- * SortMergeJoin Inner (30) + :- * Sort (17) + : +- Exchange (16) + : +- * Project (15) + : +- * BroadcastHashJoin Inner BuildRight (14) + : :- * Project (9) + : : +- * BroadcastHashJoin Inner BuildRight (8) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.store_sales (1) + : : +- BroadcastExchange (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet default.date_dim (4) + : +- BroadcastExchange (13) + : +- * Filter (12) + : +- * ColumnarToRow (11) + : +- Scan parquet default.store (10) + +- * Sort (29) + +- Exchange (28) + +- * Project (27) + +- * BroadcastHashJoin Inner BuildLeft (26) + :- BroadcastExchange (22) + : +- * Project (21) + : +- * Filter (20) + : +- * ColumnarToRow (19) + : +- Scan parquet default.date_dim (18) + +- * Filter (25) + +- * ColumnarToRow (24) + +- Scan parquet default.store_returns (23) + + +(1) Scan parquet default.store_sales +Output [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_item_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_store_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5] + +(3) Filter [codegen id : 3] +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5] +Condition : ((((isnotnull(ss_customer_sk#3) AND isnotnull(ss_item_sk#2)) AND isnotnull(ss_ticket_number#5)) AND isnotnull(ss_store_sk#4)) AND isnotnull(ss_sold_date_sk#1)) + +(4) Scan parquet default.date_dim +Output [1]: [d_date_sk#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [1]: [d_date_sk#6] + +(6) Filter [codegen id : 1] +Input [1]: [d_date_sk#6] +Condition : isnotnull(d_date_sk#6) + +(7) BroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#6] +Join condition: None + +(9) Project [codegen id : 3] +Output [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5] +Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, d_date_sk#6] + +(10) Scan parquet default.store +Output [11]: [s_store_sk#8, s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [11]: [s_store_sk#8, s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18] + +(12) Filter [codegen id : 2] +Input [11]: [s_store_sk#8, s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18] +Condition : isnotnull(s_store_sk#8) + +(13) BroadcastExchange +Input [11]: [s_store_sk#8, s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] + +(14) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#8] +Join condition: None + +(15) Project [codegen id : 3] +Output [14]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18] +Input [16]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, s_store_sk#8, s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18] + +(16) Exchange +Input [14]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18] +Arguments: hashpartitioning(cast(ss_ticket_number#5 as bigint), cast(ss_item_sk#2 as bigint), cast(ss_customer_sk#3 as bigint), 5), true, [id=#20] + +(17) Sort [codegen id : 4] +Input [14]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18] +Arguments: [cast(ss_ticket_number#5 as bigint) ASC NULLS FIRST, cast(ss_item_sk#2 as bigint) ASC NULLS FIRST, cast(ss_customer_sk#3 as bigint) ASC NULLS FIRST], false, 0 + +(18) Scan parquet default.date_dim +Output [3]: [d_date_sk#21, d_year#22, d_moy#23] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,8), IsNotNull(d_date_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 5] +Input [3]: [d_date_sk#21, d_year#22, d_moy#23] + +(20) Filter [codegen id : 5] +Input [3]: [d_date_sk#21, d_year#22, d_moy#23] +Condition : ((((isnotnull(d_year#22) AND isnotnull(d_moy#23)) AND (d_year#22 = 2001)) AND (d_moy#23 = 8)) AND isnotnull(d_date_sk#21)) + +(21) Project [codegen id : 5] +Output [1]: [d_date_sk#21] +Input [3]: [d_date_sk#21, d_year#22, d_moy#23] + +(22) BroadcastExchange +Input [1]: [d_date_sk#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#24] + +(23) Scan parquet default.store_returns +Output [4]: [sr_returned_date_sk#25, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_returns] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_customer_sk), IsNotNull(sr_item_sk), IsNotNull(sr_returned_date_sk)] +ReadSchema: struct + +(24) ColumnarToRow +Input [4]: [sr_returned_date_sk#25, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28] + +(25) Filter +Input [4]: [sr_returned_date_sk#25, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28] +Condition : (((isnotnull(sr_ticket_number#28) AND isnotnull(sr_customer_sk#27)) AND isnotnull(sr_item_sk#26)) AND isnotnull(sr_returned_date_sk#25)) + +(26) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cast(d_date_sk#21 as bigint)] +Right keys [1]: [sr_returned_date_sk#25] +Join condition: None + +(27) Project [codegen id : 6] +Output [4]: [sr_returned_date_sk#25, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28] +Input [5]: [d_date_sk#21, sr_returned_date_sk#25, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28] + +(28) Exchange +Input [4]: [sr_returned_date_sk#25, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28] +Arguments: hashpartitioning(sr_ticket_number#28, sr_item_sk#26, sr_customer_sk#27, 5), true, [id=#29] + +(29) Sort [codegen id : 7] +Input [4]: [sr_returned_date_sk#25, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28] +Arguments: [sr_ticket_number#28 ASC NULLS FIRST, sr_item_sk#26 ASC NULLS FIRST, sr_customer_sk#27 ASC NULLS FIRST], false, 0 + +(30) SortMergeJoin [codegen id : 8] +Left keys [3]: [cast(ss_ticket_number#5 as bigint), cast(ss_item_sk#2 as bigint), cast(ss_customer_sk#3 as bigint)] +Right keys [3]: [sr_ticket_number#28, sr_item_sk#26, sr_customer_sk#27] +Join condition: None + +(31) Project [codegen id : 8] +Output [12]: [ss_sold_date_sk#1, sr_returned_date_sk#25, s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18] +Input [18]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18, sr_returned_date_sk#25, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28] + +(32) HashAggregate [codegen id : 8] +Input [12]: [ss_sold_date_sk#1, sr_returned_date_sk#25, s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18] +Keys [10]: [s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18] +Functions [5]: [partial_sum(cast(CASE WHEN ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))] +Aggregate Attributes [5]: [sum#30, sum#31, sum#32, sum#33, sum#34] +Results [15]: [s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18, sum#35, sum#36, sum#37, sum#38, sum#39] + +(33) Exchange +Input [15]: [s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18, sum#35, sum#36, sum#37, sum#38, sum#39] +Arguments: hashpartitioning(s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18, 5), true, [id=#40] + +(34) HashAggregate [codegen id : 9] +Input [15]: [s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18, sum#35, sum#36, sum#37, sum#38, sum#39] +Keys [10]: [s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18] +Functions [5]: [sum(cast(CASE WHEN ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))] +Aggregate Attributes [5]: [sum(cast(CASE WHEN ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint))#41, sum(cast(CASE WHEN (((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint))#42, sum(cast(CASE WHEN (((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint))#43, sum(cast(CASE WHEN (((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint))#44, sum(cast(CASE WHEN ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))#45] +Results [15]: [s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18, sum(cast(CASE WHEN ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint))#41 AS 30 days #46, sum(cast(CASE WHEN (((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint))#42 AS 31 - 60 days #47, sum(cast(CASE WHEN (((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint))#43 AS 61 - 90 days #48, sum(cast(CASE WHEN (((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint))#44 AS 91 - 120 days #49, sum(cast(CASE WHEN ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))#45 AS >120 days #50] + +(35) TakeOrderedAndProject +Input [15]: [s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18, 30 days #46, 31 - 60 days #47, 61 - 90 days #48, 91 - 120 days #49, >120 days #50] +Arguments: 100, [s_store_name#9 ASC NULLS FIRST, s_company_id#10 ASC NULLS FIRST, s_street_number#11 ASC NULLS FIRST, s_street_name#12 ASC NULLS FIRST, s_street_type#13 ASC NULLS FIRST, s_suite_number#14 ASC NULLS FIRST, s_city#15 ASC NULLS FIRST, s_county#16 ASC NULLS FIRST, s_state#17 ASC NULLS FIRST, s_zip#18 ASC NULLS FIRST], [s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18, 30 days #46, 31 - 60 days #47, 61 - 90 days #48, 91 - 120 days #49, >120 days #50] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.sf100/simplified.txt new file mode 100644 index 0000000000000..460ed30f87df5 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.sf100/simplified.txt @@ -0,0 +1,57 @@ +TakeOrderedAndProject [30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,s_city,s_company_id,s_county,s_state,s_store_name,s_street_name,s_street_number,s_street_type,s_suite_number,s_zip] + WholeStageCodegen (9) + HashAggregate [s_city,s_company_id,s_county,s_state,s_store_name,s_street_name,s_street_number,s_street_type,s_suite_number,s_zip,sum,sum,sum,sum,sum] [30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,sum,sum,sum,sum,sum,sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 30) AND ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 60) AND ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 90) AND ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 120) THEN 1 ELSE 0 END as bigint))] + InputAdapter + Exchange [s_city,s_company_id,s_county,s_state,s_store_name,s_street_name,s_street_number,s_street_type,s_suite_number,s_zip] #1 + WholeStageCodegen (8) + HashAggregate [s_city,s_company_id,s_county,s_state,s_store_name,s_street_name,s_street_number,s_street_type,s_suite_number,s_zip,sr_returned_date_sk,ss_sold_date_sk] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [s_city,s_company_id,s_county,s_state,s_store_name,s_street_name,s_street_number,s_street_type,s_suite_number,s_zip,sr_returned_date_sk,ss_sold_date_sk] + SortMergeJoin [sr_customer_sk,sr_item_sk,sr_ticket_number,ss_customer_sk,ss_item_sk,ss_ticket_number] + InputAdapter + WholeStageCodegen (4) + Sort [ss_customer_sk,ss_item_sk,ss_ticket_number] + InputAdapter + Exchange [ss_customer_sk,ss_item_sk,ss_ticket_number] #2 + WholeStageCodegen (3) + Project [s_city,s_company_id,s_county,s_state,s_store_name,s_street_name,s_street_number,s_street_type,s_suite_number,s_zip,ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_ticket_number] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_city,s_company_id,s_county,s_state,s_store_name,s_store_sk,s_street_name,s_street_number,s_street_type,s_suite_number,s_zip] + InputAdapter + WholeStageCodegen (7) + Sort [sr_customer_sk,sr_item_sk,sr_ticket_number] + InputAdapter + Exchange [sr_customer_sk,sr_item_sk,sr_ticket_number] #5 + WholeStageCodegen (6) + Project [sr_customer_sk,sr_item_sk,sr_returned_date_sk,sr_ticket_number] + BroadcastHashJoin [d_date_sk,sr_returned_date_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + Filter [sr_customer_sk,sr_item_sk,sr_returned_date_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_customer_sk,sr_item_sk,sr_returned_date_sk,sr_ticket_number] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/explain.txt new file mode 100644 index 0000000000000..66e6608ce4a31 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/explain.txt @@ -0,0 +1,183 @@ +== Physical Plan == +TakeOrderedAndProject (32) ++- * HashAggregate (31) + +- Exchange (30) + +- * HashAggregate (29) + +- * Project (28) + +- * BroadcastHashJoin Inner BuildRight (27) + :- * Project (21) + : +- * BroadcastHashJoin Inner BuildRight (20) + : :- * Project (15) + : : +- * BroadcastHashJoin Inner BuildRight (14) + : : :- * Project (9) + : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.store_returns (4) + : : +- BroadcastExchange (13) + : : +- * Filter (12) + : : +- * ColumnarToRow (11) + : : +- Scan parquet default.store (10) + : +- BroadcastExchange (19) + : +- * Filter (18) + : +- * ColumnarToRow (17) + : +- Scan parquet default.date_dim (16) + +- BroadcastExchange (26) + +- * Project (25) + +- * Filter (24) + +- * ColumnarToRow (23) + +- Scan parquet default.date_dim (22) + + +(1) Scan parquet default.store_sales +Output [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_store_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5] + +(3) Filter [codegen id : 5] +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5] +Condition : ((((isnotnull(ss_item_sk#2) AND isnotnull(ss_customer_sk#3)) AND isnotnull(ss_ticket_number#5)) AND isnotnull(ss_store_sk#4)) AND isnotnull(ss_sold_date_sk#1)) + +(4) Scan parquet default.store_returns +Output [4]: [sr_returned_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_returns] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_customer_sk), IsNotNull(sr_item_sk), IsNotNull(sr_returned_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [sr_returned_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9] + +(6) Filter [codegen id : 1] +Input [4]: [sr_returned_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9] +Condition : (((isnotnull(sr_ticket_number#9) AND isnotnull(sr_customer_sk#8)) AND isnotnull(sr_item_sk#7)) AND isnotnull(sr_returned_date_sk#6)) + +(7) BroadcastExchange +Input [4]: [sr_returned_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9] +Arguments: HashedRelationBroadcastMode(List(input[3, bigint, false], input[1, bigint, false], input[2, bigint, false]),false), [id=#10] + +(8) BroadcastHashJoin [codegen id : 5] +Left keys [3]: [cast(ss_ticket_number#5 as bigint), cast(ss_item_sk#2 as bigint), cast(ss_customer_sk#3 as bigint)] +Right keys [3]: [sr_ticket_number#9, sr_item_sk#7, sr_customer_sk#8] +Join condition: None + +(9) Project [codegen id : 5] +Output [3]: [ss_sold_date_sk#1, ss_store_sk#4, sr_returned_date_sk#6] +Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, sr_returned_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9] + +(10) Scan parquet default.store +Output [11]: [s_store_sk#11, s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [11]: [s_store_sk#11, s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21] + +(12) Filter [codegen id : 2] +Input [11]: [s_store_sk#11, s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21] +Condition : isnotnull(s_store_sk#11) + +(13) BroadcastExchange +Input [11]: [s_store_sk#11, s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#22] + +(14) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#11] +Join condition: None + +(15) Project [codegen id : 5] +Output [12]: [ss_sold_date_sk#1, sr_returned_date_sk#6, s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21] +Input [14]: [ss_sold_date_sk#1, ss_store_sk#4, sr_returned_date_sk#6, s_store_sk#11, s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21] + +(16) Scan parquet default.date_dim +Output [1]: [d_date_sk#23] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [1]: [d_date_sk#23] + +(18) Filter [codegen id : 3] +Input [1]: [d_date_sk#23] +Condition : isnotnull(d_date_sk#23) + +(19) BroadcastExchange +Input [1]: [d_date_sk#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#24] + +(20) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#23] +Join condition: None + +(21) Project [codegen id : 5] +Output [12]: [ss_sold_date_sk#1, sr_returned_date_sk#6, s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21] +Input [13]: [ss_sold_date_sk#1, sr_returned_date_sk#6, s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21, d_date_sk#23] + +(22) Scan parquet default.date_dim +Output [3]: [d_date_sk#25, d_year#26, d_moy#27] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,8), IsNotNull(d_date_sk)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 4] +Input [3]: [d_date_sk#25, d_year#26, d_moy#27] + +(24) Filter [codegen id : 4] +Input [3]: [d_date_sk#25, d_year#26, d_moy#27] +Condition : ((((isnotnull(d_year#26) AND isnotnull(d_moy#27)) AND (d_year#26 = 2001)) AND (d_moy#27 = 8)) AND isnotnull(d_date_sk#25)) + +(25) Project [codegen id : 4] +Output [1]: [d_date_sk#25] +Input [3]: [d_date_sk#25, d_year#26, d_moy#27] + +(26) BroadcastExchange +Input [1]: [d_date_sk#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#28] + +(27) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [sr_returned_date_sk#6] +Right keys [1]: [cast(d_date_sk#25 as bigint)] +Join condition: None + +(28) Project [codegen id : 5] +Output [12]: [ss_sold_date_sk#1, sr_returned_date_sk#6, s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21] +Input [13]: [ss_sold_date_sk#1, sr_returned_date_sk#6, s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21, d_date_sk#25] + +(29) HashAggregate [codegen id : 5] +Input [12]: [ss_sold_date_sk#1, sr_returned_date_sk#6, s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21] +Keys [10]: [s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21] +Functions [5]: [partial_sum(cast(CASE WHEN ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))] +Aggregate Attributes [5]: [sum#29, sum#30, sum#31, sum#32, sum#33] +Results [15]: [s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21, sum#34, sum#35, sum#36, sum#37, sum#38] + +(30) Exchange +Input [15]: [s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21, sum#34, sum#35, sum#36, sum#37, sum#38] +Arguments: hashpartitioning(s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21, 5), true, [id=#39] + +(31) HashAggregate [codegen id : 6] +Input [15]: [s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21, sum#34, sum#35, sum#36, sum#37, sum#38] +Keys [10]: [s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21] +Functions [5]: [sum(cast(CASE WHEN ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))] +Aggregate Attributes [5]: [sum(cast(CASE WHEN ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint))#40, sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint))#41, sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint))#42, sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint))#43, sum(cast(CASE WHEN ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))#44] +Results [15]: [s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21, sum(cast(CASE WHEN ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint))#40 AS 30 days #45, sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint))#41 AS 31 - 60 days #46, sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint))#42 AS 61 - 90 days #47, sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint))#43 AS 91 - 120 days #48, sum(cast(CASE WHEN ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))#44 AS >120 days #49] + +(32) TakeOrderedAndProject +Input [15]: [s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21, 30 days #45, 31 - 60 days #46, 61 - 90 days #47, 91 - 120 days #48, >120 days #49] +Arguments: 100, [s_store_name#12 ASC NULLS FIRST, s_company_id#13 ASC NULLS FIRST, s_street_number#14 ASC NULLS FIRST, s_street_name#15 ASC NULLS FIRST, s_street_type#16 ASC NULLS FIRST, s_suite_number#17 ASC NULLS FIRST, s_city#18 ASC NULLS FIRST, s_county#19 ASC NULLS FIRST, s_state#20 ASC NULLS FIRST, s_zip#21 ASC NULLS FIRST], [s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21, 30 days #45, 31 - 60 days #46, 61 - 90 days #47, 91 - 120 days #48, >120 days #49] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/simplified.txt new file mode 100644 index 0000000000000..1ef21d7b18712 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/simplified.txt @@ -0,0 +1,48 @@ +TakeOrderedAndProject [30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,s_city,s_company_id,s_county,s_state,s_store_name,s_street_name,s_street_number,s_street_type,s_suite_number,s_zip] + WholeStageCodegen (6) + HashAggregate [s_city,s_company_id,s_county,s_state,s_store_name,s_street_name,s_street_number,s_street_type,s_suite_number,s_zip,sum,sum,sum,sum,sum] [30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,sum,sum,sum,sum,sum,sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 30) AND ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 60) AND ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 90) AND ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 120) THEN 1 ELSE 0 END as bigint))] + InputAdapter + Exchange [s_city,s_company_id,s_county,s_state,s_store_name,s_street_name,s_street_number,s_street_type,s_suite_number,s_zip] #1 + WholeStageCodegen (5) + HashAggregate [s_city,s_company_id,s_county,s_state,s_store_name,s_street_name,s_street_number,s_street_type,s_suite_number,s_zip,sr_returned_date_sk,ss_sold_date_sk] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [s_city,s_company_id,s_county,s_state,s_store_name,s_street_name,s_street_number,s_street_type,s_suite_number,s_zip,sr_returned_date_sk,ss_sold_date_sk] + BroadcastHashJoin [d_date_sk,sr_returned_date_sk] + Project [s_city,s_company_id,s_county,s_state,s_store_name,s_street_name,s_street_number,s_street_type,s_suite_number,s_zip,sr_returned_date_sk,ss_sold_date_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [s_city,s_company_id,s_county,s_state,s_store_name,s_street_name,s_street_number,s_street_type,s_suite_number,s_zip,sr_returned_date_sk,ss_sold_date_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [sr_returned_date_sk,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [sr_customer_sk,sr_item_sk,sr_ticket_number,ss_customer_sk,ss_item_sk,ss_ticket_number] + Filter [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [sr_customer_sk,sr_item_sk,sr_returned_date_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_customer_sk,sr_item_sk,sr_returned_date_sk,sr_ticket_number] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_city,s_company_id,s_county,s_state,s_store_name,s_store_sk,s_street_name,s_street_number,s_street_type,s_suite_number,s_zip] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51.sf100/explain.txt new file mode 100644 index 0000000000000..6163643706c5d --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51.sf100/explain.txt @@ -0,0 +1,228 @@ +== Physical Plan == +TakeOrderedAndProject (41) ++- * Filter (40) + +- Window (39) + +- * Sort (38) + +- Exchange (37) + +- * Project (36) + +- SortMergeJoin FullOuter (35) + :- * Sort (19) + : +- Exchange (18) + : +- * Project (17) + : +- Window (16) + : +- * Sort (15) + : +- Exchange (14) + : +- * HashAggregate (13) + : +- Exchange (12) + : +- * HashAggregate (11) + : +- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.web_sales (1) + : +- BroadcastExchange (8) + : +- * Project (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet default.date_dim (4) + +- * Sort (34) + +- Exchange (33) + +- * Project (32) + +- Window (31) + +- * Sort (30) + +- Exchange (29) + +- * HashAggregate (28) + +- Exchange (27) + +- * HashAggregate (26) + +- * Project (25) + +- * BroadcastHashJoin Inner BuildRight (24) + :- * Filter (22) + : +- * ColumnarToRow (21) + : +- Scan parquet default.store_sales (20) + +- ReusedExchange (23) + + +(1) Scan parquet default.web_sales +Output [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_sales_price#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 2] +Input [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_sales_price#3] + +(3) Filter [codegen id : 2] +Input [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_sales_price#3] +Condition : (isnotnull(ws_item_sk#2) AND isnotnull(ws_sold_date_sk#1)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#4, d_date#5, d_month_seq#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#4, d_date#5, d_month_seq#6] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#4, d_date#5, d_month_seq#6] +Condition : (((isnotnull(d_month_seq#6) AND (d_month_seq#6 >= 1200)) AND (d_month_seq#6 <= 1211)) AND isnotnull(d_date_sk#4)) + +(7) Project [codegen id : 1] +Output [2]: [d_date_sk#4, d_date#5] +Input [3]: [d_date_sk#4, d_date#5, d_month_seq#6] + +(8) BroadcastExchange +Input [2]: [d_date_sk#4, d_date#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#7] + +(9) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ws_sold_date_sk#1] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(10) Project [codegen id : 2] +Output [3]: [ws_item_sk#2, ws_sales_price#3, d_date#5] +Input [5]: [ws_sold_date_sk#1, ws_item_sk#2, ws_sales_price#3, d_date_sk#4, d_date#5] + +(11) HashAggregate [codegen id : 2] +Input [3]: [ws_item_sk#2, ws_sales_price#3, d_date#5] +Keys [2]: [ws_item_sk#2, d_date#5] +Functions [1]: [partial_sum(UnscaledValue(ws_sales_price#3))] +Aggregate Attributes [1]: [sum#8] +Results [3]: [ws_item_sk#2, d_date#5, sum#9] + +(12) Exchange +Input [3]: [ws_item_sk#2, d_date#5, sum#9] +Arguments: hashpartitioning(ws_item_sk#2, d_date#5, 5), true, [id=#10] + +(13) HashAggregate [codegen id : 3] +Input [3]: [ws_item_sk#2, d_date#5, sum#9] +Keys [2]: [ws_item_sk#2, d_date#5] +Functions [1]: [sum(UnscaledValue(ws_sales_price#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_sales_price#3))#11] +Results [4]: [ws_item_sk#2 AS item_sk#12, d_date#5, MakeDecimal(sum(UnscaledValue(ws_sales_price#3))#11,17,2) AS _w0#13, ws_item_sk#2] + +(14) Exchange +Input [4]: [item_sk#12, d_date#5, _w0#13, ws_item_sk#2] +Arguments: hashpartitioning(ws_item_sk#2, 5), true, [id=#14] + +(15) Sort [codegen id : 4] +Input [4]: [item_sk#12, d_date#5, _w0#13, ws_item_sk#2] +Arguments: [ws_item_sk#2 ASC NULLS FIRST, d_date#5 ASC NULLS FIRST], false, 0 + +(16) Window +Input [4]: [item_sk#12, d_date#5, _w0#13, ws_item_sk#2] +Arguments: [sum(_w0#13) windowspecdefinition(ws_item_sk#2, d_date#5 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS cume_sales#15], [ws_item_sk#2], [d_date#5 ASC NULLS FIRST] + +(17) Project [codegen id : 5] +Output [3]: [item_sk#12, d_date#5, cume_sales#15] +Input [5]: [item_sk#12, d_date#5, _w0#13, ws_item_sk#2, cume_sales#15] + +(18) Exchange +Input [3]: [item_sk#12, d_date#5, cume_sales#15] +Arguments: hashpartitioning(item_sk#12, d_date#5, 5), true, [id=#16] + +(19) Sort [codegen id : 6] +Input [3]: [item_sk#12, d_date#5, cume_sales#15] +Arguments: [item_sk#12 ASC NULLS FIRST, d_date#5 ASC NULLS FIRST], false, 0 + +(20) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#17, ss_item_sk#18, ss_sales_price#19] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 8] +Input [3]: [ss_sold_date_sk#17, ss_item_sk#18, ss_sales_price#19] + +(22) Filter [codegen id : 8] +Input [3]: [ss_sold_date_sk#17, ss_item_sk#18, ss_sales_price#19] +Condition : (isnotnull(ss_item_sk#18) AND isnotnull(ss_sold_date_sk#17)) + +(23) ReusedExchange [Reuses operator id: 8] +Output [2]: [d_date_sk#20, d_date#21] + +(24) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_sold_date_sk#17] +Right keys [1]: [d_date_sk#20] +Join condition: None + +(25) Project [codegen id : 8] +Output [3]: [ss_item_sk#18, ss_sales_price#19, d_date#21] +Input [5]: [ss_sold_date_sk#17, ss_item_sk#18, ss_sales_price#19, d_date_sk#20, d_date#21] + +(26) HashAggregate [codegen id : 8] +Input [3]: [ss_item_sk#18, ss_sales_price#19, d_date#21] +Keys [2]: [ss_item_sk#18, d_date#21] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#19))] +Aggregate Attributes [1]: [sum#22] +Results [3]: [ss_item_sk#18, d_date#21, sum#23] + +(27) Exchange +Input [3]: [ss_item_sk#18, d_date#21, sum#23] +Arguments: hashpartitioning(ss_item_sk#18, d_date#21, 5), true, [id=#24] + +(28) HashAggregate [codegen id : 9] +Input [3]: [ss_item_sk#18, d_date#21, sum#23] +Keys [2]: [ss_item_sk#18, d_date#21] +Functions [1]: [sum(UnscaledValue(ss_sales_price#19))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#19))#25] +Results [4]: [ss_item_sk#18 AS item_sk#26, d_date#21, MakeDecimal(sum(UnscaledValue(ss_sales_price#19))#25,17,2) AS _w0#27, ss_item_sk#18] + +(29) Exchange +Input [4]: [item_sk#26, d_date#21, _w0#27, ss_item_sk#18] +Arguments: hashpartitioning(ss_item_sk#18, 5), true, [id=#28] + +(30) Sort [codegen id : 10] +Input [4]: [item_sk#26, d_date#21, _w0#27, ss_item_sk#18] +Arguments: [ss_item_sk#18 ASC NULLS FIRST, d_date#21 ASC NULLS FIRST], false, 0 + +(31) Window +Input [4]: [item_sk#26, d_date#21, _w0#27, ss_item_sk#18] +Arguments: [sum(_w0#27) windowspecdefinition(ss_item_sk#18, d_date#21 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS cume_sales#29], [ss_item_sk#18], [d_date#21 ASC NULLS FIRST] + +(32) Project [codegen id : 11] +Output [3]: [item_sk#26, d_date#21, cume_sales#29] +Input [5]: [item_sk#26, d_date#21, _w0#27, ss_item_sk#18, cume_sales#29] + +(33) Exchange +Input [3]: [item_sk#26, d_date#21, cume_sales#29] +Arguments: hashpartitioning(item_sk#26, d_date#21, 5), true, [id=#30] + +(34) Sort [codegen id : 12] +Input [3]: [item_sk#26, d_date#21, cume_sales#29] +Arguments: [item_sk#26 ASC NULLS FIRST, d_date#21 ASC NULLS FIRST], false, 0 + +(35) SortMergeJoin +Left keys [2]: [item_sk#12, d_date#5] +Right keys [2]: [item_sk#26, d_date#21] +Join condition: None + +(36) Project [codegen id : 13] +Output [4]: [CASE WHEN isnotnull(item_sk#12) THEN item_sk#12 ELSE item_sk#26 END AS item_sk#31, CASE WHEN isnotnull(d_date#5) THEN d_date#5 ELSE d_date#21 END AS d_date#32, cume_sales#15 AS web_sales#33, cume_sales#29 AS store_sales#34] +Input [6]: [item_sk#12, d_date#5, cume_sales#15, item_sk#26, d_date#21, cume_sales#29] + +(37) Exchange +Input [4]: [item_sk#31, d_date#32, web_sales#33, store_sales#34] +Arguments: hashpartitioning(item_sk#31, 5), true, [id=#35] + +(38) Sort [codegen id : 14] +Input [4]: [item_sk#31, d_date#32, web_sales#33, store_sales#34] +Arguments: [item_sk#31 ASC NULLS FIRST, d_date#32 ASC NULLS FIRST], false, 0 + +(39) Window +Input [4]: [item_sk#31, d_date#32, web_sales#33, store_sales#34] +Arguments: [max(web_sales#33) windowspecdefinition(item_sk#31, d_date#32 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS web_cumulative#36, max(store_sales#34) windowspecdefinition(item_sk#31, d_date#32 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS store_cumulative#37], [item_sk#31], [d_date#32 ASC NULLS FIRST] + +(40) Filter [codegen id : 15] +Input [6]: [item_sk#31, d_date#32, web_sales#33, store_sales#34, web_cumulative#36, store_cumulative#37] +Condition : ((isnotnull(web_cumulative#36) AND isnotnull(store_cumulative#37)) AND (web_cumulative#36 > store_cumulative#37)) + +(41) TakeOrderedAndProject +Input [6]: [item_sk#31, d_date#32, web_sales#33, store_sales#34, web_cumulative#36, store_cumulative#37] +Arguments: 100, [item_sk#31 ASC NULLS FIRST, d_date#32 ASC NULLS FIRST], [item_sk#31, d_date#32, web_sales#33, store_sales#34, web_cumulative#36, store_cumulative#37] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51.sf100/simplified.txt new file mode 100644 index 0000000000000..42138ddee3e93 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51.sf100/simplified.txt @@ -0,0 +1,71 @@ +TakeOrderedAndProject [d_date,item_sk,store_cumulative,store_sales,web_cumulative,web_sales] + WholeStageCodegen (15) + Filter [store_cumulative,web_cumulative] + InputAdapter + Window [d_date,item_sk,store_sales,web_sales] + WholeStageCodegen (14) + Sort [d_date,item_sk] + InputAdapter + Exchange [item_sk] #1 + WholeStageCodegen (13) + Project [cume_sales,cume_sales,d_date,d_date,item_sk,item_sk] + InputAdapter + SortMergeJoin [d_date,d_date,item_sk,item_sk] + WholeStageCodegen (6) + Sort [d_date,item_sk] + InputAdapter + Exchange [d_date,item_sk] #2 + WholeStageCodegen (5) + Project [cume_sales,d_date,item_sk] + InputAdapter + Window [_w0,d_date,ws_item_sk] + WholeStageCodegen (4) + Sort [d_date,ws_item_sk] + InputAdapter + Exchange [ws_item_sk] #3 + WholeStageCodegen (3) + HashAggregate [d_date,sum,ws_item_sk] [_w0,item_sk,sum,sum(UnscaledValue(ws_sales_price))] + InputAdapter + Exchange [d_date,ws_item_sk] #4 + WholeStageCodegen (2) + HashAggregate [d_date,ws_item_sk,ws_sales_price] [sum,sum] + Project [d_date,ws_item_sk,ws_sales_price] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_item_sk,ws_sales_price,ws_sold_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (1) + Project [d_date,d_date_sk] + Filter [d_date_sk,d_month_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk,d_month_seq] + WholeStageCodegen (12) + Sort [d_date,item_sk] + InputAdapter + Exchange [d_date,item_sk] #6 + WholeStageCodegen (11) + Project [cume_sales,d_date,item_sk] + InputAdapter + Window [_w0,d_date,ss_item_sk] + WholeStageCodegen (10) + Sort [d_date,ss_item_sk] + InputAdapter + Exchange [ss_item_sk] #7 + WholeStageCodegen (9) + HashAggregate [d_date,ss_item_sk,sum] [_w0,item_sk,sum,sum(UnscaledValue(ss_sales_price))] + InputAdapter + Exchange [d_date,ss_item_sk] #8 + WholeStageCodegen (8) + HashAggregate [d_date,ss_item_sk,ss_sales_price] [sum,sum] + Project [d_date,ss_item_sk,ss_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk] + InputAdapter + ReusedExchange [d_date,d_date_sk] #5 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51/explain.txt new file mode 100644 index 0000000000000..df98f23cd3db6 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51/explain.txt @@ -0,0 +1,228 @@ +== Physical Plan == +TakeOrderedAndProject (41) ++- * Filter (40) + +- Window (39) + +- * Sort (38) + +- Exchange (37) + +- * Project (36) + +- SortMergeJoin FullOuter (35) + :- * Sort (19) + : +- Exchange (18) + : +- * Project (17) + : +- Window (16) + : +- * Sort (15) + : +- Exchange (14) + : +- * HashAggregate (13) + : +- Exchange (12) + : +- * HashAggregate (11) + : +- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.web_sales (1) + : +- BroadcastExchange (8) + : +- * Project (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet default.date_dim (4) + +- * Sort (34) + +- Exchange (33) + +- * Project (32) + +- Window (31) + +- * Sort (30) + +- Exchange (29) + +- * HashAggregate (28) + +- Exchange (27) + +- * HashAggregate (26) + +- * Project (25) + +- * BroadcastHashJoin Inner BuildRight (24) + :- * Filter (22) + : +- * ColumnarToRow (21) + : +- Scan parquet default.store_sales (20) + +- ReusedExchange (23) + + +(1) Scan parquet default.web_sales +Output [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_sales_price#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 2] +Input [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_sales_price#3] + +(3) Filter [codegen id : 2] +Input [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_sales_price#3] +Condition : (isnotnull(ws_item_sk#2) AND isnotnull(ws_sold_date_sk#1)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#4, d_date#5, d_month_seq#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#4, d_date#5, d_month_seq#6] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#4, d_date#5, d_month_seq#6] +Condition : (((isnotnull(d_month_seq#6) AND (d_month_seq#6 >= 1200)) AND (d_month_seq#6 <= 1211)) AND isnotnull(d_date_sk#4)) + +(7) Project [codegen id : 1] +Output [2]: [d_date_sk#4, d_date#5] +Input [3]: [d_date_sk#4, d_date#5, d_month_seq#6] + +(8) BroadcastExchange +Input [2]: [d_date_sk#4, d_date#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#7] + +(9) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ws_sold_date_sk#1] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(10) Project [codegen id : 2] +Output [3]: [ws_item_sk#2, ws_sales_price#3, d_date#5] +Input [5]: [ws_sold_date_sk#1, ws_item_sk#2, ws_sales_price#3, d_date_sk#4, d_date#5] + +(11) HashAggregate [codegen id : 2] +Input [3]: [ws_item_sk#2, ws_sales_price#3, d_date#5] +Keys [2]: [ws_item_sk#2, d_date#5] +Functions [1]: [partial_sum(UnscaledValue(ws_sales_price#3))] +Aggregate Attributes [1]: [sum#8] +Results [3]: [ws_item_sk#2, d_date#5, sum#9] + +(12) Exchange +Input [3]: [ws_item_sk#2, d_date#5, sum#9] +Arguments: hashpartitioning(ws_item_sk#2, d_date#5, 5), true, [id=#10] + +(13) HashAggregate [codegen id : 3] +Input [3]: [ws_item_sk#2, d_date#5, sum#9] +Keys [2]: [ws_item_sk#2, d_date#5] +Functions [1]: [sum(UnscaledValue(ws_sales_price#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_sales_price#3))#11] +Results [4]: [ws_item_sk#2 AS item_sk#12, d_date#5, MakeDecimal(sum(UnscaledValue(ws_sales_price#3))#11,17,2) AS _w0#13, ws_item_sk#2] + +(14) Exchange +Input [4]: [item_sk#12, d_date#5, _w0#13, ws_item_sk#2] +Arguments: hashpartitioning(ws_item_sk#2, 5), true, [id=#14] + +(15) Sort [codegen id : 4] +Input [4]: [item_sk#12, d_date#5, _w0#13, ws_item_sk#2] +Arguments: [ws_item_sk#2 ASC NULLS FIRST, d_date#5 ASC NULLS FIRST], false, 0 + +(16) Window +Input [4]: [item_sk#12, d_date#5, _w0#13, ws_item_sk#2] +Arguments: [sum(_w0#13) windowspecdefinition(ws_item_sk#2, d_date#5 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS cume_sales#15], [ws_item_sk#2], [d_date#5 ASC NULLS FIRST] + +(17) Project [codegen id : 5] +Output [3]: [item_sk#12, d_date#5, cume_sales#15] +Input [5]: [item_sk#12, d_date#5, _w0#13, ws_item_sk#2, cume_sales#15] + +(18) Exchange +Input [3]: [item_sk#12, d_date#5, cume_sales#15] +Arguments: hashpartitioning(item_sk#12, d_date#5, 5), true, [id=#16] + +(19) Sort [codegen id : 6] +Input [3]: [item_sk#12, d_date#5, cume_sales#15] +Arguments: [item_sk#12 ASC NULLS FIRST, d_date#5 ASC NULLS FIRST], false, 0 + +(20) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#17, ss_item_sk#18, ss_sales_price#19] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 8] +Input [3]: [ss_sold_date_sk#17, ss_item_sk#18, ss_sales_price#19] + +(22) Filter [codegen id : 8] +Input [3]: [ss_sold_date_sk#17, ss_item_sk#18, ss_sales_price#19] +Condition : (isnotnull(ss_item_sk#18) AND isnotnull(ss_sold_date_sk#17)) + +(23) ReusedExchange [Reuses operator id: 8] +Output [2]: [d_date_sk#20, d_date#21] + +(24) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_sold_date_sk#17] +Right keys [1]: [d_date_sk#20] +Join condition: None + +(25) Project [codegen id : 8] +Output [3]: [ss_item_sk#18, ss_sales_price#19, d_date#21] +Input [5]: [ss_sold_date_sk#17, ss_item_sk#18, ss_sales_price#19, d_date_sk#20, d_date#21] + +(26) HashAggregate [codegen id : 8] +Input [3]: [ss_item_sk#18, ss_sales_price#19, d_date#21] +Keys [2]: [ss_item_sk#18, d_date#21] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#19))] +Aggregate Attributes [1]: [sum#22] +Results [3]: [ss_item_sk#18, d_date#21, sum#23] + +(27) Exchange +Input [3]: [ss_item_sk#18, d_date#21, sum#23] +Arguments: hashpartitioning(ss_item_sk#18, d_date#21, 5), true, [id=#24] + +(28) HashAggregate [codegen id : 9] +Input [3]: [ss_item_sk#18, d_date#21, sum#23] +Keys [2]: [ss_item_sk#18, d_date#21] +Functions [1]: [sum(UnscaledValue(ss_sales_price#19))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#19))#25] +Results [4]: [ss_item_sk#18 AS item_sk#26, d_date#21, MakeDecimal(sum(UnscaledValue(ss_sales_price#19))#25,17,2) AS _w0#27, ss_item_sk#18] + +(29) Exchange +Input [4]: [item_sk#26, d_date#21, _w0#27, ss_item_sk#18] +Arguments: hashpartitioning(ss_item_sk#18, 5), true, [id=#28] + +(30) Sort [codegen id : 10] +Input [4]: [item_sk#26, d_date#21, _w0#27, ss_item_sk#18] +Arguments: [ss_item_sk#18 ASC NULLS FIRST, d_date#21 ASC NULLS FIRST], false, 0 + +(31) Window +Input [4]: [item_sk#26, d_date#21, _w0#27, ss_item_sk#18] +Arguments: [sum(_w0#27) windowspecdefinition(ss_item_sk#18, d_date#21 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS cume_sales#29], [ss_item_sk#18], [d_date#21 ASC NULLS FIRST] + +(32) Project [codegen id : 11] +Output [3]: [item_sk#26, d_date#21, cume_sales#29] +Input [5]: [item_sk#26, d_date#21, _w0#27, ss_item_sk#18, cume_sales#29] + +(33) Exchange +Input [3]: [item_sk#26, d_date#21, cume_sales#29] +Arguments: hashpartitioning(item_sk#26, d_date#21, 5), true, [id=#30] + +(34) Sort [codegen id : 12] +Input [3]: [item_sk#26, d_date#21, cume_sales#29] +Arguments: [item_sk#26 ASC NULLS FIRST, d_date#21 ASC NULLS FIRST], false, 0 + +(35) SortMergeJoin +Left keys [2]: [item_sk#12, d_date#5] +Right keys [2]: [item_sk#26, d_date#21] +Join condition: None + +(36) Project [codegen id : 13] +Output [4]: [CASE WHEN isnotnull(item_sk#12) THEN item_sk#12 ELSE item_sk#26 END AS item_sk#31, CASE WHEN isnotnull(d_date#5) THEN d_date#5 ELSE d_date#21 END AS d_date#32, cume_sales#15 AS web_sales#33, cume_sales#29 AS store_sales#34] +Input [6]: [item_sk#12, d_date#5, cume_sales#15, item_sk#26, d_date#21, cume_sales#29] + +(37) Exchange +Input [4]: [item_sk#31, d_date#32, web_sales#33, store_sales#34] +Arguments: hashpartitioning(item_sk#31, 5), true, [id=#35] + +(38) Sort [codegen id : 14] +Input [4]: [item_sk#31, d_date#32, web_sales#33, store_sales#34] +Arguments: [item_sk#31 ASC NULLS FIRST, d_date#32 ASC NULLS FIRST], false, 0 + +(39) Window +Input [4]: [item_sk#31, d_date#32, web_sales#33, store_sales#34] +Arguments: [max(web_sales#33) windowspecdefinition(item_sk#31, d_date#32 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS web_cumulative#36, max(store_sales#34) windowspecdefinition(item_sk#31, d_date#32 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS store_cumulative#37], [item_sk#31], [d_date#32 ASC NULLS FIRST] + +(40) Filter [codegen id : 15] +Input [6]: [item_sk#31, d_date#32, web_sales#33, store_sales#34, web_cumulative#36, store_cumulative#37] +Condition : ((isnotnull(web_cumulative#36) AND isnotnull(store_cumulative#37)) AND (web_cumulative#36 > store_cumulative#37)) + +(41) TakeOrderedAndProject +Input [6]: [item_sk#31, d_date#32, web_sales#33, store_sales#34, web_cumulative#36, store_cumulative#37] +Arguments: 100, [item_sk#31 ASC NULLS FIRST, d_date#32 ASC NULLS FIRST], [item_sk#31, d_date#32, web_sales#33, store_sales#34, web_cumulative#36, store_cumulative#37] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51/simplified.txt new file mode 100644 index 0000000000000..42138ddee3e93 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51/simplified.txt @@ -0,0 +1,71 @@ +TakeOrderedAndProject [d_date,item_sk,store_cumulative,store_sales,web_cumulative,web_sales] + WholeStageCodegen (15) + Filter [store_cumulative,web_cumulative] + InputAdapter + Window [d_date,item_sk,store_sales,web_sales] + WholeStageCodegen (14) + Sort [d_date,item_sk] + InputAdapter + Exchange [item_sk] #1 + WholeStageCodegen (13) + Project [cume_sales,cume_sales,d_date,d_date,item_sk,item_sk] + InputAdapter + SortMergeJoin [d_date,d_date,item_sk,item_sk] + WholeStageCodegen (6) + Sort [d_date,item_sk] + InputAdapter + Exchange [d_date,item_sk] #2 + WholeStageCodegen (5) + Project [cume_sales,d_date,item_sk] + InputAdapter + Window [_w0,d_date,ws_item_sk] + WholeStageCodegen (4) + Sort [d_date,ws_item_sk] + InputAdapter + Exchange [ws_item_sk] #3 + WholeStageCodegen (3) + HashAggregate [d_date,sum,ws_item_sk] [_w0,item_sk,sum,sum(UnscaledValue(ws_sales_price))] + InputAdapter + Exchange [d_date,ws_item_sk] #4 + WholeStageCodegen (2) + HashAggregate [d_date,ws_item_sk,ws_sales_price] [sum,sum] + Project [d_date,ws_item_sk,ws_sales_price] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_item_sk,ws_sales_price,ws_sold_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (1) + Project [d_date,d_date_sk] + Filter [d_date_sk,d_month_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk,d_month_seq] + WholeStageCodegen (12) + Sort [d_date,item_sk] + InputAdapter + Exchange [d_date,item_sk] #6 + WholeStageCodegen (11) + Project [cume_sales,d_date,item_sk] + InputAdapter + Window [_w0,d_date,ss_item_sk] + WholeStageCodegen (10) + Sort [d_date,ss_item_sk] + InputAdapter + Exchange [ss_item_sk] #7 + WholeStageCodegen (9) + HashAggregate [d_date,ss_item_sk,sum] [_w0,item_sk,sum,sum(UnscaledValue(ss_sales_price))] + InputAdapter + Exchange [d_date,ss_item_sk] #8 + WholeStageCodegen (8) + HashAggregate [d_date,ss_item_sk,ss_sales_price] [sum,sum] + Project [d_date,ss_item_sk,ss_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk] + InputAdapter + ReusedExchange [d_date,d_date_sk] #5 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52.sf100/explain.txt new file mode 100644 index 0000000000000..0475abd0b38cf --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52.sf100/explain.txt @@ -0,0 +1,122 @@ +== Physical Plan == +TakeOrderedAndProject (21) ++- * HashAggregate (20) + +- Exchange (19) + +- * HashAggregate (18) + +- * Project (17) + +- * BroadcastHashJoin Inner BuildRight (16) + :- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.store_sales (1) + : +- BroadcastExchange (8) + : +- * Project (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet default.item (4) + +- BroadcastExchange (15) + +- * Project (14) + +- * Filter (13) + +- * ColumnarToRow (12) + +- Scan parquet default.date_dim (11) + + +(1) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3] + +(3) Filter [codegen id : 3] +Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3] +Condition : (isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_item_sk#2)) + +(4) Scan parquet default.item +Output [4]: [i_item_sk#4, i_brand_id#5, i_brand#6, i_manager_id#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,1), IsNotNull(i_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [i_item_sk#4, i_brand_id#5, i_brand#6, i_manager_id#7] + +(6) Filter [codegen id : 1] +Input [4]: [i_item_sk#4, i_brand_id#5, i_brand#6, i_manager_id#7] +Condition : ((isnotnull(i_manager_id#7) AND (i_manager_id#7 = 1)) AND isnotnull(i_item_sk#4)) + +(7) Project [codegen id : 1] +Output [3]: [i_item_sk#4, i_brand_id#5, i_brand#6] +Input [4]: [i_item_sk#4, i_brand_id#5, i_brand#6, i_manager_id#7] + +(8) BroadcastExchange +Input [3]: [i_item_sk#4, i_brand_id#5, i_brand#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#8] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#4] +Join condition: None + +(10) Project [codegen id : 3] +Output [4]: [ss_sold_date_sk#1, ss_ext_sales_price#3, i_brand_id#5, i_brand#6] +Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3, i_item_sk#4, i_brand_id#5, i_brand#6] + +(11) Scan parquet default.date_dim +Output [3]: [d_date_sk#9, d_year#10, d_moy#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] + +(13) Filter [codegen id : 2] +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] +Condition : ((((isnotnull(d_moy#11) AND isnotnull(d_year#10)) AND (d_moy#11 = 11)) AND (d_year#10 = 2000)) AND isnotnull(d_date_sk#9)) + +(14) Project [codegen id : 2] +Output [2]: [d_date_sk#9, d_year#10] +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] + +(15) BroadcastExchange +Input [2]: [d_date_sk#9, d_year#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(16) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#9] +Join condition: None + +(17) Project [codegen id : 3] +Output [4]: [d_year#10, ss_ext_sales_price#3, i_brand_id#5, i_brand#6] +Input [6]: [ss_sold_date_sk#1, ss_ext_sales_price#3, i_brand_id#5, i_brand#6, d_date_sk#9, d_year#10] + +(18) HashAggregate [codegen id : 3] +Input [4]: [d_year#10, ss_ext_sales_price#3, i_brand_id#5, i_brand#6] +Keys [3]: [d_year#10, i_brand#6, i_brand_id#5] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum#13] +Results [4]: [d_year#10, i_brand#6, i_brand_id#5, sum#14] + +(19) Exchange +Input [4]: [d_year#10, i_brand#6, i_brand_id#5, sum#14] +Arguments: hashpartitioning(d_year#10, i_brand#6, i_brand_id#5, 5), true, [id=#15] + +(20) HashAggregate [codegen id : 4] +Input [4]: [d_year#10, i_brand#6, i_brand_id#5, sum#14] +Keys [3]: [d_year#10, i_brand#6, i_brand_id#5] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#16] +Results [4]: [d_year#10, i_brand_id#5 AS brand_id#17, i_brand#6 AS brand#18, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#16,17,2) AS ext_price#19] + +(21) TakeOrderedAndProject +Input [4]: [d_year#10, brand_id#17, brand#18, ext_price#19] +Arguments: 100, [d_year#10 ASC NULLS FIRST, ext_price#19 DESC NULLS LAST, brand_id#17 ASC NULLS FIRST], [d_year#10, brand_id#17, brand#18, ext_price#19] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52.sf100/simplified.txt new file mode 100644 index 0000000000000..0746a032aa1a2 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52.sf100/simplified.txt @@ -0,0 +1,31 @@ +TakeOrderedAndProject [brand,brand_id,d_year,ext_price] + WholeStageCodegen (4) + HashAggregate [d_year,i_brand,i_brand_id,sum] [brand,brand_id,ext_price,sum,sum(UnscaledValue(ss_ext_sales_price))] + InputAdapter + Exchange [d_year,i_brand,i_brand_id] #1 + WholeStageCodegen (3) + HashAggregate [d_year,i_brand,i_brand_id,ss_ext_sales_price] [sum,sum] + Project [d_year,i_brand,i_brand_id,ss_ext_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_brand,i_brand_id,ss_ext_sales_price,ss_sold_date_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [i_brand,i_brand_id,i_item_sk] + Filter [i_item_sk,i_manager_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand,i_brand_id,i_item_sk,i_manager_id] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [d_date_sk,d_year] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52/explain.txt new file mode 100644 index 0000000000000..71d181aed2940 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52/explain.txt @@ -0,0 +1,122 @@ +== Physical Plan == +TakeOrderedAndProject (21) ++- * HashAggregate (20) + +- Exchange (19) + +- * HashAggregate (18) + +- * Project (17) + +- * BroadcastHashJoin Inner BuildRight (16) + :- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Project (4) + : : +- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.date_dim (1) + : +- BroadcastExchange (8) + : +- * Filter (7) + : +- * ColumnarToRow (6) + : +- Scan parquet default.store_sales (5) + +- BroadcastExchange (15) + +- * Project (14) + +- * Filter (13) + +- * ColumnarToRow (12) + +- Scan parquet default.item (11) + + +(1) Scan parquet default.date_dim +Output [3]: [d_date_sk#1, d_year#2, d_moy#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(3) Filter [codegen id : 3] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Condition : ((((isnotnull(d_moy#3) AND isnotnull(d_year#2)) AND (d_moy#3 = 11)) AND (d_year#2 = 2000)) AND isnotnull(d_date_sk#1)) + +(4) Project [codegen id : 3] +Output [2]: [d_date_sk#1, d_year#2] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(5) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] + +(7) Filter [codegen id : 1] +Input [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] +Condition : (isnotnull(ss_sold_date_sk#4) AND isnotnull(ss_item_sk#5)) + +(8) BroadcastExchange +Input [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [d_date_sk#1] +Right keys [1]: [ss_sold_date_sk#4] +Join condition: None + +(10) Project [codegen id : 3] +Output [3]: [d_year#2, ss_item_sk#5, ss_ext_sales_price#6] +Input [5]: [d_date_sk#1, d_year#2, ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] + +(11) Scan parquet default.item +Output [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,1), IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] + +(13) Filter [codegen id : 2] +Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] +Condition : ((isnotnull(i_manager_id#11) AND (i_manager_id#11 = 1)) AND isnotnull(i_item_sk#8)) + +(14) Project [codegen id : 2] +Output [3]: [i_item_sk#8, i_brand_id#9, i_brand#10] +Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] + +(15) BroadcastExchange +Input [3]: [i_item_sk#8, i_brand_id#9, i_brand#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(16) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_item_sk#5] +Right keys [1]: [i_item_sk#8] +Join condition: None + +(17) Project [codegen id : 3] +Output [4]: [d_year#2, ss_ext_sales_price#6, i_brand_id#9, i_brand#10] +Input [6]: [d_year#2, ss_item_sk#5, ss_ext_sales_price#6, i_item_sk#8, i_brand_id#9, i_brand#10] + +(18) HashAggregate [codegen id : 3] +Input [4]: [d_year#2, ss_ext_sales_price#6, i_brand_id#9, i_brand#10] +Keys [3]: [d_year#2, i_brand#10, i_brand_id#9] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#6))] +Aggregate Attributes [1]: [sum#13] +Results [4]: [d_year#2, i_brand#10, i_brand_id#9, sum#14] + +(19) Exchange +Input [4]: [d_year#2, i_brand#10, i_brand_id#9, sum#14] +Arguments: hashpartitioning(d_year#2, i_brand#10, i_brand_id#9, 5), true, [id=#15] + +(20) HashAggregate [codegen id : 4] +Input [4]: [d_year#2, i_brand#10, i_brand_id#9, sum#14] +Keys [3]: [d_year#2, i_brand#10, i_brand_id#9] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#6))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#6))#16] +Results [4]: [d_year#2, i_brand_id#9 AS brand_id#17, i_brand#10 AS brand#18, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#6))#16,17,2) AS ext_price#19] + +(21) TakeOrderedAndProject +Input [4]: [d_year#2, brand_id#17, brand#18, ext_price#19] +Arguments: 100, [d_year#2 ASC NULLS FIRST, ext_price#19 DESC NULLS LAST, brand_id#17 ASC NULLS FIRST], [d_year#2, brand_id#17, brand#18, ext_price#19] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52/simplified.txt new file mode 100644 index 0000000000000..7d70658c38241 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52/simplified.txt @@ -0,0 +1,31 @@ +TakeOrderedAndProject [brand,brand_id,d_year,ext_price] + WholeStageCodegen (4) + HashAggregate [d_year,i_brand,i_brand_id,sum] [brand,brand_id,ext_price,sum,sum(UnscaledValue(ss_ext_sales_price))] + InputAdapter + Exchange [d_year,i_brand,i_brand_id] #1 + WholeStageCodegen (3) + HashAggregate [d_year,i_brand,i_brand_id,ss_ext_sales_price] [sum,sum] + Project [d_year,i_brand,i_brand_id,ss_ext_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [d_year,ss_ext_sales_price,ss_item_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [d_date_sk,d_year] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [i_brand,i_brand_id,i_item_sk] + Filter [i_item_sk,i_manager_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand,i_brand_id,i_item_sk,i_manager_id] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53.sf100/explain.txt new file mode 100644 index 0000000000000..d08a3f5cda86b --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53.sf100/explain.txt @@ -0,0 +1,180 @@ +== Physical Plan == +TakeOrderedAndProject (32) ++- * Project (31) + +- * Filter (30) + +- Window (29) + +- * Sort (28) + +- Exchange (27) + +- * HashAggregate (26) + +- Exchange (25) + +- * HashAggregate (24) + +- * Project (23) + +- * BroadcastHashJoin Inner BuildRight (22) + :- * Project (16) + : +- * BroadcastHashJoin Inner BuildRight (15) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildLeft (9) + : : :- BroadcastExchange (5) + : : : +- * Project (4) + : : : +- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.item (1) + : : +- * Filter (8) + : : +- * ColumnarToRow (7) + : : +- Scan parquet default.store_sales (6) + : +- BroadcastExchange (14) + : +- * Filter (13) + : +- * ColumnarToRow (12) + : +- Scan parquet default.store (11) + +- BroadcastExchange (21) + +- * Project (20) + +- * Filter (19) + +- * ColumnarToRow (18) + +- Scan parquet default.date_dim (17) + + +(1) Scan parquet default.item +Output [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manufact_id#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [Or(And(And(In(i_category, [Books,Children,Electronics]),In(i_class, [personal,portable,reference,self-help])),In(i_brand, [scholaramalgamalg #6,scholaramalgamalg #7,exportiunivamalg #8,scholaramalgamalg #8])),And(And(In(i_category, [Women,Music,Men]),In(i_class, [accessories,classical,fragrances,pants])),In(i_brand, [amalgimporto #9,edu packscholar #9,exportiimporto #9,importoamalg #9]))), IsNotNull(i_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manufact_id#5] + +(3) Filter [codegen id : 1] +Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manufact_id#5] +Condition : ((((i_category#4 IN (Books,Children,Electronics) AND i_class#3 IN (personal,portable,reference,self-help)) AND i_brand#2 IN (scholaramalgamalg #6,scholaramalgamalg #7,exportiunivamalg #8,scholaramalgamalg #8)) OR ((i_category#4 IN (Women,Music,Men) AND i_class#3 IN (accessories,classical,fragrances,pants)) AND i_brand#2 IN (amalgimporto #9,edu packscholar #9,exportiimporto #9,importoamalg #9))) AND isnotnull(i_item_sk#1)) + +(4) Project [codegen id : 1] +Output [2]: [i_item_sk#1, i_manufact_id#5] +Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manufact_id#5] + +(5) BroadcastExchange +Input [2]: [i_item_sk#1, i_manufact_id#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#10] + +(6) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#11, ss_item_sk#12, ss_store_sk#13, ss_sales_price#14] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(7) ColumnarToRow +Input [4]: [ss_sold_date_sk#11, ss_item_sk#12, ss_store_sk#13, ss_sales_price#14] + +(8) Filter +Input [4]: [ss_sold_date_sk#11, ss_item_sk#12, ss_store_sk#13, ss_sales_price#14] +Condition : ((isnotnull(ss_item_sk#12) AND isnotnull(ss_sold_date_sk#11)) AND isnotnull(ss_store_sk#13)) + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [ss_item_sk#12] +Join condition: None + +(10) Project [codegen id : 4] +Output [4]: [i_manufact_id#5, ss_sold_date_sk#11, ss_store_sk#13, ss_sales_price#14] +Input [6]: [i_item_sk#1, i_manufact_id#5, ss_sold_date_sk#11, ss_item_sk#12, ss_store_sk#13, ss_sales_price#14] + +(11) Scan parquet default.store +Output [1]: [s_store_sk#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [1]: [s_store_sk#15] + +(13) Filter [codegen id : 2] +Input [1]: [s_store_sk#15] +Condition : isnotnull(s_store_sk#15) + +(14) BroadcastExchange +Input [1]: [s_store_sk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#16] + +(15) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#13] +Right keys [1]: [s_store_sk#15] +Join condition: None + +(16) Project [codegen id : 4] +Output [3]: [i_manufact_id#5, ss_sold_date_sk#11, ss_sales_price#14] +Input [5]: [i_manufact_id#5, ss_sold_date_sk#11, ss_store_sk#13, ss_sales_price#14, s_store_sk#15] + +(17) Scan parquet default.date_dim +Output [3]: [d_date_sk#17, d_month_seq#18, d_qoy#19] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [In(d_month_seq, [1200,1211,1205,1201,1206,1210,1207,1202,1209,1203,1208,1204]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#17, d_month_seq#18, d_qoy#19] + +(19) Filter [codegen id : 3] +Input [3]: [d_date_sk#17, d_month_seq#18, d_qoy#19] +Condition : (d_month_seq#18 INSET (1200,1211,1205,1201,1206,1210,1207,1202,1209,1203,1208,1204) AND isnotnull(d_date_sk#17)) + +(20) Project [codegen id : 3] +Output [2]: [d_date_sk#17, d_qoy#19] +Input [3]: [d_date_sk#17, d_month_seq#18, d_qoy#19] + +(21) BroadcastExchange +Input [2]: [d_date_sk#17, d_qoy#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] + +(22) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#11] +Right keys [1]: [d_date_sk#17] +Join condition: None + +(23) Project [codegen id : 4] +Output [3]: [i_manufact_id#5, ss_sales_price#14, d_qoy#19] +Input [5]: [i_manufact_id#5, ss_sold_date_sk#11, ss_sales_price#14, d_date_sk#17, d_qoy#19] + +(24) HashAggregate [codegen id : 4] +Input [3]: [i_manufact_id#5, ss_sales_price#14, d_qoy#19] +Keys [2]: [i_manufact_id#5, d_qoy#19] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#14))] +Aggregate Attributes [1]: [sum#21] +Results [3]: [i_manufact_id#5, d_qoy#19, sum#22] + +(25) Exchange +Input [3]: [i_manufact_id#5, d_qoy#19, sum#22] +Arguments: hashpartitioning(i_manufact_id#5, d_qoy#19, 5), true, [id=#23] + +(26) HashAggregate [codegen id : 5] +Input [3]: [i_manufact_id#5, d_qoy#19, sum#22] +Keys [2]: [i_manufact_id#5, d_qoy#19] +Functions [1]: [sum(UnscaledValue(ss_sales_price#14))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#14))#24] +Results [3]: [i_manufact_id#5, MakeDecimal(sum(UnscaledValue(ss_sales_price#14))#24,17,2) AS sum_sales#25, MakeDecimal(sum(UnscaledValue(ss_sales_price#14))#24,17,2) AS _w0#26] + +(27) Exchange +Input [3]: [i_manufact_id#5, sum_sales#25, _w0#26] +Arguments: hashpartitioning(i_manufact_id#5, 5), true, [id=#27] + +(28) Sort [codegen id : 6] +Input [3]: [i_manufact_id#5, sum_sales#25, _w0#26] +Arguments: [i_manufact_id#5 ASC NULLS FIRST], false, 0 + +(29) Window +Input [3]: [i_manufact_id#5, sum_sales#25, _w0#26] +Arguments: [avg(_w0#26) windowspecdefinition(i_manufact_id#5, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_quarterly_sales#28], [i_manufact_id#5] + +(30) Filter [codegen id : 7] +Input [4]: [i_manufact_id#5, sum_sales#25, _w0#26, avg_quarterly_sales#28] +Condition : (CASE WHEN (avg_quarterly_sales#28 > 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#25 as decimal(22,6))) - promote_precision(cast(avg_quarterly_sales#28 as decimal(22,6)))), DecimalType(22,6), true))) / promote_precision(cast(avg_quarterly_sales#28 as decimal(22,6)))), DecimalType(38,16), true) ELSE null END > 0.1000000000000000) + +(31) Project [codegen id : 7] +Output [3]: [i_manufact_id#5, sum_sales#25, avg_quarterly_sales#28] +Input [4]: [i_manufact_id#5, sum_sales#25, _w0#26, avg_quarterly_sales#28] + +(32) TakeOrderedAndProject +Input [3]: [i_manufact_id#5, sum_sales#25, avg_quarterly_sales#28] +Arguments: 100, [avg_quarterly_sales#28 ASC NULLS FIRST, sum_sales#25 ASC NULLS FIRST, i_manufact_id#5 ASC NULLS FIRST], [i_manufact_id#5, sum_sales#25, avg_quarterly_sales#28] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53.sf100/simplified.txt new file mode 100644 index 0000000000000..98f126325517b --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53.sf100/simplified.txt @@ -0,0 +1,49 @@ +TakeOrderedAndProject [avg_quarterly_sales,i_manufact_id,sum_sales] + WholeStageCodegen (7) + Project [avg_quarterly_sales,i_manufact_id,sum_sales] + Filter [avg_quarterly_sales,sum_sales] + InputAdapter + Window [_w0,i_manufact_id] + WholeStageCodegen (6) + Sort [i_manufact_id] + InputAdapter + Exchange [i_manufact_id] #1 + WholeStageCodegen (5) + HashAggregate [d_qoy,i_manufact_id,sum] [_w0,sum,sum(UnscaledValue(ss_sales_price)),sum_sales] + InputAdapter + Exchange [d_qoy,i_manufact_id] #2 + WholeStageCodegen (4) + HashAggregate [d_qoy,i_manufact_id,ss_sales_price] [sum,sum] + Project [d_qoy,i_manufact_id,ss_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_manufact_id,ss_sales_price,ss_sold_date_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [i_manufact_id,ss_sales_price,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [i_item_sk,i_manufact_id] + Filter [i_brand,i_category,i_class,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand,i_category,i_class,i_item_sk,i_manufact_id] + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Project [d_date_sk,d_qoy] + Filter [d_date_sk,d_month_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq,d_qoy] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/explain.txt new file mode 100644 index 0000000000000..f230f2f140edf --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/explain.txt @@ -0,0 +1,180 @@ +== Physical Plan == +TakeOrderedAndProject (32) ++- * Project (31) + +- * Filter (30) + +- Window (29) + +- * Sort (28) + +- Exchange (27) + +- * HashAggregate (26) + +- Exchange (25) + +- * HashAggregate (24) + +- * Project (23) + +- * BroadcastHashJoin Inner BuildRight (22) + :- * Project (17) + : +- * BroadcastHashJoin Inner BuildRight (16) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Project (4) + : : : +- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.item (1) + : : +- BroadcastExchange (8) + : : +- * Filter (7) + : : +- * ColumnarToRow (6) + : : +- Scan parquet default.store_sales (5) + : +- BroadcastExchange (15) + : +- * Project (14) + : +- * Filter (13) + : +- * ColumnarToRow (12) + : +- Scan parquet default.date_dim (11) + +- BroadcastExchange (21) + +- * Filter (20) + +- * ColumnarToRow (19) + +- Scan parquet default.store (18) + + +(1) Scan parquet default.item +Output [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manufact_id#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [Or(And(And(In(i_category, [Books,Children,Electronics]),In(i_class, [personal,portable,reference,self-help])),In(i_brand, [scholaramalgamalg #6,scholaramalgamalg #7,exportiunivamalg #8,scholaramalgamalg #8])),And(And(In(i_category, [Women,Music,Men]),In(i_class, [accessories,classical,fragrances,pants])),In(i_brand, [amalgimporto #9,edu packscholar #9,exportiimporto #9,importoamalg #9]))), IsNotNull(i_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manufact_id#5] + +(3) Filter [codegen id : 4] +Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manufact_id#5] +Condition : ((((i_category#4 IN (Books,Children,Electronics) AND i_class#3 IN (personal,portable,reference,self-help)) AND i_brand#2 IN (scholaramalgamalg #6,scholaramalgamalg #7,exportiunivamalg #8,scholaramalgamalg #8)) OR ((i_category#4 IN (Women,Music,Men) AND i_class#3 IN (accessories,classical,fragrances,pants)) AND i_brand#2 IN (amalgimporto #9,edu packscholar #9,exportiimporto #9,importoamalg #9))) AND isnotnull(i_item_sk#1)) + +(4) Project [codegen id : 4] +Output [2]: [i_item_sk#1, i_manufact_id#5] +Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manufact_id#5] + +(5) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#10, ss_item_sk#11, ss_store_sk#12, ss_sales_price#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [4]: [ss_sold_date_sk#10, ss_item_sk#11, ss_store_sk#12, ss_sales_price#13] + +(7) Filter [codegen id : 1] +Input [4]: [ss_sold_date_sk#10, ss_item_sk#11, ss_store_sk#12, ss_sales_price#13] +Condition : ((isnotnull(ss_item_sk#11) AND isnotnull(ss_sold_date_sk#10)) AND isnotnull(ss_store_sk#12)) + +(8) BroadcastExchange +Input [4]: [ss_sold_date_sk#10, ss_item_sk#11, ss_store_sk#12, ss_sales_price#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#14] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [ss_item_sk#11] +Join condition: None + +(10) Project [codegen id : 4] +Output [4]: [i_manufact_id#5, ss_sold_date_sk#10, ss_store_sk#12, ss_sales_price#13] +Input [6]: [i_item_sk#1, i_manufact_id#5, ss_sold_date_sk#10, ss_item_sk#11, ss_store_sk#12, ss_sales_price#13] + +(11) Scan parquet default.date_dim +Output [3]: [d_date_sk#15, d_month_seq#16, d_qoy#17] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [In(d_month_seq, [1200,1211,1205,1201,1206,1210,1207,1202,1209,1203,1208,1204]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#15, d_month_seq#16, d_qoy#17] + +(13) Filter [codegen id : 2] +Input [3]: [d_date_sk#15, d_month_seq#16, d_qoy#17] +Condition : (d_month_seq#16 INSET (1200,1211,1205,1201,1206,1210,1207,1202,1209,1203,1208,1204) AND isnotnull(d_date_sk#15)) + +(14) Project [codegen id : 2] +Output [2]: [d_date_sk#15, d_qoy#17] +Input [3]: [d_date_sk#15, d_month_seq#16, d_qoy#17] + +(15) BroadcastExchange +Input [2]: [d_date_sk#15, d_qoy#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] + +(16) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#10] +Right keys [1]: [d_date_sk#15] +Join condition: None + +(17) Project [codegen id : 4] +Output [4]: [i_manufact_id#5, ss_store_sk#12, ss_sales_price#13, d_qoy#17] +Input [6]: [i_manufact_id#5, ss_sold_date_sk#10, ss_store_sk#12, ss_sales_price#13, d_date_sk#15, d_qoy#17] + +(18) Scan parquet default.store +Output [1]: [s_store_sk#19] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [1]: [s_store_sk#19] + +(20) Filter [codegen id : 3] +Input [1]: [s_store_sk#19] +Condition : isnotnull(s_store_sk#19) + +(21) BroadcastExchange +Input [1]: [s_store_sk#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#20] + +(22) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#12] +Right keys [1]: [s_store_sk#19] +Join condition: None + +(23) Project [codegen id : 4] +Output [3]: [i_manufact_id#5, ss_sales_price#13, d_qoy#17] +Input [5]: [i_manufact_id#5, ss_store_sk#12, ss_sales_price#13, d_qoy#17, s_store_sk#19] + +(24) HashAggregate [codegen id : 4] +Input [3]: [i_manufact_id#5, ss_sales_price#13, d_qoy#17] +Keys [2]: [i_manufact_id#5, d_qoy#17] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#13))] +Aggregate Attributes [1]: [sum#21] +Results [3]: [i_manufact_id#5, d_qoy#17, sum#22] + +(25) Exchange +Input [3]: [i_manufact_id#5, d_qoy#17, sum#22] +Arguments: hashpartitioning(i_manufact_id#5, d_qoy#17, 5), true, [id=#23] + +(26) HashAggregate [codegen id : 5] +Input [3]: [i_manufact_id#5, d_qoy#17, sum#22] +Keys [2]: [i_manufact_id#5, d_qoy#17] +Functions [1]: [sum(UnscaledValue(ss_sales_price#13))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#13))#24] +Results [3]: [i_manufact_id#5, MakeDecimal(sum(UnscaledValue(ss_sales_price#13))#24,17,2) AS sum_sales#25, MakeDecimal(sum(UnscaledValue(ss_sales_price#13))#24,17,2) AS _w0#26] + +(27) Exchange +Input [3]: [i_manufact_id#5, sum_sales#25, _w0#26] +Arguments: hashpartitioning(i_manufact_id#5, 5), true, [id=#27] + +(28) Sort [codegen id : 6] +Input [3]: [i_manufact_id#5, sum_sales#25, _w0#26] +Arguments: [i_manufact_id#5 ASC NULLS FIRST], false, 0 + +(29) Window +Input [3]: [i_manufact_id#5, sum_sales#25, _w0#26] +Arguments: [avg(_w0#26) windowspecdefinition(i_manufact_id#5, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_quarterly_sales#28], [i_manufact_id#5] + +(30) Filter [codegen id : 7] +Input [4]: [i_manufact_id#5, sum_sales#25, _w0#26, avg_quarterly_sales#28] +Condition : (CASE WHEN (avg_quarterly_sales#28 > 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#25 as decimal(22,6))) - promote_precision(cast(avg_quarterly_sales#28 as decimal(22,6)))), DecimalType(22,6), true))) / promote_precision(cast(avg_quarterly_sales#28 as decimal(22,6)))), DecimalType(38,16), true) ELSE null END > 0.1000000000000000) + +(31) Project [codegen id : 7] +Output [3]: [i_manufact_id#5, sum_sales#25, avg_quarterly_sales#28] +Input [4]: [i_manufact_id#5, sum_sales#25, _w0#26, avg_quarterly_sales#28] + +(32) TakeOrderedAndProject +Input [3]: [i_manufact_id#5, sum_sales#25, avg_quarterly_sales#28] +Arguments: 100, [avg_quarterly_sales#28 ASC NULLS FIRST, sum_sales#25 ASC NULLS FIRST, i_manufact_id#5 ASC NULLS FIRST], [i_manufact_id#5, sum_sales#25, avg_quarterly_sales#28] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/simplified.txt new file mode 100644 index 0000000000000..c8070c46c9c80 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/simplified.txt @@ -0,0 +1,49 @@ +TakeOrderedAndProject [avg_quarterly_sales,i_manufact_id,sum_sales] + WholeStageCodegen (7) + Project [avg_quarterly_sales,i_manufact_id,sum_sales] + Filter [avg_quarterly_sales,sum_sales] + InputAdapter + Window [_w0,i_manufact_id] + WholeStageCodegen (6) + Sort [i_manufact_id] + InputAdapter + Exchange [i_manufact_id] #1 + WholeStageCodegen (5) + HashAggregate [d_qoy,i_manufact_id,sum] [_w0,sum,sum(UnscaledValue(ss_sales_price)),sum_sales] + InputAdapter + Exchange [d_qoy,i_manufact_id] #2 + WholeStageCodegen (4) + HashAggregate [d_qoy,i_manufact_id,ss_sales_price] [sum,sum] + Project [d_qoy,i_manufact_id,ss_sales_price] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [d_qoy,i_manufact_id,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_manufact_id,ss_sales_price,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [i_item_sk,i_manufact_id] + Filter [i_brand,i_category,i_class,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand,i_category,i_class,i_item_sk,i_manufact_id] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [d_date_sk,d_qoy] + Filter [d_date_sk,d_month_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq,d_qoy] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.sf100/explain.txt new file mode 100644 index 0000000000000..9ccda17a031c3 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.sf100/explain.txt @@ -0,0 +1,494 @@ +== Physical Plan == +TakeOrderedAndProject (71) ++- * HashAggregate (70) + +- Exchange (69) + +- * HashAggregate (68) + +- * HashAggregate (67) + +- Exchange (66) + +- * HashAggregate (65) + +- * Project (64) + +- * BroadcastHashJoin Inner BuildRight (63) + :- * Project (57) + : +- * BroadcastHashJoin Inner BuildRight (56) + : :- * Project (51) + : : +- * SortMergeJoin Inner (50) + : : :- * Sort (44) + : : : +- Exchange (43) + : : : +- * Project (42) + : : : +- * SortMergeJoin Inner (41) + : : : :- * Sort (35) + : : : : +- * HashAggregate (34) + : : : : +- * HashAggregate (33) + : : : : +- * Project (32) + : : : : +- * SortMergeJoin Inner (31) + : : : : :- * Sort (25) + : : : : : +- Exchange (24) + : : : : : +- * Project (23) + : : : : : +- * BroadcastHashJoin Inner BuildRight (22) + : : : : : :- * Project (16) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : : : : :- Union (9) + : : : : : : : :- * Project (4) + : : : : : : : : +- * Filter (3) + : : : : : : : : +- * ColumnarToRow (2) + : : : : : : : : +- Scan parquet default.catalog_sales (1) + : : : : : : : +- * Project (8) + : : : : : : : +- * Filter (7) + : : : : : : : +- * ColumnarToRow (6) + : : : : : : : +- Scan parquet default.web_sales (5) + : : : : : : +- BroadcastExchange (14) + : : : : : : +- * Project (13) + : : : : : : +- * Filter (12) + : : : : : : +- * ColumnarToRow (11) + : : : : : : +- Scan parquet default.item (10) + : : : : : +- BroadcastExchange (21) + : : : : : +- * Project (20) + : : : : : +- * Filter (19) + : : : : : +- * ColumnarToRow (18) + : : : : : +- Scan parquet default.date_dim (17) + : : : : +- * Sort (30) + : : : : +- Exchange (29) + : : : : +- * Filter (28) + : : : : +- * ColumnarToRow (27) + : : : : +- Scan parquet default.customer (26) + : : : +- * Sort (40) + : : : +- Exchange (39) + : : : +- * Filter (38) + : : : +- * ColumnarToRow (37) + : : : +- Scan parquet default.store_sales (36) + : : +- * Sort (49) + : : +- Exchange (48) + : : +- * Filter (47) + : : +- * ColumnarToRow (46) + : : +- Scan parquet default.customer_address (45) + : +- BroadcastExchange (55) + : +- * Filter (54) + : +- * ColumnarToRow (53) + : +- Scan parquet default.store (52) + +- BroadcastExchange (62) + +- * Project (61) + +- * Filter (60) + +- * ColumnarToRow (59) + +- Scan parquet default.date_dim (58) + + +(1) Scan parquet default.catalog_sales +Output [3]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [3]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3] + +(3) Filter [codegen id : 1] +Input [3]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3] +Condition : ((isnotnull(cs_item_sk#3) AND isnotnull(cs_sold_date_sk#1)) AND isnotnull(cs_bill_customer_sk#2)) + +(4) Project [codegen id : 1] +Output [3]: [cs_sold_date_sk#1 AS sold_date_sk#4, cs_bill_customer_sk#2 AS customer_sk#5, cs_item_sk#3 AS item_sk#6] +Input [3]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3] + +(5) Scan parquet default.web_sales +Output [3]: [ws_sold_date_sk#7, ws_item_sk#8, ws_bill_customer_sk#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_customer_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 2] +Input [3]: [ws_sold_date_sk#7, ws_item_sk#8, ws_bill_customer_sk#9] + +(7) Filter [codegen id : 2] +Input [3]: [ws_sold_date_sk#7, ws_item_sk#8, ws_bill_customer_sk#9] +Condition : ((isnotnull(ws_item_sk#8) AND isnotnull(ws_sold_date_sk#7)) AND isnotnull(ws_bill_customer_sk#9)) + +(8) Project [codegen id : 2] +Output [3]: [ws_sold_date_sk#7 AS sold_date_sk#10, ws_bill_customer_sk#9 AS customer_sk#11, ws_item_sk#8 AS item_sk#12] +Input [3]: [ws_sold_date_sk#7, ws_item_sk#8, ws_bill_customer_sk#9] + +(9) Union + +(10) Scan parquet default.item +Output [3]: [i_item_sk#13, i_class#14, i_category#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_category), IsNotNull(i_class), EqualTo(i_category,Women), EqualTo(i_class,maternity), IsNotNull(i_item_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 3] +Input [3]: [i_item_sk#13, i_class#14, i_category#15] + +(12) Filter [codegen id : 3] +Input [3]: [i_item_sk#13, i_class#14, i_category#15] +Condition : ((((isnotnull(i_category#15) AND isnotnull(i_class#14)) AND (i_category#15 = Women)) AND (i_class#14 = maternity)) AND isnotnull(i_item_sk#13)) + +(13) Project [codegen id : 3] +Output [1]: [i_item_sk#13] +Input [3]: [i_item_sk#13, i_class#14, i_category#15] + +(14) BroadcastExchange +Input [1]: [i_item_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] + +(15) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [item_sk#6] +Right keys [1]: [i_item_sk#13] +Join condition: None + +(16) Project [codegen id : 5] +Output [2]: [sold_date_sk#4, customer_sk#5] +Input [4]: [sold_date_sk#4, customer_sk#5, item_sk#6, i_item_sk#13] + +(17) Scan parquet default.date_dim +Output [3]: [d_date_sk#17, d_year#18, d_moy#19] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,12), EqualTo(d_year,1998), IsNotNull(d_date_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 4] +Input [3]: [d_date_sk#17, d_year#18, d_moy#19] + +(19) Filter [codegen id : 4] +Input [3]: [d_date_sk#17, d_year#18, d_moy#19] +Condition : ((((isnotnull(d_moy#19) AND isnotnull(d_year#18)) AND (d_moy#19 = 12)) AND (d_year#18 = 1998)) AND isnotnull(d_date_sk#17)) + +(20) Project [codegen id : 4] +Output [1]: [d_date_sk#17] +Input [3]: [d_date_sk#17, d_year#18, d_moy#19] + +(21) BroadcastExchange +Input [1]: [d_date_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] + +(22) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [sold_date_sk#4] +Right keys [1]: [d_date_sk#17] +Join condition: None + +(23) Project [codegen id : 5] +Output [1]: [customer_sk#5] +Input [3]: [sold_date_sk#4, customer_sk#5, d_date_sk#17] + +(24) Exchange +Input [1]: [customer_sk#5] +Arguments: hashpartitioning(customer_sk#5, 5), true, [id=#21] + +(25) Sort [codegen id : 6] +Input [1]: [customer_sk#5] +Arguments: [customer_sk#5 ASC NULLS FIRST], false, 0 + +(26) Scan parquet default.customer +Output [2]: [c_customer_sk#22, c_current_addr_sk#23] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 7] +Input [2]: [c_customer_sk#22, c_current_addr_sk#23] + +(28) Filter [codegen id : 7] +Input [2]: [c_customer_sk#22, c_current_addr_sk#23] +Condition : (isnotnull(c_customer_sk#22) AND isnotnull(c_current_addr_sk#23)) + +(29) Exchange +Input [2]: [c_customer_sk#22, c_current_addr_sk#23] +Arguments: hashpartitioning(c_customer_sk#22, 5), true, [id=#24] + +(30) Sort [codegen id : 8] +Input [2]: [c_customer_sk#22, c_current_addr_sk#23] +Arguments: [c_customer_sk#22 ASC NULLS FIRST], false, 0 + +(31) SortMergeJoin [codegen id : 9] +Left keys [1]: [customer_sk#5] +Right keys [1]: [c_customer_sk#22] +Join condition: None + +(32) Project [codegen id : 9] +Output [2]: [c_customer_sk#22, c_current_addr_sk#23] +Input [3]: [customer_sk#5, c_customer_sk#22, c_current_addr_sk#23] + +(33) HashAggregate [codegen id : 9] +Input [2]: [c_customer_sk#22, c_current_addr_sk#23] +Keys [2]: [c_customer_sk#22, c_current_addr_sk#23] +Functions: [] +Aggregate Attributes: [] +Results [2]: [c_customer_sk#22, c_current_addr_sk#23] + +(34) HashAggregate [codegen id : 9] +Input [2]: [c_customer_sk#22, c_current_addr_sk#23] +Keys [2]: [c_customer_sk#22, c_current_addr_sk#23] +Functions: [] +Aggregate Attributes: [] +Results [2]: [c_customer_sk#22, c_current_addr_sk#23] + +(35) Sort [codegen id : 9] +Input [2]: [c_customer_sk#22, c_current_addr_sk#23] +Arguments: [c_customer_sk#22 ASC NULLS FIRST], false, 0 + +(36) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#25, ss_customer_sk#26, ss_ext_sales_price#27] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(37) ColumnarToRow [codegen id : 10] +Input [3]: [ss_sold_date_sk#25, ss_customer_sk#26, ss_ext_sales_price#27] + +(38) Filter [codegen id : 10] +Input [3]: [ss_sold_date_sk#25, ss_customer_sk#26, ss_ext_sales_price#27] +Condition : (isnotnull(ss_customer_sk#26) AND isnotnull(ss_sold_date_sk#25)) + +(39) Exchange +Input [3]: [ss_sold_date_sk#25, ss_customer_sk#26, ss_ext_sales_price#27] +Arguments: hashpartitioning(ss_customer_sk#26, 5), true, [id=#28] + +(40) Sort [codegen id : 11] +Input [3]: [ss_sold_date_sk#25, ss_customer_sk#26, ss_ext_sales_price#27] +Arguments: [ss_customer_sk#26 ASC NULLS FIRST], false, 0 + +(41) SortMergeJoin [codegen id : 12] +Left keys [1]: [c_customer_sk#22] +Right keys [1]: [ss_customer_sk#26] +Join condition: None + +(42) Project [codegen id : 12] +Output [4]: [c_customer_sk#22, c_current_addr_sk#23, ss_sold_date_sk#25, ss_ext_sales_price#27] +Input [5]: [c_customer_sk#22, c_current_addr_sk#23, ss_sold_date_sk#25, ss_customer_sk#26, ss_ext_sales_price#27] + +(43) Exchange +Input [4]: [c_customer_sk#22, c_current_addr_sk#23, ss_sold_date_sk#25, ss_ext_sales_price#27] +Arguments: hashpartitioning(c_current_addr_sk#23, 5), true, [id=#29] + +(44) Sort [codegen id : 13] +Input [4]: [c_customer_sk#22, c_current_addr_sk#23, ss_sold_date_sk#25, ss_ext_sales_price#27] +Arguments: [c_current_addr_sk#23 ASC NULLS FIRST], false, 0 + +(45) Scan parquet default.customer_address +Output [3]: [ca_address_sk#30, ca_county#31, ca_state#32] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_state), IsNotNull(ca_county)] +ReadSchema: struct + +(46) ColumnarToRow [codegen id : 14] +Input [3]: [ca_address_sk#30, ca_county#31, ca_state#32] + +(47) Filter [codegen id : 14] +Input [3]: [ca_address_sk#30, ca_county#31, ca_state#32] +Condition : ((isnotnull(ca_address_sk#30) AND isnotnull(ca_state#32)) AND isnotnull(ca_county#31)) + +(48) Exchange +Input [3]: [ca_address_sk#30, ca_county#31, ca_state#32] +Arguments: hashpartitioning(ca_address_sk#30, 5), true, [id=#33] + +(49) Sort [codegen id : 15] +Input [3]: [ca_address_sk#30, ca_county#31, ca_state#32] +Arguments: [ca_address_sk#30 ASC NULLS FIRST], false, 0 + +(50) SortMergeJoin [codegen id : 18] +Left keys [1]: [c_current_addr_sk#23] +Right keys [1]: [ca_address_sk#30] +Join condition: None + +(51) Project [codegen id : 18] +Output [5]: [c_customer_sk#22, ss_sold_date_sk#25, ss_ext_sales_price#27, ca_county#31, ca_state#32] +Input [7]: [c_customer_sk#22, c_current_addr_sk#23, ss_sold_date_sk#25, ss_ext_sales_price#27, ca_address_sk#30, ca_county#31, ca_state#32] + +(52) Scan parquet default.store +Output [2]: [s_county#34, s_state#35] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_state), IsNotNull(s_county)] +ReadSchema: struct + +(53) ColumnarToRow [codegen id : 16] +Input [2]: [s_county#34, s_state#35] + +(54) Filter [codegen id : 16] +Input [2]: [s_county#34, s_state#35] +Condition : (isnotnull(s_state#35) AND isnotnull(s_county#34)) + +(55) BroadcastExchange +Input [2]: [s_county#34, s_state#35] +Arguments: HashedRelationBroadcastMode(List(input[0, string, false], input[1, string, false]),false), [id=#36] + +(56) BroadcastHashJoin [codegen id : 18] +Left keys [2]: [ca_county#31, ca_state#32] +Right keys [2]: [s_county#34, s_state#35] +Join condition: None + +(57) Project [codegen id : 18] +Output [3]: [c_customer_sk#22, ss_sold_date_sk#25, ss_ext_sales_price#27] +Input [7]: [c_customer_sk#22, ss_sold_date_sk#25, ss_ext_sales_price#27, ca_county#31, ca_state#32, s_county#34, s_state#35] + +(58) Scan parquet default.date_dim +Output [2]: [d_date_sk#17, d_month_seq#37] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)] +ReadSchema: struct + +(59) ColumnarToRow [codegen id : 17] +Input [2]: [d_date_sk#17, d_month_seq#37] + +(60) Filter [codegen id : 17] +Input [2]: [d_date_sk#17, d_month_seq#37] +Condition : (((isnotnull(d_month_seq#37) AND (d_month_seq#37 >= Subquery scalar-subquery#38, [id=#39])) AND (d_month_seq#37 <= Subquery scalar-subquery#40, [id=#41])) AND isnotnull(d_date_sk#17)) + +(61) Project [codegen id : 17] +Output [1]: [d_date_sk#17] +Input [2]: [d_date_sk#17, d_month_seq#37] + +(62) BroadcastExchange +Input [1]: [d_date_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#42] + +(63) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [ss_sold_date_sk#25] +Right keys [1]: [d_date_sk#17] +Join condition: None + +(64) Project [codegen id : 18] +Output [2]: [c_customer_sk#22, ss_ext_sales_price#27] +Input [4]: [c_customer_sk#22, ss_sold_date_sk#25, ss_ext_sales_price#27, d_date_sk#17] + +(65) HashAggregate [codegen id : 18] +Input [2]: [c_customer_sk#22, ss_ext_sales_price#27] +Keys [1]: [c_customer_sk#22] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#27))] +Aggregate Attributes [1]: [sum#43] +Results [2]: [c_customer_sk#22, sum#44] + +(66) Exchange +Input [2]: [c_customer_sk#22, sum#44] +Arguments: hashpartitioning(c_customer_sk#22, 5), true, [id=#45] + +(67) HashAggregate [codegen id : 19] +Input [2]: [c_customer_sk#22, sum#44] +Keys [1]: [c_customer_sk#22] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#27))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#27))#46] +Results [1]: [cast(CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#27))#46,17,2)) / 50.00), DecimalType(21,6), true) as int) AS segment#47] + +(68) HashAggregate [codegen id : 19] +Input [1]: [segment#47] +Keys [1]: [segment#47] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#48] +Results [2]: [segment#47, count#49] + +(69) Exchange +Input [2]: [segment#47, count#49] +Arguments: hashpartitioning(segment#47, 5), true, [id=#50] + +(70) HashAggregate [codegen id : 20] +Input [2]: [segment#47, count#49] +Keys [1]: [segment#47] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#51] +Results [3]: [segment#47, count(1)#51 AS num_customers#52, (segment#47 * 50) AS segment_base#53] + +(71) TakeOrderedAndProject +Input [3]: [segment#47, num_customers#52, segment_base#53] +Arguments: 100, [segment#47 ASC NULLS FIRST, num_customers#52 ASC NULLS FIRST], [segment#47, num_customers#52, segment_base#53] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 60 Hosting Expression = Subquery scalar-subquery#38, [id=#39] +* HashAggregate (78) ++- Exchange (77) + +- * HashAggregate (76) + +- * Project (75) + +- * Filter (74) + +- * ColumnarToRow (73) + +- Scan parquet default.date_dim (72) + + +(72) Scan parquet default.date_dim +Output [3]: [d_month_seq#37, d_year#18, d_moy#19] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)] +ReadSchema: struct + +(73) ColumnarToRow [codegen id : 1] +Input [3]: [d_month_seq#37, d_year#18, d_moy#19] + +(74) Filter [codegen id : 1] +Input [3]: [d_month_seq#37, d_year#18, d_moy#19] +Condition : (((isnotnull(d_year#18) AND isnotnull(d_moy#19)) AND (d_year#18 = 1998)) AND (d_moy#19 = 12)) + +(75) Project [codegen id : 1] +Output [1]: [(d_month_seq#37 + 1) AS (d_month_seq + 1)#54] +Input [3]: [d_month_seq#37, d_year#18, d_moy#19] + +(76) HashAggregate [codegen id : 1] +Input [1]: [(d_month_seq + 1)#54] +Keys [1]: [(d_month_seq + 1)#54] +Functions: [] +Aggregate Attributes: [] +Results [1]: [(d_month_seq + 1)#54] + +(77) Exchange +Input [1]: [(d_month_seq + 1)#54] +Arguments: hashpartitioning((d_month_seq + 1)#54, 5), true, [id=#55] + +(78) HashAggregate [codegen id : 2] +Input [1]: [(d_month_seq + 1)#54] +Keys [1]: [(d_month_seq + 1)#54] +Functions: [] +Aggregate Attributes: [] +Results [1]: [(d_month_seq + 1)#54] + +Subquery:2 Hosting operator id = 60 Hosting Expression = Subquery scalar-subquery#40, [id=#41] +* HashAggregate (85) ++- Exchange (84) + +- * HashAggregate (83) + +- * Project (82) + +- * Filter (81) + +- * ColumnarToRow (80) + +- Scan parquet default.date_dim (79) + + +(79) Scan parquet default.date_dim +Output [3]: [d_month_seq#37, d_year#18, d_moy#19] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)] +ReadSchema: struct + +(80) ColumnarToRow [codegen id : 1] +Input [3]: [d_month_seq#37, d_year#18, d_moy#19] + +(81) Filter [codegen id : 1] +Input [3]: [d_month_seq#37, d_year#18, d_moy#19] +Condition : (((isnotnull(d_year#18) AND isnotnull(d_moy#19)) AND (d_year#18 = 1998)) AND (d_moy#19 = 12)) + +(82) Project [codegen id : 1] +Output [1]: [(d_month_seq#37 + 3) AS (d_month_seq + 3)#56] +Input [3]: [d_month_seq#37, d_year#18, d_moy#19] + +(83) HashAggregate [codegen id : 1] +Input [1]: [(d_month_seq + 3)#56] +Keys [1]: [(d_month_seq + 3)#56] +Functions: [] +Aggregate Attributes: [] +Results [1]: [(d_month_seq + 3)#56] + +(84) Exchange +Input [1]: [(d_month_seq + 3)#56] +Arguments: hashpartitioning((d_month_seq + 3)#56, 5), true, [id=#57] + +(85) HashAggregate [codegen id : 2] +Input [1]: [(d_month_seq + 3)#56] +Keys [1]: [(d_month_seq + 3)#56] +Functions: [] +Aggregate Attributes: [] +Results [1]: [(d_month_seq + 3)#56] + + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.sf100/simplified.txt new file mode 100644 index 0000000000000..1f0920d4cbbd6 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.sf100/simplified.txt @@ -0,0 +1,142 @@ +TakeOrderedAndProject [num_customers,segment,segment_base] + WholeStageCodegen (20) + HashAggregate [count,segment] [count,count(1),num_customers,segment_base] + InputAdapter + Exchange [segment] #1 + WholeStageCodegen (19) + HashAggregate [segment] [count,count] + HashAggregate [c_customer_sk,sum] [segment,sum,sum(UnscaledValue(ss_ext_sales_price))] + InputAdapter + Exchange [c_customer_sk] #2 + WholeStageCodegen (18) + HashAggregate [c_customer_sk,ss_ext_sales_price] [sum,sum] + Project [c_customer_sk,ss_ext_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [c_customer_sk,ss_ext_sales_price,ss_sold_date_sk] + BroadcastHashJoin [ca_county,ca_state,s_county,s_state] + Project [c_customer_sk,ca_county,ca_state,ss_ext_sales_price,ss_sold_date_sk] + SortMergeJoin [c_current_addr_sk,ca_address_sk] + InputAdapter + WholeStageCodegen (13) + Sort [c_current_addr_sk] + InputAdapter + Exchange [c_current_addr_sk] #3 + WholeStageCodegen (12) + Project [c_current_addr_sk,c_customer_sk,ss_ext_sales_price,ss_sold_date_sk] + SortMergeJoin [c_customer_sk,ss_customer_sk] + InputAdapter + WholeStageCodegen (9) + Sort [c_customer_sk] + HashAggregate [c_current_addr_sk,c_customer_sk] + HashAggregate [c_current_addr_sk,c_customer_sk] + Project [c_current_addr_sk,c_customer_sk] + SortMergeJoin [c_customer_sk,customer_sk] + InputAdapter + WholeStageCodegen (6) + Sort [customer_sk] + InputAdapter + Exchange [customer_sk] #4 + WholeStageCodegen (5) + Project [customer_sk] + BroadcastHashJoin [d_date_sk,sold_date_sk] + Project [customer_sk,sold_date_sk] + BroadcastHashJoin [i_item_sk,item_sk] + InputAdapter + Union + WholeStageCodegen (1) + Project [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + WholeStageCodegen (2) + Project [ws_bill_customer_sk,ws_item_sk,ws_sold_date_sk] + Filter [ws_bill_customer_sk,ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_customer_sk,ws_item_sk,ws_sold_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Project [i_item_sk] + Filter [i_category,i_class,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_category,i_class,i_item_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (4) + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + InputAdapter + WholeStageCodegen (8) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #7 + WholeStageCodegen (7) + Filter [c_current_addr_sk,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_current_addr_sk,c_customer_sk] + InputAdapter + WholeStageCodegen (11) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #8 + WholeStageCodegen (10) + Filter [ss_customer_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] + InputAdapter + WholeStageCodegen (15) + Sort [ca_address_sk] + InputAdapter + Exchange [ca_address_sk] #9 + WholeStageCodegen (14) + Filter [ca_address_sk,ca_county,ca_state] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_county,ca_state] + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (16) + Filter [s_county,s_state] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_county,s_state] + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (17) + Project [d_date_sk] + Filter [d_date_sk,d_month_seq] + Subquery #1 + WholeStageCodegen (2) + HashAggregate [(d_month_seq + 1)] + InputAdapter + Exchange [(d_month_seq + 1)] #12 + WholeStageCodegen (1) + HashAggregate [(d_month_seq + 1)] + Project [d_month_seq] + Filter [d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_month_seq,d_moy,d_year] + Subquery #2 + WholeStageCodegen (2) + HashAggregate [(d_month_seq + 3)] + InputAdapter + Exchange [(d_month_seq + 3)] #13 + WholeStageCodegen (1) + HashAggregate [(d_month_seq + 3)] + Project [d_month_seq] + Filter [d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_month_seq,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/explain.txt new file mode 100644 index 0000000000000..f1c0861f61508 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/explain.txt @@ -0,0 +1,459 @@ +== Physical Plan == +TakeOrderedAndProject (64) ++- * HashAggregate (63) + +- Exchange (62) + +- * HashAggregate (61) + +- * HashAggregate (60) + +- Exchange (59) + +- * HashAggregate (58) + +- * Project (57) + +- * BroadcastHashJoin Inner BuildRight (56) + :- * Project (50) + : +- * BroadcastHashJoin Inner BuildRight (49) + : :- * Project (44) + : : +- * BroadcastHashJoin Inner BuildRight (43) + : : :- * Project (38) + : : : +- * BroadcastHashJoin Inner BuildRight (37) + : : : :- * HashAggregate (32) + : : : : +- Exchange (31) + : : : : +- * HashAggregate (30) + : : : : +- * Project (29) + : : : : +- * BroadcastHashJoin Inner BuildRight (28) + : : : : :- * Project (23) + : : : : : +- * BroadcastHashJoin Inner BuildRight (22) + : : : : : :- * Project (16) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : : : : :- Union (9) + : : : : : : : :- * Project (4) + : : : : : : : : +- * Filter (3) + : : : : : : : : +- * ColumnarToRow (2) + : : : : : : : : +- Scan parquet default.catalog_sales (1) + : : : : : : : +- * Project (8) + : : : : : : : +- * Filter (7) + : : : : : : : +- * ColumnarToRow (6) + : : : : : : : +- Scan parquet default.web_sales (5) + : : : : : : +- BroadcastExchange (14) + : : : : : : +- * Project (13) + : : : : : : +- * Filter (12) + : : : : : : +- * ColumnarToRow (11) + : : : : : : +- Scan parquet default.item (10) + : : : : : +- BroadcastExchange (21) + : : : : : +- * Project (20) + : : : : : +- * Filter (19) + : : : : : +- * ColumnarToRow (18) + : : : : : +- Scan parquet default.date_dim (17) + : : : : +- BroadcastExchange (27) + : : : : +- * Filter (26) + : : : : +- * ColumnarToRow (25) + : : : : +- Scan parquet default.customer (24) + : : : +- BroadcastExchange (36) + : : : +- * Filter (35) + : : : +- * ColumnarToRow (34) + : : : +- Scan parquet default.store_sales (33) + : : +- BroadcastExchange (42) + : : +- * Filter (41) + : : +- * ColumnarToRow (40) + : : +- Scan parquet default.customer_address (39) + : +- BroadcastExchange (48) + : +- * Filter (47) + : +- * ColumnarToRow (46) + : +- Scan parquet default.store (45) + +- BroadcastExchange (55) + +- * Project (54) + +- * Filter (53) + +- * ColumnarToRow (52) + +- Scan parquet default.date_dim (51) + + +(1) Scan parquet default.catalog_sales +Output [3]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [3]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3] + +(3) Filter [codegen id : 1] +Input [3]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3] +Condition : ((isnotnull(cs_item_sk#3) AND isnotnull(cs_sold_date_sk#1)) AND isnotnull(cs_bill_customer_sk#2)) + +(4) Project [codegen id : 1] +Output [3]: [cs_sold_date_sk#1 AS sold_date_sk#4, cs_bill_customer_sk#2 AS customer_sk#5, cs_item_sk#3 AS item_sk#6] +Input [3]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3] + +(5) Scan parquet default.web_sales +Output [3]: [ws_sold_date_sk#7, ws_item_sk#8, ws_bill_customer_sk#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_customer_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 2] +Input [3]: [ws_sold_date_sk#7, ws_item_sk#8, ws_bill_customer_sk#9] + +(7) Filter [codegen id : 2] +Input [3]: [ws_sold_date_sk#7, ws_item_sk#8, ws_bill_customer_sk#9] +Condition : ((isnotnull(ws_item_sk#8) AND isnotnull(ws_sold_date_sk#7)) AND isnotnull(ws_bill_customer_sk#9)) + +(8) Project [codegen id : 2] +Output [3]: [ws_sold_date_sk#7 AS sold_date_sk#10, ws_bill_customer_sk#9 AS customer_sk#11, ws_item_sk#8 AS item_sk#12] +Input [3]: [ws_sold_date_sk#7, ws_item_sk#8, ws_bill_customer_sk#9] + +(9) Union + +(10) Scan parquet default.item +Output [3]: [i_item_sk#13, i_class#14, i_category#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_category), IsNotNull(i_class), EqualTo(i_category,Women), EqualTo(i_class,maternity), IsNotNull(i_item_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 3] +Input [3]: [i_item_sk#13, i_class#14, i_category#15] + +(12) Filter [codegen id : 3] +Input [3]: [i_item_sk#13, i_class#14, i_category#15] +Condition : ((((isnotnull(i_category#15) AND isnotnull(i_class#14)) AND (i_category#15 = Women)) AND (i_class#14 = maternity)) AND isnotnull(i_item_sk#13)) + +(13) Project [codegen id : 3] +Output [1]: [i_item_sk#13] +Input [3]: [i_item_sk#13, i_class#14, i_category#15] + +(14) BroadcastExchange +Input [1]: [i_item_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] + +(15) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [item_sk#6] +Right keys [1]: [i_item_sk#13] +Join condition: None + +(16) Project [codegen id : 6] +Output [2]: [sold_date_sk#4, customer_sk#5] +Input [4]: [sold_date_sk#4, customer_sk#5, item_sk#6, i_item_sk#13] + +(17) Scan parquet default.date_dim +Output [3]: [d_date_sk#17, d_year#18, d_moy#19] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,12), EqualTo(d_year,1998), IsNotNull(d_date_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 4] +Input [3]: [d_date_sk#17, d_year#18, d_moy#19] + +(19) Filter [codegen id : 4] +Input [3]: [d_date_sk#17, d_year#18, d_moy#19] +Condition : ((((isnotnull(d_moy#19) AND isnotnull(d_year#18)) AND (d_moy#19 = 12)) AND (d_year#18 = 1998)) AND isnotnull(d_date_sk#17)) + +(20) Project [codegen id : 4] +Output [1]: [d_date_sk#17] +Input [3]: [d_date_sk#17, d_year#18, d_moy#19] + +(21) BroadcastExchange +Input [1]: [d_date_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] + +(22) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [sold_date_sk#4] +Right keys [1]: [d_date_sk#17] +Join condition: None + +(23) Project [codegen id : 6] +Output [1]: [customer_sk#5] +Input [3]: [sold_date_sk#4, customer_sk#5, d_date_sk#17] + +(24) Scan parquet default.customer +Output [2]: [c_customer_sk#21, c_current_addr_sk#22] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(25) ColumnarToRow [codegen id : 5] +Input [2]: [c_customer_sk#21, c_current_addr_sk#22] + +(26) Filter [codegen id : 5] +Input [2]: [c_customer_sk#21, c_current_addr_sk#22] +Condition : (isnotnull(c_customer_sk#21) AND isnotnull(c_current_addr_sk#22)) + +(27) BroadcastExchange +Input [2]: [c_customer_sk#21, c_current_addr_sk#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#23] + +(28) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [customer_sk#5] +Right keys [1]: [c_customer_sk#21] +Join condition: None + +(29) Project [codegen id : 6] +Output [2]: [c_customer_sk#21, c_current_addr_sk#22] +Input [3]: [customer_sk#5, c_customer_sk#21, c_current_addr_sk#22] + +(30) HashAggregate [codegen id : 6] +Input [2]: [c_customer_sk#21, c_current_addr_sk#22] +Keys [2]: [c_customer_sk#21, c_current_addr_sk#22] +Functions: [] +Aggregate Attributes: [] +Results [2]: [c_customer_sk#21, c_current_addr_sk#22] + +(31) Exchange +Input [2]: [c_customer_sk#21, c_current_addr_sk#22] +Arguments: hashpartitioning(c_customer_sk#21, c_current_addr_sk#22, 5), true, [id=#24] + +(32) HashAggregate [codegen id : 11] +Input [2]: [c_customer_sk#21, c_current_addr_sk#22] +Keys [2]: [c_customer_sk#21, c_current_addr_sk#22] +Functions: [] +Aggregate Attributes: [] +Results [2]: [c_customer_sk#21, c_current_addr_sk#22] + +(33) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#25, ss_customer_sk#26, ss_ext_sales_price#27] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 7] +Input [3]: [ss_sold_date_sk#25, ss_customer_sk#26, ss_ext_sales_price#27] + +(35) Filter [codegen id : 7] +Input [3]: [ss_sold_date_sk#25, ss_customer_sk#26, ss_ext_sales_price#27] +Condition : (isnotnull(ss_customer_sk#26) AND isnotnull(ss_sold_date_sk#25)) + +(36) BroadcastExchange +Input [3]: [ss_sold_date_sk#25, ss_customer_sk#26, ss_ext_sales_price#27] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#28] + +(37) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [c_customer_sk#21] +Right keys [1]: [ss_customer_sk#26] +Join condition: None + +(38) Project [codegen id : 11] +Output [4]: [c_customer_sk#21, c_current_addr_sk#22, ss_sold_date_sk#25, ss_ext_sales_price#27] +Input [5]: [c_customer_sk#21, c_current_addr_sk#22, ss_sold_date_sk#25, ss_customer_sk#26, ss_ext_sales_price#27] + +(39) Scan parquet default.customer_address +Output [3]: [ca_address_sk#29, ca_county#30, ca_state#31] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_county), IsNotNull(ca_state)] +ReadSchema: struct + +(40) ColumnarToRow [codegen id : 8] +Input [3]: [ca_address_sk#29, ca_county#30, ca_state#31] + +(41) Filter [codegen id : 8] +Input [3]: [ca_address_sk#29, ca_county#30, ca_state#31] +Condition : ((isnotnull(ca_address_sk#29) AND isnotnull(ca_county#30)) AND isnotnull(ca_state#31)) + +(42) BroadcastExchange +Input [3]: [ca_address_sk#29, ca_county#30, ca_state#31] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#32] + +(43) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [c_current_addr_sk#22] +Right keys [1]: [ca_address_sk#29] +Join condition: None + +(44) Project [codegen id : 11] +Output [5]: [c_customer_sk#21, ss_sold_date_sk#25, ss_ext_sales_price#27, ca_county#30, ca_state#31] +Input [7]: [c_customer_sk#21, c_current_addr_sk#22, ss_sold_date_sk#25, ss_ext_sales_price#27, ca_address_sk#29, ca_county#30, ca_state#31] + +(45) Scan parquet default.store +Output [2]: [s_county#33, s_state#34] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_state), IsNotNull(s_county)] +ReadSchema: struct + +(46) ColumnarToRow [codegen id : 9] +Input [2]: [s_county#33, s_state#34] + +(47) Filter [codegen id : 9] +Input [2]: [s_county#33, s_state#34] +Condition : (isnotnull(s_state#34) AND isnotnull(s_county#33)) + +(48) BroadcastExchange +Input [2]: [s_county#33, s_state#34] +Arguments: HashedRelationBroadcastMode(List(input[0, string, false], input[1, string, false]),false), [id=#35] + +(49) BroadcastHashJoin [codegen id : 11] +Left keys [2]: [ca_county#30, ca_state#31] +Right keys [2]: [s_county#33, s_state#34] +Join condition: None + +(50) Project [codegen id : 11] +Output [3]: [c_customer_sk#21, ss_sold_date_sk#25, ss_ext_sales_price#27] +Input [7]: [c_customer_sk#21, ss_sold_date_sk#25, ss_ext_sales_price#27, ca_county#30, ca_state#31, s_county#33, s_state#34] + +(51) Scan parquet default.date_dim +Output [2]: [d_date_sk#17, d_month_seq#36] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)] +ReadSchema: struct + +(52) ColumnarToRow [codegen id : 10] +Input [2]: [d_date_sk#17, d_month_seq#36] + +(53) Filter [codegen id : 10] +Input [2]: [d_date_sk#17, d_month_seq#36] +Condition : (((isnotnull(d_month_seq#36) AND (d_month_seq#36 >= Subquery scalar-subquery#37, [id=#38])) AND (d_month_seq#36 <= Subquery scalar-subquery#39, [id=#40])) AND isnotnull(d_date_sk#17)) + +(54) Project [codegen id : 10] +Output [1]: [d_date_sk#17] +Input [2]: [d_date_sk#17, d_month_seq#36] + +(55) BroadcastExchange +Input [1]: [d_date_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#41] + +(56) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ss_sold_date_sk#25] +Right keys [1]: [d_date_sk#17] +Join condition: None + +(57) Project [codegen id : 11] +Output [2]: [c_customer_sk#21, ss_ext_sales_price#27] +Input [4]: [c_customer_sk#21, ss_sold_date_sk#25, ss_ext_sales_price#27, d_date_sk#17] + +(58) HashAggregate [codegen id : 11] +Input [2]: [c_customer_sk#21, ss_ext_sales_price#27] +Keys [1]: [c_customer_sk#21] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#27))] +Aggregate Attributes [1]: [sum#42] +Results [2]: [c_customer_sk#21, sum#43] + +(59) Exchange +Input [2]: [c_customer_sk#21, sum#43] +Arguments: hashpartitioning(c_customer_sk#21, 5), true, [id=#44] + +(60) HashAggregate [codegen id : 12] +Input [2]: [c_customer_sk#21, sum#43] +Keys [1]: [c_customer_sk#21] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#27))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#27))#45] +Results [1]: [cast(CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#27))#45,17,2)) / 50.00), DecimalType(21,6), true) as int) AS segment#46] + +(61) HashAggregate [codegen id : 12] +Input [1]: [segment#46] +Keys [1]: [segment#46] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#47] +Results [2]: [segment#46, count#48] + +(62) Exchange +Input [2]: [segment#46, count#48] +Arguments: hashpartitioning(segment#46, 5), true, [id=#49] + +(63) HashAggregate [codegen id : 13] +Input [2]: [segment#46, count#48] +Keys [1]: [segment#46] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#50] +Results [3]: [segment#46, count(1)#50 AS num_customers#51, (segment#46 * 50) AS segment_base#52] + +(64) TakeOrderedAndProject +Input [3]: [segment#46, num_customers#51, segment_base#52] +Arguments: 100, [segment#46 ASC NULLS FIRST, num_customers#51 ASC NULLS FIRST], [segment#46, num_customers#51, segment_base#52] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 53 Hosting Expression = Subquery scalar-subquery#37, [id=#38] +* HashAggregate (71) ++- Exchange (70) + +- * HashAggregate (69) + +- * Project (68) + +- * Filter (67) + +- * ColumnarToRow (66) + +- Scan parquet default.date_dim (65) + + +(65) Scan parquet default.date_dim +Output [3]: [d_month_seq#36, d_year#18, d_moy#19] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)] +ReadSchema: struct + +(66) ColumnarToRow [codegen id : 1] +Input [3]: [d_month_seq#36, d_year#18, d_moy#19] + +(67) Filter [codegen id : 1] +Input [3]: [d_month_seq#36, d_year#18, d_moy#19] +Condition : (((isnotnull(d_year#18) AND isnotnull(d_moy#19)) AND (d_year#18 = 1998)) AND (d_moy#19 = 12)) + +(68) Project [codegen id : 1] +Output [1]: [(d_month_seq#36 + 1) AS (d_month_seq + 1)#53] +Input [3]: [d_month_seq#36, d_year#18, d_moy#19] + +(69) HashAggregate [codegen id : 1] +Input [1]: [(d_month_seq + 1)#53] +Keys [1]: [(d_month_seq + 1)#53] +Functions: [] +Aggregate Attributes: [] +Results [1]: [(d_month_seq + 1)#53] + +(70) Exchange +Input [1]: [(d_month_seq + 1)#53] +Arguments: hashpartitioning((d_month_seq + 1)#53, 5), true, [id=#54] + +(71) HashAggregate [codegen id : 2] +Input [1]: [(d_month_seq + 1)#53] +Keys [1]: [(d_month_seq + 1)#53] +Functions: [] +Aggregate Attributes: [] +Results [1]: [(d_month_seq + 1)#53] + +Subquery:2 Hosting operator id = 53 Hosting Expression = Subquery scalar-subquery#39, [id=#40] +* HashAggregate (78) ++- Exchange (77) + +- * HashAggregate (76) + +- * Project (75) + +- * Filter (74) + +- * ColumnarToRow (73) + +- Scan parquet default.date_dim (72) + + +(72) Scan parquet default.date_dim +Output [3]: [d_month_seq#36, d_year#18, d_moy#19] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)] +ReadSchema: struct + +(73) ColumnarToRow [codegen id : 1] +Input [3]: [d_month_seq#36, d_year#18, d_moy#19] + +(74) Filter [codegen id : 1] +Input [3]: [d_month_seq#36, d_year#18, d_moy#19] +Condition : (((isnotnull(d_year#18) AND isnotnull(d_moy#19)) AND (d_year#18 = 1998)) AND (d_moy#19 = 12)) + +(75) Project [codegen id : 1] +Output [1]: [(d_month_seq#36 + 3) AS (d_month_seq + 3)#55] +Input [3]: [d_month_seq#36, d_year#18, d_moy#19] + +(76) HashAggregate [codegen id : 1] +Input [1]: [(d_month_seq + 3)#55] +Keys [1]: [(d_month_seq + 3)#55] +Functions: [] +Aggregate Attributes: [] +Results [1]: [(d_month_seq + 3)#55] + +(77) Exchange +Input [1]: [(d_month_seq + 3)#55] +Arguments: hashpartitioning((d_month_seq + 3)#55, 5), true, [id=#56] + +(78) HashAggregate [codegen id : 2] +Input [1]: [(d_month_seq + 3)#55] +Keys [1]: [(d_month_seq + 3)#55] +Functions: [] +Aggregate Attributes: [] +Results [1]: [(d_month_seq + 3)#55] + + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/simplified.txt new file mode 100644 index 0000000000000..fafabe1ef0515 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/simplified.txt @@ -0,0 +1,121 @@ +TakeOrderedAndProject [num_customers,segment,segment_base] + WholeStageCodegen (13) + HashAggregate [count,segment] [count,count(1),num_customers,segment_base] + InputAdapter + Exchange [segment] #1 + WholeStageCodegen (12) + HashAggregate [segment] [count,count] + HashAggregate [c_customer_sk,sum] [segment,sum,sum(UnscaledValue(ss_ext_sales_price))] + InputAdapter + Exchange [c_customer_sk] #2 + WholeStageCodegen (11) + HashAggregate [c_customer_sk,ss_ext_sales_price] [sum,sum] + Project [c_customer_sk,ss_ext_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [c_customer_sk,ss_ext_sales_price,ss_sold_date_sk] + BroadcastHashJoin [ca_county,ca_state,s_county,s_state] + Project [c_customer_sk,ca_county,ca_state,ss_ext_sales_price,ss_sold_date_sk] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_addr_sk,c_customer_sk,ss_ext_sales_price,ss_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + HashAggregate [c_current_addr_sk,c_customer_sk] + InputAdapter + Exchange [c_current_addr_sk,c_customer_sk] #3 + WholeStageCodegen (6) + HashAggregate [c_current_addr_sk,c_customer_sk] + Project [c_current_addr_sk,c_customer_sk] + BroadcastHashJoin [c_customer_sk,customer_sk] + Project [customer_sk] + BroadcastHashJoin [d_date_sk,sold_date_sk] + Project [customer_sk,sold_date_sk] + BroadcastHashJoin [i_item_sk,item_sk] + InputAdapter + Union + WholeStageCodegen (1) + Project [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + WholeStageCodegen (2) + Project [ws_bill_customer_sk,ws_item_sk,ws_sold_date_sk] + Filter [ws_bill_customer_sk,ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_customer_sk,ws_item_sk,ws_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [i_item_sk] + Filter [i_category,i_class,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_category,i_class,i_item_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Filter [c_current_addr_sk,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_current_addr_sk,c_customer_sk] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (7) + Filter [ss_customer_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (8) + Filter [ca_address_sk,ca_county,ca_state] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_county,ca_state] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (9) + Filter [s_county,s_state] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_county,s_state] + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (10) + Project [d_date_sk] + Filter [d_date_sk,d_month_seq] + Subquery #1 + WholeStageCodegen (2) + HashAggregate [(d_month_seq + 1)] + InputAdapter + Exchange [(d_month_seq + 1)] #11 + WholeStageCodegen (1) + HashAggregate [(d_month_seq + 1)] + Project [d_month_seq] + Filter [d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_month_seq,d_moy,d_year] + Subquery #2 + WholeStageCodegen (2) + HashAggregate [(d_month_seq + 3)] + InputAdapter + Exchange [(d_month_seq + 3)] #12 + WholeStageCodegen (1) + HashAggregate [(d_month_seq + 3)] + Project [d_month_seq] + Filter [d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_month_seq,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55.sf100/explain.txt new file mode 100644 index 0000000000000..4a964a54cc37f --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55.sf100/explain.txt @@ -0,0 +1,122 @@ +== Physical Plan == +TakeOrderedAndProject (21) ++- * HashAggregate (20) + +- Exchange (19) + +- * HashAggregate (18) + +- * Project (17) + +- * BroadcastHashJoin Inner BuildRight (16) + :- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.store_sales (1) + : +- BroadcastExchange (8) + : +- * Project (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet default.item (4) + +- BroadcastExchange (15) + +- * Project (14) + +- * Filter (13) + +- * ColumnarToRow (12) + +- Scan parquet default.date_dim (11) + + +(1) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3] + +(3) Filter [codegen id : 3] +Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3] +Condition : (isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_item_sk#2)) + +(4) Scan parquet default.item +Output [4]: [i_item_sk#4, i_brand_id#5, i_brand#6, i_manager_id#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,28), IsNotNull(i_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [i_item_sk#4, i_brand_id#5, i_brand#6, i_manager_id#7] + +(6) Filter [codegen id : 1] +Input [4]: [i_item_sk#4, i_brand_id#5, i_brand#6, i_manager_id#7] +Condition : ((isnotnull(i_manager_id#7) AND (i_manager_id#7 = 28)) AND isnotnull(i_item_sk#4)) + +(7) Project [codegen id : 1] +Output [3]: [i_item_sk#4, i_brand_id#5, i_brand#6] +Input [4]: [i_item_sk#4, i_brand_id#5, i_brand#6, i_manager_id#7] + +(8) BroadcastExchange +Input [3]: [i_item_sk#4, i_brand_id#5, i_brand#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#8] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#4] +Join condition: None + +(10) Project [codegen id : 3] +Output [4]: [ss_sold_date_sk#1, ss_ext_sales_price#3, i_brand_id#5, i_brand#6] +Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3, i_item_sk#4, i_brand_id#5, i_brand#6] + +(11) Scan parquet default.date_dim +Output [3]: [d_date_sk#9, d_year#10, d_moy#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,1999), IsNotNull(d_date_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] + +(13) Filter [codegen id : 2] +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] +Condition : ((((isnotnull(d_moy#11) AND isnotnull(d_year#10)) AND (d_moy#11 = 11)) AND (d_year#10 = 1999)) AND isnotnull(d_date_sk#9)) + +(14) Project [codegen id : 2] +Output [1]: [d_date_sk#9] +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] + +(15) BroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(16) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#9] +Join condition: None + +(17) Project [codegen id : 3] +Output [3]: [ss_ext_sales_price#3, i_brand_id#5, i_brand#6] +Input [5]: [ss_sold_date_sk#1, ss_ext_sales_price#3, i_brand_id#5, i_brand#6, d_date_sk#9] + +(18) HashAggregate [codegen id : 3] +Input [3]: [ss_ext_sales_price#3, i_brand_id#5, i_brand#6] +Keys [2]: [i_brand#6, i_brand_id#5] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum#13] +Results [3]: [i_brand#6, i_brand_id#5, sum#14] + +(19) Exchange +Input [3]: [i_brand#6, i_brand_id#5, sum#14] +Arguments: hashpartitioning(i_brand#6, i_brand_id#5, 5), true, [id=#15] + +(20) HashAggregate [codegen id : 4] +Input [3]: [i_brand#6, i_brand_id#5, sum#14] +Keys [2]: [i_brand#6, i_brand_id#5] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#16] +Results [3]: [i_brand_id#5 AS brand_id#17, i_brand#6 AS brand#18, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#16,17,2) AS ext_price#19] + +(21) TakeOrderedAndProject +Input [3]: [brand_id#17, brand#18, ext_price#19] +Arguments: 100, [ext_price#19 DESC NULLS LAST, brand_id#17 ASC NULLS FIRST], [brand_id#17, brand#18, ext_price#19] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55.sf100/simplified.txt new file mode 100644 index 0000000000000..d3ab9692efb60 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55.sf100/simplified.txt @@ -0,0 +1,31 @@ +TakeOrderedAndProject [brand,brand_id,ext_price] + WholeStageCodegen (4) + HashAggregate [i_brand,i_brand_id,sum] [brand,brand_id,ext_price,sum,sum(UnscaledValue(ss_ext_sales_price))] + InputAdapter + Exchange [i_brand,i_brand_id] #1 + WholeStageCodegen (3) + HashAggregate [i_brand,i_brand_id,ss_ext_sales_price] [sum,sum] + Project [i_brand,i_brand_id,ss_ext_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_brand,i_brand_id,ss_ext_sales_price,ss_sold_date_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [i_brand,i_brand_id,i_item_sk] + Filter [i_item_sk,i_manager_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand,i_brand_id,i_item_sk,i_manager_id] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55/explain.txt new file mode 100644 index 0000000000000..8662e923d9af5 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55/explain.txt @@ -0,0 +1,122 @@ +== Physical Plan == +TakeOrderedAndProject (21) ++- * HashAggregate (20) + +- Exchange (19) + +- * HashAggregate (18) + +- * Project (17) + +- * BroadcastHashJoin Inner BuildRight (16) + :- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Project (4) + : : +- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.date_dim (1) + : +- BroadcastExchange (8) + : +- * Filter (7) + : +- * ColumnarToRow (6) + : +- Scan parquet default.store_sales (5) + +- BroadcastExchange (15) + +- * Project (14) + +- * Filter (13) + +- * ColumnarToRow (12) + +- Scan parquet default.item (11) + + +(1) Scan parquet default.date_dim +Output [3]: [d_date_sk#1, d_year#2, d_moy#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,1999), IsNotNull(d_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(3) Filter [codegen id : 3] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Condition : ((((isnotnull(d_moy#3) AND isnotnull(d_year#2)) AND (d_moy#3 = 11)) AND (d_year#2 = 1999)) AND isnotnull(d_date_sk#1)) + +(4) Project [codegen id : 3] +Output [1]: [d_date_sk#1] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(5) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] + +(7) Filter [codegen id : 1] +Input [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] +Condition : (isnotnull(ss_sold_date_sk#4) AND isnotnull(ss_item_sk#5)) + +(8) BroadcastExchange +Input [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [d_date_sk#1] +Right keys [1]: [ss_sold_date_sk#4] +Join condition: None + +(10) Project [codegen id : 3] +Output [2]: [ss_item_sk#5, ss_ext_sales_price#6] +Input [4]: [d_date_sk#1, ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] + +(11) Scan parquet default.item +Output [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,28), IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] + +(13) Filter [codegen id : 2] +Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] +Condition : ((isnotnull(i_manager_id#11) AND (i_manager_id#11 = 28)) AND isnotnull(i_item_sk#8)) + +(14) Project [codegen id : 2] +Output [3]: [i_item_sk#8, i_brand_id#9, i_brand#10] +Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] + +(15) BroadcastExchange +Input [3]: [i_item_sk#8, i_brand_id#9, i_brand#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(16) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_item_sk#5] +Right keys [1]: [i_item_sk#8] +Join condition: None + +(17) Project [codegen id : 3] +Output [3]: [ss_ext_sales_price#6, i_brand_id#9, i_brand#10] +Input [5]: [ss_item_sk#5, ss_ext_sales_price#6, i_item_sk#8, i_brand_id#9, i_brand#10] + +(18) HashAggregate [codegen id : 3] +Input [3]: [ss_ext_sales_price#6, i_brand_id#9, i_brand#10] +Keys [2]: [i_brand#10, i_brand_id#9] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#6))] +Aggregate Attributes [1]: [sum#13] +Results [3]: [i_brand#10, i_brand_id#9, sum#14] + +(19) Exchange +Input [3]: [i_brand#10, i_brand_id#9, sum#14] +Arguments: hashpartitioning(i_brand#10, i_brand_id#9, 5), true, [id=#15] + +(20) HashAggregate [codegen id : 4] +Input [3]: [i_brand#10, i_brand_id#9, sum#14] +Keys [2]: [i_brand#10, i_brand_id#9] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#6))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#6))#16] +Results [3]: [i_brand_id#9 AS brand_id#17, i_brand#10 AS brand#18, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#6))#16,17,2) AS ext_price#19] + +(21) TakeOrderedAndProject +Input [3]: [brand_id#17, brand#18, ext_price#19] +Arguments: 100, [ext_price#19 DESC NULLS LAST, brand_id#17 ASC NULLS FIRST], [brand_id#17, brand#18, ext_price#19] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55/simplified.txt new file mode 100644 index 0000000000000..b20fb74817e84 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55/simplified.txt @@ -0,0 +1,31 @@ +TakeOrderedAndProject [brand,brand_id,ext_price] + WholeStageCodegen (4) + HashAggregate [i_brand,i_brand_id,sum] [brand,brand_id,ext_price,sum,sum(UnscaledValue(ss_ext_sales_price))] + InputAdapter + Exchange [i_brand,i_brand_id] #1 + WholeStageCodegen (3) + HashAggregate [i_brand,i_brand_id,ss_ext_sales_price] [sum,sum] + Project [i_brand,i_brand_id,ss_ext_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_ext_sales_price,ss_item_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [i_brand,i_brand_id,i_item_sk] + Filter [i_item_sk,i_manager_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand,i_brand_id,i_item_sk,i_manager_id] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56.sf100/explain.txt new file mode 100644 index 0000000000000..480c3ecda13f3 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56.sf100/explain.txt @@ -0,0 +1,378 @@ +== Physical Plan == +TakeOrderedAndProject (67) ++- * HashAggregate (66) + +- Exchange (65) + +- * HashAggregate (64) + +- Union (63) + :- * HashAggregate (32) + : +- Exchange (31) + : +- * HashAggregate (30) + : +- * Project (29) + : +- * BroadcastHashJoin Inner BuildRight (28) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.customer_address (11) + : +- BroadcastExchange (27) + : +- * BroadcastHashJoin LeftSemi BuildRight (26) + : :- * Filter (20) + : : +- * ColumnarToRow (19) + : : +- Scan parquet default.item (18) + : +- BroadcastExchange (25) + : +- * Project (24) + : +- * Filter (23) + : +- * ColumnarToRow (22) + : +- Scan parquet default.item (21) + :- * HashAggregate (47) + : +- Exchange (46) + : +- * HashAggregate (45) + : +- * Project (44) + : +- * BroadcastHashJoin Inner BuildRight (43) + : :- * Project (41) + : : +- * BroadcastHashJoin Inner BuildRight (40) + : : :- * Project (38) + : : : +- * BroadcastHashJoin Inner BuildRight (37) + : : : :- * Filter (35) + : : : : +- * ColumnarToRow (34) + : : : : +- Scan parquet default.catalog_sales (33) + : : : +- ReusedExchange (36) + : : +- ReusedExchange (39) + : +- ReusedExchange (42) + +- * HashAggregate (62) + +- Exchange (61) + +- * HashAggregate (60) + +- * Project (59) + +- * BroadcastHashJoin Inner BuildRight (58) + :- * Project (56) + : +- * BroadcastHashJoin Inner BuildRight (55) + : :- * Project (53) + : : +- * BroadcastHashJoin Inner BuildRight (52) + : : :- * Filter (50) + : : : +- * ColumnarToRow (49) + : : : +- Scan parquet default.web_sales (48) + : : +- ReusedExchange (51) + : +- ReusedExchange (54) + +- ReusedExchange (57) + + +(1) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4] + +(3) Filter [codegen id : 5] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4] +Condition : ((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_addr_sk#3)) AND isnotnull(ss_item_sk#2)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#5, d_year#6, d_moy#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,2), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] +Condition : ((((isnotnull(d_year#6) AND isnotnull(d_moy#7)) AND (d_year#6 = 2001)) AND (d_moy#7 = 2)) AND isnotnull(d_date_sk#5)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#5] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] + +(8) BroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#8] + +(9) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(10) Project [codegen id : 5] +Output [3]: [ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4] +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4, d_date_sk#5] + +(11) Scan parquet default.customer_address +Output [2]: [ca_address_sk#9, ca_gmt_offset#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer_address] +PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [ca_address_sk#9, ca_gmt_offset#10] + +(13) Filter [codegen id : 2] +Input [2]: [ca_address_sk#9, ca_gmt_offset#10] +Condition : ((isnotnull(ca_gmt_offset#10) AND (ca_gmt_offset#10 = -5.00)) AND isnotnull(ca_address_sk#9)) + +(14) Project [codegen id : 2] +Output [1]: [ca_address_sk#9] +Input [2]: [ca_address_sk#9, ca_gmt_offset#10] + +(15) BroadcastExchange +Input [1]: [ca_address_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] + +(16) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_addr_sk#3] +Right keys [1]: [ca_address_sk#9] +Join condition: None + +(17) Project [codegen id : 5] +Output [2]: [ss_item_sk#2, ss_ext_sales_price#4] +Input [4]: [ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4, ca_address_sk#9] + +(18) Scan parquet default.item +Output [2]: [i_item_sk#12, i_item_id#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 4] +Input [2]: [i_item_sk#12, i_item_id#13] + +(20) Filter [codegen id : 4] +Input [2]: [i_item_sk#12, i_item_id#13] +Condition : isnotnull(i_item_sk#12) + +(21) Scan parquet default.item +Output [2]: [i_item_id#13, i_color#14] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [In(i_color, [slate,blanched,burnished])] +ReadSchema: struct + +(22) ColumnarToRow [codegen id : 3] +Input [2]: [i_item_id#13, i_color#14] + +(23) Filter [codegen id : 3] +Input [2]: [i_item_id#13, i_color#14] +Condition : i_color#14 IN (slate,blanched,burnished) + +(24) Project [codegen id : 3] +Output [1]: [i_item_id#13 AS i_item_id#13#15] +Input [2]: [i_item_id#13, i_color#14] + +(25) BroadcastExchange +Input [1]: [i_item_id#13#15] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#16] + +(26) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_id#13] +Right keys [1]: [i_item_id#13#15] +Join condition: None + +(27) BroadcastExchange +Input [2]: [i_item_sk#12, i_item_id#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#17] + +(28) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#12] +Join condition: None + +(29) Project [codegen id : 5] +Output [2]: [ss_ext_sales_price#4, i_item_id#13] +Input [4]: [ss_item_sk#2, ss_ext_sales_price#4, i_item_sk#12, i_item_id#13] + +(30) HashAggregate [codegen id : 5] +Input [2]: [ss_ext_sales_price#4, i_item_id#13] +Keys [1]: [i_item_id#13] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#4))] +Aggregate Attributes [1]: [sum#18] +Results [2]: [i_item_id#13, sum#19] + +(31) Exchange +Input [2]: [i_item_id#13, sum#19] +Arguments: hashpartitioning(i_item_id#13, 5), true, [id=#20] + +(32) HashAggregate [codegen id : 6] +Input [2]: [i_item_id#13, sum#19] +Keys [1]: [i_item_id#13] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#4))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#4))#21] +Results [2]: [i_item_id#13, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#4))#21,17,2) AS total_sales#22] + +(33) Scan parquet default.catalog_sales +Output [4]: [cs_sold_date_sk#23, cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_addr_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 11] +Input [4]: [cs_sold_date_sk#23, cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26] + +(35) Filter [codegen id : 11] +Input [4]: [cs_sold_date_sk#23, cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26] +Condition : ((isnotnull(cs_sold_date_sk#23) AND isnotnull(cs_bill_addr_sk#24)) AND isnotnull(cs_item_sk#25)) + +(36) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(37) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_sold_date_sk#23] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(38) Project [codegen id : 11] +Output [3]: [cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26] +Input [5]: [cs_sold_date_sk#23, cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26, d_date_sk#5] + +(39) ReusedExchange [Reuses operator id: 15] +Output [1]: [ca_address_sk#9] + +(40) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_bill_addr_sk#24] +Right keys [1]: [ca_address_sk#9] +Join condition: None + +(41) Project [codegen id : 11] +Output [2]: [cs_item_sk#25, cs_ext_sales_price#26] +Input [4]: [cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26, ca_address_sk#9] + +(42) ReusedExchange [Reuses operator id: 27] +Output [2]: [i_item_sk#12, i_item_id#13] + +(43) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_item_sk#25] +Right keys [1]: [i_item_sk#12] +Join condition: None + +(44) Project [codegen id : 11] +Output [2]: [cs_ext_sales_price#26, i_item_id#13] +Input [4]: [cs_item_sk#25, cs_ext_sales_price#26, i_item_sk#12, i_item_id#13] + +(45) HashAggregate [codegen id : 11] +Input [2]: [cs_ext_sales_price#26, i_item_id#13] +Keys [1]: [i_item_id#13] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#26))] +Aggregate Attributes [1]: [sum#27] +Results [2]: [i_item_id#13, sum#28] + +(46) Exchange +Input [2]: [i_item_id#13, sum#28] +Arguments: hashpartitioning(i_item_id#13, 5), true, [id=#29] + +(47) HashAggregate [codegen id : 12] +Input [2]: [i_item_id#13, sum#28] +Keys [1]: [i_item_id#13] +Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#26))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#26))#30] +Results [2]: [i_item_id#13, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#26))#30,17,2) AS total_sales#31] + +(48) Scan parquet default.web_sales +Output [4]: [ws_sold_date_sk#32, ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk), IsNotNull(ws_item_sk)] +ReadSchema: struct + +(49) ColumnarToRow [codegen id : 17] +Input [4]: [ws_sold_date_sk#32, ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35] + +(50) Filter [codegen id : 17] +Input [4]: [ws_sold_date_sk#32, ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35] +Condition : ((isnotnull(ws_sold_date_sk#32) AND isnotnull(ws_bill_addr_sk#34)) AND isnotnull(ws_item_sk#33)) + +(51) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(52) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_sold_date_sk#32] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(53) Project [codegen id : 17] +Output [3]: [ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35] +Input [5]: [ws_sold_date_sk#32, ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35, d_date_sk#5] + +(54) ReusedExchange [Reuses operator id: 15] +Output [1]: [ca_address_sk#9] + +(55) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_bill_addr_sk#34] +Right keys [1]: [ca_address_sk#9] +Join condition: None + +(56) Project [codegen id : 17] +Output [2]: [ws_item_sk#33, ws_ext_sales_price#35] +Input [4]: [ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35, ca_address_sk#9] + +(57) ReusedExchange [Reuses operator id: 27] +Output [2]: [i_item_sk#12, i_item_id#13] + +(58) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_item_sk#33] +Right keys [1]: [i_item_sk#12] +Join condition: None + +(59) Project [codegen id : 17] +Output [2]: [ws_ext_sales_price#35, i_item_id#13] +Input [4]: [ws_item_sk#33, ws_ext_sales_price#35, i_item_sk#12, i_item_id#13] + +(60) HashAggregate [codegen id : 17] +Input [2]: [ws_ext_sales_price#35, i_item_id#13] +Keys [1]: [i_item_id#13] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#35))] +Aggregate Attributes [1]: [sum#36] +Results [2]: [i_item_id#13, sum#37] + +(61) Exchange +Input [2]: [i_item_id#13, sum#37] +Arguments: hashpartitioning(i_item_id#13, 5), true, [id=#38] + +(62) HashAggregate [codegen id : 18] +Input [2]: [i_item_id#13, sum#37] +Keys [1]: [i_item_id#13] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#35))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#35))#39] +Results [2]: [i_item_id#13, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#35))#39,17,2) AS total_sales#40] + +(63) Union + +(64) HashAggregate [codegen id : 19] +Input [2]: [i_item_id#13, total_sales#22] +Keys [1]: [i_item_id#13] +Functions [1]: [partial_sum(total_sales#22)] +Aggregate Attributes [2]: [sum#41, isEmpty#42] +Results [3]: [i_item_id#13, sum#43, isEmpty#44] + +(65) Exchange +Input [3]: [i_item_id#13, sum#43, isEmpty#44] +Arguments: hashpartitioning(i_item_id#13, 5), true, [id=#45] + +(66) HashAggregate [codegen id : 20] +Input [3]: [i_item_id#13, sum#43, isEmpty#44] +Keys [1]: [i_item_id#13] +Functions [1]: [sum(total_sales#22)] +Aggregate Attributes [1]: [sum(total_sales#22)#46] +Results [2]: [i_item_id#13, sum(total_sales#22)#46 AS total_sales#47] + +(67) TakeOrderedAndProject +Input [2]: [i_item_id#13, total_sales#47] +Arguments: 100, [total_sales#47 ASC NULLS FIRST], [i_item_id#13, total_sales#47] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56.sf100/simplified.txt new file mode 100644 index 0000000000000..f8bc60fffb502 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56.sf100/simplified.txt @@ -0,0 +1,101 @@ +TakeOrderedAndProject [i_item_id,total_sales] + WholeStageCodegen (20) + HashAggregate [i_item_id,isEmpty,sum] [isEmpty,sum,sum(total_sales),total_sales] + InputAdapter + Exchange [i_item_id] #1 + WholeStageCodegen (19) + HashAggregate [i_item_id,total_sales] [isEmpty,isEmpty,sum,sum] + InputAdapter + Union + WholeStageCodegen (6) + HashAggregate [i_item_id,sum] [sum,sum(UnscaledValue(ss_ext_sales_price)),total_sales] + InputAdapter + Exchange [i_item_id] #2 + WholeStageCodegen (5) + HashAggregate [i_item_id,ss_ext_sales_price] [sum,sum] + Project [i_item_id,ss_ext_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_ext_sales_price,ss_item_sk] + BroadcastHashJoin [ca_address_sk,ss_addr_sk] + Project [ss_addr_sk,ss_ext_sales_price,ss_item_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_addr_sk,ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [ca_address_sk] + Filter [ca_address_sk,ca_gmt_offset] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + BroadcastHashJoin [i_item_id,i_item_id] + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_id,i_item_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (3) + Project [i_item_id] + Filter [i_color] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_color,i_item_id] + WholeStageCodegen (12) + HashAggregate [i_item_id,sum] [sum,sum(UnscaledValue(cs_ext_sales_price)),total_sales] + InputAdapter + Exchange [i_item_id] #7 + WholeStageCodegen (11) + HashAggregate [cs_ext_sales_price,i_item_id] [sum,sum] + Project [cs_ext_sales_price,i_item_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_ext_sales_price,cs_item_sk] + BroadcastHashJoin [ca_address_sk,cs_bill_addr_sk] + Project [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_bill_addr_sk,cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + ReusedExchange [ca_address_sk] #4 + InputAdapter + ReusedExchange [i_item_id,i_item_sk] #5 + WholeStageCodegen (18) + HashAggregate [i_item_id,sum] [sum,sum(UnscaledValue(ws_ext_sales_price)),total_sales] + InputAdapter + Exchange [i_item_id] #8 + WholeStageCodegen (17) + HashAggregate [i_item_id,ws_ext_sales_price] [sum,sum] + Project [i_item_id,ws_ext_sales_price] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Project [ws_ext_sales_price,ws_item_sk] + BroadcastHashJoin [ca_address_sk,ws_bill_addr_sk] + Project [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_bill_addr_sk,ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + ReusedExchange [ca_address_sk] #4 + InputAdapter + ReusedExchange [i_item_id,i_item_sk] #5 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56/explain.txt new file mode 100644 index 0000000000000..ba02fc0bfcfd8 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56/explain.txt @@ -0,0 +1,378 @@ +== Physical Plan == +TakeOrderedAndProject (67) ++- * HashAggregate (66) + +- Exchange (65) + +- * HashAggregate (64) + +- Union (63) + :- * HashAggregate (32) + : +- Exchange (31) + : +- * HashAggregate (30) + : +- * Project (29) + : +- * BroadcastHashJoin Inner BuildRight (28) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.customer_address (11) + : +- BroadcastExchange (27) + : +- * BroadcastHashJoin LeftSemi BuildRight (26) + : :- * Filter (20) + : : +- * ColumnarToRow (19) + : : +- Scan parquet default.item (18) + : +- BroadcastExchange (25) + : +- * Project (24) + : +- * Filter (23) + : +- * ColumnarToRow (22) + : +- Scan parquet default.item (21) + :- * HashAggregate (47) + : +- Exchange (46) + : +- * HashAggregate (45) + : +- * Project (44) + : +- * BroadcastHashJoin Inner BuildRight (43) + : :- * Project (41) + : : +- * BroadcastHashJoin Inner BuildRight (40) + : : :- * Project (38) + : : : +- * BroadcastHashJoin Inner BuildRight (37) + : : : :- * Filter (35) + : : : : +- * ColumnarToRow (34) + : : : : +- Scan parquet default.catalog_sales (33) + : : : +- ReusedExchange (36) + : : +- ReusedExchange (39) + : +- ReusedExchange (42) + +- * HashAggregate (62) + +- Exchange (61) + +- * HashAggregate (60) + +- * Project (59) + +- * BroadcastHashJoin Inner BuildRight (58) + :- * Project (56) + : +- * BroadcastHashJoin Inner BuildRight (55) + : :- * Project (53) + : : +- * BroadcastHashJoin Inner BuildRight (52) + : : :- * Filter (50) + : : : +- * ColumnarToRow (49) + : : : +- Scan parquet default.web_sales (48) + : : +- ReusedExchange (51) + : +- ReusedExchange (54) + +- ReusedExchange (57) + + +(1) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4] + +(3) Filter [codegen id : 5] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4] +Condition : ((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_addr_sk#3)) AND isnotnull(ss_item_sk#2)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#5, d_year#6, d_moy#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,2), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] +Condition : ((((isnotnull(d_year#6) AND isnotnull(d_moy#7)) AND (d_year#6 = 2001)) AND (d_moy#7 = 2)) AND isnotnull(d_date_sk#5)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#5] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] + +(8) BroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#8] + +(9) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(10) Project [codegen id : 5] +Output [3]: [ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4] +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4, d_date_sk#5] + +(11) Scan parquet default.customer_address +Output [2]: [ca_address_sk#9, ca_gmt_offset#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer_address] +PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [ca_address_sk#9, ca_gmt_offset#10] + +(13) Filter [codegen id : 2] +Input [2]: [ca_address_sk#9, ca_gmt_offset#10] +Condition : ((isnotnull(ca_gmt_offset#10) AND (ca_gmt_offset#10 = -5.00)) AND isnotnull(ca_address_sk#9)) + +(14) Project [codegen id : 2] +Output [1]: [ca_address_sk#9] +Input [2]: [ca_address_sk#9, ca_gmt_offset#10] + +(15) BroadcastExchange +Input [1]: [ca_address_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] + +(16) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_addr_sk#3] +Right keys [1]: [ca_address_sk#9] +Join condition: None + +(17) Project [codegen id : 5] +Output [2]: [ss_item_sk#2, ss_ext_sales_price#4] +Input [4]: [ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4, ca_address_sk#9] + +(18) Scan parquet default.item +Output [2]: [i_item_sk#12, i_item_id#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 4] +Input [2]: [i_item_sk#12, i_item_id#13] + +(20) Filter [codegen id : 4] +Input [2]: [i_item_sk#12, i_item_id#13] +Condition : isnotnull(i_item_sk#12) + +(21) Scan parquet default.item +Output [2]: [i_item_id#13, i_color#14] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [In(i_color, [slate,blanched,burnished])] +ReadSchema: struct + +(22) ColumnarToRow [codegen id : 3] +Input [2]: [i_item_id#13, i_color#14] + +(23) Filter [codegen id : 3] +Input [2]: [i_item_id#13, i_color#14] +Condition : i_color#14 IN (slate,blanched,burnished) + +(24) Project [codegen id : 3] +Output [1]: [i_item_id#13 AS i_item_id#13#15] +Input [2]: [i_item_id#13, i_color#14] + +(25) BroadcastExchange +Input [1]: [i_item_id#13#15] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#16] + +(26) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_id#13] +Right keys [1]: [i_item_id#13#15] +Join condition: None + +(27) BroadcastExchange +Input [2]: [i_item_sk#12, i_item_id#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#17] + +(28) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#12] +Join condition: None + +(29) Project [codegen id : 5] +Output [2]: [ss_ext_sales_price#4, i_item_id#13] +Input [4]: [ss_item_sk#2, ss_ext_sales_price#4, i_item_sk#12, i_item_id#13] + +(30) HashAggregate [codegen id : 5] +Input [2]: [ss_ext_sales_price#4, i_item_id#13] +Keys [1]: [i_item_id#13] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#4))] +Aggregate Attributes [1]: [sum#18] +Results [2]: [i_item_id#13, sum#19] + +(31) Exchange +Input [2]: [i_item_id#13, sum#19] +Arguments: hashpartitioning(i_item_id#13, 5), true, [id=#20] + +(32) HashAggregate [codegen id : 6] +Input [2]: [i_item_id#13, sum#19] +Keys [1]: [i_item_id#13] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#4))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#4))#21] +Results [2]: [i_item_id#13, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#4))#21,17,2) AS total_sales#22] + +(33) Scan parquet default.catalog_sales +Output [4]: [cs_sold_date_sk#23, cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_addr_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 11] +Input [4]: [cs_sold_date_sk#23, cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26] + +(35) Filter [codegen id : 11] +Input [4]: [cs_sold_date_sk#23, cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26] +Condition : ((isnotnull(cs_sold_date_sk#23) AND isnotnull(cs_bill_addr_sk#24)) AND isnotnull(cs_item_sk#25)) + +(36) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(37) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_sold_date_sk#23] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(38) Project [codegen id : 11] +Output [3]: [cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26] +Input [5]: [cs_sold_date_sk#23, cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26, d_date_sk#5] + +(39) ReusedExchange [Reuses operator id: 15] +Output [1]: [ca_address_sk#9] + +(40) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_bill_addr_sk#24] +Right keys [1]: [ca_address_sk#9] +Join condition: None + +(41) Project [codegen id : 11] +Output [2]: [cs_item_sk#25, cs_ext_sales_price#26] +Input [4]: [cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26, ca_address_sk#9] + +(42) ReusedExchange [Reuses operator id: 27] +Output [2]: [i_item_sk#12, i_item_id#13] + +(43) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_item_sk#25] +Right keys [1]: [i_item_sk#12] +Join condition: None + +(44) Project [codegen id : 11] +Output [2]: [cs_ext_sales_price#26, i_item_id#13] +Input [4]: [cs_item_sk#25, cs_ext_sales_price#26, i_item_sk#12, i_item_id#13] + +(45) HashAggregate [codegen id : 11] +Input [2]: [cs_ext_sales_price#26, i_item_id#13] +Keys [1]: [i_item_id#13] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#26))] +Aggregate Attributes [1]: [sum#27] +Results [2]: [i_item_id#13, sum#28] + +(46) Exchange +Input [2]: [i_item_id#13, sum#28] +Arguments: hashpartitioning(i_item_id#13, 5), true, [id=#29] + +(47) HashAggregate [codegen id : 12] +Input [2]: [i_item_id#13, sum#28] +Keys [1]: [i_item_id#13] +Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#26))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#26))#30] +Results [2]: [i_item_id#13, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#26))#30,17,2) AS total_sales#31] + +(48) Scan parquet default.web_sales +Output [4]: [ws_sold_date_sk#32, ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk), IsNotNull(ws_item_sk)] +ReadSchema: struct + +(49) ColumnarToRow [codegen id : 17] +Input [4]: [ws_sold_date_sk#32, ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35] + +(50) Filter [codegen id : 17] +Input [4]: [ws_sold_date_sk#32, ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35] +Condition : ((isnotnull(ws_sold_date_sk#32) AND isnotnull(ws_bill_addr_sk#34)) AND isnotnull(ws_item_sk#33)) + +(51) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(52) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_sold_date_sk#32] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(53) Project [codegen id : 17] +Output [3]: [ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35] +Input [5]: [ws_sold_date_sk#32, ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35, d_date_sk#5] + +(54) ReusedExchange [Reuses operator id: 15] +Output [1]: [ca_address_sk#9] + +(55) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_bill_addr_sk#34] +Right keys [1]: [ca_address_sk#9] +Join condition: None + +(56) Project [codegen id : 17] +Output [2]: [ws_item_sk#33, ws_ext_sales_price#35] +Input [4]: [ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35, ca_address_sk#9] + +(57) ReusedExchange [Reuses operator id: 27] +Output [2]: [i_item_sk#12, i_item_id#13] + +(58) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_item_sk#33] +Right keys [1]: [i_item_sk#12] +Join condition: None + +(59) Project [codegen id : 17] +Output [2]: [ws_ext_sales_price#35, i_item_id#13] +Input [4]: [ws_item_sk#33, ws_ext_sales_price#35, i_item_sk#12, i_item_id#13] + +(60) HashAggregate [codegen id : 17] +Input [2]: [ws_ext_sales_price#35, i_item_id#13] +Keys [1]: [i_item_id#13] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#35))] +Aggregate Attributes [1]: [sum#36] +Results [2]: [i_item_id#13, sum#37] + +(61) Exchange +Input [2]: [i_item_id#13, sum#37] +Arguments: hashpartitioning(i_item_id#13, 5), true, [id=#38] + +(62) HashAggregate [codegen id : 18] +Input [2]: [i_item_id#13, sum#37] +Keys [1]: [i_item_id#13] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#35))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#35))#39] +Results [2]: [i_item_id#13, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#35))#39,17,2) AS total_sales#40] + +(63) Union + +(64) HashAggregate [codegen id : 19] +Input [2]: [i_item_id#13, total_sales#22] +Keys [1]: [i_item_id#13] +Functions [1]: [partial_sum(total_sales#22)] +Aggregate Attributes [2]: [sum#41, isEmpty#42] +Results [3]: [i_item_id#13, sum#43, isEmpty#44] + +(65) Exchange +Input [3]: [i_item_id#13, sum#43, isEmpty#44] +Arguments: hashpartitioning(i_item_id#13, 5), true, [id=#45] + +(66) HashAggregate [codegen id : 20] +Input [3]: [i_item_id#13, sum#43, isEmpty#44] +Keys [1]: [i_item_id#13] +Functions [1]: [sum(total_sales#22)] +Aggregate Attributes [1]: [sum(total_sales#22)#46] +Results [2]: [i_item_id#13, sum(total_sales#22)#46 AS total_sales#47] + +(67) TakeOrderedAndProject +Input [2]: [i_item_id#13, total_sales#47] +Arguments: 100, [total_sales#47 ASC NULLS FIRST], [i_item_id#13, total_sales#47] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56/simplified.txt new file mode 100644 index 0000000000000..f8bc60fffb502 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56/simplified.txt @@ -0,0 +1,101 @@ +TakeOrderedAndProject [i_item_id,total_sales] + WholeStageCodegen (20) + HashAggregate [i_item_id,isEmpty,sum] [isEmpty,sum,sum(total_sales),total_sales] + InputAdapter + Exchange [i_item_id] #1 + WholeStageCodegen (19) + HashAggregate [i_item_id,total_sales] [isEmpty,isEmpty,sum,sum] + InputAdapter + Union + WholeStageCodegen (6) + HashAggregate [i_item_id,sum] [sum,sum(UnscaledValue(ss_ext_sales_price)),total_sales] + InputAdapter + Exchange [i_item_id] #2 + WholeStageCodegen (5) + HashAggregate [i_item_id,ss_ext_sales_price] [sum,sum] + Project [i_item_id,ss_ext_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_ext_sales_price,ss_item_sk] + BroadcastHashJoin [ca_address_sk,ss_addr_sk] + Project [ss_addr_sk,ss_ext_sales_price,ss_item_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_addr_sk,ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [ca_address_sk] + Filter [ca_address_sk,ca_gmt_offset] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + BroadcastHashJoin [i_item_id,i_item_id] + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_id,i_item_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (3) + Project [i_item_id] + Filter [i_color] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_color,i_item_id] + WholeStageCodegen (12) + HashAggregate [i_item_id,sum] [sum,sum(UnscaledValue(cs_ext_sales_price)),total_sales] + InputAdapter + Exchange [i_item_id] #7 + WholeStageCodegen (11) + HashAggregate [cs_ext_sales_price,i_item_id] [sum,sum] + Project [cs_ext_sales_price,i_item_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_ext_sales_price,cs_item_sk] + BroadcastHashJoin [ca_address_sk,cs_bill_addr_sk] + Project [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_bill_addr_sk,cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + ReusedExchange [ca_address_sk] #4 + InputAdapter + ReusedExchange [i_item_id,i_item_sk] #5 + WholeStageCodegen (18) + HashAggregate [i_item_id,sum] [sum,sum(UnscaledValue(ws_ext_sales_price)),total_sales] + InputAdapter + Exchange [i_item_id] #8 + WholeStageCodegen (17) + HashAggregate [i_item_id,ws_ext_sales_price] [sum,sum] + Project [i_item_id,ws_ext_sales_price] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Project [ws_ext_sales_price,ws_item_sk] + BroadcastHashJoin [ca_address_sk,ws_bill_addr_sk] + Project [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_bill_addr_sk,ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + ReusedExchange [ca_address_sk] #4 + InputAdapter + ReusedExchange [i_item_id,i_item_sk] #5 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57.sf100/explain.txt new file mode 100644 index 0000000000000..a5bd18b0d822f --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57.sf100/explain.txt @@ -0,0 +1,313 @@ +== Physical Plan == +TakeOrderedAndProject (58) ++- * Project (57) + +- * SortMergeJoin Inner (56) + :- * Project (48) + : +- * SortMergeJoin Inner (47) + : :- * Sort (37) + : : +- Exchange (36) + : : +- * Filter (35) + : : +- Window (34) + : : +- * Sort (33) + : : +- Exchange (32) + : : +- * Project (31) + : : +- Window (30) + : : +- * Sort (29) + : : +- Exchange (28) + : : +- * HashAggregate (27) + : : +- Exchange (26) + : : +- * HashAggregate (25) + : : +- * Project (24) + : : +- * SortMergeJoin Inner (23) + : : :- * Sort (17) + : : : +- Exchange (16) + : : : +- * Project (15) + : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : :- * Project (9) + : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.catalog_sales (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.date_dim (4) + : : : +- BroadcastExchange (13) + : : : +- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet default.call_center (10) + : : +- * Sort (22) + : : +- Exchange (21) + : : +- * Filter (20) + : : +- * ColumnarToRow (19) + : : +- Scan parquet default.item (18) + : +- * Sort (46) + : +- Exchange (45) + : +- * Project (44) + : +- * Filter (43) + : +- Window (42) + : +- * Sort (41) + : +- Exchange (40) + : +- * HashAggregate (39) + : +- ReusedExchange (38) + +- * Sort (55) + +- Exchange (54) + +- * Project (53) + +- * Filter (52) + +- Window (51) + +- * Sort (50) + +- ReusedExchange (49) + + +(1) Scan parquet default.catalog_sales +Output [4]: [cs_sold_date_sk#1, cs_call_center_sk#2, cs_item_sk#3, cs_sales_price#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_call_center_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [4]: [cs_sold_date_sk#1, cs_call_center_sk#2, cs_item_sk#3, cs_sales_price#4] + +(3) Filter [codegen id : 3] +Input [4]: [cs_sold_date_sk#1, cs_call_center_sk#2, cs_item_sk#3, cs_sales_price#4] +Condition : ((isnotnull(cs_item_sk#3) AND isnotnull(cs_sold_date_sk#1)) AND isnotnull(cs_call_center_sk#2)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#5, d_year#6, d_moy#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000),EqualTo(d_moy,1))), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] +Condition : ((((d_year#6 = 1999) OR ((d_year#6 = 1998) AND (d_moy#7 = 12))) OR ((d_year#6 = 2000) AND (d_moy#7 = 1))) AND isnotnull(d_date_sk#5)) + +(7) BroadcastExchange +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#8] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cs_sold_date_sk#1] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(9) Project [codegen id : 3] +Output [5]: [cs_call_center_sk#2, cs_item_sk#3, cs_sales_price#4, d_year#6, d_moy#7] +Input [7]: [cs_sold_date_sk#1, cs_call_center_sk#2, cs_item_sk#3, cs_sales_price#4, d_date_sk#5, d_year#6, d_moy#7] + +(10) Scan parquet default.call_center +Output [2]: [cc_call_center_sk#9, cc_name#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/call_center] +PushedFilters: [IsNotNull(cc_call_center_sk), IsNotNull(cc_name)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [cc_call_center_sk#9, cc_name#10] + +(12) Filter [codegen id : 2] +Input [2]: [cc_call_center_sk#9, cc_name#10] +Condition : (isnotnull(cc_call_center_sk#9) AND isnotnull(cc_name#10)) + +(13) BroadcastExchange +Input [2]: [cc_call_center_sk#9, cc_name#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#11] + +(14) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cs_call_center_sk#2] +Right keys [1]: [cc_call_center_sk#9] +Join condition: None + +(15) Project [codegen id : 3] +Output [5]: [cs_item_sk#3, cs_sales_price#4, d_year#6, d_moy#7, cc_name#10] +Input [7]: [cs_call_center_sk#2, cs_item_sk#3, cs_sales_price#4, d_year#6, d_moy#7, cc_call_center_sk#9, cc_name#10] + +(16) Exchange +Input [5]: [cs_item_sk#3, cs_sales_price#4, d_year#6, d_moy#7, cc_name#10] +Arguments: hashpartitioning(cs_item_sk#3, 5), true, [id=#12] + +(17) Sort [codegen id : 4] +Input [5]: [cs_item_sk#3, cs_sales_price#4, d_year#6, d_moy#7, cc_name#10] +Arguments: [cs_item_sk#3 ASC NULLS FIRST], false, 0 + +(18) Scan parquet default.item +Output [3]: [i_item_sk#13, i_brand#14, i_category#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_category), IsNotNull(i_brand)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 5] +Input [3]: [i_item_sk#13, i_brand#14, i_category#15] + +(20) Filter [codegen id : 5] +Input [3]: [i_item_sk#13, i_brand#14, i_category#15] +Condition : ((isnotnull(i_item_sk#13) AND isnotnull(i_category#15)) AND isnotnull(i_brand#14)) + +(21) Exchange +Input [3]: [i_item_sk#13, i_brand#14, i_category#15] +Arguments: hashpartitioning(i_item_sk#13, 5), true, [id=#16] + +(22) Sort [codegen id : 6] +Input [3]: [i_item_sk#13, i_brand#14, i_category#15] +Arguments: [i_item_sk#13 ASC NULLS FIRST], false, 0 + +(23) SortMergeJoin [codegen id : 7] +Left keys [1]: [cs_item_sk#3] +Right keys [1]: [i_item_sk#13] +Join condition: None + +(24) Project [codegen id : 7] +Output [6]: [i_brand#14, i_category#15, cs_sales_price#4, d_year#6, d_moy#7, cc_name#10] +Input [8]: [cs_item_sk#3, cs_sales_price#4, d_year#6, d_moy#7, cc_name#10, i_item_sk#13, i_brand#14, i_category#15] + +(25) HashAggregate [codegen id : 7] +Input [6]: [i_brand#14, i_category#15, cs_sales_price#4, d_year#6, d_moy#7, cc_name#10] +Keys [5]: [i_category#15, i_brand#14, cc_name#10, d_year#6, d_moy#7] +Functions [1]: [partial_sum(UnscaledValue(cs_sales_price#4))] +Aggregate Attributes [1]: [sum#17] +Results [6]: [i_category#15, i_brand#14, cc_name#10, d_year#6, d_moy#7, sum#18] + +(26) Exchange +Input [6]: [i_category#15, i_brand#14, cc_name#10, d_year#6, d_moy#7, sum#18] +Arguments: hashpartitioning(i_category#15, i_brand#14, cc_name#10, d_year#6, d_moy#7, 5), true, [id=#19] + +(27) HashAggregate [codegen id : 8] +Input [6]: [i_category#15, i_brand#14, cc_name#10, d_year#6, d_moy#7, sum#18] +Keys [5]: [i_category#15, i_brand#14, cc_name#10, d_year#6, d_moy#7] +Functions [1]: [sum(UnscaledValue(cs_sales_price#4))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_sales_price#4))#20] +Results [7]: [i_category#15, i_brand#14, cc_name#10, d_year#6, d_moy#7, MakeDecimal(sum(UnscaledValue(cs_sales_price#4))#20,17,2) AS sum_sales#21, MakeDecimal(sum(UnscaledValue(cs_sales_price#4))#20,17,2) AS _w0#22] + +(28) Exchange +Input [7]: [i_category#15, i_brand#14, cc_name#10, d_year#6, d_moy#7, sum_sales#21, _w0#22] +Arguments: hashpartitioning(i_category#15, i_brand#14, cc_name#10, d_year#6, 5), true, [id=#23] + +(29) Sort [codegen id : 9] +Input [7]: [i_category#15, i_brand#14, cc_name#10, d_year#6, d_moy#7, sum_sales#21, _w0#22] +Arguments: [i_category#15 ASC NULLS FIRST, i_brand#14 ASC NULLS FIRST, cc_name#10 ASC NULLS FIRST, d_year#6 ASC NULLS FIRST], false, 0 + +(30) Window +Input [7]: [i_category#15, i_brand#14, cc_name#10, d_year#6, d_moy#7, sum_sales#21, _w0#22] +Arguments: [avg(_w0#22) windowspecdefinition(i_category#15, i_brand#14, cc_name#10, d_year#6, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#24], [i_category#15, i_brand#14, cc_name#10, d_year#6] + +(31) Project [codegen id : 10] +Output [7]: [i_category#15, i_brand#14, cc_name#10, d_year#6, d_moy#7, sum_sales#21, avg_monthly_sales#24] +Input [8]: [i_category#15, i_brand#14, cc_name#10, d_year#6, d_moy#7, sum_sales#21, _w0#22, avg_monthly_sales#24] + +(32) Exchange +Input [7]: [i_category#15, i_brand#14, cc_name#10, d_year#6, d_moy#7, sum_sales#21, avg_monthly_sales#24] +Arguments: hashpartitioning(i_category#15, i_brand#14, cc_name#10, 5), true, [id=#25] + +(33) Sort [codegen id : 11] +Input [7]: [i_category#15, i_brand#14, cc_name#10, d_year#6, d_moy#7, sum_sales#21, avg_monthly_sales#24] +Arguments: [i_category#15 ASC NULLS FIRST, i_brand#14 ASC NULLS FIRST, cc_name#10 ASC NULLS FIRST, d_year#6 ASC NULLS FIRST, d_moy#7 ASC NULLS FIRST], false, 0 + +(34) Window +Input [7]: [i_category#15, i_brand#14, cc_name#10, d_year#6, d_moy#7, sum_sales#21, avg_monthly_sales#24] +Arguments: [rank(d_year#6, d_moy#7) windowspecdefinition(i_category#15, i_brand#14, cc_name#10, d_year#6 ASC NULLS FIRST, d_moy#7 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#26], [i_category#15, i_brand#14, cc_name#10], [d_year#6 ASC NULLS FIRST, d_moy#7 ASC NULLS FIRST] + +(35) Filter [codegen id : 12] +Input [8]: [i_category#15, i_brand#14, cc_name#10, d_year#6, d_moy#7, sum_sales#21, avg_monthly_sales#24, rn#26] +Condition : (((((isnotnull(avg_monthly_sales#24) AND isnotnull(d_year#6)) AND (d_year#6 = 1999)) AND (avg_monthly_sales#24 > 0.000000)) AND (CASE WHEN (avg_monthly_sales#24 > 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(22,6), true))) / promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(38,16), true) ELSE null END > 0.1000000000000000)) AND isnotnull(rn#26)) + +(36) Exchange +Input [8]: [i_category#15, i_brand#14, cc_name#10, d_year#6, d_moy#7, sum_sales#21, avg_monthly_sales#24, rn#26] +Arguments: hashpartitioning(i_category#15, i_brand#14, cc_name#10, rn#26, 5), true, [id=#27] + +(37) Sort [codegen id : 13] +Input [8]: [i_category#15, i_brand#14, cc_name#10, d_year#6, d_moy#7, sum_sales#21, avg_monthly_sales#24, rn#26] +Arguments: [i_category#15 ASC NULLS FIRST, i_brand#14 ASC NULLS FIRST, cc_name#10 ASC NULLS FIRST, rn#26 ASC NULLS FIRST], false, 0 + +(38) ReusedExchange [Reuses operator id: 26] +Output [6]: [i_category#28, i_brand#29, cc_name#30, d_year#31, d_moy#32, sum#33] + +(39) HashAggregate [codegen id : 21] +Input [6]: [i_category#28, i_brand#29, cc_name#30, d_year#31, d_moy#32, sum#33] +Keys [5]: [i_category#28, i_brand#29, cc_name#30, d_year#31, d_moy#32] +Functions [1]: [sum(UnscaledValue(cs_sales_price#4))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_sales_price#4))#34] +Results [6]: [i_category#28, i_brand#29, cc_name#30, d_year#31, d_moy#32, MakeDecimal(sum(UnscaledValue(cs_sales_price#4))#34,17,2) AS sum_sales#35] + +(40) Exchange +Input [6]: [i_category#28, i_brand#29, cc_name#30, d_year#31, d_moy#32, sum_sales#35] +Arguments: hashpartitioning(i_category#28, i_brand#29, cc_name#30, 5), true, [id=#36] + +(41) Sort [codegen id : 22] +Input [6]: [i_category#28, i_brand#29, cc_name#30, d_year#31, d_moy#32, sum_sales#35] +Arguments: [i_category#28 ASC NULLS FIRST, i_brand#29 ASC NULLS FIRST, cc_name#30 ASC NULLS FIRST, d_year#31 ASC NULLS FIRST, d_moy#32 ASC NULLS FIRST], false, 0 + +(42) Window +Input [6]: [i_category#28, i_brand#29, cc_name#30, d_year#31, d_moy#32, sum_sales#35] +Arguments: [rank(d_year#31, d_moy#32) windowspecdefinition(i_category#28, i_brand#29, cc_name#30, d_year#31 ASC NULLS FIRST, d_moy#32 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#37], [i_category#28, i_brand#29, cc_name#30], [d_year#31 ASC NULLS FIRST, d_moy#32 ASC NULLS FIRST] + +(43) Filter [codegen id : 23] +Input [7]: [i_category#28, i_brand#29, cc_name#30, d_year#31, d_moy#32, sum_sales#35, rn#37] +Condition : isnotnull(rn#37) + +(44) Project [codegen id : 23] +Output [5]: [i_category#28, i_brand#29, cc_name#30, sum_sales#35, rn#37] +Input [7]: [i_category#28, i_brand#29, cc_name#30, d_year#31, d_moy#32, sum_sales#35, rn#37] + +(45) Exchange +Input [5]: [i_category#28, i_brand#29, cc_name#30, sum_sales#35, rn#37] +Arguments: hashpartitioning(i_category#28, i_brand#29, cc_name#30, (rn#37 + 1), 5), true, [id=#38] + +(46) Sort [codegen id : 24] +Input [5]: [i_category#28, i_brand#29, cc_name#30, sum_sales#35, rn#37] +Arguments: [i_category#28 ASC NULLS FIRST, i_brand#29 ASC NULLS FIRST, cc_name#30 ASC NULLS FIRST, (rn#37 + 1) ASC NULLS FIRST], false, 0 + +(47) SortMergeJoin [codegen id : 25] +Left keys [4]: [i_category#15, i_brand#14, cc_name#10, rn#26] +Right keys [4]: [i_category#28, i_brand#29, cc_name#30, (rn#37 + 1)] +Join condition: None + +(48) Project [codegen id : 25] +Output [9]: [i_category#15, i_brand#14, cc_name#10, d_year#6, d_moy#7, sum_sales#21, avg_monthly_sales#24, rn#26, sum_sales#35] +Input [13]: [i_category#15, i_brand#14, cc_name#10, d_year#6, d_moy#7, sum_sales#21, avg_monthly_sales#24, rn#26, i_category#28, i_brand#29, cc_name#30, sum_sales#35, rn#37] + +(49) ReusedExchange [Reuses operator id: 40] +Output [6]: [i_category#39, i_brand#40, cc_name#41, d_year#42, d_moy#43, sum_sales#44] + +(50) Sort [codegen id : 34] +Input [6]: [i_category#39, i_brand#40, cc_name#41, d_year#42, d_moy#43, sum_sales#44] +Arguments: [i_category#39 ASC NULLS FIRST, i_brand#40 ASC NULLS FIRST, cc_name#41 ASC NULLS FIRST, d_year#42 ASC NULLS FIRST, d_moy#43 ASC NULLS FIRST], false, 0 + +(51) Window +Input [6]: [i_category#39, i_brand#40, cc_name#41, d_year#42, d_moy#43, sum_sales#44] +Arguments: [rank(d_year#42, d_moy#43) windowspecdefinition(i_category#39, i_brand#40, cc_name#41, d_year#42 ASC NULLS FIRST, d_moy#43 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#45], [i_category#39, i_brand#40, cc_name#41], [d_year#42 ASC NULLS FIRST, d_moy#43 ASC NULLS FIRST] + +(52) Filter [codegen id : 35] +Input [7]: [i_category#39, i_brand#40, cc_name#41, d_year#42, d_moy#43, sum_sales#44, rn#45] +Condition : isnotnull(rn#45) + +(53) Project [codegen id : 35] +Output [5]: [i_category#39, i_brand#40, cc_name#41, sum_sales#44, rn#45] +Input [7]: [i_category#39, i_brand#40, cc_name#41, d_year#42, d_moy#43, sum_sales#44, rn#45] + +(54) Exchange +Input [5]: [i_category#39, i_brand#40, cc_name#41, sum_sales#44, rn#45] +Arguments: hashpartitioning(i_category#39, i_brand#40, cc_name#41, (rn#45 - 1), 5), true, [id=#46] + +(55) Sort [codegen id : 36] +Input [5]: [i_category#39, i_brand#40, cc_name#41, sum_sales#44, rn#45] +Arguments: [i_category#39 ASC NULLS FIRST, i_brand#40 ASC NULLS FIRST, cc_name#41 ASC NULLS FIRST, (rn#45 - 1) ASC NULLS FIRST], false, 0 + +(56) SortMergeJoin [codegen id : 37] +Left keys [4]: [i_category#15, i_brand#14, cc_name#10, rn#26] +Right keys [4]: [i_category#39, i_brand#40, cc_name#41, (rn#45 - 1)] +Join condition: None + +(57) Project [codegen id : 37] +Output [9]: [i_category#15, i_brand#14, cc_name#10, d_year#6, d_moy#7, avg_monthly_sales#24, sum_sales#21, sum_sales#35 AS psum#47, sum_sales#44 AS nsum#48] +Input [14]: [i_category#15, i_brand#14, cc_name#10, d_year#6, d_moy#7, sum_sales#21, avg_monthly_sales#24, rn#26, sum_sales#35, i_category#39, i_brand#40, cc_name#41, sum_sales#44, rn#45] + +(58) TakeOrderedAndProject +Input [9]: [i_category#15, i_brand#14, cc_name#10, d_year#6, d_moy#7, avg_monthly_sales#24, sum_sales#21, psum#47, nsum#48] +Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(22,6), true) ASC NULLS FIRST, cc_name#10 ASC NULLS FIRST], [i_category#15, i_brand#14, cc_name#10, d_year#6, d_moy#7, avg_monthly_sales#24, sum_sales#21, psum#47, nsum#48] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57.sf100/simplified.txt new file mode 100644 index 0000000000000..ba6b65472229a --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57.sf100/simplified.txt @@ -0,0 +1,107 @@ +TakeOrderedAndProject [avg_monthly_sales,cc_name,d_moy,d_year,i_brand,i_category,nsum,psum,sum_sales] + WholeStageCodegen (37) + Project [avg_monthly_sales,cc_name,d_moy,d_year,i_brand,i_category,sum_sales,sum_sales,sum_sales] + SortMergeJoin [cc_name,cc_name,i_brand,i_brand,i_category,i_category,rn,rn] + InputAdapter + WholeStageCodegen (25) + Project [avg_monthly_sales,cc_name,d_moy,d_year,i_brand,i_category,rn,sum_sales,sum_sales] + SortMergeJoin [cc_name,cc_name,i_brand,i_brand,i_category,i_category,rn,rn] + InputAdapter + WholeStageCodegen (13) + Sort [cc_name,i_brand,i_category,rn] + InputAdapter + Exchange [cc_name,i_brand,i_category,rn] #1 + WholeStageCodegen (12) + Filter [avg_monthly_sales,d_year,rn,sum_sales] + InputAdapter + Window [cc_name,d_moy,d_year,i_brand,i_category] + WholeStageCodegen (11) + Sort [cc_name,d_moy,d_year,i_brand,i_category] + InputAdapter + Exchange [cc_name,i_brand,i_category] #2 + WholeStageCodegen (10) + Project [avg_monthly_sales,cc_name,d_moy,d_year,i_brand,i_category,sum_sales] + InputAdapter + Window [_w0,cc_name,d_year,i_brand,i_category] + WholeStageCodegen (9) + Sort [cc_name,d_year,i_brand,i_category] + InputAdapter + Exchange [cc_name,d_year,i_brand,i_category] #3 + WholeStageCodegen (8) + HashAggregate [cc_name,d_moy,d_year,i_brand,i_category,sum] [_w0,sum,sum(UnscaledValue(cs_sales_price)),sum_sales] + InputAdapter + Exchange [cc_name,d_moy,d_year,i_brand,i_category] #4 + WholeStageCodegen (7) + HashAggregate [cc_name,cs_sales_price,d_moy,d_year,i_brand,i_category] [sum,sum] + Project [cc_name,cs_sales_price,d_moy,d_year,i_brand,i_category] + SortMergeJoin [cs_item_sk,i_item_sk] + InputAdapter + WholeStageCodegen (4) + Sort [cs_item_sk] + InputAdapter + Exchange [cs_item_sk] #5 + WholeStageCodegen (3) + Project [cc_name,cs_item_sk,cs_sales_price,d_moy,d_year] + BroadcastHashJoin [cc_call_center_sk,cs_call_center_sk] + Project [cs_call_center_sk,cs_item_sk,cs_sales_price,d_moy,d_year] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_call_center_sk,cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_call_center_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (1) + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (2) + Filter [cc_call_center_sk,cc_name] + ColumnarToRow + InputAdapter + Scan parquet default.call_center [cc_call_center_sk,cc_name] + InputAdapter + WholeStageCodegen (6) + Sort [i_item_sk] + InputAdapter + Exchange [i_item_sk] #8 + WholeStageCodegen (5) + Filter [i_brand,i_category,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand,i_category,i_item_sk] + InputAdapter + WholeStageCodegen (24) + Sort [cc_name,i_brand,i_category,rn] + InputAdapter + Exchange [cc_name,i_brand,i_category,rn] #9 + WholeStageCodegen (23) + Project [cc_name,i_brand,i_category,rn,sum_sales] + Filter [rn] + InputAdapter + Window [cc_name,d_moy,d_year,i_brand,i_category] + WholeStageCodegen (22) + Sort [cc_name,d_moy,d_year,i_brand,i_category] + InputAdapter + Exchange [cc_name,i_brand,i_category] #10 + WholeStageCodegen (21) + HashAggregate [cc_name,d_moy,d_year,i_brand,i_category,sum] [sum,sum(UnscaledValue(cs_sales_price)),sum_sales] + InputAdapter + ReusedExchange [cc_name,d_moy,d_year,i_brand,i_category,sum] #4 + InputAdapter + WholeStageCodegen (36) + Sort [cc_name,i_brand,i_category,rn] + InputAdapter + Exchange [cc_name,i_brand,i_category,rn] #11 + WholeStageCodegen (35) + Project [cc_name,i_brand,i_category,rn,sum_sales] + Filter [rn] + InputAdapter + Window [cc_name,d_moy,d_year,i_brand,i_category] + WholeStageCodegen (34) + Sort [cc_name,d_moy,d_year,i_brand,i_category] + InputAdapter + ReusedExchange [cc_name,d_moy,d_year,i_brand,i_category,sum_sales] #10 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/explain.txt new file mode 100644 index 0000000000000..5c8c93cee8107 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/explain.txt @@ -0,0 +1,278 @@ +== Physical Plan == +TakeOrderedAndProject (51) ++- * Project (50) + +- * BroadcastHashJoin Inner BuildRight (49) + :- * Project (42) + : +- * BroadcastHashJoin Inner BuildRight (41) + : :- * Filter (32) + : : +- Window (31) + : : +- * Sort (30) + : : +- Exchange (29) + : : +- * Project (28) + : : +- Window (27) + : : +- * Sort (26) + : : +- Exchange (25) + : : +- * HashAggregate (24) + : : +- Exchange (23) + : : +- * HashAggregate (22) + : : +- * Project (21) + : : +- * BroadcastHashJoin Inner BuildRight (20) + : : :- * Project (15) + : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : :- * Project (9) + : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.item (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.catalog_sales (4) + : : : +- BroadcastExchange (13) + : : : +- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet default.date_dim (10) + : : +- BroadcastExchange (19) + : : +- * Filter (18) + : : +- * ColumnarToRow (17) + : : +- Scan parquet default.call_center (16) + : +- BroadcastExchange (40) + : +- * Project (39) + : +- * Filter (38) + : +- Window (37) + : +- * Sort (36) + : +- Exchange (35) + : +- * HashAggregate (34) + : +- ReusedExchange (33) + +- BroadcastExchange (48) + +- * Project (47) + +- * Filter (46) + +- Window (45) + +- * Sort (44) + +- ReusedExchange (43) + + +(1) Scan parquet default.item +Output [3]: [i_item_sk#1, i_brand#2, i_category#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_category), IsNotNull(i_brand)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [3]: [i_item_sk#1, i_brand#2, i_category#3] + +(3) Filter [codegen id : 4] +Input [3]: [i_item_sk#1, i_brand#2, i_category#3] +Condition : ((isnotnull(i_item_sk#1) AND isnotnull(i_category#3)) AND isnotnull(i_brand#2)) + +(4) Scan parquet default.catalog_sales +Output [4]: [cs_sold_date_sk#4, cs_call_center_sk#5, cs_item_sk#6, cs_sales_price#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_call_center_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [cs_sold_date_sk#4, cs_call_center_sk#5, cs_item_sk#6, cs_sales_price#7] + +(6) Filter [codegen id : 1] +Input [4]: [cs_sold_date_sk#4, cs_call_center_sk#5, cs_item_sk#6, cs_sales_price#7] +Condition : ((isnotnull(cs_item_sk#6) AND isnotnull(cs_sold_date_sk#4)) AND isnotnull(cs_call_center_sk#5)) + +(7) BroadcastExchange +Input [4]: [cs_sold_date_sk#4, cs_call_center_sk#5, cs_item_sk#6, cs_sales_price#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, false] as bigint)),false), [id=#8] + +(8) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [cs_item_sk#6] +Join condition: None + +(9) Project [codegen id : 4] +Output [5]: [i_brand#2, i_category#3, cs_sold_date_sk#4, cs_call_center_sk#5, cs_sales_price#7] +Input [7]: [i_item_sk#1, i_brand#2, i_category#3, cs_sold_date_sk#4, cs_call_center_sk#5, cs_item_sk#6, cs_sales_price#7] + +(10) Scan parquet default.date_dim +Output [3]: [d_date_sk#9, d_year#10, d_moy#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000),EqualTo(d_moy,1))), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] + +(12) Filter [codegen id : 2] +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] +Condition : ((((d_year#10 = 1999) OR ((d_year#10 = 1998) AND (d_moy#11 = 12))) OR ((d_year#10 = 2000) AND (d_moy#11 = 1))) AND isnotnull(d_date_sk#9)) + +(13) BroadcastExchange +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#12] + +(14) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_sold_date_sk#4] +Right keys [1]: [d_date_sk#9] +Join condition: None + +(15) Project [codegen id : 4] +Output [6]: [i_brand#2, i_category#3, cs_call_center_sk#5, cs_sales_price#7, d_year#10, d_moy#11] +Input [8]: [i_brand#2, i_category#3, cs_sold_date_sk#4, cs_call_center_sk#5, cs_sales_price#7, d_date_sk#9, d_year#10, d_moy#11] + +(16) Scan parquet default.call_center +Output [2]: [cc_call_center_sk#13, cc_name#14] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/call_center] +PushedFilters: [IsNotNull(cc_call_center_sk), IsNotNull(cc_name)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [2]: [cc_call_center_sk#13, cc_name#14] + +(18) Filter [codegen id : 3] +Input [2]: [cc_call_center_sk#13, cc_name#14] +Condition : (isnotnull(cc_call_center_sk#13) AND isnotnull(cc_name#14)) + +(19) BroadcastExchange +Input [2]: [cc_call_center_sk#13, cc_name#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#15] + +(20) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_call_center_sk#5] +Right keys [1]: [cc_call_center_sk#13] +Join condition: None + +(21) Project [codegen id : 4] +Output [6]: [i_brand#2, i_category#3, cs_sales_price#7, d_year#10, d_moy#11, cc_name#14] +Input [8]: [i_brand#2, i_category#3, cs_call_center_sk#5, cs_sales_price#7, d_year#10, d_moy#11, cc_call_center_sk#13, cc_name#14] + +(22) HashAggregate [codegen id : 4] +Input [6]: [i_brand#2, i_category#3, cs_sales_price#7, d_year#10, d_moy#11, cc_name#14] +Keys [5]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11] +Functions [1]: [partial_sum(UnscaledValue(cs_sales_price#7))] +Aggregate Attributes [1]: [sum#16] +Results [6]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum#17] + +(23) Exchange +Input [6]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum#17] +Arguments: hashpartitioning(i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, 5), true, [id=#18] + +(24) HashAggregate [codegen id : 5] +Input [6]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum#17] +Keys [5]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11] +Functions [1]: [sum(UnscaledValue(cs_sales_price#7))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_sales_price#7))#19] +Results [7]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, MakeDecimal(sum(UnscaledValue(cs_sales_price#7))#19,17,2) AS sum_sales#20, MakeDecimal(sum(UnscaledValue(cs_sales_price#7))#19,17,2) AS _w0#21] + +(25) Exchange +Input [7]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum_sales#20, _w0#21] +Arguments: hashpartitioning(i_category#3, i_brand#2, cc_name#14, d_year#10, 5), true, [id=#22] + +(26) Sort [codegen id : 6] +Input [7]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum_sales#20, _w0#21] +Arguments: [i_category#3 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, cc_name#14 ASC NULLS FIRST, d_year#10 ASC NULLS FIRST], false, 0 + +(27) Window +Input [7]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum_sales#20, _w0#21] +Arguments: [avg(_w0#21) windowspecdefinition(i_category#3, i_brand#2, cc_name#14, d_year#10, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#23], [i_category#3, i_brand#2, cc_name#14, d_year#10] + +(28) Project [codegen id : 7] +Output [7]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum_sales#20, avg_monthly_sales#23] +Input [8]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum_sales#20, _w0#21, avg_monthly_sales#23] + +(29) Exchange +Input [7]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum_sales#20, avg_monthly_sales#23] +Arguments: hashpartitioning(i_category#3, i_brand#2, cc_name#14, 5), true, [id=#24] + +(30) Sort [codegen id : 8] +Input [7]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum_sales#20, avg_monthly_sales#23] +Arguments: [i_category#3 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, cc_name#14 ASC NULLS FIRST, d_year#10 ASC NULLS FIRST, d_moy#11 ASC NULLS FIRST], false, 0 + +(31) Window +Input [7]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum_sales#20, avg_monthly_sales#23] +Arguments: [rank(d_year#10, d_moy#11) windowspecdefinition(i_category#3, i_brand#2, cc_name#14, d_year#10 ASC NULLS FIRST, d_moy#11 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#25], [i_category#3, i_brand#2, cc_name#14], [d_year#10 ASC NULLS FIRST, d_moy#11 ASC NULLS FIRST] + +(32) Filter [codegen id : 23] +Input [8]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum_sales#20, avg_monthly_sales#23, rn#25] +Condition : (((((isnotnull(d_year#10) AND isnotnull(avg_monthly_sales#23)) AND (d_year#10 = 1999)) AND (avg_monthly_sales#23 > 0.000000)) AND (CASE WHEN (avg_monthly_sales#23 > 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#20 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#23 as decimal(22,6)))), DecimalType(22,6), true))) / promote_precision(cast(avg_monthly_sales#23 as decimal(22,6)))), DecimalType(38,16), true) ELSE null END > 0.1000000000000000)) AND isnotnull(rn#25)) + +(33) ReusedExchange [Reuses operator id: 23] +Output [6]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum#31] + +(34) HashAggregate [codegen id : 13] +Input [6]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum#31] +Keys [5]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30] +Functions [1]: [sum(UnscaledValue(cs_sales_price#7))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_sales_price#7))#32] +Results [6]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, MakeDecimal(sum(UnscaledValue(cs_sales_price#7))#32,17,2) AS sum_sales#33] + +(35) Exchange +Input [6]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum_sales#33] +Arguments: hashpartitioning(i_category#26, i_brand#27, cc_name#28, 5), true, [id=#34] + +(36) Sort [codegen id : 14] +Input [6]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum_sales#33] +Arguments: [i_category#26 ASC NULLS FIRST, i_brand#27 ASC NULLS FIRST, cc_name#28 ASC NULLS FIRST, d_year#29 ASC NULLS FIRST, d_moy#30 ASC NULLS FIRST], false, 0 + +(37) Window +Input [6]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum_sales#33] +Arguments: [rank(d_year#29, d_moy#30) windowspecdefinition(i_category#26, i_brand#27, cc_name#28, d_year#29 ASC NULLS FIRST, d_moy#30 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#35], [i_category#26, i_brand#27, cc_name#28], [d_year#29 ASC NULLS FIRST, d_moy#30 ASC NULLS FIRST] + +(38) Filter [codegen id : 15] +Input [7]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum_sales#33, rn#35] +Condition : isnotnull(rn#35) + +(39) Project [codegen id : 15] +Output [5]: [i_category#26, i_brand#27, cc_name#28, sum_sales#33, rn#35] +Input [7]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum_sales#33, rn#35] + +(40) BroadcastExchange +Input [5]: [i_category#26, i_brand#27, cc_name#28, sum_sales#33, rn#35] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, true] + 1)),false), [id=#36] + +(41) BroadcastHashJoin [codegen id : 23] +Left keys [4]: [i_category#3, i_brand#2, cc_name#14, rn#25] +Right keys [4]: [i_category#26, i_brand#27, cc_name#28, (rn#35 + 1)] +Join condition: None + +(42) Project [codegen id : 23] +Output [9]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum_sales#20, avg_monthly_sales#23, rn#25, sum_sales#33] +Input [13]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum_sales#20, avg_monthly_sales#23, rn#25, i_category#26, i_brand#27, cc_name#28, sum_sales#33, rn#35] + +(43) ReusedExchange [Reuses operator id: 35] +Output [6]: [i_category#37, i_brand#38, cc_name#39, d_year#40, d_moy#41, sum_sales#42] + +(44) Sort [codegen id : 21] +Input [6]: [i_category#37, i_brand#38, cc_name#39, d_year#40, d_moy#41, sum_sales#42] +Arguments: [i_category#37 ASC NULLS FIRST, i_brand#38 ASC NULLS FIRST, cc_name#39 ASC NULLS FIRST, d_year#40 ASC NULLS FIRST, d_moy#41 ASC NULLS FIRST], false, 0 + +(45) Window +Input [6]: [i_category#37, i_brand#38, cc_name#39, d_year#40, d_moy#41, sum_sales#42] +Arguments: [rank(d_year#40, d_moy#41) windowspecdefinition(i_category#37, i_brand#38, cc_name#39, d_year#40 ASC NULLS FIRST, d_moy#41 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#43], [i_category#37, i_brand#38, cc_name#39], [d_year#40 ASC NULLS FIRST, d_moy#41 ASC NULLS FIRST] + +(46) Filter [codegen id : 22] +Input [7]: [i_category#37, i_brand#38, cc_name#39, d_year#40, d_moy#41, sum_sales#42, rn#43] +Condition : isnotnull(rn#43) + +(47) Project [codegen id : 22] +Output [5]: [i_category#37, i_brand#38, cc_name#39, sum_sales#42, rn#43] +Input [7]: [i_category#37, i_brand#38, cc_name#39, d_year#40, d_moy#41, sum_sales#42, rn#43] + +(48) BroadcastExchange +Input [5]: [i_category#37, i_brand#38, cc_name#39, sum_sales#42, rn#43] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, true] - 1)),false), [id=#44] + +(49) BroadcastHashJoin [codegen id : 23] +Left keys [4]: [i_category#3, i_brand#2, cc_name#14, rn#25] +Right keys [4]: [i_category#37, i_brand#38, cc_name#39, (rn#43 - 1)] +Join condition: None + +(50) Project [codegen id : 23] +Output [9]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, avg_monthly_sales#23, sum_sales#20, sum_sales#33 AS psum#45, sum_sales#42 AS nsum#46] +Input [14]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum_sales#20, avg_monthly_sales#23, rn#25, sum_sales#33, i_category#37, i_brand#38, cc_name#39, sum_sales#42, rn#43] + +(51) TakeOrderedAndProject +Input [9]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, avg_monthly_sales#23, sum_sales#20, psum#45, nsum#46] +Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#20 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#23 as decimal(22,6)))), DecimalType(22,6), true) ASC NULLS FIRST, cc_name#14 ASC NULLS FIRST], [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, avg_monthly_sales#23, sum_sales#20, psum#45, nsum#46] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/simplified.txt new file mode 100644 index 0000000000000..94e8f21d46a8a --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/simplified.txt @@ -0,0 +1,84 @@ +TakeOrderedAndProject [avg_monthly_sales,cc_name,d_moy,d_year,i_brand,i_category,nsum,psum,sum_sales] + WholeStageCodegen (23) + Project [avg_monthly_sales,cc_name,d_moy,d_year,i_brand,i_category,sum_sales,sum_sales,sum_sales] + BroadcastHashJoin [cc_name,cc_name,i_brand,i_brand,i_category,i_category,rn,rn] + Project [avg_monthly_sales,cc_name,d_moy,d_year,i_brand,i_category,rn,sum_sales,sum_sales] + BroadcastHashJoin [cc_name,cc_name,i_brand,i_brand,i_category,i_category,rn,rn] + Filter [avg_monthly_sales,d_year,rn,sum_sales] + InputAdapter + Window [cc_name,d_moy,d_year,i_brand,i_category] + WholeStageCodegen (8) + Sort [cc_name,d_moy,d_year,i_brand,i_category] + InputAdapter + Exchange [cc_name,i_brand,i_category] #1 + WholeStageCodegen (7) + Project [avg_monthly_sales,cc_name,d_moy,d_year,i_brand,i_category,sum_sales] + InputAdapter + Window [_w0,cc_name,d_year,i_brand,i_category] + WholeStageCodegen (6) + Sort [cc_name,d_year,i_brand,i_category] + InputAdapter + Exchange [cc_name,d_year,i_brand,i_category] #2 + WholeStageCodegen (5) + HashAggregate [cc_name,d_moy,d_year,i_brand,i_category,sum] [_w0,sum,sum(UnscaledValue(cs_sales_price)),sum_sales] + InputAdapter + Exchange [cc_name,d_moy,d_year,i_brand,i_category] #3 + WholeStageCodegen (4) + HashAggregate [cc_name,cs_sales_price,d_moy,d_year,i_brand,i_category] [sum,sum] + Project [cc_name,cs_sales_price,d_moy,d_year,i_brand,i_category] + BroadcastHashJoin [cc_call_center_sk,cs_call_center_sk] + Project [cs_call_center_sk,cs_sales_price,d_moy,d_year,i_brand,i_category] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_call_center_sk,cs_sales_price,cs_sold_date_sk,i_brand,i_category] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Filter [i_brand,i_category,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand,i_category,i_item_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Filter [cs_call_center_sk,cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_call_center_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (3) + Filter [cc_call_center_sk,cc_name] + ColumnarToRow + InputAdapter + Scan parquet default.call_center [cc_call_center_sk,cc_name] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (15) + Project [cc_name,i_brand,i_category,rn,sum_sales] + Filter [rn] + InputAdapter + Window [cc_name,d_moy,d_year,i_brand,i_category] + WholeStageCodegen (14) + Sort [cc_name,d_moy,d_year,i_brand,i_category] + InputAdapter + Exchange [cc_name,i_brand,i_category] #8 + WholeStageCodegen (13) + HashAggregate [cc_name,d_moy,d_year,i_brand,i_category,sum] [sum,sum(UnscaledValue(cs_sales_price)),sum_sales] + InputAdapter + ReusedExchange [cc_name,d_moy,d_year,i_brand,i_category,sum] #3 + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (22) + Project [cc_name,i_brand,i_category,rn,sum_sales] + Filter [rn] + InputAdapter + Window [cc_name,d_moy,d_year,i_brand,i_category] + WholeStageCodegen (21) + Sort [cc_name,d_moy,d_year,i_brand,i_category] + InputAdapter + ReusedExchange [cc_name,d_moy,d_year,i_brand,i_category,sum_sales] #8 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58.sf100/explain.txt new file mode 100644 index 0000000000000..467aa26dac4e5 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58.sf100/explain.txt @@ -0,0 +1,477 @@ +== Physical Plan == +TakeOrderedAndProject (79) ++- * Project (78) + +- * BroadcastHashJoin Inner BuildRight (77) + :- * Project (52) + : +- * BroadcastHashJoin Inner BuildRight (51) + : :- * Filter (26) + : : +- * HashAggregate (25) + : : +- Exchange (24) + : : +- * HashAggregate (23) + : : +- * Project (22) + : : +- * BroadcastHashJoin Inner BuildRight (21) + : : :- * Project (16) + : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (14) + : : : +- * Project (13) + : : : +- * BroadcastHashJoin LeftSemi BuildRight (12) + : : : :- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.date_dim (4) + : : : +- BroadcastExchange (11) + : : : +- * Project (10) + : : : +- * Filter (9) + : : : +- * ColumnarToRow (8) + : : : +- Scan parquet default.date_dim (7) + : : +- BroadcastExchange (20) + : : +- * Filter (19) + : : +- * ColumnarToRow (18) + : : +- Scan parquet default.item (17) + : +- BroadcastExchange (50) + : +- * Filter (49) + : +- * HashAggregate (48) + : +- Exchange (47) + : +- * HashAggregate (46) + : +- * Project (45) + : +- * BroadcastHashJoin Inner BuildRight (44) + : :- * Project (42) + : : +- * BroadcastHashJoin Inner BuildRight (41) + : : :- * Filter (29) + : : : +- * ColumnarToRow (28) + : : : +- Scan parquet default.catalog_sales (27) + : : +- BroadcastExchange (40) + : : +- * Project (39) + : : +- * BroadcastHashJoin LeftSemi BuildRight (38) + : : :- * Filter (32) + : : : +- * ColumnarToRow (31) + : : : +- Scan parquet default.date_dim (30) + : : +- BroadcastExchange (37) + : : +- * Project (36) + : : +- * Filter (35) + : : +- * ColumnarToRow (34) + : : +- Scan parquet default.date_dim (33) + : +- ReusedExchange (43) + +- BroadcastExchange (76) + +- * Filter (75) + +- * HashAggregate (74) + +- Exchange (73) + +- * HashAggregate (72) + +- * Project (71) + +- * BroadcastHashJoin Inner BuildRight (70) + :- * Project (68) + : +- * BroadcastHashJoin Inner BuildRight (67) + : :- * Filter (55) + : : +- * ColumnarToRow (54) + : : +- Scan parquet default.web_sales (53) + : +- BroadcastExchange (66) + : +- * Project (65) + : +- * BroadcastHashJoin LeftSemi BuildRight (64) + : :- * Filter (58) + : : +- * ColumnarToRow (57) + : : +- Scan parquet default.date_dim (56) + : +- BroadcastExchange (63) + : +- * Project (62) + : +- * Filter (61) + : +- * ColumnarToRow (60) + : +- Scan parquet default.date_dim (59) + +- ReusedExchange (69) + + +(1) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3] + +(3) Filter [codegen id : 4] +Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3] +Condition : (isnotnull(ss_item_sk#2) AND isnotnull(ss_sold_date_sk#1)) + +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#4, d_date#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#4, d_date#5] + +(6) Filter [codegen id : 2] +Input [2]: [d_date_sk#4, d_date#5] +Condition : isnotnull(d_date_sk#4) + +(7) Scan parquet default.date_dim +Output [2]: [d_date#5, d_week_seq#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_week_seq)] +ReadSchema: struct + +(8) ColumnarToRow [codegen id : 1] +Input [2]: [d_date#5, d_week_seq#6] + +(9) Filter [codegen id : 1] +Input [2]: [d_date#5, d_week_seq#6] +Condition : (isnotnull(d_week_seq#6) AND (d_week_seq#6 = Subquery scalar-subquery#7, [id=#8])) + +(10) Project [codegen id : 1] +Output [1]: [d_date#5 AS d_date#5#9] +Input [2]: [d_date#5, d_week_seq#6] + +(11) BroadcastExchange +Input [1]: [d_date#5#9] +Arguments: HashedRelationBroadcastMode(List(input[0, date, true]),false), [id=#10] + +(12) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [d_date#5] +Right keys [1]: [d_date#5#9] +Join condition: None + +(13) Project [codegen id : 2] +Output [1]: [d_date_sk#4] +Input [2]: [d_date_sk#4, d_date#5] + +(14) BroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] + +(15) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(16) Project [codegen id : 4] +Output [2]: [ss_item_sk#2, ss_ext_sales_price#3] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3, d_date_sk#4] + +(17) Scan parquet default.item +Output [2]: [i_item_sk#12, i_item_id#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_item_id)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [2]: [i_item_sk#12, i_item_id#13] + +(19) Filter [codegen id : 3] +Input [2]: [i_item_sk#12, i_item_id#13] +Condition : (isnotnull(i_item_sk#12) AND isnotnull(i_item_id#13)) + +(20) BroadcastExchange +Input [2]: [i_item_sk#12, i_item_id#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#12] +Join condition: None + +(22) Project [codegen id : 4] +Output [2]: [ss_ext_sales_price#3, i_item_id#13] +Input [4]: [ss_item_sk#2, ss_ext_sales_price#3, i_item_sk#12, i_item_id#13] + +(23) HashAggregate [codegen id : 4] +Input [2]: [ss_ext_sales_price#3, i_item_id#13] +Keys [1]: [i_item_id#13] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum#15] +Results [2]: [i_item_id#13, sum#16] + +(24) Exchange +Input [2]: [i_item_id#13, sum#16] +Arguments: hashpartitioning(i_item_id#13, 5), true, [id=#17] + +(25) HashAggregate [codegen id : 15] +Input [2]: [i_item_id#13, sum#16] +Keys [1]: [i_item_id#13] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#18] +Results [2]: [i_item_id#13 AS item_id#19, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#18,17,2) AS ss_item_rev#20] + +(26) Filter [codegen id : 15] +Input [2]: [item_id#19, ss_item_rev#20] +Condition : isnotnull(ss_item_rev#20) + +(27) Scan parquet default.catalog_sales +Output [3]: [cs_sold_date_sk#21, cs_item_sk#22, cs_ext_sales_price#23] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(28) ColumnarToRow [codegen id : 8] +Input [3]: [cs_sold_date_sk#21, cs_item_sk#22, cs_ext_sales_price#23] + +(29) Filter [codegen id : 8] +Input [3]: [cs_sold_date_sk#21, cs_item_sk#22, cs_ext_sales_price#23] +Condition : (isnotnull(cs_item_sk#22) AND isnotnull(cs_sold_date_sk#21)) + +(30) Scan parquet default.date_dim +Output [2]: [d_date_sk#4, d_date#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_date_sk)] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 6] +Input [2]: [d_date_sk#4, d_date#5] + +(32) Filter [codegen id : 6] +Input [2]: [d_date_sk#4, d_date#5] +Condition : isnotnull(d_date_sk#4) + +(33) Scan parquet default.date_dim +Output [2]: [d_date#5, d_week_seq#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_week_seq)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 5] +Input [2]: [d_date#5, d_week_seq#6] + +(35) Filter [codegen id : 5] +Input [2]: [d_date#5, d_week_seq#6] +Condition : (isnotnull(d_week_seq#6) AND (d_week_seq#6 = ReusedSubquery Subquery scalar-subquery#7, [id=#8])) + +(36) Project [codegen id : 5] +Output [1]: [d_date#5 AS d_date#5#24] +Input [2]: [d_date#5, d_week_seq#6] + +(37) BroadcastExchange +Input [1]: [d_date#5#24] +Arguments: HashedRelationBroadcastMode(List(input[0, date, true]),false), [id=#25] + +(38) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [d_date#5] +Right keys [1]: [d_date#5#24] +Join condition: None + +(39) Project [codegen id : 6] +Output [1]: [d_date_sk#4] +Input [2]: [d_date_sk#4, d_date#5] + +(40) BroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#26] + +(41) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [cs_sold_date_sk#21] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(42) Project [codegen id : 8] +Output [2]: [cs_item_sk#22, cs_ext_sales_price#23] +Input [4]: [cs_sold_date_sk#21, cs_item_sk#22, cs_ext_sales_price#23, d_date_sk#4] + +(43) ReusedExchange [Reuses operator id: 20] +Output [2]: [i_item_sk#12, i_item_id#13] + +(44) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [cs_item_sk#22] +Right keys [1]: [i_item_sk#12] +Join condition: None + +(45) Project [codegen id : 8] +Output [2]: [cs_ext_sales_price#23, i_item_id#13] +Input [4]: [cs_item_sk#22, cs_ext_sales_price#23, i_item_sk#12, i_item_id#13] + +(46) HashAggregate [codegen id : 8] +Input [2]: [cs_ext_sales_price#23, i_item_id#13] +Keys [1]: [i_item_id#13] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#23))] +Aggregate Attributes [1]: [sum#27] +Results [2]: [i_item_id#13, sum#28] + +(47) Exchange +Input [2]: [i_item_id#13, sum#28] +Arguments: hashpartitioning(i_item_id#13, 5), true, [id=#29] + +(48) HashAggregate [codegen id : 9] +Input [2]: [i_item_id#13, sum#28] +Keys [1]: [i_item_id#13] +Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#23))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#23))#30] +Results [2]: [i_item_id#13 AS item_id#31, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#23))#30,17,2) AS cs_item_rev#32] + +(49) Filter [codegen id : 9] +Input [2]: [item_id#31, cs_item_rev#32] +Condition : isnotnull(cs_item_rev#32) + +(50) BroadcastExchange +Input [2]: [item_id#31, cs_item_rev#32] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#33] + +(51) BroadcastHashJoin [codegen id : 15] +Left keys [1]: [item_id#19] +Right keys [1]: [item_id#31] +Join condition: ((((cast(ss_item_rev#20 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(cs_item_rev#32)), DecimalType(20,3), true)) AND (cast(cs_item_rev#32 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ss_item_rev#20)), DecimalType(20,3), true))) AND (cast(ss_item_rev#20 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(cs_item_rev#32)), DecimalType(19,3), true))) AND (cast(cs_item_rev#32 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ss_item_rev#20)), DecimalType(19,3), true))) + +(52) Project [codegen id : 15] +Output [3]: [item_id#19, ss_item_rev#20, cs_item_rev#32] +Input [4]: [item_id#19, ss_item_rev#20, item_id#31, cs_item_rev#32] + +(53) Scan parquet default.web_sales +Output [3]: [ws_sold_date_sk#34, ws_item_sk#35, ws_ext_sales_price#36] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(54) ColumnarToRow [codegen id : 13] +Input [3]: [ws_sold_date_sk#34, ws_item_sk#35, ws_ext_sales_price#36] + +(55) Filter [codegen id : 13] +Input [3]: [ws_sold_date_sk#34, ws_item_sk#35, ws_ext_sales_price#36] +Condition : (isnotnull(ws_item_sk#35) AND isnotnull(ws_sold_date_sk#34)) + +(56) Scan parquet default.date_dim +Output [2]: [d_date_sk#4, d_date#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_date_sk)] +ReadSchema: struct + +(57) ColumnarToRow [codegen id : 11] +Input [2]: [d_date_sk#4, d_date#5] + +(58) Filter [codegen id : 11] +Input [2]: [d_date_sk#4, d_date#5] +Condition : isnotnull(d_date_sk#4) + +(59) Scan parquet default.date_dim +Output [2]: [d_date#5, d_week_seq#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_week_seq)] +ReadSchema: struct + +(60) ColumnarToRow [codegen id : 10] +Input [2]: [d_date#5, d_week_seq#6] + +(61) Filter [codegen id : 10] +Input [2]: [d_date#5, d_week_seq#6] +Condition : (isnotnull(d_week_seq#6) AND (d_week_seq#6 = ReusedSubquery Subquery scalar-subquery#7, [id=#8])) + +(62) Project [codegen id : 10] +Output [1]: [d_date#5 AS d_date#5#37] +Input [2]: [d_date#5, d_week_seq#6] + +(63) BroadcastExchange +Input [1]: [d_date#5#37] +Arguments: HashedRelationBroadcastMode(List(input[0, date, true]),false), [id=#38] + +(64) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [d_date#5] +Right keys [1]: [d_date#5#37] +Join condition: None + +(65) Project [codegen id : 11] +Output [1]: [d_date_sk#4] +Input [2]: [d_date_sk#4, d_date#5] + +(66) BroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#39] + +(67) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [ws_sold_date_sk#34] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(68) Project [codegen id : 13] +Output [2]: [ws_item_sk#35, ws_ext_sales_price#36] +Input [4]: [ws_sold_date_sk#34, ws_item_sk#35, ws_ext_sales_price#36, d_date_sk#4] + +(69) ReusedExchange [Reuses operator id: 20] +Output [2]: [i_item_sk#12, i_item_id#13] + +(70) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [ws_item_sk#35] +Right keys [1]: [i_item_sk#12] +Join condition: None + +(71) Project [codegen id : 13] +Output [2]: [ws_ext_sales_price#36, i_item_id#13] +Input [4]: [ws_item_sk#35, ws_ext_sales_price#36, i_item_sk#12, i_item_id#13] + +(72) HashAggregate [codegen id : 13] +Input [2]: [ws_ext_sales_price#36, i_item_id#13] +Keys [1]: [i_item_id#13] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#36))] +Aggregate Attributes [1]: [sum#40] +Results [2]: [i_item_id#13, sum#41] + +(73) Exchange +Input [2]: [i_item_id#13, sum#41] +Arguments: hashpartitioning(i_item_id#13, 5), true, [id=#42] + +(74) HashAggregate [codegen id : 14] +Input [2]: [i_item_id#13, sum#41] +Keys [1]: [i_item_id#13] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#36))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#36))#43] +Results [2]: [i_item_id#13 AS item_id#44, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#36))#43,17,2) AS ws_item_rev#45] + +(75) Filter [codegen id : 14] +Input [2]: [item_id#44, ws_item_rev#45] +Condition : isnotnull(ws_item_rev#45) + +(76) BroadcastExchange +Input [2]: [item_id#44, ws_item_rev#45] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#46] + +(77) BroadcastHashJoin [codegen id : 15] +Left keys [1]: [item_id#19] +Right keys [1]: [item_id#44] +Join condition: ((((((((cast(ws_item_rev#45 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(cs_item_rev#32)), DecimalType(20,3), true)) AND (cast(cs_item_rev#32 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ws_item_rev#45)), DecimalType(20,3), true))) AND (cast(ws_item_rev#45 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ss_item_rev#20)), DecimalType(19,3), true))) AND (cast(ws_item_rev#45 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(cs_item_rev#32)), DecimalType(19,3), true))) AND (cast(cs_item_rev#32 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ws_item_rev#45)), DecimalType(19,3), true))) AND (cast(ss_item_rev#20 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ws_item_rev#45)), DecimalType(20,3), true))) AND (cast(ws_item_rev#45 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ss_item_rev#20)), DecimalType(20,3), true))) AND (cast(ss_item_rev#20 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ws_item_rev#45)), DecimalType(19,3), true))) + +(78) Project [codegen id : 15] +Output [8]: [item_id#19, ss_item_rev#20, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(ss_item_rev#20 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#20 as decimal(18,2))) + promote_precision(cast(cs_item_rev#32 as decimal(18,2)))), DecimalType(18,2), true) as decimal(19,2))) + promote_precision(cast(ws_item_rev#45 as decimal(19,2)))), DecimalType(19,2), true))), DecimalType(38,21), true)) / 3.000000000000000000000), DecimalType(38,21), true)) * 100.000000000000000000000), DecimalType(38,17), true) AS ss_dev#47, cs_item_rev#32, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(cs_item_rev#32 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#20 as decimal(18,2))) + promote_precision(cast(cs_item_rev#32 as decimal(18,2)))), DecimalType(18,2), true) as decimal(19,2))) + promote_precision(cast(ws_item_rev#45 as decimal(19,2)))), DecimalType(19,2), true))), DecimalType(38,21), true)) / 3.000000000000000000000), DecimalType(38,21), true)) * 100.000000000000000000000), DecimalType(38,17), true) AS cs_dev#48, ws_item_rev#45, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(ws_item_rev#45 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#20 as decimal(18,2))) + promote_precision(cast(cs_item_rev#32 as decimal(18,2)))), DecimalType(18,2), true) as decimal(19,2))) + promote_precision(cast(ws_item_rev#45 as decimal(19,2)))), DecimalType(19,2), true))), DecimalType(38,21), true)) / 3.000000000000000000000), DecimalType(38,21), true)) * 100.000000000000000000000), DecimalType(38,17), true) AS ws_dev#49, CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#20 as decimal(18,2))) + promote_precision(cast(cs_item_rev#32 as decimal(18,2)))), DecimalType(18,2), true) as decimal(19,2))) + promote_precision(cast(ws_item_rev#45 as decimal(19,2)))), DecimalType(19,2), true)) / 3.00), DecimalType(23,6), true) AS average#50] +Input [5]: [item_id#19, ss_item_rev#20, cs_item_rev#32, item_id#44, ws_item_rev#45] + +(79) TakeOrderedAndProject +Input [8]: [item_id#19, ss_item_rev#20, ss_dev#47, cs_item_rev#32, cs_dev#48, ws_item_rev#45, ws_dev#49, average#50] +Arguments: 100, [item_id#19 ASC NULLS FIRST, ss_item_rev#20 ASC NULLS FIRST], [item_id#19, ss_item_rev#20, ss_dev#47, cs_item_rev#32, cs_dev#48, ws_item_rev#45, ws_dev#49, average#50] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 9 Hosting Expression = Subquery scalar-subquery#7, [id=#8] +* Project (83) ++- * Filter (82) + +- * ColumnarToRow (81) + +- Scan parquet default.date_dim (80) + + +(80) Scan parquet default.date_dim +Output [2]: [d_date#5, d_week_seq#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_date), EqualTo(d_date,2000-01-03)] +ReadSchema: struct + +(81) ColumnarToRow [codegen id : 1] +Input [2]: [d_date#5, d_week_seq#6] + +(82) Filter [codegen id : 1] +Input [2]: [d_date#5, d_week_seq#6] +Condition : (isnotnull(d_date#5) AND (d_date#5 = 10959)) + +(83) Project [codegen id : 1] +Output [1]: [d_week_seq#6] +Input [2]: [d_date#5, d_week_seq#6] + +Subquery:2 Hosting operator id = 35 Hosting Expression = ReusedSubquery Subquery scalar-subquery#7, [id=#8] + +Subquery:3 Hosting operator id = 61 Hosting Expression = ReusedSubquery Subquery scalar-subquery#7, [id=#8] + + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58.sf100/simplified.txt new file mode 100644 index 0000000000000..0b846774d3c97 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58.sf100/simplified.txt @@ -0,0 +1,125 @@ +TakeOrderedAndProject [average,cs_dev,cs_item_rev,item_id,ss_dev,ss_item_rev,ws_dev,ws_item_rev] + WholeStageCodegen (15) + Project [cs_item_rev,item_id,ss_item_rev,ws_item_rev] + BroadcastHashJoin [cs_item_rev,item_id,item_id,ss_item_rev,ws_item_rev] + Project [cs_item_rev,item_id,ss_item_rev] + BroadcastHashJoin [cs_item_rev,item_id,item_id,ss_item_rev] + Filter [ss_item_rev] + HashAggregate [i_item_id,sum] [item_id,ss_item_rev,sum,sum(UnscaledValue(ss_ext_sales_price))] + InputAdapter + Exchange [i_item_id] #1 + WholeStageCodegen (4) + HashAggregate [i_item_id,ss_ext_sales_price] [sum,sum] + Project [i_item_id,ss_ext_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_ext_sales_price,ss_item_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (2) + Project [d_date_sk] + BroadcastHashJoin [d_date,d_date] + Filter [d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date] + Filter [d_week_seq] + Subquery #1 + WholeStageCodegen (1) + Project [d_week_seq] + Filter [d_date] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_week_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_week_seq] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [i_item_id,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_id,i_item_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (9) + Filter [cs_item_rev] + HashAggregate [i_item_id,sum] [cs_item_rev,item_id,sum,sum(UnscaledValue(cs_ext_sales_price))] + InputAdapter + Exchange [i_item_id] #6 + WholeStageCodegen (8) + HashAggregate [cs_ext_sales_price,i_item_id] [sum,sum] + Project [cs_ext_sales_price,i_item_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_ext_sales_price,cs_item_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (6) + Project [d_date_sk] + BroadcastHashJoin [d_date,d_date] + Filter [d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (5) + Project [d_date] + Filter [d_week_seq] + ReusedSubquery [d_week_seq] #1 + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_week_seq] + InputAdapter + ReusedExchange [i_item_id,i_item_sk] #4 + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (14) + Filter [ws_item_rev] + HashAggregate [i_item_id,sum] [item_id,sum,sum(UnscaledValue(ws_ext_sales_price)),ws_item_rev] + InputAdapter + Exchange [i_item_id] #10 + WholeStageCodegen (13) + HashAggregate [i_item_id,ws_ext_sales_price] [sum,sum] + Project [i_item_id,ws_ext_sales_price] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Project [ws_ext_sales_price,ws_item_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (11) + Project [d_date_sk] + BroadcastHashJoin [d_date,d_date] + Filter [d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk] + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (10) + Project [d_date] + Filter [d_week_seq] + ReusedSubquery [d_week_seq] #1 + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_week_seq] + InputAdapter + ReusedExchange [i_item_id,i_item_sk] #4 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/explain.txt new file mode 100644 index 0000000000000..f587499d7d21a --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/explain.txt @@ -0,0 +1,477 @@ +== Physical Plan == +TakeOrderedAndProject (79) ++- * Project (78) + +- * BroadcastHashJoin Inner BuildRight (77) + :- * Project (52) + : +- * BroadcastHashJoin Inner BuildRight (51) + : :- * Filter (26) + : : +- * HashAggregate (25) + : : +- Exchange (24) + : : +- * HashAggregate (23) + : : +- * Project (22) + : : +- * BroadcastHashJoin Inner BuildRight (21) + : : :- * Project (9) + : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.item (4) + : : +- BroadcastExchange (20) + : : +- * Project (19) + : : +- * BroadcastHashJoin LeftSemi BuildRight (18) + : : :- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet default.date_dim (10) + : : +- BroadcastExchange (17) + : : +- * Project (16) + : : +- * Filter (15) + : : +- * ColumnarToRow (14) + : : +- Scan parquet default.date_dim (13) + : +- BroadcastExchange (50) + : +- * Filter (49) + : +- * HashAggregate (48) + : +- Exchange (47) + : +- * HashAggregate (46) + : +- * Project (45) + : +- * BroadcastHashJoin Inner BuildRight (44) + : :- * Project (32) + : : +- * BroadcastHashJoin Inner BuildRight (31) + : : :- * Filter (29) + : : : +- * ColumnarToRow (28) + : : : +- Scan parquet default.catalog_sales (27) + : : +- ReusedExchange (30) + : +- BroadcastExchange (43) + : +- * Project (42) + : +- * BroadcastHashJoin LeftSemi BuildRight (41) + : :- * Filter (35) + : : +- * ColumnarToRow (34) + : : +- Scan parquet default.date_dim (33) + : +- BroadcastExchange (40) + : +- * Project (39) + : +- * Filter (38) + : +- * ColumnarToRow (37) + : +- Scan parquet default.date_dim (36) + +- BroadcastExchange (76) + +- * Filter (75) + +- * HashAggregate (74) + +- Exchange (73) + +- * HashAggregate (72) + +- * Project (71) + +- * BroadcastHashJoin Inner BuildRight (70) + :- * Project (58) + : +- * BroadcastHashJoin Inner BuildRight (57) + : :- * Filter (55) + : : +- * ColumnarToRow (54) + : : +- Scan parquet default.web_sales (53) + : +- ReusedExchange (56) + +- BroadcastExchange (69) + +- * Project (68) + +- * BroadcastHashJoin LeftSemi BuildRight (67) + :- * Filter (61) + : +- * ColumnarToRow (60) + : +- Scan parquet default.date_dim (59) + +- BroadcastExchange (66) + +- * Project (65) + +- * Filter (64) + +- * ColumnarToRow (63) + +- Scan parquet default.date_dim (62) + + +(1) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3] + +(3) Filter [codegen id : 4] +Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3] +Condition : (isnotnull(ss_item_sk#2) AND isnotnull(ss_sold_date_sk#1)) + +(4) Scan parquet default.item +Output [2]: [i_item_sk#4, i_item_id#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_item_id)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [i_item_sk#4, i_item_id#5] + +(6) Filter [codegen id : 1] +Input [2]: [i_item_sk#4, i_item_id#5] +Condition : (isnotnull(i_item_sk#4) AND isnotnull(i_item_id#5)) + +(7) BroadcastExchange +Input [2]: [i_item_sk#4, i_item_id#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#6] + +(8) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#4] +Join condition: None + +(9) Project [codegen id : 4] +Output [3]: [ss_sold_date_sk#1, ss_ext_sales_price#3, i_item_id#5] +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3, i_item_sk#4, i_item_id#5] + +(10) Scan parquet default.date_dim +Output [2]: [d_date_sk#7, d_date#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 3] +Input [2]: [d_date_sk#7, d_date#8] + +(12) Filter [codegen id : 3] +Input [2]: [d_date_sk#7, d_date#8] +Condition : isnotnull(d_date_sk#7) + +(13) Scan parquet default.date_dim +Output [2]: [d_date#8, d_week_seq#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_week_seq)] +ReadSchema: struct + +(14) ColumnarToRow [codegen id : 2] +Input [2]: [d_date#8, d_week_seq#9] + +(15) Filter [codegen id : 2] +Input [2]: [d_date#8, d_week_seq#9] +Condition : (isnotnull(d_week_seq#9) AND (d_week_seq#9 = Subquery scalar-subquery#10, [id=#11])) + +(16) Project [codegen id : 2] +Output [1]: [d_date#8 AS d_date#8#12] +Input [2]: [d_date#8, d_week_seq#9] + +(17) BroadcastExchange +Input [1]: [d_date#8#12] +Arguments: HashedRelationBroadcastMode(List(input[0, date, true]),false), [id=#13] + +(18) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [d_date#8] +Right keys [1]: [d_date#8#12] +Join condition: None + +(19) Project [codegen id : 3] +Output [1]: [d_date_sk#7] +Input [2]: [d_date_sk#7, d_date#8] + +(20) BroadcastExchange +Input [1]: [d_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#7] +Join condition: None + +(22) Project [codegen id : 4] +Output [2]: [ss_ext_sales_price#3, i_item_id#5] +Input [4]: [ss_sold_date_sk#1, ss_ext_sales_price#3, i_item_id#5, d_date_sk#7] + +(23) HashAggregate [codegen id : 4] +Input [2]: [ss_ext_sales_price#3, i_item_id#5] +Keys [1]: [i_item_id#5] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum#15] +Results [2]: [i_item_id#5, sum#16] + +(24) Exchange +Input [2]: [i_item_id#5, sum#16] +Arguments: hashpartitioning(i_item_id#5, 5), true, [id=#17] + +(25) HashAggregate [codegen id : 15] +Input [2]: [i_item_id#5, sum#16] +Keys [1]: [i_item_id#5] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#18] +Results [2]: [i_item_id#5 AS item_id#19, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#18,17,2) AS ss_item_rev#20] + +(26) Filter [codegen id : 15] +Input [2]: [item_id#19, ss_item_rev#20] +Condition : isnotnull(ss_item_rev#20) + +(27) Scan parquet default.catalog_sales +Output [3]: [cs_sold_date_sk#21, cs_item_sk#22, cs_ext_sales_price#23] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(28) ColumnarToRow [codegen id : 8] +Input [3]: [cs_sold_date_sk#21, cs_item_sk#22, cs_ext_sales_price#23] + +(29) Filter [codegen id : 8] +Input [3]: [cs_sold_date_sk#21, cs_item_sk#22, cs_ext_sales_price#23] +Condition : (isnotnull(cs_item_sk#22) AND isnotnull(cs_sold_date_sk#21)) + +(30) ReusedExchange [Reuses operator id: 7] +Output [2]: [i_item_sk#4, i_item_id#5] + +(31) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [cs_item_sk#22] +Right keys [1]: [i_item_sk#4] +Join condition: None + +(32) Project [codegen id : 8] +Output [3]: [cs_sold_date_sk#21, cs_ext_sales_price#23, i_item_id#5] +Input [5]: [cs_sold_date_sk#21, cs_item_sk#22, cs_ext_sales_price#23, i_item_sk#4, i_item_id#5] + +(33) Scan parquet default.date_dim +Output [2]: [d_date_sk#7, d_date#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_date_sk)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 7] +Input [2]: [d_date_sk#7, d_date#8] + +(35) Filter [codegen id : 7] +Input [2]: [d_date_sk#7, d_date#8] +Condition : isnotnull(d_date_sk#7) + +(36) Scan parquet default.date_dim +Output [2]: [d_date#8, d_week_seq#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_week_seq)] +ReadSchema: struct + +(37) ColumnarToRow [codegen id : 6] +Input [2]: [d_date#8, d_week_seq#9] + +(38) Filter [codegen id : 6] +Input [2]: [d_date#8, d_week_seq#9] +Condition : (isnotnull(d_week_seq#9) AND (d_week_seq#9 = ReusedSubquery Subquery scalar-subquery#10, [id=#11])) + +(39) Project [codegen id : 6] +Output [1]: [d_date#8 AS d_date#8#24] +Input [2]: [d_date#8, d_week_seq#9] + +(40) BroadcastExchange +Input [1]: [d_date#8#24] +Arguments: HashedRelationBroadcastMode(List(input[0, date, true]),false), [id=#25] + +(41) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [d_date#8] +Right keys [1]: [d_date#8#24] +Join condition: None + +(42) Project [codegen id : 7] +Output [1]: [d_date_sk#7] +Input [2]: [d_date_sk#7, d_date#8] + +(43) BroadcastExchange +Input [1]: [d_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#26] + +(44) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [cs_sold_date_sk#21] +Right keys [1]: [d_date_sk#7] +Join condition: None + +(45) Project [codegen id : 8] +Output [2]: [cs_ext_sales_price#23, i_item_id#5] +Input [4]: [cs_sold_date_sk#21, cs_ext_sales_price#23, i_item_id#5, d_date_sk#7] + +(46) HashAggregate [codegen id : 8] +Input [2]: [cs_ext_sales_price#23, i_item_id#5] +Keys [1]: [i_item_id#5] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#23))] +Aggregate Attributes [1]: [sum#27] +Results [2]: [i_item_id#5, sum#28] + +(47) Exchange +Input [2]: [i_item_id#5, sum#28] +Arguments: hashpartitioning(i_item_id#5, 5), true, [id=#29] + +(48) HashAggregate [codegen id : 9] +Input [2]: [i_item_id#5, sum#28] +Keys [1]: [i_item_id#5] +Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#23))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#23))#30] +Results [2]: [i_item_id#5 AS item_id#31, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#23))#30,17,2) AS cs_item_rev#32] + +(49) Filter [codegen id : 9] +Input [2]: [item_id#31, cs_item_rev#32] +Condition : isnotnull(cs_item_rev#32) + +(50) BroadcastExchange +Input [2]: [item_id#31, cs_item_rev#32] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#33] + +(51) BroadcastHashJoin [codegen id : 15] +Left keys [1]: [item_id#19] +Right keys [1]: [item_id#31] +Join condition: ((((cast(ss_item_rev#20 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(cs_item_rev#32)), DecimalType(19,3), true)) AND (cast(ss_item_rev#20 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(cs_item_rev#32)), DecimalType(20,3), true))) AND (cast(cs_item_rev#32 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ss_item_rev#20)), DecimalType(19,3), true))) AND (cast(cs_item_rev#32 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ss_item_rev#20)), DecimalType(20,3), true))) + +(52) Project [codegen id : 15] +Output [3]: [item_id#19, ss_item_rev#20, cs_item_rev#32] +Input [4]: [item_id#19, ss_item_rev#20, item_id#31, cs_item_rev#32] + +(53) Scan parquet default.web_sales +Output [3]: [ws_sold_date_sk#34, ws_item_sk#35, ws_ext_sales_price#36] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(54) ColumnarToRow [codegen id : 13] +Input [3]: [ws_sold_date_sk#34, ws_item_sk#35, ws_ext_sales_price#36] + +(55) Filter [codegen id : 13] +Input [3]: [ws_sold_date_sk#34, ws_item_sk#35, ws_ext_sales_price#36] +Condition : (isnotnull(ws_item_sk#35) AND isnotnull(ws_sold_date_sk#34)) + +(56) ReusedExchange [Reuses operator id: 7] +Output [2]: [i_item_sk#4, i_item_id#5] + +(57) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [ws_item_sk#35] +Right keys [1]: [i_item_sk#4] +Join condition: None + +(58) Project [codegen id : 13] +Output [3]: [ws_sold_date_sk#34, ws_ext_sales_price#36, i_item_id#5] +Input [5]: [ws_sold_date_sk#34, ws_item_sk#35, ws_ext_sales_price#36, i_item_sk#4, i_item_id#5] + +(59) Scan parquet default.date_dim +Output [2]: [d_date_sk#7, d_date#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_date_sk)] +ReadSchema: struct + +(60) ColumnarToRow [codegen id : 12] +Input [2]: [d_date_sk#7, d_date#8] + +(61) Filter [codegen id : 12] +Input [2]: [d_date_sk#7, d_date#8] +Condition : isnotnull(d_date_sk#7) + +(62) Scan parquet default.date_dim +Output [2]: [d_date#8, d_week_seq#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_week_seq)] +ReadSchema: struct + +(63) ColumnarToRow [codegen id : 11] +Input [2]: [d_date#8, d_week_seq#9] + +(64) Filter [codegen id : 11] +Input [2]: [d_date#8, d_week_seq#9] +Condition : (isnotnull(d_week_seq#9) AND (d_week_seq#9 = ReusedSubquery Subquery scalar-subquery#10, [id=#11])) + +(65) Project [codegen id : 11] +Output [1]: [d_date#8 AS d_date#8#37] +Input [2]: [d_date#8, d_week_seq#9] + +(66) BroadcastExchange +Input [1]: [d_date#8#37] +Arguments: HashedRelationBroadcastMode(List(input[0, date, true]),false), [id=#38] + +(67) BroadcastHashJoin [codegen id : 12] +Left keys [1]: [d_date#8] +Right keys [1]: [d_date#8#37] +Join condition: None + +(68) Project [codegen id : 12] +Output [1]: [d_date_sk#7] +Input [2]: [d_date_sk#7, d_date#8] + +(69) BroadcastExchange +Input [1]: [d_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#39] + +(70) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [ws_sold_date_sk#34] +Right keys [1]: [d_date_sk#7] +Join condition: None + +(71) Project [codegen id : 13] +Output [2]: [ws_ext_sales_price#36, i_item_id#5] +Input [4]: [ws_sold_date_sk#34, ws_ext_sales_price#36, i_item_id#5, d_date_sk#7] + +(72) HashAggregate [codegen id : 13] +Input [2]: [ws_ext_sales_price#36, i_item_id#5] +Keys [1]: [i_item_id#5] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#36))] +Aggregate Attributes [1]: [sum#40] +Results [2]: [i_item_id#5, sum#41] + +(73) Exchange +Input [2]: [i_item_id#5, sum#41] +Arguments: hashpartitioning(i_item_id#5, 5), true, [id=#42] + +(74) HashAggregate [codegen id : 14] +Input [2]: [i_item_id#5, sum#41] +Keys [1]: [i_item_id#5] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#36))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#36))#43] +Results [2]: [i_item_id#5 AS item_id#44, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#36))#43,17,2) AS ws_item_rev#45] + +(75) Filter [codegen id : 14] +Input [2]: [item_id#44, ws_item_rev#45] +Condition : isnotnull(ws_item_rev#45) + +(76) BroadcastExchange +Input [2]: [item_id#44, ws_item_rev#45] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#46] + +(77) BroadcastHashJoin [codegen id : 15] +Left keys [1]: [item_id#19] +Right keys [1]: [item_id#44] +Join condition: ((((((((cast(ss_item_rev#20 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ws_item_rev#45)), DecimalType(19,3), true)) AND (cast(ss_item_rev#20 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ws_item_rev#45)), DecimalType(20,3), true))) AND (cast(cs_item_rev#32 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ws_item_rev#45)), DecimalType(19,3), true))) AND (cast(cs_item_rev#32 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ws_item_rev#45)), DecimalType(20,3), true))) AND (cast(ws_item_rev#45 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ss_item_rev#20)), DecimalType(19,3), true))) AND (cast(ws_item_rev#45 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ss_item_rev#20)), DecimalType(20,3), true))) AND (cast(ws_item_rev#45 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(cs_item_rev#32)), DecimalType(19,3), true))) AND (cast(ws_item_rev#45 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(cs_item_rev#32)), DecimalType(20,3), true))) + +(78) Project [codegen id : 15] +Output [8]: [item_id#19, ss_item_rev#20, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(ss_item_rev#20 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#20 as decimal(18,2))) + promote_precision(cast(cs_item_rev#32 as decimal(18,2)))), DecimalType(18,2), true) as decimal(19,2))) + promote_precision(cast(ws_item_rev#45 as decimal(19,2)))), DecimalType(19,2), true))), DecimalType(38,21), true)) / 3.000000000000000000000), DecimalType(38,21), true)) * 100.000000000000000000000), DecimalType(38,17), true) AS ss_dev#47, cs_item_rev#32, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(cs_item_rev#32 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#20 as decimal(18,2))) + promote_precision(cast(cs_item_rev#32 as decimal(18,2)))), DecimalType(18,2), true) as decimal(19,2))) + promote_precision(cast(ws_item_rev#45 as decimal(19,2)))), DecimalType(19,2), true))), DecimalType(38,21), true)) / 3.000000000000000000000), DecimalType(38,21), true)) * 100.000000000000000000000), DecimalType(38,17), true) AS cs_dev#48, ws_item_rev#45, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(ws_item_rev#45 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#20 as decimal(18,2))) + promote_precision(cast(cs_item_rev#32 as decimal(18,2)))), DecimalType(18,2), true) as decimal(19,2))) + promote_precision(cast(ws_item_rev#45 as decimal(19,2)))), DecimalType(19,2), true))), DecimalType(38,21), true)) / 3.000000000000000000000), DecimalType(38,21), true)) * 100.000000000000000000000), DecimalType(38,17), true) AS ws_dev#49, CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#20 as decimal(18,2))) + promote_precision(cast(cs_item_rev#32 as decimal(18,2)))), DecimalType(18,2), true) as decimal(19,2))) + promote_precision(cast(ws_item_rev#45 as decimal(19,2)))), DecimalType(19,2), true)) / 3.00), DecimalType(23,6), true) AS average#50] +Input [5]: [item_id#19, ss_item_rev#20, cs_item_rev#32, item_id#44, ws_item_rev#45] + +(79) TakeOrderedAndProject +Input [8]: [item_id#19, ss_item_rev#20, ss_dev#47, cs_item_rev#32, cs_dev#48, ws_item_rev#45, ws_dev#49, average#50] +Arguments: 100, [item_id#19 ASC NULLS FIRST, ss_item_rev#20 ASC NULLS FIRST], [item_id#19, ss_item_rev#20, ss_dev#47, cs_item_rev#32, cs_dev#48, ws_item_rev#45, ws_dev#49, average#50] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 15 Hosting Expression = Subquery scalar-subquery#10, [id=#11] +* Project (83) ++- * Filter (82) + +- * ColumnarToRow (81) + +- Scan parquet default.date_dim (80) + + +(80) Scan parquet default.date_dim +Output [2]: [d_date#8, d_week_seq#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_date), EqualTo(d_date,2000-01-03)] +ReadSchema: struct + +(81) ColumnarToRow [codegen id : 1] +Input [2]: [d_date#8, d_week_seq#9] + +(82) Filter [codegen id : 1] +Input [2]: [d_date#8, d_week_seq#9] +Condition : (isnotnull(d_date#8) AND (d_date#8 = 10959)) + +(83) Project [codegen id : 1] +Output [1]: [d_week_seq#9] +Input [2]: [d_date#8, d_week_seq#9] + +Subquery:2 Hosting operator id = 38 Hosting Expression = ReusedSubquery Subquery scalar-subquery#10, [id=#11] + +Subquery:3 Hosting operator id = 64 Hosting Expression = ReusedSubquery Subquery scalar-subquery#10, [id=#11] + + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/simplified.txt new file mode 100644 index 0000000000000..11295424fc7e3 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/simplified.txt @@ -0,0 +1,125 @@ +TakeOrderedAndProject [average,cs_dev,cs_item_rev,item_id,ss_dev,ss_item_rev,ws_dev,ws_item_rev] + WholeStageCodegen (15) + Project [cs_item_rev,item_id,ss_item_rev,ws_item_rev] + BroadcastHashJoin [cs_item_rev,item_id,item_id,ss_item_rev,ws_item_rev] + Project [cs_item_rev,item_id,ss_item_rev] + BroadcastHashJoin [cs_item_rev,item_id,item_id,ss_item_rev] + Filter [ss_item_rev] + HashAggregate [i_item_id,sum] [item_id,ss_item_rev,sum,sum(UnscaledValue(ss_ext_sales_price))] + InputAdapter + Exchange [i_item_id] #1 + WholeStageCodegen (4) + HashAggregate [i_item_id,ss_ext_sales_price] [sum,sum] + Project [i_item_id,ss_ext_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_item_id,ss_ext_sales_price,ss_sold_date_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [i_item_id,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_id,i_item_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (3) + Project [d_date_sk] + BroadcastHashJoin [d_date,d_date] + Filter [d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [d_date] + Filter [d_week_seq] + Subquery #1 + WholeStageCodegen (1) + Project [d_week_seq] + Filter [d_date] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_week_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_week_seq] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (9) + Filter [cs_item_rev] + HashAggregate [i_item_id,sum] [cs_item_rev,item_id,sum,sum(UnscaledValue(cs_ext_sales_price))] + InputAdapter + Exchange [i_item_id] #6 + WholeStageCodegen (8) + HashAggregate [cs_ext_sales_price,i_item_id] [sum,sum] + Project [cs_ext_sales_price,i_item_id] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_ext_sales_price,cs_sold_date_sk,i_item_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Filter [cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] + InputAdapter + ReusedExchange [i_item_id,i_item_sk] #2 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (7) + Project [d_date_sk] + BroadcastHashJoin [d_date,d_date] + Filter [d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (6) + Project [d_date] + Filter [d_week_seq] + ReusedSubquery [d_week_seq] #1 + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_week_seq] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (14) + Filter [ws_item_rev] + HashAggregate [i_item_id,sum] [item_id,sum,sum(UnscaledValue(ws_ext_sales_price)),ws_item_rev] + InputAdapter + Exchange [i_item_id] #10 + WholeStageCodegen (13) + HashAggregate [i_item_id,ws_ext_sales_price] [sum,sum] + Project [i_item_id,ws_ext_sales_price] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [i_item_id,ws_ext_sales_price,ws_sold_date_sk] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Filter [ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [i_item_id,i_item_sk] #2 + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (12) + Project [d_date_sk] + BroadcastHashJoin [d_date,d_date] + Filter [d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk] + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (11) + Project [d_date] + Filter [d_week_seq] + ReusedSubquery [d_week_seq] #1 + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_week_seq] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59.sf100/explain.txt new file mode 100644 index 0000000000000..84dfe09b7e67d --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59.sf100/explain.txt @@ -0,0 +1,249 @@ +== Physical Plan == +TakeOrderedAndProject (44) ++- * Project (43) + +- * BroadcastHashJoin Inner BuildRight (42) + :- * Project (25) + : +- * BroadcastHashJoin Inner BuildRight (24) + : :- * Project (18) + : : +- * BroadcastHashJoin Inner BuildRight (17) + : : :- * HashAggregate (12) + : : : +- Exchange (11) + : : : +- * HashAggregate (10) + : : : +- * Project (9) + : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (16) + : : +- * Filter (15) + : : +- * ColumnarToRow (14) + : : +- Scan parquet default.store (13) + : +- BroadcastExchange (23) + : +- * Project (22) + : +- * Filter (21) + : +- * ColumnarToRow (20) + : +- Scan parquet default.date_dim (19) + +- BroadcastExchange (41) + +- * Project (40) + +- * BroadcastHashJoin Inner BuildRight (39) + :- * Project (33) + : +- * BroadcastHashJoin Inner BuildRight (32) + : :- * HashAggregate (27) + : : +- ReusedExchange (26) + : +- BroadcastExchange (31) + : +- * Filter (30) + : +- * ColumnarToRow (29) + : +- Scan parquet default.store (28) + +- BroadcastExchange (38) + +- * Project (37) + +- * Filter (36) + +- * ColumnarToRow (35) + +- Scan parquet default.date_dim (34) + + +(1) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_sales_price#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 2] +Input [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_sales_price#3] + +(3) Filter [codegen id : 2] +Input [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_sales_price#3] +Condition : (isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#2)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_date_sk), IsNotNull(d_week_seq)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6] +Condition : (isnotnull(d_date_sk#4) AND isnotnull(d_week_seq#5)) + +(7) BroadcastExchange +Input [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7] + +(8) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(9) Project [codegen id : 2] +Output [4]: [ss_store_sk#2, ss_sales_price#3, d_week_seq#5, d_day_name#6] +Input [6]: [ss_sold_date_sk#1, ss_store_sk#2, ss_sales_price#3, d_date_sk#4, d_week_seq#5, d_day_name#6] + +(10) HashAggregate [codegen id : 2] +Input [4]: [ss_store_sk#2, ss_sales_price#3, d_week_seq#5, d_day_name#6] +Keys [2]: [d_week_seq#5, ss_store_sk#2] +Functions [7]: [partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday) THEN ss_sales_price#3 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday) THEN ss_sales_price#3 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday) THEN ss_sales_price#3 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#3 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday) THEN ss_sales_price#3 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday) THEN ss_sales_price#3 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday) THEN ss_sales_price#3 ELSE null END))] +Aggregate Attributes [7]: [sum#8, sum#9, sum#10, sum#11, sum#12, sum#13, sum#14] +Results [9]: [d_week_seq#5, ss_store_sk#2, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20, sum#21] + +(11) Exchange +Input [9]: [d_week_seq#5, ss_store_sk#2, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20, sum#21] +Arguments: hashpartitioning(d_week_seq#5, ss_store_sk#2, 5), true, [id=#22] + +(12) HashAggregate [codegen id : 10] +Input [9]: [d_week_seq#5, ss_store_sk#2, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20, sum#21] +Keys [2]: [d_week_seq#5, ss_store_sk#2] +Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday) THEN ss_sales_price#3 ELSE null END))] +Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday) THEN ss_sales_price#3 ELSE null END))#23, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday) THEN ss_sales_price#3 ELSE null END))#24, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday) THEN ss_sales_price#3 ELSE null END))#25, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#3 ELSE null END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday) THEN ss_sales_price#3 ELSE null END))#27, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday) THEN ss_sales_price#3 ELSE null END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday) THEN ss_sales_price#3 ELSE null END))#29] +Results [9]: [d_week_seq#5, ss_store_sk#2, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday) THEN ss_sales_price#3 ELSE null END))#23,17,2) AS sun_sales#30, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday) THEN ss_sales_price#3 ELSE null END))#24,17,2) AS mon_sales#31, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday) THEN ss_sales_price#3 ELSE null END))#25,17,2) AS tue_sales#32, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#3 ELSE null END))#26,17,2) AS wed_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday) THEN ss_sales_price#3 ELSE null END))#27,17,2) AS thu_sales#34, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday) THEN ss_sales_price#3 ELSE null END))#28,17,2) AS fri_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday) THEN ss_sales_price#3 ELSE null END))#29,17,2) AS sat_sales#36] + +(13) Scan parquet default.store +Output [3]: [s_store_sk#37, s_store_id#38, s_store_name#39] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_id)] +ReadSchema: struct + +(14) ColumnarToRow [codegen id : 3] +Input [3]: [s_store_sk#37, s_store_id#38, s_store_name#39] + +(15) Filter [codegen id : 3] +Input [3]: [s_store_sk#37, s_store_id#38, s_store_name#39] +Condition : (isnotnull(s_store_sk#37) AND isnotnull(s_store_id#38)) + +(16) BroadcastExchange +Input [3]: [s_store_sk#37, s_store_id#38, s_store_name#39] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#40] + +(17) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ss_store_sk#2] +Right keys [1]: [s_store_sk#37] +Join condition: None + +(18) Project [codegen id : 10] +Output [10]: [d_week_seq#5, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#38, s_store_name#39] +Input [12]: [d_week_seq#5, ss_store_sk#2, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_sk#37, s_store_id#38, s_store_name#39] + +(19) Scan parquet default.date_dim +Output [2]: [d_month_seq#41, d_week_seq#42] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223), IsNotNull(d_week_seq)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 4] +Input [2]: [d_month_seq#41, d_week_seq#42] + +(21) Filter [codegen id : 4] +Input [2]: [d_month_seq#41, d_week_seq#42] +Condition : (((isnotnull(d_month_seq#41) AND (d_month_seq#41 >= 1212)) AND (d_month_seq#41 <= 1223)) AND isnotnull(d_week_seq#42)) + +(22) Project [codegen id : 4] +Output [1]: [d_week_seq#42] +Input [2]: [d_month_seq#41, d_week_seq#42] + +(23) BroadcastExchange +Input [1]: [d_week_seq#42] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#43] + +(24) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [d_week_seq#5] +Right keys [1]: [d_week_seq#42] +Join condition: None + +(25) Project [codegen id : 10] +Output [10]: [s_store_name#39 AS s_store_name1#44, d_week_seq#5 AS d_week_seq1#45, s_store_id#38 AS s_store_id1#46, sun_sales#30 AS sun_sales1#47, mon_sales#31 AS mon_sales1#48, tue_sales#32 AS tue_sales1#49, wed_sales#33 AS wed_sales1#50, thu_sales#34 AS thu_sales1#51, fri_sales#35 AS fri_sales1#52, sat_sales#36 AS sat_sales1#53] +Input [11]: [d_week_seq#5, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#38, s_store_name#39, d_week_seq#42] + +(26) ReusedExchange [Reuses operator id: 11] +Output [9]: [d_week_seq#5, ss_store_sk#2, sum#54, sum#55, sum#56, sum#57, sum#58, sum#59, sum#60] + +(27) HashAggregate [codegen id : 9] +Input [9]: [d_week_seq#5, ss_store_sk#2, sum#54, sum#55, sum#56, sum#57, sum#58, sum#59, sum#60] +Keys [2]: [d_week_seq#5, ss_store_sk#2] +Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday) THEN ss_sales_price#3 ELSE null END))] +Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday) THEN ss_sales_price#3 ELSE null END))#61, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday) THEN ss_sales_price#3 ELSE null END))#62, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday) THEN ss_sales_price#3 ELSE null END))#63, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#3 ELSE null END))#64, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday) THEN ss_sales_price#3 ELSE null END))#65, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday) THEN ss_sales_price#3 ELSE null END))#66, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday) THEN ss_sales_price#3 ELSE null END))#67] +Results [9]: [d_week_seq#5, ss_store_sk#2, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday) THEN ss_sales_price#3 ELSE null END))#61,17,2) AS sun_sales#30, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday) THEN ss_sales_price#3 ELSE null END))#62,17,2) AS mon_sales#31, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday) THEN ss_sales_price#3 ELSE null END))#63,17,2) AS tue_sales#32, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#3 ELSE null END))#64,17,2) AS wed_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday) THEN ss_sales_price#3 ELSE null END))#65,17,2) AS thu_sales#34, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday) THEN ss_sales_price#3 ELSE null END))#66,17,2) AS fri_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday) THEN ss_sales_price#3 ELSE null END))#67,17,2) AS sat_sales#36] + +(28) Scan parquet default.store +Output [2]: [s_store_sk#37, s_store_id#38] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_id)] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 7] +Input [2]: [s_store_sk#37, s_store_id#38] + +(30) Filter [codegen id : 7] +Input [2]: [s_store_sk#37, s_store_id#38] +Condition : (isnotnull(s_store_sk#37) AND isnotnull(s_store_id#38)) + +(31) BroadcastExchange +Input [2]: [s_store_sk#37, s_store_id#38] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#68] + +(32) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_store_sk#2] +Right keys [1]: [s_store_sk#37] +Join condition: None + +(33) Project [codegen id : 9] +Output [9]: [d_week_seq#5, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#38] +Input [11]: [d_week_seq#5, ss_store_sk#2, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_sk#37, s_store_id#38] + +(34) Scan parquet default.date_dim +Output [2]: [d_month_seq#69, d_week_seq#70] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1224), LessThanOrEqual(d_month_seq,1235), IsNotNull(d_week_seq)] +ReadSchema: struct + +(35) ColumnarToRow [codegen id : 8] +Input [2]: [d_month_seq#69, d_week_seq#70] + +(36) Filter [codegen id : 8] +Input [2]: [d_month_seq#69, d_week_seq#70] +Condition : (((isnotnull(d_month_seq#69) AND (d_month_seq#69 >= 1224)) AND (d_month_seq#69 <= 1235)) AND isnotnull(d_week_seq#70)) + +(37) Project [codegen id : 8] +Output [1]: [d_week_seq#70] +Input [2]: [d_month_seq#69, d_week_seq#70] + +(38) BroadcastExchange +Input [1]: [d_week_seq#70] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#71] + +(39) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [d_week_seq#5] +Right keys [1]: [d_week_seq#70] +Join condition: None + +(40) Project [codegen id : 9] +Output [9]: [d_week_seq#5 AS d_week_seq2#72, s_store_id#38 AS s_store_id2#73, sun_sales#30 AS sun_sales2#74, mon_sales#31 AS mon_sales2#75, tue_sales#32 AS tue_sales2#76, wed_sales#33 AS wed_sales2#77, thu_sales#34 AS thu_sales2#78, fri_sales#35 AS fri_sales2#79, sat_sales#36 AS sat_sales2#80] +Input [10]: [d_week_seq#5, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#38, d_week_seq#70] + +(41) BroadcastExchange +Input [9]: [d_week_seq2#72, s_store_id2#73, sun_sales2#74, mon_sales2#75, tue_sales2#76, wed_sales2#77, thu_sales2#78, fri_sales2#79, sat_sales2#80] +Arguments: HashedRelationBroadcastMode(List(input[1, string, true], (input[0, int, true] - 52)),false), [id=#81] + +(42) BroadcastHashJoin [codegen id : 10] +Left keys [2]: [s_store_id1#46, d_week_seq1#45] +Right keys [2]: [s_store_id2#73, (d_week_seq2#72 - 52)] +Join condition: None + +(43) Project [codegen id : 10] +Output [10]: [s_store_name1#44, s_store_id1#46, d_week_seq1#45, CheckOverflow((promote_precision(sun_sales1#47) / promote_precision(sun_sales2#74)), DecimalType(37,20), true) AS (sun_sales1 / sun_sales2)#82, CheckOverflow((promote_precision(mon_sales1#48) / promote_precision(mon_sales2#75)), DecimalType(37,20), true) AS (mon_sales1 / mon_sales2)#83, CheckOverflow((promote_precision(tue_sales1#49) / promote_precision(tue_sales2#76)), DecimalType(37,20), true) AS (tue_sales1 / tue_sales2)#84, CheckOverflow((promote_precision(wed_sales1#50) / promote_precision(wed_sales2#77)), DecimalType(37,20), true) AS (wed_sales1 / wed_sales2)#85, CheckOverflow((promote_precision(thu_sales1#51) / promote_precision(thu_sales2#78)), DecimalType(37,20), true) AS (thu_sales1 / thu_sales2)#86, CheckOverflow((promote_precision(fri_sales1#52) / promote_precision(fri_sales2#79)), DecimalType(37,20), true) AS (fri_sales1 / fri_sales2)#87, CheckOverflow((promote_precision(sat_sales1#53) / promote_precision(sat_sales2#80)), DecimalType(37,20), true) AS (sat_sales1 / sat_sales2)#88] +Input [19]: [s_store_name1#44, d_week_seq1#45, s_store_id1#46, sun_sales1#47, mon_sales1#48, tue_sales1#49, wed_sales1#50, thu_sales1#51, fri_sales1#52, sat_sales1#53, d_week_seq2#72, s_store_id2#73, sun_sales2#74, mon_sales2#75, tue_sales2#76, wed_sales2#77, thu_sales2#78, fri_sales2#79, sat_sales2#80] + +(44) TakeOrderedAndProject +Input [10]: [s_store_name1#44, s_store_id1#46, d_week_seq1#45, (sun_sales1 / sun_sales2)#82, (mon_sales1 / mon_sales2)#83, (tue_sales1 / tue_sales2)#84, (wed_sales1 / wed_sales2)#85, (thu_sales1 / thu_sales2)#86, (fri_sales1 / fri_sales2)#87, (sat_sales1 / sat_sales2)#88] +Arguments: 100, [s_store_name1#44 ASC NULLS FIRST, s_store_id1#46 ASC NULLS FIRST, d_week_seq1#45 ASC NULLS FIRST], [s_store_name1#44, s_store_id1#46, d_week_seq1#45, (sun_sales1 / sun_sales2)#82, (mon_sales1 / mon_sales2)#83, (tue_sales1 / tue_sales2)#84, (wed_sales1 / wed_sales2)#85, (thu_sales1 / thu_sales2)#86, (fri_sales1 / fri_sales2)#87, (sat_sales1 / sat_sales2)#88] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59.sf100/simplified.txt new file mode 100644 index 0000000000000..de4e9930a2d38 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59.sf100/simplified.txt @@ -0,0 +1,66 @@ +TakeOrderedAndProject [(fri_sales1 / fri_sales2),(mon_sales1 / mon_sales2),(sat_sales1 / sat_sales2),(sun_sales1 / sun_sales2),(thu_sales1 / thu_sales2),(tue_sales1 / tue_sales2),(wed_sales1 / wed_sales2),d_week_seq1,s_store_id1,s_store_name1] + WholeStageCodegen (10) + Project [d_week_seq1,fri_sales1,fri_sales2,mon_sales1,mon_sales2,s_store_id1,s_store_name1,sat_sales1,sat_sales2,sun_sales1,sun_sales2,thu_sales1,thu_sales2,tue_sales1,tue_sales2,wed_sales1,wed_sales2] + BroadcastHashJoin [d_week_seq1,d_week_seq2,s_store_id1,s_store_id2] + Project [d_week_seq,fri_sales,mon_sales,s_store_id,s_store_name,sat_sales,sun_sales,thu_sales,tue_sales,wed_sales] + BroadcastHashJoin [d_week_seq,d_week_seq] + Project [d_week_seq,fri_sales,mon_sales,s_store_id,s_store_name,sat_sales,sun_sales,thu_sales,tue_sales,wed_sales] + BroadcastHashJoin [s_store_sk,ss_store_sk] + HashAggregate [d_week_seq,ss_store_sk,sum,sum,sum,sum,sum,sum,sum] [fri_sales,mon_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sun_sales,thu_sales,tue_sales,wed_sales] + InputAdapter + Exchange [d_week_seq,ss_store_sk] #1 + WholeStageCodegen (2) + HashAggregate [d_day_name,d_week_seq,ss_sales_price,ss_store_sk] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [d_day_name,d_week_seq,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [d_date_sk,d_week_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_day_name,d_week_seq] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (3) + Filter [s_store_id,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_id,s_store_name,s_store_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (4) + Project [d_week_seq] + Filter [d_month_seq,d_week_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_month_seq,d_week_seq] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (9) + Project [d_week_seq,fri_sales,mon_sales,s_store_id,sat_sales,sun_sales,thu_sales,tue_sales,wed_sales] + BroadcastHashJoin [d_week_seq,d_week_seq] + Project [d_week_seq,fri_sales,mon_sales,s_store_id,sat_sales,sun_sales,thu_sales,tue_sales,wed_sales] + BroadcastHashJoin [s_store_sk,ss_store_sk] + HashAggregate [d_week_seq,ss_store_sk,sum,sum,sum,sum,sum,sum,sum] [fri_sales,mon_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sun_sales,thu_sales,tue_sales,wed_sales] + InputAdapter + ReusedExchange [d_week_seq,ss_store_sk,sum,sum,sum,sum,sum,sum,sum] #1 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (7) + Filter [s_store_id,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_id,s_store_sk] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (8) + Project [d_week_seq] + Filter [d_month_seq,d_week_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_month_seq,d_week_seq] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59/explain.txt new file mode 100644 index 0000000000000..a7c8053ca4713 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59/explain.txt @@ -0,0 +1,249 @@ +== Physical Plan == +TakeOrderedAndProject (44) ++- * Project (43) + +- * BroadcastHashJoin Inner BuildRight (42) + :- * Project (25) + : +- * BroadcastHashJoin Inner BuildRight (24) + : :- * Project (18) + : : +- * BroadcastHashJoin Inner BuildRight (17) + : : :- * HashAggregate (12) + : : : +- Exchange (11) + : : : +- * HashAggregate (10) + : : : +- * Project (9) + : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (16) + : : +- * Filter (15) + : : +- * ColumnarToRow (14) + : : +- Scan parquet default.store (13) + : +- BroadcastExchange (23) + : +- * Project (22) + : +- * Filter (21) + : +- * ColumnarToRow (20) + : +- Scan parquet default.date_dim (19) + +- BroadcastExchange (41) + +- * Project (40) + +- * BroadcastHashJoin Inner BuildRight (39) + :- * Project (33) + : +- * BroadcastHashJoin Inner BuildRight (32) + : :- * HashAggregate (27) + : : +- ReusedExchange (26) + : +- BroadcastExchange (31) + : +- * Filter (30) + : +- * ColumnarToRow (29) + : +- Scan parquet default.store (28) + +- BroadcastExchange (38) + +- * Project (37) + +- * Filter (36) + +- * ColumnarToRow (35) + +- Scan parquet default.date_dim (34) + + +(1) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_sales_price#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 2] +Input [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_sales_price#3] + +(3) Filter [codegen id : 2] +Input [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_sales_price#3] +Condition : (isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#2)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_date_sk), IsNotNull(d_week_seq)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6] +Condition : (isnotnull(d_date_sk#4) AND isnotnull(d_week_seq#5)) + +(7) BroadcastExchange +Input [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7] + +(8) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(9) Project [codegen id : 2] +Output [4]: [ss_store_sk#2, ss_sales_price#3, d_week_seq#5, d_day_name#6] +Input [6]: [ss_sold_date_sk#1, ss_store_sk#2, ss_sales_price#3, d_date_sk#4, d_week_seq#5, d_day_name#6] + +(10) HashAggregate [codegen id : 2] +Input [4]: [ss_store_sk#2, ss_sales_price#3, d_week_seq#5, d_day_name#6] +Keys [2]: [d_week_seq#5, ss_store_sk#2] +Functions [7]: [partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday) THEN ss_sales_price#3 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday) THEN ss_sales_price#3 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday) THEN ss_sales_price#3 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#3 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday) THEN ss_sales_price#3 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday) THEN ss_sales_price#3 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday) THEN ss_sales_price#3 ELSE null END))] +Aggregate Attributes [7]: [sum#8, sum#9, sum#10, sum#11, sum#12, sum#13, sum#14] +Results [9]: [d_week_seq#5, ss_store_sk#2, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20, sum#21] + +(11) Exchange +Input [9]: [d_week_seq#5, ss_store_sk#2, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20, sum#21] +Arguments: hashpartitioning(d_week_seq#5, ss_store_sk#2, 5), true, [id=#22] + +(12) HashAggregate [codegen id : 10] +Input [9]: [d_week_seq#5, ss_store_sk#2, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20, sum#21] +Keys [2]: [d_week_seq#5, ss_store_sk#2] +Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday) THEN ss_sales_price#3 ELSE null END))] +Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday) THEN ss_sales_price#3 ELSE null END))#23, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday) THEN ss_sales_price#3 ELSE null END))#24, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday) THEN ss_sales_price#3 ELSE null END))#25, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#3 ELSE null END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday) THEN ss_sales_price#3 ELSE null END))#27, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday) THEN ss_sales_price#3 ELSE null END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday) THEN ss_sales_price#3 ELSE null END))#29] +Results [9]: [d_week_seq#5, ss_store_sk#2, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday) THEN ss_sales_price#3 ELSE null END))#23,17,2) AS sun_sales#30, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday) THEN ss_sales_price#3 ELSE null END))#24,17,2) AS mon_sales#31, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday) THEN ss_sales_price#3 ELSE null END))#25,17,2) AS tue_sales#32, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#3 ELSE null END))#26,17,2) AS wed_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday) THEN ss_sales_price#3 ELSE null END))#27,17,2) AS thu_sales#34, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday) THEN ss_sales_price#3 ELSE null END))#28,17,2) AS fri_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday) THEN ss_sales_price#3 ELSE null END))#29,17,2) AS sat_sales#36] + +(13) Scan parquet default.store +Output [3]: [s_store_sk#37, s_store_id#38, s_store_name#39] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_id)] +ReadSchema: struct + +(14) ColumnarToRow [codegen id : 3] +Input [3]: [s_store_sk#37, s_store_id#38, s_store_name#39] + +(15) Filter [codegen id : 3] +Input [3]: [s_store_sk#37, s_store_id#38, s_store_name#39] +Condition : (isnotnull(s_store_sk#37) AND isnotnull(s_store_id#38)) + +(16) BroadcastExchange +Input [3]: [s_store_sk#37, s_store_id#38, s_store_name#39] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#40] + +(17) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ss_store_sk#2] +Right keys [1]: [s_store_sk#37] +Join condition: None + +(18) Project [codegen id : 10] +Output [10]: [d_week_seq#5, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#38, s_store_name#39] +Input [12]: [d_week_seq#5, ss_store_sk#2, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_sk#37, s_store_id#38, s_store_name#39] + +(19) Scan parquet default.date_dim +Output [2]: [d_month_seq#41, d_week_seq#42] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223), IsNotNull(d_week_seq)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 4] +Input [2]: [d_month_seq#41, d_week_seq#42] + +(21) Filter [codegen id : 4] +Input [2]: [d_month_seq#41, d_week_seq#42] +Condition : (((isnotnull(d_month_seq#41) AND (d_month_seq#41 >= 1212)) AND (d_month_seq#41 <= 1223)) AND isnotnull(d_week_seq#42)) + +(22) Project [codegen id : 4] +Output [1]: [d_week_seq#42] +Input [2]: [d_month_seq#41, d_week_seq#42] + +(23) BroadcastExchange +Input [1]: [d_week_seq#42] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#43] + +(24) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [d_week_seq#5] +Right keys [1]: [d_week_seq#42] +Join condition: None + +(25) Project [codegen id : 10] +Output [10]: [s_store_name#39 AS s_store_name1#44, d_week_seq#5 AS d_week_seq1#45, s_store_id#38 AS s_store_id1#46, sun_sales#30 AS sun_sales1#47, mon_sales#31 AS mon_sales1#48, tue_sales#32 AS tue_sales1#49, wed_sales#33 AS wed_sales1#50, thu_sales#34 AS thu_sales1#51, fri_sales#35 AS fri_sales1#52, sat_sales#36 AS sat_sales1#53] +Input [11]: [d_week_seq#5, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#38, s_store_name#39, d_week_seq#42] + +(26) ReusedExchange [Reuses operator id: 11] +Output [9]: [d_week_seq#5, ss_store_sk#2, sum#54, sum#55, sum#56, sum#57, sum#58, sum#59, sum#60] + +(27) HashAggregate [codegen id : 9] +Input [9]: [d_week_seq#5, ss_store_sk#2, sum#54, sum#55, sum#56, sum#57, sum#58, sum#59, sum#60] +Keys [2]: [d_week_seq#5, ss_store_sk#2] +Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday) THEN ss_sales_price#3 ELSE null END))] +Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday) THEN ss_sales_price#3 ELSE null END))#61, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday) THEN ss_sales_price#3 ELSE null END))#62, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday) THEN ss_sales_price#3 ELSE null END))#63, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#3 ELSE null END))#64, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday) THEN ss_sales_price#3 ELSE null END))#65, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday) THEN ss_sales_price#3 ELSE null END))#66, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday) THEN ss_sales_price#3 ELSE null END))#67] +Results [9]: [d_week_seq#5, ss_store_sk#2, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday) THEN ss_sales_price#3 ELSE null END))#61,17,2) AS sun_sales#30, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday) THEN ss_sales_price#3 ELSE null END))#62,17,2) AS mon_sales#31, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday) THEN ss_sales_price#3 ELSE null END))#63,17,2) AS tue_sales#32, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#3 ELSE null END))#64,17,2) AS wed_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday) THEN ss_sales_price#3 ELSE null END))#65,17,2) AS thu_sales#34, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday) THEN ss_sales_price#3 ELSE null END))#66,17,2) AS fri_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday) THEN ss_sales_price#3 ELSE null END))#67,17,2) AS sat_sales#36] + +(28) Scan parquet default.store +Output [2]: [s_store_sk#37, s_store_id#38] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_id)] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 7] +Input [2]: [s_store_sk#37, s_store_id#38] + +(30) Filter [codegen id : 7] +Input [2]: [s_store_sk#37, s_store_id#38] +Condition : (isnotnull(s_store_sk#37) AND isnotnull(s_store_id#38)) + +(31) BroadcastExchange +Input [2]: [s_store_sk#37, s_store_id#38] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#68] + +(32) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_store_sk#2] +Right keys [1]: [s_store_sk#37] +Join condition: None + +(33) Project [codegen id : 9] +Output [9]: [d_week_seq#5, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#38] +Input [11]: [d_week_seq#5, ss_store_sk#2, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_sk#37, s_store_id#38] + +(34) Scan parquet default.date_dim +Output [2]: [d_month_seq#69, d_week_seq#70] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1224), LessThanOrEqual(d_month_seq,1235), IsNotNull(d_week_seq)] +ReadSchema: struct + +(35) ColumnarToRow [codegen id : 8] +Input [2]: [d_month_seq#69, d_week_seq#70] + +(36) Filter [codegen id : 8] +Input [2]: [d_month_seq#69, d_week_seq#70] +Condition : (((isnotnull(d_month_seq#69) AND (d_month_seq#69 >= 1224)) AND (d_month_seq#69 <= 1235)) AND isnotnull(d_week_seq#70)) + +(37) Project [codegen id : 8] +Output [1]: [d_week_seq#70] +Input [2]: [d_month_seq#69, d_week_seq#70] + +(38) BroadcastExchange +Input [1]: [d_week_seq#70] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#71] + +(39) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [d_week_seq#5] +Right keys [1]: [d_week_seq#70] +Join condition: None + +(40) Project [codegen id : 9] +Output [9]: [d_week_seq#5 AS d_week_seq2#72, s_store_id#38 AS s_store_id2#73, sun_sales#30 AS sun_sales2#74, mon_sales#31 AS mon_sales2#75, tue_sales#32 AS tue_sales2#76, wed_sales#33 AS wed_sales2#77, thu_sales#34 AS thu_sales2#78, fri_sales#35 AS fri_sales2#79, sat_sales#36 AS sat_sales2#80] +Input [10]: [d_week_seq#5, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#38, d_week_seq#70] + +(41) BroadcastExchange +Input [9]: [d_week_seq2#72, s_store_id2#73, sun_sales2#74, mon_sales2#75, tue_sales2#76, wed_sales2#77, thu_sales2#78, fri_sales2#79, sat_sales2#80] +Arguments: HashedRelationBroadcastMode(List(input[1, string, true], (input[0, int, true] - 52)),false), [id=#81] + +(42) BroadcastHashJoin [codegen id : 10] +Left keys [2]: [s_store_id1#46, d_week_seq1#45] +Right keys [2]: [s_store_id2#73, (d_week_seq2#72 - 52)] +Join condition: None + +(43) Project [codegen id : 10] +Output [10]: [s_store_name1#44, s_store_id1#46, d_week_seq1#45, CheckOverflow((promote_precision(sun_sales1#47) / promote_precision(sun_sales2#74)), DecimalType(37,20), true) AS (sun_sales1 / sun_sales2)#82, CheckOverflow((promote_precision(mon_sales1#48) / promote_precision(mon_sales2#75)), DecimalType(37,20), true) AS (mon_sales1 / mon_sales2)#83, CheckOverflow((promote_precision(tue_sales1#49) / promote_precision(tue_sales2#76)), DecimalType(37,20), true) AS (tue_sales1 / tue_sales2)#84, CheckOverflow((promote_precision(wed_sales1#50) / promote_precision(wed_sales2#77)), DecimalType(37,20), true) AS (wed_sales1 / wed_sales2)#85, CheckOverflow((promote_precision(thu_sales1#51) / promote_precision(thu_sales2#78)), DecimalType(37,20), true) AS (thu_sales1 / thu_sales2)#86, CheckOverflow((promote_precision(fri_sales1#52) / promote_precision(fri_sales2#79)), DecimalType(37,20), true) AS (fri_sales1 / fri_sales2)#87, CheckOverflow((promote_precision(sat_sales1#53) / promote_precision(sat_sales2#80)), DecimalType(37,20), true) AS (sat_sales1 / sat_sales2)#88] +Input [19]: [s_store_name1#44, d_week_seq1#45, s_store_id1#46, sun_sales1#47, mon_sales1#48, tue_sales1#49, wed_sales1#50, thu_sales1#51, fri_sales1#52, sat_sales1#53, d_week_seq2#72, s_store_id2#73, sun_sales2#74, mon_sales2#75, tue_sales2#76, wed_sales2#77, thu_sales2#78, fri_sales2#79, sat_sales2#80] + +(44) TakeOrderedAndProject +Input [10]: [s_store_name1#44, s_store_id1#46, d_week_seq1#45, (sun_sales1 / sun_sales2)#82, (mon_sales1 / mon_sales2)#83, (tue_sales1 / tue_sales2)#84, (wed_sales1 / wed_sales2)#85, (thu_sales1 / thu_sales2)#86, (fri_sales1 / fri_sales2)#87, (sat_sales1 / sat_sales2)#88] +Arguments: 100, [s_store_name1#44 ASC NULLS FIRST, s_store_id1#46 ASC NULLS FIRST, d_week_seq1#45 ASC NULLS FIRST], [s_store_name1#44, s_store_id1#46, d_week_seq1#45, (sun_sales1 / sun_sales2)#82, (mon_sales1 / mon_sales2)#83, (tue_sales1 / tue_sales2)#84, (wed_sales1 / wed_sales2)#85, (thu_sales1 / thu_sales2)#86, (fri_sales1 / fri_sales2)#87, (sat_sales1 / sat_sales2)#88] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59/simplified.txt new file mode 100644 index 0000000000000..de4e9930a2d38 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59/simplified.txt @@ -0,0 +1,66 @@ +TakeOrderedAndProject [(fri_sales1 / fri_sales2),(mon_sales1 / mon_sales2),(sat_sales1 / sat_sales2),(sun_sales1 / sun_sales2),(thu_sales1 / thu_sales2),(tue_sales1 / tue_sales2),(wed_sales1 / wed_sales2),d_week_seq1,s_store_id1,s_store_name1] + WholeStageCodegen (10) + Project [d_week_seq1,fri_sales1,fri_sales2,mon_sales1,mon_sales2,s_store_id1,s_store_name1,sat_sales1,sat_sales2,sun_sales1,sun_sales2,thu_sales1,thu_sales2,tue_sales1,tue_sales2,wed_sales1,wed_sales2] + BroadcastHashJoin [d_week_seq1,d_week_seq2,s_store_id1,s_store_id2] + Project [d_week_seq,fri_sales,mon_sales,s_store_id,s_store_name,sat_sales,sun_sales,thu_sales,tue_sales,wed_sales] + BroadcastHashJoin [d_week_seq,d_week_seq] + Project [d_week_seq,fri_sales,mon_sales,s_store_id,s_store_name,sat_sales,sun_sales,thu_sales,tue_sales,wed_sales] + BroadcastHashJoin [s_store_sk,ss_store_sk] + HashAggregate [d_week_seq,ss_store_sk,sum,sum,sum,sum,sum,sum,sum] [fri_sales,mon_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sun_sales,thu_sales,tue_sales,wed_sales] + InputAdapter + Exchange [d_week_seq,ss_store_sk] #1 + WholeStageCodegen (2) + HashAggregate [d_day_name,d_week_seq,ss_sales_price,ss_store_sk] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [d_day_name,d_week_seq,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [d_date_sk,d_week_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_day_name,d_week_seq] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (3) + Filter [s_store_id,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_id,s_store_name,s_store_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (4) + Project [d_week_seq] + Filter [d_month_seq,d_week_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_month_seq,d_week_seq] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (9) + Project [d_week_seq,fri_sales,mon_sales,s_store_id,sat_sales,sun_sales,thu_sales,tue_sales,wed_sales] + BroadcastHashJoin [d_week_seq,d_week_seq] + Project [d_week_seq,fri_sales,mon_sales,s_store_id,sat_sales,sun_sales,thu_sales,tue_sales,wed_sales] + BroadcastHashJoin [s_store_sk,ss_store_sk] + HashAggregate [d_week_seq,ss_store_sk,sum,sum,sum,sum,sum,sum,sum] [fri_sales,mon_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sun_sales,thu_sales,tue_sales,wed_sales] + InputAdapter + ReusedExchange [d_week_seq,ss_store_sk,sum,sum,sum,sum,sum,sum,sum] #1 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (7) + Filter [s_store_id,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_id,s_store_sk] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (8) + Project [d_week_seq] + Filter [d_month_seq,d_week_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_month_seq,d_week_seq] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6.sf100/explain.txt new file mode 100644 index 0000000000000..2c003cb15cc3b --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6.sf100/explain.txt @@ -0,0 +1,331 @@ +== Physical Plan == +TakeOrderedAndProject (50) ++- * Project (49) + +- * Filter (48) + +- * HashAggregate (47) + +- Exchange (46) + +- * HashAggregate (45) + +- * Project (44) + +- * SortMergeJoin Inner (43) + :- * Sort (28) + : +- Exchange (27) + : +- * Project (26) + : +- * BroadcastHashJoin Inner BuildRight (25) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.store_sales (1) + : : +- BroadcastExchange (8) + : : +- * Project (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet default.date_dim (4) + : +- BroadcastExchange (24) + : +- * Project (23) + : +- * Filter (22) + : +- * BroadcastHashJoin LeftOuter BuildRight (21) + : :- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.item (11) + : +- BroadcastExchange (20) + : +- * HashAggregate (19) + : +- Exchange (18) + : +- * HashAggregate (17) + : +- * Filter (16) + : +- * ColumnarToRow (15) + : +- Scan parquet default.item (14) + +- * Sort (42) + +- Exchange (41) + +- * Project (40) + +- * SortMergeJoin Inner (39) + :- * Sort (33) + : +- Exchange (32) + : +- * Filter (31) + : +- * ColumnarToRow (30) + : +- Scan parquet default.customer_address (29) + +- * Sort (38) + +- Exchange (37) + +- * Filter (36) + +- * ColumnarToRow (35) + +- Scan parquet default.customer (34) + + +(1) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3] + +(3) Filter [codegen id : 5] +Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3] +Condition : ((isnotnull(ss_customer_sk#3) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_item_sk#2)) + +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#4, d_month_seq#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] +Condition : ((isnotnull(d_month_seq#5) AND (d_month_seq#5 = Subquery scalar-subquery#6, [id=#7])) AND isnotnull(d_date_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#4] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(8) BroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#8] + +(9) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(10) Project [codegen id : 5] +Output [2]: [ss_item_sk#2, ss_customer_sk#3] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, d_date_sk#4] + +(11) Scan parquet default.item +Output [3]: [i_item_sk#9, i_current_price#10, i_category#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_current_price), IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 4] +Input [3]: [i_item_sk#9, i_current_price#10, i_category#11] + +(13) Filter [codegen id : 4] +Input [3]: [i_item_sk#9, i_current_price#10, i_category#11] +Condition : (isnotnull(i_current_price#10) AND isnotnull(i_item_sk#9)) + +(14) Scan parquet default.item +Output [2]: [i_current_price#10, i_category#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_category)] +ReadSchema: struct + +(15) ColumnarToRow [codegen id : 2] +Input [2]: [i_current_price#10, i_category#11] + +(16) Filter [codegen id : 2] +Input [2]: [i_current_price#10, i_category#11] +Condition : isnotnull(i_category#11) + +(17) HashAggregate [codegen id : 2] +Input [2]: [i_current_price#10, i_category#11] +Keys [1]: [i_category#11] +Functions [1]: [partial_avg(UnscaledValue(i_current_price#10))] +Aggregate Attributes [2]: [sum#12, count#13] +Results [3]: [i_category#11, sum#14, count#15] + +(18) Exchange +Input [3]: [i_category#11, sum#14, count#15] +Arguments: hashpartitioning(i_category#11, 5), true, [id=#16] + +(19) HashAggregate [codegen id : 3] +Input [3]: [i_category#11, sum#14, count#15] +Keys [1]: [i_category#11] +Functions [1]: [avg(UnscaledValue(i_current_price#10))] +Aggregate Attributes [1]: [avg(UnscaledValue(i_current_price#10))#17] +Results [2]: [cast((avg(UnscaledValue(i_current_price#10))#17 / 100.0) as decimal(11,6)) AS avg(i_current_price)#18, i_category#11 AS i_category#11#19] + +(20) BroadcastExchange +Input [2]: [avg(i_current_price)#18, i_category#11#19] +Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [id=#20] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_category#11] +Right keys [1]: [i_category#11#19] +Join condition: None + +(22) Filter [codegen id : 4] +Input [5]: [i_item_sk#9, i_current_price#10, i_category#11, avg(i_current_price)#18, i_category#11#19] +Condition : (cast(i_current_price#10 as decimal(14,7)) > CheckOverflow((1.200000 * promote_precision(avg(i_current_price)#18)), DecimalType(14,7), true)) + +(23) Project [codegen id : 4] +Output [1]: [i_item_sk#9] +Input [5]: [i_item_sk#9, i_current_price#10, i_category#11, avg(i_current_price)#18, i_category#11#19] + +(24) BroadcastExchange +Input [1]: [i_item_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#21] + +(25) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#9] +Join condition: None + +(26) Project [codegen id : 5] +Output [1]: [ss_customer_sk#3] +Input [3]: [ss_item_sk#2, ss_customer_sk#3, i_item_sk#9] + +(27) Exchange +Input [1]: [ss_customer_sk#3] +Arguments: hashpartitioning(ss_customer_sk#3, 5), true, [id=#22] + +(28) Sort [codegen id : 6] +Input [1]: [ss_customer_sk#3] +Arguments: [ss_customer_sk#3 ASC NULLS FIRST], false, 0 + +(29) Scan parquet default.customer_address +Output [2]: [ca_address_sk#23, ca_state#24] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct + +(30) ColumnarToRow [codegen id : 7] +Input [2]: [ca_address_sk#23, ca_state#24] + +(31) Filter [codegen id : 7] +Input [2]: [ca_address_sk#23, ca_state#24] +Condition : isnotnull(ca_address_sk#23) + +(32) Exchange +Input [2]: [ca_address_sk#23, ca_state#24] +Arguments: hashpartitioning(ca_address_sk#23, 5), true, [id=#25] + +(33) Sort [codegen id : 8] +Input [2]: [ca_address_sk#23, ca_state#24] +Arguments: [ca_address_sk#23 ASC NULLS FIRST], false, 0 + +(34) Scan parquet default.customer +Output [2]: [c_customer_sk#26, c_current_addr_sk#27] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer] +PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_customer_sk)] +ReadSchema: struct + +(35) ColumnarToRow [codegen id : 9] +Input [2]: [c_customer_sk#26, c_current_addr_sk#27] + +(36) Filter [codegen id : 9] +Input [2]: [c_customer_sk#26, c_current_addr_sk#27] +Condition : (isnotnull(c_current_addr_sk#27) AND isnotnull(c_customer_sk#26)) + +(37) Exchange +Input [2]: [c_customer_sk#26, c_current_addr_sk#27] +Arguments: hashpartitioning(c_current_addr_sk#27, 5), true, [id=#28] + +(38) Sort [codegen id : 10] +Input [2]: [c_customer_sk#26, c_current_addr_sk#27] +Arguments: [c_current_addr_sk#27 ASC NULLS FIRST], false, 0 + +(39) SortMergeJoin [codegen id : 11] +Left keys [1]: [ca_address_sk#23] +Right keys [1]: [c_current_addr_sk#27] +Join condition: None + +(40) Project [codegen id : 11] +Output [2]: [ca_state#24, c_customer_sk#26] +Input [4]: [ca_address_sk#23, ca_state#24, c_customer_sk#26, c_current_addr_sk#27] + +(41) Exchange +Input [2]: [ca_state#24, c_customer_sk#26] +Arguments: hashpartitioning(c_customer_sk#26, 5), true, [id=#29] + +(42) Sort [codegen id : 12] +Input [2]: [ca_state#24, c_customer_sk#26] +Arguments: [c_customer_sk#26 ASC NULLS FIRST], false, 0 + +(43) SortMergeJoin [codegen id : 13] +Left keys [1]: [ss_customer_sk#3] +Right keys [1]: [c_customer_sk#26] +Join condition: None + +(44) Project [codegen id : 13] +Output [1]: [ca_state#24] +Input [3]: [ss_customer_sk#3, ca_state#24, c_customer_sk#26] + +(45) HashAggregate [codegen id : 13] +Input [1]: [ca_state#24] +Keys [1]: [ca_state#24] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#30] +Results [2]: [ca_state#24, count#31] + +(46) Exchange +Input [2]: [ca_state#24, count#31] +Arguments: hashpartitioning(ca_state#24, 5), true, [id=#32] + +(47) HashAggregate [codegen id : 14] +Input [2]: [ca_state#24, count#31] +Keys [1]: [ca_state#24] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#33] +Results [3]: [ca_state#24 AS state#34, count(1)#33 AS cnt#35, count(1)#33 AS count(1)#36] + +(48) Filter [codegen id : 14] +Input [3]: [state#34, cnt#35, count(1)#36] +Condition : (count(1)#36 >= 10) + +(49) Project [codegen id : 14] +Output [2]: [state#34, cnt#35] +Input [3]: [state#34, cnt#35, count(1)#36] + +(50) TakeOrderedAndProject +Input [2]: [state#34, cnt#35] +Arguments: 100, [cnt#35 ASC NULLS FIRST], [state#34, cnt#35] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 6 Hosting Expression = Subquery scalar-subquery#6, [id=#7] +* HashAggregate (57) ++- Exchange (56) + +- * HashAggregate (55) + +- * Project (54) + +- * Filter (53) + +- * ColumnarToRow (52) + +- Scan parquet default.date_dim (51) + + +(51) Scan parquet default.date_dim +Output [3]: [d_month_seq#5, d_year#37, d_moy#38] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,1)] +ReadSchema: struct + +(52) ColumnarToRow [codegen id : 1] +Input [3]: [d_month_seq#5, d_year#37, d_moy#38] + +(53) Filter [codegen id : 1] +Input [3]: [d_month_seq#5, d_year#37, d_moy#38] +Condition : (((isnotnull(d_year#37) AND isnotnull(d_moy#38)) AND (d_year#37 = 2000)) AND (d_moy#38 = 1)) + +(54) Project [codegen id : 1] +Output [1]: [d_month_seq#5] +Input [3]: [d_month_seq#5, d_year#37, d_moy#38] + +(55) HashAggregate [codegen id : 1] +Input [1]: [d_month_seq#5] +Keys [1]: [d_month_seq#5] +Functions: [] +Aggregate Attributes: [] +Results [1]: [d_month_seq#5] + +(56) Exchange +Input [1]: [d_month_seq#5] +Arguments: hashpartitioning(d_month_seq#5, 5), true, [id=#39] + +(57) HashAggregate [codegen id : 2] +Input [1]: [d_month_seq#5] +Keys [1]: [d_month_seq#5] +Functions: [] +Aggregate Attributes: [] +Results [1]: [d_month_seq#5] + + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6.sf100/simplified.txt new file mode 100644 index 0000000000000..f97e8686c4421 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6.sf100/simplified.txt @@ -0,0 +1,95 @@ +TakeOrderedAndProject [cnt,state] + WholeStageCodegen (14) + Project [cnt,state] + Filter [count(1)] + HashAggregate [ca_state,count] [cnt,count,count(1),count(1),state] + InputAdapter + Exchange [ca_state] #1 + WholeStageCodegen (13) + HashAggregate [ca_state] [count,count] + Project [ca_state] + SortMergeJoin [c_customer_sk,ss_customer_sk] + InputAdapter + WholeStageCodegen (6) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #2 + WholeStageCodegen (5) + Project [ss_customer_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_customer_sk,ss_item_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_customer_sk,ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_month_seq] + Subquery #1 + WholeStageCodegen (2) + HashAggregate [d_month_seq] + InputAdapter + Exchange [d_month_seq] #4 + WholeStageCodegen (1) + HashAggregate [d_month_seq] + Project [d_month_seq] + Filter [d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_month_seq,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Project [i_item_sk] + Filter [avg(i_current_price),i_current_price] + BroadcastHashJoin [i_category,i_category] + Filter [i_current_price,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_category,i_current_price,i_item_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (3) + HashAggregate [count,i_category,sum] [avg(UnscaledValue(i_current_price)),avg(i_current_price),count,i_category,sum] + InputAdapter + Exchange [i_category] #7 + WholeStageCodegen (2) + HashAggregate [i_category,i_current_price] [count,count,sum,sum] + Filter [i_category] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_category,i_current_price] + InputAdapter + WholeStageCodegen (12) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #8 + WholeStageCodegen (11) + Project [c_customer_sk,ca_state] + SortMergeJoin [c_current_addr_sk,ca_address_sk] + InputAdapter + WholeStageCodegen (8) + Sort [ca_address_sk] + InputAdapter + Exchange [ca_address_sk] #9 + WholeStageCodegen (7) + Filter [ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_state] + InputAdapter + WholeStageCodegen (10) + Sort [c_current_addr_sk] + InputAdapter + Exchange [c_current_addr_sk] #10 + WholeStageCodegen (9) + Filter [c_current_addr_sk,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_current_addr_sk,c_customer_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6/explain.txt new file mode 100644 index 0000000000000..a3007b7efa680 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6/explain.txt @@ -0,0 +1,301 @@ +== Physical Plan == +TakeOrderedAndProject (44) ++- * Project (43) + +- * Filter (42) + +- * HashAggregate (41) + +- Exchange (40) + +- * HashAggregate (39) + +- * Project (38) + +- * BroadcastHashJoin Inner BuildRight (37) + :- * Project (22) + : +- * BroadcastHashJoin Inner BuildRight (21) + : :- * Project (15) + : : +- * BroadcastHashJoin Inner BuildRight (14) + : : :- * Project (9) + : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.customer_address (1) + : : : +- BroadcastExchange (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.customer (4) + : : +- BroadcastExchange (13) + : : +- * Filter (12) + : : +- * ColumnarToRow (11) + : : +- Scan parquet default.store_sales (10) + : +- BroadcastExchange (20) + : +- * Project (19) + : +- * Filter (18) + : +- * ColumnarToRow (17) + : +- Scan parquet default.date_dim (16) + +- BroadcastExchange (36) + +- * Project (35) + +- * Filter (34) + +- * BroadcastHashJoin LeftOuter BuildRight (33) + :- * Filter (25) + : +- * ColumnarToRow (24) + : +- Scan parquet default.item (23) + +- BroadcastExchange (32) + +- * HashAggregate (31) + +- Exchange (30) + +- * HashAggregate (29) + +- * Filter (28) + +- * ColumnarToRow (27) + +- Scan parquet default.item (26) + + +(1) Scan parquet default.customer_address +Output [2]: [ca_address_sk#1, ca_state#2] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 7] +Input [2]: [ca_address_sk#1, ca_state#2] + +(3) Filter [codegen id : 7] +Input [2]: [ca_address_sk#1, ca_state#2] +Condition : isnotnull(ca_address_sk#1) + +(4) Scan parquet default.customer +Output [2]: [c_customer_sk#3, c_current_addr_sk#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_customer_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [c_customer_sk#3, c_current_addr_sk#4] + +(6) Filter [codegen id : 1] +Input [2]: [c_customer_sk#3, c_current_addr_sk#4] +Condition : (isnotnull(c_current_addr_sk#4) AND isnotnull(c_customer_sk#3)) + +(7) BroadcastExchange +Input [2]: [c_customer_sk#3, c_current_addr_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#5] + +(8) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ca_address_sk#1] +Right keys [1]: [c_current_addr_sk#4] +Join condition: None + +(9) Project [codegen id : 7] +Output [2]: [ca_state#2, c_customer_sk#3] +Input [4]: [ca_address_sk#1, ca_state#2, c_customer_sk#3, c_current_addr_sk#4] + +(10) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#6, ss_item_sk#7, ss_customer_sk#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [ss_sold_date_sk#6, ss_item_sk#7, ss_customer_sk#8] + +(12) Filter [codegen id : 2] +Input [3]: [ss_sold_date_sk#6, ss_item_sk#7, ss_customer_sk#8] +Condition : ((isnotnull(ss_customer_sk#8) AND isnotnull(ss_sold_date_sk#6)) AND isnotnull(ss_item_sk#7)) + +(13) BroadcastExchange +Input [3]: [ss_sold_date_sk#6, ss_item_sk#7, ss_customer_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, false] as bigint)),false), [id=#9] + +(14) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [ss_customer_sk#8] +Join condition: None + +(15) Project [codegen id : 7] +Output [3]: [ca_state#2, ss_sold_date_sk#6, ss_item_sk#7] +Input [5]: [ca_state#2, c_customer_sk#3, ss_sold_date_sk#6, ss_item_sk#7, ss_customer_sk#8] + +(16) Scan parquet default.date_dim +Output [2]: [d_date_sk#10, d_month_seq#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [2]: [d_date_sk#10, d_month_seq#11] + +(18) Filter [codegen id : 3] +Input [2]: [d_date_sk#10, d_month_seq#11] +Condition : ((isnotnull(d_month_seq#11) AND (d_month_seq#11 = Subquery scalar-subquery#12, [id=#13])) AND isnotnull(d_date_sk#10)) + +(19) Project [codegen id : 3] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_month_seq#11] + +(20) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] + +(21) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ss_sold_date_sk#6] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(22) Project [codegen id : 7] +Output [2]: [ca_state#2, ss_item_sk#7] +Input [4]: [ca_state#2, ss_sold_date_sk#6, ss_item_sk#7, d_date_sk#10] + +(23) Scan parquet default.item +Output [3]: [i_item_sk#15, i_current_price#16, i_category#17] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_current_price), IsNotNull(i_item_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 6] +Input [3]: [i_item_sk#15, i_current_price#16, i_category#17] + +(25) Filter [codegen id : 6] +Input [3]: [i_item_sk#15, i_current_price#16, i_category#17] +Condition : (isnotnull(i_current_price#16) AND isnotnull(i_item_sk#15)) + +(26) Scan parquet default.item +Output [2]: [i_current_price#16, i_category#17] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_category)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 4] +Input [2]: [i_current_price#16, i_category#17] + +(28) Filter [codegen id : 4] +Input [2]: [i_current_price#16, i_category#17] +Condition : isnotnull(i_category#17) + +(29) HashAggregate [codegen id : 4] +Input [2]: [i_current_price#16, i_category#17] +Keys [1]: [i_category#17] +Functions [1]: [partial_avg(UnscaledValue(i_current_price#16))] +Aggregate Attributes [2]: [sum#18, count#19] +Results [3]: [i_category#17, sum#20, count#21] + +(30) Exchange +Input [3]: [i_category#17, sum#20, count#21] +Arguments: hashpartitioning(i_category#17, 5), true, [id=#22] + +(31) HashAggregate [codegen id : 5] +Input [3]: [i_category#17, sum#20, count#21] +Keys [1]: [i_category#17] +Functions [1]: [avg(UnscaledValue(i_current_price#16))] +Aggregate Attributes [1]: [avg(UnscaledValue(i_current_price#16))#23] +Results [2]: [cast((avg(UnscaledValue(i_current_price#16))#23 / 100.0) as decimal(11,6)) AS avg(i_current_price)#24, i_category#17 AS i_category#17#25] + +(32) BroadcastExchange +Input [2]: [avg(i_current_price)#24, i_category#17#25] +Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [id=#26] + +(33) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [i_category#17] +Right keys [1]: [i_category#17#25] +Join condition: None + +(34) Filter [codegen id : 6] +Input [5]: [i_item_sk#15, i_current_price#16, i_category#17, avg(i_current_price)#24, i_category#17#25] +Condition : (cast(i_current_price#16 as decimal(14,7)) > CheckOverflow((1.200000 * promote_precision(avg(i_current_price)#24)), DecimalType(14,7), true)) + +(35) Project [codegen id : 6] +Output [1]: [i_item_sk#15] +Input [5]: [i_item_sk#15, i_current_price#16, i_category#17, avg(i_current_price)#24, i_category#17#25] + +(36) BroadcastExchange +Input [1]: [i_item_sk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#27] + +(37) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ss_item_sk#7] +Right keys [1]: [i_item_sk#15] +Join condition: None + +(38) Project [codegen id : 7] +Output [1]: [ca_state#2] +Input [3]: [ca_state#2, ss_item_sk#7, i_item_sk#15] + +(39) HashAggregate [codegen id : 7] +Input [1]: [ca_state#2] +Keys [1]: [ca_state#2] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#28] +Results [2]: [ca_state#2, count#29] + +(40) Exchange +Input [2]: [ca_state#2, count#29] +Arguments: hashpartitioning(ca_state#2, 5), true, [id=#30] + +(41) HashAggregate [codegen id : 8] +Input [2]: [ca_state#2, count#29] +Keys [1]: [ca_state#2] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#31] +Results [3]: [ca_state#2 AS state#32, count(1)#31 AS cnt#33, count(1)#31 AS count(1)#34] + +(42) Filter [codegen id : 8] +Input [3]: [state#32, cnt#33, count(1)#34] +Condition : (count(1)#34 >= 10) + +(43) Project [codegen id : 8] +Output [2]: [state#32, cnt#33] +Input [3]: [state#32, cnt#33, count(1)#34] + +(44) TakeOrderedAndProject +Input [2]: [state#32, cnt#33] +Arguments: 100, [cnt#33 ASC NULLS FIRST], [state#32, cnt#33] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 18 Hosting Expression = Subquery scalar-subquery#12, [id=#13] +* HashAggregate (51) ++- Exchange (50) + +- * HashAggregate (49) + +- * Project (48) + +- * Filter (47) + +- * ColumnarToRow (46) + +- Scan parquet default.date_dim (45) + + +(45) Scan parquet default.date_dim +Output [3]: [d_month_seq#11, d_year#35, d_moy#36] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,1)] +ReadSchema: struct + +(46) ColumnarToRow [codegen id : 1] +Input [3]: [d_month_seq#11, d_year#35, d_moy#36] + +(47) Filter [codegen id : 1] +Input [3]: [d_month_seq#11, d_year#35, d_moy#36] +Condition : (((isnotnull(d_year#35) AND isnotnull(d_moy#36)) AND (d_year#35 = 2000)) AND (d_moy#36 = 1)) + +(48) Project [codegen id : 1] +Output [1]: [d_month_seq#11] +Input [3]: [d_month_seq#11, d_year#35, d_moy#36] + +(49) HashAggregate [codegen id : 1] +Input [1]: [d_month_seq#11] +Keys [1]: [d_month_seq#11] +Functions: [] +Aggregate Attributes: [] +Results [1]: [d_month_seq#11] + +(50) Exchange +Input [1]: [d_month_seq#11] +Arguments: hashpartitioning(d_month_seq#11, 5), true, [id=#37] + +(51) HashAggregate [codegen id : 2] +Input [1]: [d_month_seq#11] +Keys [1]: [d_month_seq#11] +Functions: [] +Aggregate Attributes: [] +Results [1]: [d_month_seq#11] + + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6/simplified.txt new file mode 100644 index 0000000000000..9b39a0cb65f56 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6/simplified.txt @@ -0,0 +1,77 @@ +TakeOrderedAndProject [cnt,state] + WholeStageCodegen (8) + Project [cnt,state] + Filter [count(1)] + HashAggregate [ca_state,count] [cnt,count,count(1),count(1),state] + InputAdapter + Exchange [ca_state] #1 + WholeStageCodegen (7) + HashAggregate [ca_state] [count,count] + Project [ca_state] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ca_state,ss_item_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ca_state,ss_item_sk,ss_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Project [c_customer_sk,ca_state] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Filter [ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [c_current_addr_sk,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_current_addr_sk,c_customer_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [ss_customer_sk,ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_date_sk,d_month_seq] + Subquery #1 + WholeStageCodegen (2) + HashAggregate [d_month_seq] + InputAdapter + Exchange [d_month_seq] #5 + WholeStageCodegen (1) + HashAggregate [d_month_seq] + Project [d_month_seq] + Filter [d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_month_seq,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (6) + Project [i_item_sk] + Filter [avg(i_current_price),i_current_price] + BroadcastHashJoin [i_category,i_category] + Filter [i_current_price,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_category,i_current_price,i_item_sk] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (5) + HashAggregate [count,i_category,sum] [avg(UnscaledValue(i_current_price)),avg(i_current_price),count,i_category,sum] + InputAdapter + Exchange [i_category] #8 + WholeStageCodegen (4) + HashAggregate [i_category,i_current_price] [count,count,sum,sum] + Filter [i_category] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_category,i_current_price] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60.sf100/explain.txt new file mode 100644 index 0000000000000..cec4923c48198 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60.sf100/explain.txt @@ -0,0 +1,378 @@ +== Physical Plan == +TakeOrderedAndProject (67) ++- * HashAggregate (66) + +- Exchange (65) + +- * HashAggregate (64) + +- Union (63) + :- * HashAggregate (32) + : +- Exchange (31) + : +- * HashAggregate (30) + : +- * Project (29) + : +- * BroadcastHashJoin Inner BuildRight (28) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.customer_address (11) + : +- BroadcastExchange (27) + : +- * BroadcastHashJoin LeftSemi BuildRight (26) + : :- * Filter (20) + : : +- * ColumnarToRow (19) + : : +- Scan parquet default.item (18) + : +- BroadcastExchange (25) + : +- * Project (24) + : +- * Filter (23) + : +- * ColumnarToRow (22) + : +- Scan parquet default.item (21) + :- * HashAggregate (47) + : +- Exchange (46) + : +- * HashAggregate (45) + : +- * Project (44) + : +- * BroadcastHashJoin Inner BuildRight (43) + : :- * Project (41) + : : +- * BroadcastHashJoin Inner BuildRight (40) + : : :- * Project (38) + : : : +- * BroadcastHashJoin Inner BuildRight (37) + : : : :- * Filter (35) + : : : : +- * ColumnarToRow (34) + : : : : +- Scan parquet default.catalog_sales (33) + : : : +- ReusedExchange (36) + : : +- ReusedExchange (39) + : +- ReusedExchange (42) + +- * HashAggregate (62) + +- Exchange (61) + +- * HashAggregate (60) + +- * Project (59) + +- * BroadcastHashJoin Inner BuildRight (58) + :- * Project (56) + : +- * BroadcastHashJoin Inner BuildRight (55) + : :- * Project (53) + : : +- * BroadcastHashJoin Inner BuildRight (52) + : : :- * Filter (50) + : : : +- * ColumnarToRow (49) + : : : +- Scan parquet default.web_sales (48) + : : +- ReusedExchange (51) + : +- ReusedExchange (54) + +- ReusedExchange (57) + + +(1) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4] + +(3) Filter [codegen id : 5] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4] +Condition : ((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_addr_sk#3)) AND isnotnull(ss_item_sk#2)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#5, d_year#6, d_moy#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,9), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] +Condition : ((((isnotnull(d_year#6) AND isnotnull(d_moy#7)) AND (d_year#6 = 1998)) AND (d_moy#7 = 9)) AND isnotnull(d_date_sk#5)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#5] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] + +(8) BroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#8] + +(9) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(10) Project [codegen id : 5] +Output [3]: [ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4] +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4, d_date_sk#5] + +(11) Scan parquet default.customer_address +Output [2]: [ca_address_sk#9, ca_gmt_offset#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer_address] +PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [ca_address_sk#9, ca_gmt_offset#10] + +(13) Filter [codegen id : 2] +Input [2]: [ca_address_sk#9, ca_gmt_offset#10] +Condition : ((isnotnull(ca_gmt_offset#10) AND (ca_gmt_offset#10 = -5.00)) AND isnotnull(ca_address_sk#9)) + +(14) Project [codegen id : 2] +Output [1]: [ca_address_sk#9] +Input [2]: [ca_address_sk#9, ca_gmt_offset#10] + +(15) BroadcastExchange +Input [1]: [ca_address_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] + +(16) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_addr_sk#3] +Right keys [1]: [ca_address_sk#9] +Join condition: None + +(17) Project [codegen id : 5] +Output [2]: [ss_item_sk#2, ss_ext_sales_price#4] +Input [4]: [ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4, ca_address_sk#9] + +(18) Scan parquet default.item +Output [2]: [i_item_sk#12, i_item_id#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 4] +Input [2]: [i_item_sk#12, i_item_id#13] + +(20) Filter [codegen id : 4] +Input [2]: [i_item_sk#12, i_item_id#13] +Condition : isnotnull(i_item_sk#12) + +(21) Scan parquet default.item +Output [2]: [i_item_id#13, i_category#14] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Music)] +ReadSchema: struct + +(22) ColumnarToRow [codegen id : 3] +Input [2]: [i_item_id#13, i_category#14] + +(23) Filter [codegen id : 3] +Input [2]: [i_item_id#13, i_category#14] +Condition : (isnotnull(i_category#14) AND (i_category#14 = Music)) + +(24) Project [codegen id : 3] +Output [1]: [i_item_id#13 AS i_item_id#13#15] +Input [2]: [i_item_id#13, i_category#14] + +(25) BroadcastExchange +Input [1]: [i_item_id#13#15] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#16] + +(26) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_id#13] +Right keys [1]: [i_item_id#13#15] +Join condition: None + +(27) BroadcastExchange +Input [2]: [i_item_sk#12, i_item_id#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#17] + +(28) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#12] +Join condition: None + +(29) Project [codegen id : 5] +Output [2]: [ss_ext_sales_price#4, i_item_id#13] +Input [4]: [ss_item_sk#2, ss_ext_sales_price#4, i_item_sk#12, i_item_id#13] + +(30) HashAggregate [codegen id : 5] +Input [2]: [ss_ext_sales_price#4, i_item_id#13] +Keys [1]: [i_item_id#13] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#4))] +Aggregate Attributes [1]: [sum#18] +Results [2]: [i_item_id#13, sum#19] + +(31) Exchange +Input [2]: [i_item_id#13, sum#19] +Arguments: hashpartitioning(i_item_id#13, 5), true, [id=#20] + +(32) HashAggregate [codegen id : 6] +Input [2]: [i_item_id#13, sum#19] +Keys [1]: [i_item_id#13] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#4))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#4))#21] +Results [2]: [i_item_id#13, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#4))#21,17,2) AS total_sales#22] + +(33) Scan parquet default.catalog_sales +Output [4]: [cs_sold_date_sk#23, cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_addr_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 11] +Input [4]: [cs_sold_date_sk#23, cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26] + +(35) Filter [codegen id : 11] +Input [4]: [cs_sold_date_sk#23, cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26] +Condition : ((isnotnull(cs_sold_date_sk#23) AND isnotnull(cs_bill_addr_sk#24)) AND isnotnull(cs_item_sk#25)) + +(36) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(37) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_sold_date_sk#23] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(38) Project [codegen id : 11] +Output [3]: [cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26] +Input [5]: [cs_sold_date_sk#23, cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26, d_date_sk#5] + +(39) ReusedExchange [Reuses operator id: 15] +Output [1]: [ca_address_sk#9] + +(40) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_bill_addr_sk#24] +Right keys [1]: [ca_address_sk#9] +Join condition: None + +(41) Project [codegen id : 11] +Output [2]: [cs_item_sk#25, cs_ext_sales_price#26] +Input [4]: [cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26, ca_address_sk#9] + +(42) ReusedExchange [Reuses operator id: 27] +Output [2]: [i_item_sk#12, i_item_id#13] + +(43) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_item_sk#25] +Right keys [1]: [i_item_sk#12] +Join condition: None + +(44) Project [codegen id : 11] +Output [2]: [cs_ext_sales_price#26, i_item_id#13] +Input [4]: [cs_item_sk#25, cs_ext_sales_price#26, i_item_sk#12, i_item_id#13] + +(45) HashAggregate [codegen id : 11] +Input [2]: [cs_ext_sales_price#26, i_item_id#13] +Keys [1]: [i_item_id#13] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#26))] +Aggregate Attributes [1]: [sum#27] +Results [2]: [i_item_id#13, sum#28] + +(46) Exchange +Input [2]: [i_item_id#13, sum#28] +Arguments: hashpartitioning(i_item_id#13, 5), true, [id=#29] + +(47) HashAggregate [codegen id : 12] +Input [2]: [i_item_id#13, sum#28] +Keys [1]: [i_item_id#13] +Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#26))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#26))#30] +Results [2]: [i_item_id#13, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#26))#30,17,2) AS total_sales#31] + +(48) Scan parquet default.web_sales +Output [4]: [ws_sold_date_sk#32, ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk), IsNotNull(ws_item_sk)] +ReadSchema: struct + +(49) ColumnarToRow [codegen id : 17] +Input [4]: [ws_sold_date_sk#32, ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35] + +(50) Filter [codegen id : 17] +Input [4]: [ws_sold_date_sk#32, ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35] +Condition : ((isnotnull(ws_sold_date_sk#32) AND isnotnull(ws_bill_addr_sk#34)) AND isnotnull(ws_item_sk#33)) + +(51) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(52) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_sold_date_sk#32] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(53) Project [codegen id : 17] +Output [3]: [ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35] +Input [5]: [ws_sold_date_sk#32, ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35, d_date_sk#5] + +(54) ReusedExchange [Reuses operator id: 15] +Output [1]: [ca_address_sk#9] + +(55) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_bill_addr_sk#34] +Right keys [1]: [ca_address_sk#9] +Join condition: None + +(56) Project [codegen id : 17] +Output [2]: [ws_item_sk#33, ws_ext_sales_price#35] +Input [4]: [ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35, ca_address_sk#9] + +(57) ReusedExchange [Reuses operator id: 27] +Output [2]: [i_item_sk#12, i_item_id#13] + +(58) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_item_sk#33] +Right keys [1]: [i_item_sk#12] +Join condition: None + +(59) Project [codegen id : 17] +Output [2]: [ws_ext_sales_price#35, i_item_id#13] +Input [4]: [ws_item_sk#33, ws_ext_sales_price#35, i_item_sk#12, i_item_id#13] + +(60) HashAggregate [codegen id : 17] +Input [2]: [ws_ext_sales_price#35, i_item_id#13] +Keys [1]: [i_item_id#13] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#35))] +Aggregate Attributes [1]: [sum#36] +Results [2]: [i_item_id#13, sum#37] + +(61) Exchange +Input [2]: [i_item_id#13, sum#37] +Arguments: hashpartitioning(i_item_id#13, 5), true, [id=#38] + +(62) HashAggregate [codegen id : 18] +Input [2]: [i_item_id#13, sum#37] +Keys [1]: [i_item_id#13] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#35))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#35))#39] +Results [2]: [i_item_id#13, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#35))#39,17,2) AS total_sales#40] + +(63) Union + +(64) HashAggregate [codegen id : 19] +Input [2]: [i_item_id#13, total_sales#22] +Keys [1]: [i_item_id#13] +Functions [1]: [partial_sum(total_sales#22)] +Aggregate Attributes [2]: [sum#41, isEmpty#42] +Results [3]: [i_item_id#13, sum#43, isEmpty#44] + +(65) Exchange +Input [3]: [i_item_id#13, sum#43, isEmpty#44] +Arguments: hashpartitioning(i_item_id#13, 5), true, [id=#45] + +(66) HashAggregate [codegen id : 20] +Input [3]: [i_item_id#13, sum#43, isEmpty#44] +Keys [1]: [i_item_id#13] +Functions [1]: [sum(total_sales#22)] +Aggregate Attributes [1]: [sum(total_sales#22)#46] +Results [2]: [i_item_id#13, sum(total_sales#22)#46 AS total_sales#47] + +(67) TakeOrderedAndProject +Input [2]: [i_item_id#13, total_sales#47] +Arguments: 100, [i_item_id#13 ASC NULLS FIRST, total_sales#47 ASC NULLS FIRST], [i_item_id#13, total_sales#47] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60.sf100/simplified.txt new file mode 100644 index 0000000000000..d49bf8669bea5 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60.sf100/simplified.txt @@ -0,0 +1,101 @@ +TakeOrderedAndProject [i_item_id,total_sales] + WholeStageCodegen (20) + HashAggregate [i_item_id,isEmpty,sum] [isEmpty,sum,sum(total_sales),total_sales] + InputAdapter + Exchange [i_item_id] #1 + WholeStageCodegen (19) + HashAggregate [i_item_id,total_sales] [isEmpty,isEmpty,sum,sum] + InputAdapter + Union + WholeStageCodegen (6) + HashAggregate [i_item_id,sum] [sum,sum(UnscaledValue(ss_ext_sales_price)),total_sales] + InputAdapter + Exchange [i_item_id] #2 + WholeStageCodegen (5) + HashAggregate [i_item_id,ss_ext_sales_price] [sum,sum] + Project [i_item_id,ss_ext_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_ext_sales_price,ss_item_sk] + BroadcastHashJoin [ca_address_sk,ss_addr_sk] + Project [ss_addr_sk,ss_ext_sales_price,ss_item_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_addr_sk,ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [ca_address_sk] + Filter [ca_address_sk,ca_gmt_offset] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + BroadcastHashJoin [i_item_id,i_item_id] + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_id,i_item_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (3) + Project [i_item_id] + Filter [i_category] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_category,i_item_id] + WholeStageCodegen (12) + HashAggregate [i_item_id,sum] [sum,sum(UnscaledValue(cs_ext_sales_price)),total_sales] + InputAdapter + Exchange [i_item_id] #7 + WholeStageCodegen (11) + HashAggregate [cs_ext_sales_price,i_item_id] [sum,sum] + Project [cs_ext_sales_price,i_item_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_ext_sales_price,cs_item_sk] + BroadcastHashJoin [ca_address_sk,cs_bill_addr_sk] + Project [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_bill_addr_sk,cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + ReusedExchange [ca_address_sk] #4 + InputAdapter + ReusedExchange [i_item_id,i_item_sk] #5 + WholeStageCodegen (18) + HashAggregate [i_item_id,sum] [sum,sum(UnscaledValue(ws_ext_sales_price)),total_sales] + InputAdapter + Exchange [i_item_id] #8 + WholeStageCodegen (17) + HashAggregate [i_item_id,ws_ext_sales_price] [sum,sum] + Project [i_item_id,ws_ext_sales_price] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Project [ws_ext_sales_price,ws_item_sk] + BroadcastHashJoin [ca_address_sk,ws_bill_addr_sk] + Project [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_bill_addr_sk,ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + ReusedExchange [ca_address_sk] #4 + InputAdapter + ReusedExchange [i_item_id,i_item_sk] #5 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60/explain.txt new file mode 100644 index 0000000000000..8e05106d68911 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60/explain.txt @@ -0,0 +1,378 @@ +== Physical Plan == +TakeOrderedAndProject (67) ++- * HashAggregate (66) + +- Exchange (65) + +- * HashAggregate (64) + +- Union (63) + :- * HashAggregate (32) + : +- Exchange (31) + : +- * HashAggregate (30) + : +- * Project (29) + : +- * BroadcastHashJoin Inner BuildRight (28) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.customer_address (11) + : +- BroadcastExchange (27) + : +- * BroadcastHashJoin LeftSemi BuildRight (26) + : :- * Filter (20) + : : +- * ColumnarToRow (19) + : : +- Scan parquet default.item (18) + : +- BroadcastExchange (25) + : +- * Project (24) + : +- * Filter (23) + : +- * ColumnarToRow (22) + : +- Scan parquet default.item (21) + :- * HashAggregate (47) + : +- Exchange (46) + : +- * HashAggregate (45) + : +- * Project (44) + : +- * BroadcastHashJoin Inner BuildRight (43) + : :- * Project (41) + : : +- * BroadcastHashJoin Inner BuildRight (40) + : : :- * Project (38) + : : : +- * BroadcastHashJoin Inner BuildRight (37) + : : : :- * Filter (35) + : : : : +- * ColumnarToRow (34) + : : : : +- Scan parquet default.catalog_sales (33) + : : : +- ReusedExchange (36) + : : +- ReusedExchange (39) + : +- ReusedExchange (42) + +- * HashAggregate (62) + +- Exchange (61) + +- * HashAggregate (60) + +- * Project (59) + +- * BroadcastHashJoin Inner BuildRight (58) + :- * Project (56) + : +- * BroadcastHashJoin Inner BuildRight (55) + : :- * Project (53) + : : +- * BroadcastHashJoin Inner BuildRight (52) + : : :- * Filter (50) + : : : +- * ColumnarToRow (49) + : : : +- Scan parquet default.web_sales (48) + : : +- ReusedExchange (51) + : +- ReusedExchange (54) + +- ReusedExchange (57) + + +(1) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4] + +(3) Filter [codegen id : 5] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4] +Condition : ((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_addr_sk#3)) AND isnotnull(ss_item_sk#2)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#5, d_year#6, d_moy#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,9), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] +Condition : ((((isnotnull(d_year#6) AND isnotnull(d_moy#7)) AND (d_year#6 = 1998)) AND (d_moy#7 = 9)) AND isnotnull(d_date_sk#5)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#5] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] + +(8) BroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#8] + +(9) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(10) Project [codegen id : 5] +Output [3]: [ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4] +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4, d_date_sk#5] + +(11) Scan parquet default.customer_address +Output [2]: [ca_address_sk#9, ca_gmt_offset#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer_address] +PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [ca_address_sk#9, ca_gmt_offset#10] + +(13) Filter [codegen id : 2] +Input [2]: [ca_address_sk#9, ca_gmt_offset#10] +Condition : ((isnotnull(ca_gmt_offset#10) AND (ca_gmt_offset#10 = -5.00)) AND isnotnull(ca_address_sk#9)) + +(14) Project [codegen id : 2] +Output [1]: [ca_address_sk#9] +Input [2]: [ca_address_sk#9, ca_gmt_offset#10] + +(15) BroadcastExchange +Input [1]: [ca_address_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] + +(16) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_addr_sk#3] +Right keys [1]: [ca_address_sk#9] +Join condition: None + +(17) Project [codegen id : 5] +Output [2]: [ss_item_sk#2, ss_ext_sales_price#4] +Input [4]: [ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4, ca_address_sk#9] + +(18) Scan parquet default.item +Output [2]: [i_item_sk#12, i_item_id#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 4] +Input [2]: [i_item_sk#12, i_item_id#13] + +(20) Filter [codegen id : 4] +Input [2]: [i_item_sk#12, i_item_id#13] +Condition : isnotnull(i_item_sk#12) + +(21) Scan parquet default.item +Output [2]: [i_item_id#13, i_category#14] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Music)] +ReadSchema: struct + +(22) ColumnarToRow [codegen id : 3] +Input [2]: [i_item_id#13, i_category#14] + +(23) Filter [codegen id : 3] +Input [2]: [i_item_id#13, i_category#14] +Condition : (isnotnull(i_category#14) AND (i_category#14 = Music)) + +(24) Project [codegen id : 3] +Output [1]: [i_item_id#13 AS i_item_id#13#15] +Input [2]: [i_item_id#13, i_category#14] + +(25) BroadcastExchange +Input [1]: [i_item_id#13#15] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#16] + +(26) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_id#13] +Right keys [1]: [i_item_id#13#15] +Join condition: None + +(27) BroadcastExchange +Input [2]: [i_item_sk#12, i_item_id#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#17] + +(28) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#12] +Join condition: None + +(29) Project [codegen id : 5] +Output [2]: [ss_ext_sales_price#4, i_item_id#13] +Input [4]: [ss_item_sk#2, ss_ext_sales_price#4, i_item_sk#12, i_item_id#13] + +(30) HashAggregate [codegen id : 5] +Input [2]: [ss_ext_sales_price#4, i_item_id#13] +Keys [1]: [i_item_id#13] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#4))] +Aggregate Attributes [1]: [sum#18] +Results [2]: [i_item_id#13, sum#19] + +(31) Exchange +Input [2]: [i_item_id#13, sum#19] +Arguments: hashpartitioning(i_item_id#13, 5), true, [id=#20] + +(32) HashAggregate [codegen id : 6] +Input [2]: [i_item_id#13, sum#19] +Keys [1]: [i_item_id#13] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#4))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#4))#21] +Results [2]: [i_item_id#13, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#4))#21,17,2) AS total_sales#22] + +(33) Scan parquet default.catalog_sales +Output [4]: [cs_sold_date_sk#23, cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_addr_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 11] +Input [4]: [cs_sold_date_sk#23, cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26] + +(35) Filter [codegen id : 11] +Input [4]: [cs_sold_date_sk#23, cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26] +Condition : ((isnotnull(cs_sold_date_sk#23) AND isnotnull(cs_bill_addr_sk#24)) AND isnotnull(cs_item_sk#25)) + +(36) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(37) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_sold_date_sk#23] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(38) Project [codegen id : 11] +Output [3]: [cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26] +Input [5]: [cs_sold_date_sk#23, cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26, d_date_sk#5] + +(39) ReusedExchange [Reuses operator id: 15] +Output [1]: [ca_address_sk#9] + +(40) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_bill_addr_sk#24] +Right keys [1]: [ca_address_sk#9] +Join condition: None + +(41) Project [codegen id : 11] +Output [2]: [cs_item_sk#25, cs_ext_sales_price#26] +Input [4]: [cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26, ca_address_sk#9] + +(42) ReusedExchange [Reuses operator id: 27] +Output [2]: [i_item_sk#12, i_item_id#13] + +(43) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_item_sk#25] +Right keys [1]: [i_item_sk#12] +Join condition: None + +(44) Project [codegen id : 11] +Output [2]: [cs_ext_sales_price#26, i_item_id#13] +Input [4]: [cs_item_sk#25, cs_ext_sales_price#26, i_item_sk#12, i_item_id#13] + +(45) HashAggregate [codegen id : 11] +Input [2]: [cs_ext_sales_price#26, i_item_id#13] +Keys [1]: [i_item_id#13] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#26))] +Aggregate Attributes [1]: [sum#27] +Results [2]: [i_item_id#13, sum#28] + +(46) Exchange +Input [2]: [i_item_id#13, sum#28] +Arguments: hashpartitioning(i_item_id#13, 5), true, [id=#29] + +(47) HashAggregate [codegen id : 12] +Input [2]: [i_item_id#13, sum#28] +Keys [1]: [i_item_id#13] +Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#26))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#26))#30] +Results [2]: [i_item_id#13, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#26))#30,17,2) AS total_sales#31] + +(48) Scan parquet default.web_sales +Output [4]: [ws_sold_date_sk#32, ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk), IsNotNull(ws_item_sk)] +ReadSchema: struct + +(49) ColumnarToRow [codegen id : 17] +Input [4]: [ws_sold_date_sk#32, ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35] + +(50) Filter [codegen id : 17] +Input [4]: [ws_sold_date_sk#32, ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35] +Condition : ((isnotnull(ws_sold_date_sk#32) AND isnotnull(ws_bill_addr_sk#34)) AND isnotnull(ws_item_sk#33)) + +(51) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(52) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_sold_date_sk#32] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(53) Project [codegen id : 17] +Output [3]: [ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35] +Input [5]: [ws_sold_date_sk#32, ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35, d_date_sk#5] + +(54) ReusedExchange [Reuses operator id: 15] +Output [1]: [ca_address_sk#9] + +(55) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_bill_addr_sk#34] +Right keys [1]: [ca_address_sk#9] +Join condition: None + +(56) Project [codegen id : 17] +Output [2]: [ws_item_sk#33, ws_ext_sales_price#35] +Input [4]: [ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35, ca_address_sk#9] + +(57) ReusedExchange [Reuses operator id: 27] +Output [2]: [i_item_sk#12, i_item_id#13] + +(58) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_item_sk#33] +Right keys [1]: [i_item_sk#12] +Join condition: None + +(59) Project [codegen id : 17] +Output [2]: [ws_ext_sales_price#35, i_item_id#13] +Input [4]: [ws_item_sk#33, ws_ext_sales_price#35, i_item_sk#12, i_item_id#13] + +(60) HashAggregate [codegen id : 17] +Input [2]: [ws_ext_sales_price#35, i_item_id#13] +Keys [1]: [i_item_id#13] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#35))] +Aggregate Attributes [1]: [sum#36] +Results [2]: [i_item_id#13, sum#37] + +(61) Exchange +Input [2]: [i_item_id#13, sum#37] +Arguments: hashpartitioning(i_item_id#13, 5), true, [id=#38] + +(62) HashAggregate [codegen id : 18] +Input [2]: [i_item_id#13, sum#37] +Keys [1]: [i_item_id#13] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#35))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#35))#39] +Results [2]: [i_item_id#13, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#35))#39,17,2) AS total_sales#40] + +(63) Union + +(64) HashAggregate [codegen id : 19] +Input [2]: [i_item_id#13, total_sales#22] +Keys [1]: [i_item_id#13] +Functions [1]: [partial_sum(total_sales#22)] +Aggregate Attributes [2]: [sum#41, isEmpty#42] +Results [3]: [i_item_id#13, sum#43, isEmpty#44] + +(65) Exchange +Input [3]: [i_item_id#13, sum#43, isEmpty#44] +Arguments: hashpartitioning(i_item_id#13, 5), true, [id=#45] + +(66) HashAggregate [codegen id : 20] +Input [3]: [i_item_id#13, sum#43, isEmpty#44] +Keys [1]: [i_item_id#13] +Functions [1]: [sum(total_sales#22)] +Aggregate Attributes [1]: [sum(total_sales#22)#46] +Results [2]: [i_item_id#13, sum(total_sales#22)#46 AS total_sales#47] + +(67) TakeOrderedAndProject +Input [2]: [i_item_id#13, total_sales#47] +Arguments: 100, [i_item_id#13 ASC NULLS FIRST, total_sales#47 ASC NULLS FIRST], [i_item_id#13, total_sales#47] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60/simplified.txt new file mode 100644 index 0000000000000..d49bf8669bea5 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60/simplified.txt @@ -0,0 +1,101 @@ +TakeOrderedAndProject [i_item_id,total_sales] + WholeStageCodegen (20) + HashAggregate [i_item_id,isEmpty,sum] [isEmpty,sum,sum(total_sales),total_sales] + InputAdapter + Exchange [i_item_id] #1 + WholeStageCodegen (19) + HashAggregate [i_item_id,total_sales] [isEmpty,isEmpty,sum,sum] + InputAdapter + Union + WholeStageCodegen (6) + HashAggregate [i_item_id,sum] [sum,sum(UnscaledValue(ss_ext_sales_price)),total_sales] + InputAdapter + Exchange [i_item_id] #2 + WholeStageCodegen (5) + HashAggregate [i_item_id,ss_ext_sales_price] [sum,sum] + Project [i_item_id,ss_ext_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_ext_sales_price,ss_item_sk] + BroadcastHashJoin [ca_address_sk,ss_addr_sk] + Project [ss_addr_sk,ss_ext_sales_price,ss_item_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_addr_sk,ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [ca_address_sk] + Filter [ca_address_sk,ca_gmt_offset] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + BroadcastHashJoin [i_item_id,i_item_id] + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_id,i_item_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (3) + Project [i_item_id] + Filter [i_category] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_category,i_item_id] + WholeStageCodegen (12) + HashAggregate [i_item_id,sum] [sum,sum(UnscaledValue(cs_ext_sales_price)),total_sales] + InputAdapter + Exchange [i_item_id] #7 + WholeStageCodegen (11) + HashAggregate [cs_ext_sales_price,i_item_id] [sum,sum] + Project [cs_ext_sales_price,i_item_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_ext_sales_price,cs_item_sk] + BroadcastHashJoin [ca_address_sk,cs_bill_addr_sk] + Project [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_bill_addr_sk,cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + ReusedExchange [ca_address_sk] #4 + InputAdapter + ReusedExchange [i_item_id,i_item_sk] #5 + WholeStageCodegen (18) + HashAggregate [i_item_id,sum] [sum,sum(UnscaledValue(ws_ext_sales_price)),total_sales] + InputAdapter + Exchange [i_item_id] #8 + WholeStageCodegen (17) + HashAggregate [i_item_id,ws_ext_sales_price] [sum,sum] + Project [i_item_id,ws_ext_sales_price] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Project [ws_ext_sales_price,ws_item_sk] + BroadcastHashJoin [ca_address_sk,ws_bill_addr_sk] + Project [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_bill_addr_sk,ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + ReusedExchange [ca_address_sk] #4 + InputAdapter + ReusedExchange [i_item_id,i_item_sk] #5 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.sf100/explain.txt new file mode 100644 index 0000000000000..acc767cba6ff5 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.sf100/explain.txt @@ -0,0 +1,414 @@ +== Physical Plan == +TakeOrderedAndProject (75) ++- * Project (74) + +- BroadcastNestedLoopJoin Inner BuildRight (73) + :- * HashAggregate (47) + : +- Exchange (46) + : +- * HashAggregate (45) + : +- * Project (44) + : +- * BroadcastHashJoin Inner BuildRight (43) + : :- * Project (31) + : : +- * BroadcastHashJoin Inner BuildRight (30) + : : :- * Project (24) + : : : +- * BroadcastHashJoin Inner BuildRight (23) + : : : :- * Project (17) + : : : : +- * BroadcastHashJoin Inner BuildRight (16) + : : : : :- * Project (10) + : : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : : :- * Filter (3) + : : : : : : +- * ColumnarToRow (2) + : : : : : : +- Scan parquet default.store_sales (1) + : : : : : +- BroadcastExchange (8) + : : : : : +- * Project (7) + : : : : : +- * Filter (6) + : : : : : +- * ColumnarToRow (5) + : : : : : +- Scan parquet default.date_dim (4) + : : : : +- BroadcastExchange (15) + : : : : +- * Project (14) + : : : : +- * Filter (13) + : : : : +- * ColumnarToRow (12) + : : : : +- Scan parquet default.item (11) + : : : +- BroadcastExchange (22) + : : : +- * Project (21) + : : : +- * Filter (20) + : : : +- * ColumnarToRow (19) + : : : +- Scan parquet default.promotion (18) + : : +- BroadcastExchange (29) + : : +- * Project (28) + : : +- * Filter (27) + : : +- * ColumnarToRow (26) + : : +- Scan parquet default.store (25) + : +- BroadcastExchange (42) + : +- * Project (41) + : +- * BroadcastHashJoin Inner BuildRight (40) + : :- * Filter (34) + : : +- * ColumnarToRow (33) + : : +- Scan parquet default.customer (32) + : +- BroadcastExchange (39) + : +- * Project (38) + : +- * Filter (37) + : +- * ColumnarToRow (36) + : +- Scan parquet default.customer_address (35) + +- BroadcastExchange (72) + +- * HashAggregate (71) + +- Exchange (70) + +- * HashAggregate (69) + +- * Project (68) + +- * BroadcastHashJoin Inner BuildRight (67) + :- * Project (59) + : +- * BroadcastHashJoin Inner BuildRight (58) + : :- * Project (56) + : : +- * BroadcastHashJoin Inner BuildRight (55) + : : :- * Project (53) + : : : +- * BroadcastHashJoin Inner BuildLeft (52) + : : : :- ReusedExchange (48) + : : : +- * Filter (51) + : : : +- * ColumnarToRow (50) + : : : +- Scan parquet default.store_sales (49) + : : +- ReusedExchange (54) + : +- ReusedExchange (57) + +- BroadcastExchange (66) + +- * Project (65) + +- * BroadcastHashJoin Inner BuildLeft (64) + :- ReusedExchange (60) + +- * Filter (63) + +- * ColumnarToRow (62) + +- Scan parquet default.customer (61) + + +(1) Scan parquet default.store_sales +Output [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_promo_sk#5, ss_ext_sales_price#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_promo_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 7] +Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_promo_sk#5, ss_ext_sales_price#6] + +(3) Filter [codegen id : 7] +Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_promo_sk#5, ss_ext_sales_price#6] +Condition : ((((isnotnull(ss_store_sk#4) AND isnotnull(ss_promo_sk#5)) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_customer_sk#3)) AND isnotnull(ss_item_sk#2)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#7, d_year#8, d_moy#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,11), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#7, d_year#8, d_moy#9] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#7, d_year#8, d_moy#9] +Condition : ((((isnotnull(d_year#8) AND isnotnull(d_moy#9)) AND (d_year#8 = 1998)) AND (d_moy#9 = 11)) AND isnotnull(d_date_sk#7)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#7] +Input [3]: [d_date_sk#7, d_year#8, d_moy#9] + +(8) BroadcastExchange +Input [1]: [d_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#10] + +(9) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#7] +Join condition: None + +(10) Project [codegen id : 7] +Output [5]: [ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_promo_sk#5, ss_ext_sales_price#6] +Input [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_promo_sk#5, ss_ext_sales_price#6, d_date_sk#7] + +(11) Scan parquet default.item +Output [2]: [i_item_sk#11, i_category#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Jewelry), IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [i_item_sk#11, i_category#12] + +(13) Filter [codegen id : 2] +Input [2]: [i_item_sk#11, i_category#12] +Condition : ((isnotnull(i_category#12) AND (i_category#12 = Jewelry)) AND isnotnull(i_item_sk#11)) + +(14) Project [codegen id : 2] +Output [1]: [i_item_sk#11] +Input [2]: [i_item_sk#11, i_category#12] + +(15) BroadcastExchange +Input [1]: [i_item_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] + +(16) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#11] +Join condition: None + +(17) Project [codegen id : 7] +Output [4]: [ss_customer_sk#3, ss_store_sk#4, ss_promo_sk#5, ss_ext_sales_price#6] +Input [6]: [ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_promo_sk#5, ss_ext_sales_price#6, i_item_sk#11] + +(18) Scan parquet default.promotion +Output [4]: [p_promo_sk#14, p_channel_dmail#15, p_channel_email#16, p_channel_tv#17] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/promotion] +PushedFilters: [Or(Or(EqualTo(p_channel_dmail,Y),EqualTo(p_channel_email,Y)),EqualTo(p_channel_tv,Y)), IsNotNull(p_promo_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [4]: [p_promo_sk#14, p_channel_dmail#15, p_channel_email#16, p_channel_tv#17] + +(20) Filter [codegen id : 3] +Input [4]: [p_promo_sk#14, p_channel_dmail#15, p_channel_email#16, p_channel_tv#17] +Condition : ((((p_channel_dmail#15 = Y) OR (p_channel_email#16 = Y)) OR (p_channel_tv#17 = Y)) AND isnotnull(p_promo_sk#14)) + +(21) Project [codegen id : 3] +Output [1]: [p_promo_sk#14] +Input [4]: [p_promo_sk#14, p_channel_dmail#15, p_channel_email#16, p_channel_tv#17] + +(22) BroadcastExchange +Input [1]: [p_promo_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] + +(23) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ss_promo_sk#5] +Right keys [1]: [p_promo_sk#14] +Join condition: None + +(24) Project [codegen id : 7] +Output [3]: [ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#6] +Input [5]: [ss_customer_sk#3, ss_store_sk#4, ss_promo_sk#5, ss_ext_sales_price#6, p_promo_sk#14] + +(25) Scan parquet default.store +Output [2]: [s_store_sk#19, s_gmt_offset#20] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_gmt_offset), EqualTo(s_gmt_offset,-5.00), IsNotNull(s_store_sk)] +ReadSchema: struct + +(26) ColumnarToRow [codegen id : 4] +Input [2]: [s_store_sk#19, s_gmt_offset#20] + +(27) Filter [codegen id : 4] +Input [2]: [s_store_sk#19, s_gmt_offset#20] +Condition : ((isnotnull(s_gmt_offset#20) AND (s_gmt_offset#20 = -5.00)) AND isnotnull(s_store_sk#19)) + +(28) Project [codegen id : 4] +Output [1]: [s_store_sk#19] +Input [2]: [s_store_sk#19, s_gmt_offset#20] + +(29) BroadcastExchange +Input [1]: [s_store_sk#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#21] + +(30) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#19] +Join condition: None + +(31) Project [codegen id : 7] +Output [2]: [ss_customer_sk#3, ss_ext_sales_price#6] +Input [4]: [ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#6, s_store_sk#19] + +(32) Scan parquet default.customer +Output [2]: [c_customer_sk#22, c_current_addr_sk#23] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(33) ColumnarToRow [codegen id : 6] +Input [2]: [c_customer_sk#22, c_current_addr_sk#23] + +(34) Filter [codegen id : 6] +Input [2]: [c_customer_sk#22, c_current_addr_sk#23] +Condition : (isnotnull(c_customer_sk#22) AND isnotnull(c_current_addr_sk#23)) + +(35) Scan parquet default.customer_address +Output [2]: [ca_address_sk#24, ca_gmt_offset#25] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer_address] +PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(36) ColumnarToRow [codegen id : 5] +Input [2]: [ca_address_sk#24, ca_gmt_offset#25] + +(37) Filter [codegen id : 5] +Input [2]: [ca_address_sk#24, ca_gmt_offset#25] +Condition : ((isnotnull(ca_gmt_offset#25) AND (ca_gmt_offset#25 = -5.00)) AND isnotnull(ca_address_sk#24)) + +(38) Project [codegen id : 5] +Output [1]: [ca_address_sk#24] +Input [2]: [ca_address_sk#24, ca_gmt_offset#25] + +(39) BroadcastExchange +Input [1]: [ca_address_sk#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#26] + +(40) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [c_current_addr_sk#23] +Right keys [1]: [ca_address_sk#24] +Join condition: None + +(41) Project [codegen id : 6] +Output [1]: [c_customer_sk#22] +Input [3]: [c_customer_sk#22, c_current_addr_sk#23, ca_address_sk#24] + +(42) BroadcastExchange +Input [1]: [c_customer_sk#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#27] + +(43) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ss_customer_sk#3] +Right keys [1]: [c_customer_sk#22] +Join condition: None + +(44) Project [codegen id : 7] +Output [1]: [ss_ext_sales_price#6] +Input [3]: [ss_customer_sk#3, ss_ext_sales_price#6, c_customer_sk#22] + +(45) HashAggregate [codegen id : 7] +Input [1]: [ss_ext_sales_price#6] +Keys: [] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#6))] +Aggregate Attributes [1]: [sum#28] +Results [1]: [sum#29] + +(46) Exchange +Input [1]: [sum#29] +Arguments: SinglePartition, true, [id=#30] + +(47) HashAggregate [codegen id : 8] +Input [1]: [sum#29] +Keys: [] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#6))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#6))#31] +Results [1]: [MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#6))#31,17,2) AS promotions#32] + +(48) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#7] + +(49) Scan parquet default.store_sales +Output [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(50) ColumnarToRow +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#6] + +(51) Filter +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#6] +Condition : (((isnotnull(ss_store_sk#4) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_customer_sk#3)) AND isnotnull(ss_item_sk#2)) + +(52) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [d_date_sk#7] +Right keys [1]: [ss_sold_date_sk#1] +Join condition: None + +(53) Project [codegen id : 14] +Output [4]: [ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#6] +Input [6]: [d_date_sk#7, ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#6] + +(54) ReusedExchange [Reuses operator id: 15] +Output [1]: [i_item_sk#11] + +(55) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#11] +Join condition: None + +(56) Project [codegen id : 14] +Output [3]: [ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#6] +Input [5]: [ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#6, i_item_sk#11] + +(57) ReusedExchange [Reuses operator id: 29] +Output [1]: [s_store_sk#19] + +(58) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#19] +Join condition: None + +(59) Project [codegen id : 14] +Output [2]: [ss_customer_sk#3, ss_ext_sales_price#6] +Input [4]: [ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#6, s_store_sk#19] + +(60) ReusedExchange [Reuses operator id: 39] +Output [1]: [ca_address_sk#24] + +(61) Scan parquet default.customer +Output [2]: [c_customer_sk#22, c_current_addr_sk#23] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(62) ColumnarToRow +Input [2]: [c_customer_sk#22, c_current_addr_sk#23] + +(63) Filter +Input [2]: [c_customer_sk#22, c_current_addr_sk#23] +Condition : (isnotnull(c_customer_sk#22) AND isnotnull(c_current_addr_sk#23)) + +(64) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [ca_address_sk#24] +Right keys [1]: [c_current_addr_sk#23] +Join condition: None + +(65) Project [codegen id : 13] +Output [1]: [c_customer_sk#22] +Input [3]: [ca_address_sk#24, c_customer_sk#22, c_current_addr_sk#23] + +(66) BroadcastExchange +Input [1]: [c_customer_sk#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#33] + +(67) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [ss_customer_sk#3] +Right keys [1]: [c_customer_sk#22] +Join condition: None + +(68) Project [codegen id : 14] +Output [1]: [ss_ext_sales_price#6] +Input [3]: [ss_customer_sk#3, ss_ext_sales_price#6, c_customer_sk#22] + +(69) HashAggregate [codegen id : 14] +Input [1]: [ss_ext_sales_price#6] +Keys: [] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#6))] +Aggregate Attributes [1]: [sum#34] +Results [1]: [sum#35] + +(70) Exchange +Input [1]: [sum#35] +Arguments: SinglePartition, true, [id=#36] + +(71) HashAggregate [codegen id : 15] +Input [1]: [sum#35] +Keys: [] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#6))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#6))#37] +Results [1]: [MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#6))#37,17,2) AS total#38] + +(72) BroadcastExchange +Input [1]: [total#38] +Arguments: IdentityBroadcastMode, [id=#39] + +(73) BroadcastNestedLoopJoin +Join condition: None + +(74) Project [codegen id : 16] +Output [3]: [promotions#32, total#38, CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(promotions#32 as decimal(15,4))) / promote_precision(cast(total#38 as decimal(15,4)))), DecimalType(35,20), true)) * 100.00000000000000000000), DecimalType(38,19), true) AS (CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20)))#40] +Input [2]: [promotions#32, total#38] + +(75) TakeOrderedAndProject +Input [3]: [promotions#32, total#38, (CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20)))#40] +Arguments: 100, [promotions#32 ASC NULLS FIRST, total#38 ASC NULLS FIRST], [promotions#32, total#38, (CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20)))#40] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.sf100/simplified.txt new file mode 100644 index 0000000000000..4fa1b4dd0a240 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.sf100/simplified.txt @@ -0,0 +1,110 @@ +TakeOrderedAndProject [(CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20))),promotions,total] + WholeStageCodegen (16) + Project [promotions,total] + InputAdapter + BroadcastNestedLoopJoin + WholeStageCodegen (8) + HashAggregate [sum] [promotions,sum,sum(UnscaledValue(ss_ext_sales_price))] + InputAdapter + Exchange #1 + WholeStageCodegen (7) + HashAggregate [ss_ext_sales_price] [sum,sum] + Project [ss_ext_sales_price] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Project [ss_customer_sk,ss_ext_sales_price] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_customer_sk,ss_ext_sales_price,ss_store_sk] + BroadcastHashJoin [p_promo_sk,ss_promo_sk] + Project [ss_customer_sk,ss_ext_sales_price,ss_promo_sk,ss_store_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_customer_sk,ss_ext_sales_price,ss_item_sk,ss_promo_sk,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_customer_sk,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_ext_sales_price,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [i_item_sk] + Filter [i_category,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_category,i_item_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [p_promo_sk] + Filter [p_channel_dmail,p_channel_email,p_channel_tv,p_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.promotion [p_channel_dmail,p_channel_email,p_channel_tv,p_promo_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Project [s_store_sk] + Filter [s_gmt_offset,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_gmt_offset,s_store_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (6) + Project [c_customer_sk] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Filter [c_current_addr_sk,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_current_addr_sk,c_customer_sk] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (5) + Project [ca_address_sk] + Filter [ca_address_sk,ca_gmt_offset] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset] + BroadcastExchange #8 + WholeStageCodegen (15) + HashAggregate [sum] [sum,sum(UnscaledValue(ss_ext_sales_price)),total] + InputAdapter + Exchange #9 + WholeStageCodegen (14) + HashAggregate [ss_ext_sales_price] [sum,sum] + Project [ss_ext_sales_price] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Project [ss_customer_sk,ss_ext_sales_price] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_customer_sk,ss_ext_sales_price,ss_store_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_customer_sk,ss_ext_sales_price,ss_item_sk,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #2 + Filter [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk,ss_store_sk] + InputAdapter + ReusedExchange [i_item_sk] #3 + InputAdapter + ReusedExchange [s_store_sk] #5 + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (13) + Project [c_customer_sk] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + InputAdapter + ReusedExchange [ca_address_sk] #7 + Filter [c_current_addr_sk,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_current_addr_sk,c_customer_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/explain.txt new file mode 100644 index 0000000000000..05fffeeec65c9 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/explain.txt @@ -0,0 +1,396 @@ +== Physical Plan == +TakeOrderedAndProject (72) ++- * Project (71) + +- BroadcastNestedLoopJoin Inner BuildRight (70) + :- * HashAggregate (47) + : +- Exchange (46) + : +- * HashAggregate (45) + : +- * Project (44) + : +- * BroadcastHashJoin Inner BuildRight (43) + : :- * Project (37) + : : +- * BroadcastHashJoin Inner BuildRight (36) + : : :- * Project (30) + : : : +- * BroadcastHashJoin Inner BuildRight (29) + : : : :- * Project (24) + : : : : +- * BroadcastHashJoin Inner BuildRight (23) + : : : : :- * Project (17) + : : : : : +- * BroadcastHashJoin Inner BuildRight (16) + : : : : : :- * Project (10) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : : : :- * Filter (3) + : : : : : : : +- * ColumnarToRow (2) + : : : : : : : +- Scan parquet default.store_sales (1) + : : : : : : +- BroadcastExchange (8) + : : : : : : +- * Project (7) + : : : : : : +- * Filter (6) + : : : : : : +- * ColumnarToRow (5) + : : : : : : +- Scan parquet default.store (4) + : : : : : +- BroadcastExchange (15) + : : : : : +- * Project (14) + : : : : : +- * Filter (13) + : : : : : +- * ColumnarToRow (12) + : : : : : +- Scan parquet default.promotion (11) + : : : : +- BroadcastExchange (22) + : : : : +- * Project (21) + : : : : +- * Filter (20) + : : : : +- * ColumnarToRow (19) + : : : : +- Scan parquet default.date_dim (18) + : : : +- BroadcastExchange (28) + : : : +- * Filter (27) + : : : +- * ColumnarToRow (26) + : : : +- Scan parquet default.customer (25) + : : +- BroadcastExchange (35) + : : +- * Project (34) + : : +- * Filter (33) + : : +- * ColumnarToRow (32) + : : +- Scan parquet default.customer_address (31) + : +- BroadcastExchange (42) + : +- * Project (41) + : +- * Filter (40) + : +- * ColumnarToRow (39) + : +- Scan parquet default.item (38) + +- BroadcastExchange (69) + +- * HashAggregate (68) + +- Exchange (67) + +- * HashAggregate (66) + +- * Project (65) + +- * BroadcastHashJoin Inner BuildRight (64) + :- * Project (62) + : +- * BroadcastHashJoin Inner BuildRight (61) + : :- * Project (59) + : : +- * BroadcastHashJoin Inner BuildRight (58) + : : :- * Project (56) + : : : +- * BroadcastHashJoin Inner BuildRight (55) + : : : :- * Project (53) + : : : : +- * BroadcastHashJoin Inner BuildRight (52) + : : : : :- * Filter (50) + : : : : : +- * ColumnarToRow (49) + : : : : : +- Scan parquet default.store_sales (48) + : : : : +- ReusedExchange (51) + : : : +- ReusedExchange (54) + : : +- ReusedExchange (57) + : +- ReusedExchange (60) + +- ReusedExchange (63) + + +(1) Scan parquet default.store_sales +Output [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_promo_sk#5, ss_ext_sales_price#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_promo_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 7] +Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_promo_sk#5, ss_ext_sales_price#6] + +(3) Filter [codegen id : 7] +Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_promo_sk#5, ss_ext_sales_price#6] +Condition : ((((isnotnull(ss_store_sk#4) AND isnotnull(ss_promo_sk#5)) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_customer_sk#3)) AND isnotnull(ss_item_sk#2)) + +(4) Scan parquet default.store +Output [2]: [s_store_sk#7, s_gmt_offset#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_gmt_offset), EqualTo(s_gmt_offset,-5.00), IsNotNull(s_store_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [s_store_sk#7, s_gmt_offset#8] + +(6) Filter [codegen id : 1] +Input [2]: [s_store_sk#7, s_gmt_offset#8] +Condition : ((isnotnull(s_gmt_offset#8) AND (s_gmt_offset#8 = -5.00)) AND isnotnull(s_store_sk#7)) + +(7) Project [codegen id : 1] +Output [1]: [s_store_sk#7] +Input [2]: [s_store_sk#7, s_gmt_offset#8] + +(8) BroadcastExchange +Input [1]: [s_store_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] + +(9) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#7] +Join condition: None + +(10) Project [codegen id : 7] +Output [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_promo_sk#5, ss_ext_sales_price#6] +Input [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_promo_sk#5, ss_ext_sales_price#6, s_store_sk#7] + +(11) Scan parquet default.promotion +Output [4]: [p_promo_sk#10, p_channel_dmail#11, p_channel_email#12, p_channel_tv#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/promotion] +PushedFilters: [Or(Or(EqualTo(p_channel_dmail,Y),EqualTo(p_channel_email,Y)),EqualTo(p_channel_tv,Y)), IsNotNull(p_promo_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [4]: [p_promo_sk#10, p_channel_dmail#11, p_channel_email#12, p_channel_tv#13] + +(13) Filter [codegen id : 2] +Input [4]: [p_promo_sk#10, p_channel_dmail#11, p_channel_email#12, p_channel_tv#13] +Condition : ((((p_channel_dmail#11 = Y) OR (p_channel_email#12 = Y)) OR (p_channel_tv#13 = Y)) AND isnotnull(p_promo_sk#10)) + +(14) Project [codegen id : 2] +Output [1]: [p_promo_sk#10] +Input [4]: [p_promo_sk#10, p_channel_dmail#11, p_channel_email#12, p_channel_tv#13] + +(15) BroadcastExchange +Input [1]: [p_promo_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] + +(16) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ss_promo_sk#5] +Right keys [1]: [p_promo_sk#10] +Join condition: None + +(17) Project [codegen id : 7] +Output [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ext_sales_price#6] +Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_promo_sk#5, ss_ext_sales_price#6, p_promo_sk#10] + +(18) Scan parquet default.date_dim +Output [3]: [d_date_sk#15, d_year#16, d_moy#17] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,11), IsNotNull(d_date_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#15, d_year#16, d_moy#17] + +(20) Filter [codegen id : 3] +Input [3]: [d_date_sk#15, d_year#16, d_moy#17] +Condition : ((((isnotnull(d_year#16) AND isnotnull(d_moy#17)) AND (d_year#16 = 1998)) AND (d_moy#17 = 11)) AND isnotnull(d_date_sk#15)) + +(21) Project [codegen id : 3] +Output [1]: [d_date_sk#15] +Input [3]: [d_date_sk#15, d_year#16, d_moy#17] + +(22) BroadcastExchange +Input [1]: [d_date_sk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] + +(23) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#15] +Join condition: None + +(24) Project [codegen id : 7] +Output [3]: [ss_item_sk#2, ss_customer_sk#3, ss_ext_sales_price#6] +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ext_sales_price#6, d_date_sk#15] + +(25) Scan parquet default.customer +Output [2]: [c_customer_sk#19, c_current_addr_sk#20] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(26) ColumnarToRow [codegen id : 4] +Input [2]: [c_customer_sk#19, c_current_addr_sk#20] + +(27) Filter [codegen id : 4] +Input [2]: [c_customer_sk#19, c_current_addr_sk#20] +Condition : (isnotnull(c_customer_sk#19) AND isnotnull(c_current_addr_sk#20)) + +(28) BroadcastExchange +Input [2]: [c_customer_sk#19, c_current_addr_sk#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#21] + +(29) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ss_customer_sk#3] +Right keys [1]: [c_customer_sk#19] +Join condition: None + +(30) Project [codegen id : 7] +Output [3]: [ss_item_sk#2, ss_ext_sales_price#6, c_current_addr_sk#20] +Input [5]: [ss_item_sk#2, ss_customer_sk#3, ss_ext_sales_price#6, c_customer_sk#19, c_current_addr_sk#20] + +(31) Scan parquet default.customer_address +Output [2]: [ca_address_sk#22, ca_gmt_offset#23] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer_address] +PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(32) ColumnarToRow [codegen id : 5] +Input [2]: [ca_address_sk#22, ca_gmt_offset#23] + +(33) Filter [codegen id : 5] +Input [2]: [ca_address_sk#22, ca_gmt_offset#23] +Condition : ((isnotnull(ca_gmt_offset#23) AND (ca_gmt_offset#23 = -5.00)) AND isnotnull(ca_address_sk#22)) + +(34) Project [codegen id : 5] +Output [1]: [ca_address_sk#22] +Input [2]: [ca_address_sk#22, ca_gmt_offset#23] + +(35) BroadcastExchange +Input [1]: [ca_address_sk#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#24] + +(36) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [c_current_addr_sk#20] +Right keys [1]: [ca_address_sk#22] +Join condition: None + +(37) Project [codegen id : 7] +Output [2]: [ss_item_sk#2, ss_ext_sales_price#6] +Input [4]: [ss_item_sk#2, ss_ext_sales_price#6, c_current_addr_sk#20, ca_address_sk#22] + +(38) Scan parquet default.item +Output [2]: [i_item_sk#25, i_category#26] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Jewelry), IsNotNull(i_item_sk)] +ReadSchema: struct + +(39) ColumnarToRow [codegen id : 6] +Input [2]: [i_item_sk#25, i_category#26] + +(40) Filter [codegen id : 6] +Input [2]: [i_item_sk#25, i_category#26] +Condition : ((isnotnull(i_category#26) AND (i_category#26 = Jewelry)) AND isnotnull(i_item_sk#25)) + +(41) Project [codegen id : 6] +Output [1]: [i_item_sk#25] +Input [2]: [i_item_sk#25, i_category#26] + +(42) BroadcastExchange +Input [1]: [i_item_sk#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#27] + +(43) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#25] +Join condition: None + +(44) Project [codegen id : 7] +Output [1]: [ss_ext_sales_price#6] +Input [3]: [ss_item_sk#2, ss_ext_sales_price#6, i_item_sk#25] + +(45) HashAggregate [codegen id : 7] +Input [1]: [ss_ext_sales_price#6] +Keys: [] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#6))] +Aggregate Attributes [1]: [sum#28] +Results [1]: [sum#29] + +(46) Exchange +Input [1]: [sum#29] +Arguments: SinglePartition, true, [id=#30] + +(47) HashAggregate [codegen id : 8] +Input [1]: [sum#29] +Keys: [] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#6))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#6))#31] +Results [1]: [MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#6))#31,17,2) AS promotions#32] + +(48) Scan parquet default.store_sales +Output [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(49) ColumnarToRow [codegen id : 14] +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#6] + +(50) Filter [codegen id : 14] +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#6] +Condition : (((isnotnull(ss_store_sk#4) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_customer_sk#3)) AND isnotnull(ss_item_sk#2)) + +(51) ReusedExchange [Reuses operator id: 8] +Output [1]: [s_store_sk#7] + +(52) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#7] +Join condition: None + +(53) Project [codegen id : 14] +Output [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ext_sales_price#6] +Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#6, s_store_sk#7] + +(54) ReusedExchange [Reuses operator id: 22] +Output [1]: [d_date_sk#15] + +(55) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#15] +Join condition: None + +(56) Project [codegen id : 14] +Output [3]: [ss_item_sk#2, ss_customer_sk#3, ss_ext_sales_price#6] +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ext_sales_price#6, d_date_sk#15] + +(57) ReusedExchange [Reuses operator id: 28] +Output [2]: [c_customer_sk#19, c_current_addr_sk#20] + +(58) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [ss_customer_sk#3] +Right keys [1]: [c_customer_sk#19] +Join condition: None + +(59) Project [codegen id : 14] +Output [3]: [ss_item_sk#2, ss_ext_sales_price#6, c_current_addr_sk#20] +Input [5]: [ss_item_sk#2, ss_customer_sk#3, ss_ext_sales_price#6, c_customer_sk#19, c_current_addr_sk#20] + +(60) ReusedExchange [Reuses operator id: 35] +Output [1]: [ca_address_sk#22] + +(61) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [c_current_addr_sk#20] +Right keys [1]: [ca_address_sk#22] +Join condition: None + +(62) Project [codegen id : 14] +Output [2]: [ss_item_sk#2, ss_ext_sales_price#6] +Input [4]: [ss_item_sk#2, ss_ext_sales_price#6, c_current_addr_sk#20, ca_address_sk#22] + +(63) ReusedExchange [Reuses operator id: 42] +Output [1]: [i_item_sk#25] + +(64) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#25] +Join condition: None + +(65) Project [codegen id : 14] +Output [1]: [ss_ext_sales_price#6] +Input [3]: [ss_item_sk#2, ss_ext_sales_price#6, i_item_sk#25] + +(66) HashAggregate [codegen id : 14] +Input [1]: [ss_ext_sales_price#6] +Keys: [] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#6))] +Aggregate Attributes [1]: [sum#33] +Results [1]: [sum#34] + +(67) Exchange +Input [1]: [sum#34] +Arguments: SinglePartition, true, [id=#35] + +(68) HashAggregate [codegen id : 15] +Input [1]: [sum#34] +Keys: [] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#6))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#6))#36] +Results [1]: [MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#6))#36,17,2) AS total#37] + +(69) BroadcastExchange +Input [1]: [total#37] +Arguments: IdentityBroadcastMode, [id=#38] + +(70) BroadcastNestedLoopJoin +Join condition: None + +(71) Project [codegen id : 16] +Output [3]: [promotions#32, total#37, CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(promotions#32 as decimal(15,4))) / promote_precision(cast(total#37 as decimal(15,4)))), DecimalType(35,20), true)) * 100.00000000000000000000), DecimalType(38,19), true) AS (CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20)))#39] +Input [2]: [promotions#32, total#37] + +(72) TakeOrderedAndProject +Input [3]: [promotions#32, total#37, (CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20)))#39] +Arguments: 100, [promotions#32 ASC NULLS FIRST, total#37 ASC NULLS FIRST], [promotions#32, total#37, (CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20)))#39] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/simplified.txt new file mode 100644 index 0000000000000..438b46afbd687 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/simplified.txt @@ -0,0 +1,105 @@ +TakeOrderedAndProject [(CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20))),promotions,total] + WholeStageCodegen (16) + Project [promotions,total] + InputAdapter + BroadcastNestedLoopJoin + WholeStageCodegen (8) + HashAggregate [sum] [promotions,sum,sum(UnscaledValue(ss_ext_sales_price))] + InputAdapter + Exchange #1 + WholeStageCodegen (7) + HashAggregate [ss_ext_sales_price] [sum,sum] + Project [ss_ext_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_ext_sales_price,ss_item_sk] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_addr_sk,ss_ext_sales_price,ss_item_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Project [ss_customer_sk,ss_ext_sales_price,ss_item_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_customer_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + BroadcastHashJoin [p_promo_sk,ss_promo_sk] + Project [ss_customer_sk,ss_ext_sales_price,ss_item_sk,ss_promo_sk,ss_sold_date_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Filter [ss_customer_sk,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_ext_sales_price,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [s_store_sk] + Filter [s_gmt_offset,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_gmt_offset,s_store_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [p_promo_sk] + Filter [p_channel_dmail,p_channel_email,p_channel_tv,p_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.promotion [p_channel_dmail,p_channel_email,p_channel_tv,p_promo_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Filter [c_current_addr_sk,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_current_addr_sk,c_customer_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Project [ca_address_sk] + Filter [ca_address_sk,ca_gmt_offset] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (6) + Project [i_item_sk] + Filter [i_category,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_category,i_item_sk] + BroadcastExchange #8 + WholeStageCodegen (15) + HashAggregate [sum] [sum,sum(UnscaledValue(ss_ext_sales_price)),total] + InputAdapter + Exchange #9 + WholeStageCodegen (14) + HashAggregate [ss_ext_sales_price] [sum,sum] + Project [ss_ext_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_ext_sales_price,ss_item_sk] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_addr_sk,ss_ext_sales_price,ss_item_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Project [ss_customer_sk,ss_ext_sales_price,ss_item_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_customer_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Filter [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk,ss_store_sk] + InputAdapter + ReusedExchange [s_store_sk] #2 + InputAdapter + ReusedExchange [d_date_sk] #4 + InputAdapter + ReusedExchange [c_current_addr_sk,c_customer_sk] #5 + InputAdapter + ReusedExchange [ca_address_sk] #6 + InputAdapter + ReusedExchange [i_item_sk] #7 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.sf100/explain.txt new file mode 100644 index 0000000000000..d2553a2c58c62 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.sf100/explain.txt @@ -0,0 +1,183 @@ +== Physical Plan == +TakeOrderedAndProject (32) ++- * HashAggregate (31) + +- Exchange (30) + +- * HashAggregate (29) + +- * Project (28) + +- * BroadcastHashJoin Inner BuildRight (27) + :- * Project (22) + : +- * BroadcastHashJoin Inner BuildRight (21) + : :- * Project (16) + : : +- * BroadcastHashJoin Inner BuildRight (15) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildLeft (9) + : : : :- BroadcastExchange (5) + : : : : +- * Project (4) + : : : : +- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.date_dim (1) + : : : +- * Filter (8) + : : : +- * ColumnarToRow (7) + : : : +- Scan parquet default.web_sales (6) + : : +- BroadcastExchange (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.web_site (11) + : +- BroadcastExchange (20) + : +- * Filter (19) + : +- * ColumnarToRow (18) + : +- Scan parquet default.ship_mode (17) + +- BroadcastExchange (26) + +- * Filter (25) + +- * ColumnarToRow (24) + +- Scan parquet default.warehouse (23) + + +(1) Scan parquet default.date_dim +Output [2]: [d_date_sk#1, d_month_seq#2] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#1, d_month_seq#2] + +(3) Filter [codegen id : 1] +Input [2]: [d_date_sk#1, d_month_seq#2] +Condition : (((isnotnull(d_month_seq#2) AND (d_month_seq#2 >= 1200)) AND (d_month_seq#2 <= 1211)) AND isnotnull(d_date_sk#1)) + +(4) Project [codegen id : 1] +Output [1]: [d_date_sk#1] +Input [2]: [d_date_sk#1, d_month_seq#2] + +(5) BroadcastExchange +Input [1]: [d_date_sk#1] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#3] + +(6) Scan parquet default.web_sales +Output [5]: [ws_sold_date_sk#4, ws_ship_date_sk#5, ws_web_site_sk#6, ws_ship_mode_sk#7, ws_warehouse_sk#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_warehouse_sk), IsNotNull(ws_ship_mode_sk), IsNotNull(ws_web_site_sk), IsNotNull(ws_ship_date_sk)] +ReadSchema: struct + +(7) ColumnarToRow +Input [5]: [ws_sold_date_sk#4, ws_ship_date_sk#5, ws_web_site_sk#6, ws_ship_mode_sk#7, ws_warehouse_sk#8] + +(8) Filter +Input [5]: [ws_sold_date_sk#4, ws_ship_date_sk#5, ws_web_site_sk#6, ws_ship_mode_sk#7, ws_warehouse_sk#8] +Condition : (((isnotnull(ws_warehouse_sk#8) AND isnotnull(ws_ship_mode_sk#7)) AND isnotnull(ws_web_site_sk#6)) AND isnotnull(ws_ship_date_sk#5)) + +(9) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [d_date_sk#1] +Right keys [1]: [ws_ship_date_sk#5] +Join condition: None + +(10) Project [codegen id : 5] +Output [5]: [ws_sold_date_sk#4, ws_ship_date_sk#5, ws_web_site_sk#6, ws_ship_mode_sk#7, ws_warehouse_sk#8] +Input [6]: [d_date_sk#1, ws_sold_date_sk#4, ws_ship_date_sk#5, ws_web_site_sk#6, ws_ship_mode_sk#7, ws_warehouse_sk#8] + +(11) Scan parquet default.web_site +Output [2]: [web_site_sk#9, web_name#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_site] +PushedFilters: [IsNotNull(web_site_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [web_site_sk#9, web_name#10] + +(13) Filter [codegen id : 2] +Input [2]: [web_site_sk#9, web_name#10] +Condition : isnotnull(web_site_sk#9) + +(14) BroadcastExchange +Input [2]: [web_site_sk#9, web_name#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#11] + +(15) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ws_web_site_sk#6] +Right keys [1]: [web_site_sk#9] +Join condition: None + +(16) Project [codegen id : 5] +Output [5]: [ws_sold_date_sk#4, ws_ship_date_sk#5, ws_ship_mode_sk#7, ws_warehouse_sk#8, web_name#10] +Input [7]: [ws_sold_date_sk#4, ws_ship_date_sk#5, ws_web_site_sk#6, ws_ship_mode_sk#7, ws_warehouse_sk#8, web_site_sk#9, web_name#10] + +(17) Scan parquet default.ship_mode +Output [2]: [sm_ship_mode_sk#12, sm_type#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/ship_mode] +PushedFilters: [IsNotNull(sm_ship_mode_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [2]: [sm_ship_mode_sk#12, sm_type#13] + +(19) Filter [codegen id : 3] +Input [2]: [sm_ship_mode_sk#12, sm_type#13] +Condition : isnotnull(sm_ship_mode_sk#12) + +(20) BroadcastExchange +Input [2]: [sm_ship_mode_sk#12, sm_type#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] + +(21) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ws_ship_mode_sk#7] +Right keys [1]: [sm_ship_mode_sk#12] +Join condition: None + +(22) Project [codegen id : 5] +Output [5]: [ws_sold_date_sk#4, ws_ship_date_sk#5, ws_warehouse_sk#8, web_name#10, sm_type#13] +Input [7]: [ws_sold_date_sk#4, ws_ship_date_sk#5, ws_ship_mode_sk#7, ws_warehouse_sk#8, web_name#10, sm_ship_mode_sk#12, sm_type#13] + +(23) Scan parquet default.warehouse +Output [2]: [w_warehouse_sk#15, w_warehouse_name#16] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 4] +Input [2]: [w_warehouse_sk#15, w_warehouse_name#16] + +(25) Filter [codegen id : 4] +Input [2]: [w_warehouse_sk#15, w_warehouse_name#16] +Condition : isnotnull(w_warehouse_sk#15) + +(26) BroadcastExchange +Input [2]: [w_warehouse_sk#15, w_warehouse_name#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#17] + +(27) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ws_warehouse_sk#8] +Right keys [1]: [w_warehouse_sk#15] +Join condition: None + +(28) Project [codegen id : 5] +Output [5]: [ws_sold_date_sk#4, ws_ship_date_sk#5, w_warehouse_name#16, sm_type#13, web_name#10] +Input [7]: [ws_sold_date_sk#4, ws_ship_date_sk#5, ws_warehouse_sk#8, web_name#10, sm_type#13, w_warehouse_sk#15, w_warehouse_name#16] + +(29) HashAggregate [codegen id : 5] +Input [5]: [ws_sold_date_sk#4, ws_ship_date_sk#5, w_warehouse_name#16, sm_type#13, web_name#10] +Keys [3]: [substr(w_warehouse_name#16, 1, 20) AS substr(w_warehouse_name#16, 1, 20)#18, sm_type#13, web_name#10] +Functions [5]: [partial_sum(cast(CASE WHEN ((ws_ship_date_sk#5 - ws_sold_date_sk#4) <= 30) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((ws_ship_date_sk#5 - ws_sold_date_sk#4) > 30) AND ((ws_ship_date_sk#5 - ws_sold_date_sk#4) <= 60)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((ws_ship_date_sk#5 - ws_sold_date_sk#4) > 60) AND ((ws_ship_date_sk#5 - ws_sold_date_sk#4) <= 90)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((ws_ship_date_sk#5 - ws_sold_date_sk#4) > 90) AND ((ws_ship_date_sk#5 - ws_sold_date_sk#4) <= 120)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN ((ws_ship_date_sk#5 - ws_sold_date_sk#4) > 120) THEN 1 ELSE 0 END as bigint))] +Aggregate Attributes [5]: [sum#19, sum#20, sum#21, sum#22, sum#23] +Results [8]: [substr(w_warehouse_name#16, 1, 20)#18, sm_type#13, web_name#10, sum#24, sum#25, sum#26, sum#27, sum#28] + +(30) Exchange +Input [8]: [substr(w_warehouse_name#16, 1, 20)#18, sm_type#13, web_name#10, sum#24, sum#25, sum#26, sum#27, sum#28] +Arguments: hashpartitioning(substr(w_warehouse_name#16, 1, 20)#18, sm_type#13, web_name#10, 5), true, [id=#29] + +(31) HashAggregate [codegen id : 6] +Input [8]: [substr(w_warehouse_name#16, 1, 20)#18, sm_type#13, web_name#10, sum#24, sum#25, sum#26, sum#27, sum#28] +Keys [3]: [substr(w_warehouse_name#16, 1, 20)#18, sm_type#13, web_name#10] +Functions [5]: [sum(cast(CASE WHEN ((ws_ship_date_sk#5 - ws_sold_date_sk#4) <= 30) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((ws_ship_date_sk#5 - ws_sold_date_sk#4) > 30) AND ((ws_ship_date_sk#5 - ws_sold_date_sk#4) <= 60)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((ws_ship_date_sk#5 - ws_sold_date_sk#4) > 60) AND ((ws_ship_date_sk#5 - ws_sold_date_sk#4) <= 90)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((ws_ship_date_sk#5 - ws_sold_date_sk#4) > 90) AND ((ws_ship_date_sk#5 - ws_sold_date_sk#4) <= 120)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN ((ws_ship_date_sk#5 - ws_sold_date_sk#4) > 120) THEN 1 ELSE 0 END as bigint))] +Aggregate Attributes [5]: [sum(cast(CASE WHEN ((ws_ship_date_sk#5 - ws_sold_date_sk#4) <= 30) THEN 1 ELSE 0 END as bigint))#30, sum(cast(CASE WHEN (((ws_ship_date_sk#5 - ws_sold_date_sk#4) > 30) AND ((ws_ship_date_sk#5 - ws_sold_date_sk#4) <= 60)) THEN 1 ELSE 0 END as bigint))#31, sum(cast(CASE WHEN (((ws_ship_date_sk#5 - ws_sold_date_sk#4) > 60) AND ((ws_ship_date_sk#5 - ws_sold_date_sk#4) <= 90)) THEN 1 ELSE 0 END as bigint))#32, sum(cast(CASE WHEN (((ws_ship_date_sk#5 - ws_sold_date_sk#4) > 90) AND ((ws_ship_date_sk#5 - ws_sold_date_sk#4) <= 120)) THEN 1 ELSE 0 END as bigint))#33, sum(cast(CASE WHEN ((ws_ship_date_sk#5 - ws_sold_date_sk#4) > 120) THEN 1 ELSE 0 END as bigint))#34] +Results [8]: [substr(w_warehouse_name#16, 1, 20)#18 AS substr(w_warehouse_name, 1, 20)#35, sm_type#13, web_name#10, sum(cast(CASE WHEN ((ws_ship_date_sk#5 - ws_sold_date_sk#4) <= 30) THEN 1 ELSE 0 END as bigint))#30 AS 30 days #36, sum(cast(CASE WHEN (((ws_ship_date_sk#5 - ws_sold_date_sk#4) > 30) AND ((ws_ship_date_sk#5 - ws_sold_date_sk#4) <= 60)) THEN 1 ELSE 0 END as bigint))#31 AS 31 - 60 days #37, sum(cast(CASE WHEN (((ws_ship_date_sk#5 - ws_sold_date_sk#4) > 60) AND ((ws_ship_date_sk#5 - ws_sold_date_sk#4) <= 90)) THEN 1 ELSE 0 END as bigint))#32 AS 61 - 90 days #38, sum(cast(CASE WHEN (((ws_ship_date_sk#5 - ws_sold_date_sk#4) > 90) AND ((ws_ship_date_sk#5 - ws_sold_date_sk#4) <= 120)) THEN 1 ELSE 0 END as bigint))#33 AS 91 - 120 days #39, sum(cast(CASE WHEN ((ws_ship_date_sk#5 - ws_sold_date_sk#4) > 120) THEN 1 ELSE 0 END as bigint))#34 AS >120 days #40] + +(32) TakeOrderedAndProject +Input [8]: [substr(w_warehouse_name, 1, 20)#35, sm_type#13, web_name#10, 30 days #36, 31 - 60 days #37, 61 - 90 days #38, 91 - 120 days #39, >120 days #40] +Arguments: 100, [substr(w_warehouse_name, 1, 20)#35 ASC NULLS FIRST, sm_type#13 ASC NULLS FIRST, web_name#10 ASC NULLS FIRST], [substr(w_warehouse_name, 1, 20)#35, sm_type#13, web_name#10, 30 days #36, 31 - 60 days #37, 61 - 90 days #38, 91 - 120 days #39, >120 days #40] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.sf100/simplified.txt new file mode 100644 index 0000000000000..0cc558cd526f8 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.sf100/simplified.txt @@ -0,0 +1,48 @@ +TakeOrderedAndProject [30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,sm_type,substr(w_warehouse_name, 1, 20),web_name] + WholeStageCodegen (6) + HashAggregate [sm_type,substr(w_warehouse_name, 1, 20),sum,sum,sum,sum,sum,web_name] [30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,substr(w_warehouse_name, 1, 20),sum,sum,sum,sum,sum,sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 30) AND ((ws_ship_date_sk - ws_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 60) AND ((ws_ship_date_sk - ws_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 90) AND ((ws_ship_date_sk - ws_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint))] + InputAdapter + Exchange [sm_type,substr(w_warehouse_name, 1, 20),web_name] #1 + WholeStageCodegen (5) + HashAggregate [sm_type,w_warehouse_name,web_name,ws_ship_date_sk,ws_sold_date_sk] [substr(w_warehouse_name, 1, 20),sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [sm_type,w_warehouse_name,web_name,ws_ship_date_sk,ws_sold_date_sk] + BroadcastHashJoin [w_warehouse_sk,ws_warehouse_sk] + Project [sm_type,web_name,ws_ship_date_sk,ws_sold_date_sk,ws_warehouse_sk] + BroadcastHashJoin [sm_ship_mode_sk,ws_ship_mode_sk] + Project [web_name,ws_ship_date_sk,ws_ship_mode_sk,ws_sold_date_sk,ws_warehouse_sk] + BroadcastHashJoin [web_site_sk,ws_web_site_sk] + Project [ws_ship_date_sk,ws_ship_mode_sk,ws_sold_date_sk,ws_warehouse_sk,ws_web_site_sk] + BroadcastHashJoin [d_date_sk,ws_ship_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_month_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq] + Filter [ws_ship_date_sk,ws_ship_mode_sk,ws_warehouse_sk,ws_web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_ship_date_sk,ws_ship_mode_sk,ws_sold_date_sk,ws_warehouse_sk,ws_web_site_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_site [web_name,web_site_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [sm_ship_mode_sk] + ColumnarToRow + InputAdapter + Scan parquet default.ship_mode [sm_ship_mode_sk,sm_type] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Filter [w_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.warehouse [w_warehouse_name,w_warehouse_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/explain.txt new file mode 100644 index 0000000000000..c06918906c77d --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/explain.txt @@ -0,0 +1,183 @@ +== Physical Plan == +TakeOrderedAndProject (32) ++- * HashAggregate (31) + +- Exchange (30) + +- * HashAggregate (29) + +- * Project (28) + +- * BroadcastHashJoin Inner BuildRight (27) + :- * Project (21) + : +- * BroadcastHashJoin Inner BuildRight (20) + : :- * Project (15) + : : +- * BroadcastHashJoin Inner BuildRight (14) + : : :- * Project (9) + : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.web_sales (1) + : : : +- BroadcastExchange (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.warehouse (4) + : : +- BroadcastExchange (13) + : : +- * Filter (12) + : : +- * ColumnarToRow (11) + : : +- Scan parquet default.ship_mode (10) + : +- BroadcastExchange (19) + : +- * Filter (18) + : +- * ColumnarToRow (17) + : +- Scan parquet default.web_site (16) + +- BroadcastExchange (26) + +- * Project (25) + +- * Filter (24) + +- * ColumnarToRow (23) + +- Scan parquet default.date_dim (22) + + +(1) Scan parquet default.web_sales +Output [5]: [ws_sold_date_sk#1, ws_ship_date_sk#2, ws_web_site_sk#3, ws_ship_mode_sk#4, ws_warehouse_sk#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_warehouse_sk), IsNotNull(ws_ship_mode_sk), IsNotNull(ws_web_site_sk), IsNotNull(ws_ship_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [5]: [ws_sold_date_sk#1, ws_ship_date_sk#2, ws_web_site_sk#3, ws_ship_mode_sk#4, ws_warehouse_sk#5] + +(3) Filter [codegen id : 5] +Input [5]: [ws_sold_date_sk#1, ws_ship_date_sk#2, ws_web_site_sk#3, ws_ship_mode_sk#4, ws_warehouse_sk#5] +Condition : (((isnotnull(ws_warehouse_sk#5) AND isnotnull(ws_ship_mode_sk#4)) AND isnotnull(ws_web_site_sk#3)) AND isnotnull(ws_ship_date_sk#2)) + +(4) Scan parquet default.warehouse +Output [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] + +(6) Filter [codegen id : 1] +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Condition : isnotnull(w_warehouse_sk#6) + +(7) BroadcastExchange +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#8] + +(8) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ws_warehouse_sk#5] +Right keys [1]: [w_warehouse_sk#6] +Join condition: None + +(9) Project [codegen id : 5] +Output [5]: [ws_sold_date_sk#1, ws_ship_date_sk#2, ws_web_site_sk#3, ws_ship_mode_sk#4, w_warehouse_name#7] +Input [7]: [ws_sold_date_sk#1, ws_ship_date_sk#2, ws_web_site_sk#3, ws_ship_mode_sk#4, ws_warehouse_sk#5, w_warehouse_sk#6, w_warehouse_name#7] + +(10) Scan parquet default.ship_mode +Output [2]: [sm_ship_mode_sk#9, sm_type#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/ship_mode] +PushedFilters: [IsNotNull(sm_ship_mode_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [sm_ship_mode_sk#9, sm_type#10] + +(12) Filter [codegen id : 2] +Input [2]: [sm_ship_mode_sk#9, sm_type#10] +Condition : isnotnull(sm_ship_mode_sk#9) + +(13) BroadcastExchange +Input [2]: [sm_ship_mode_sk#9, sm_type#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#11] + +(14) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ws_ship_mode_sk#4] +Right keys [1]: [sm_ship_mode_sk#9] +Join condition: None + +(15) Project [codegen id : 5] +Output [5]: [ws_sold_date_sk#1, ws_ship_date_sk#2, ws_web_site_sk#3, w_warehouse_name#7, sm_type#10] +Input [7]: [ws_sold_date_sk#1, ws_ship_date_sk#2, ws_web_site_sk#3, ws_ship_mode_sk#4, w_warehouse_name#7, sm_ship_mode_sk#9, sm_type#10] + +(16) Scan parquet default.web_site +Output [2]: [web_site_sk#12, web_name#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_site] +PushedFilters: [IsNotNull(web_site_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [2]: [web_site_sk#12, web_name#13] + +(18) Filter [codegen id : 3] +Input [2]: [web_site_sk#12, web_name#13] +Condition : isnotnull(web_site_sk#12) + +(19) BroadcastExchange +Input [2]: [web_site_sk#12, web_name#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] + +(20) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ws_web_site_sk#3] +Right keys [1]: [web_site_sk#12] +Join condition: None + +(21) Project [codegen id : 5] +Output [5]: [ws_sold_date_sk#1, ws_ship_date_sk#2, w_warehouse_name#7, sm_type#10, web_name#13] +Input [7]: [ws_sold_date_sk#1, ws_ship_date_sk#2, ws_web_site_sk#3, w_warehouse_name#7, sm_type#10, web_site_sk#12, web_name#13] + +(22) Scan parquet default.date_dim +Output [2]: [d_date_sk#15, d_month_seq#16] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 4] +Input [2]: [d_date_sk#15, d_month_seq#16] + +(24) Filter [codegen id : 4] +Input [2]: [d_date_sk#15, d_month_seq#16] +Condition : (((isnotnull(d_month_seq#16) AND (d_month_seq#16 >= 1200)) AND (d_month_seq#16 <= 1211)) AND isnotnull(d_date_sk#15)) + +(25) Project [codegen id : 4] +Output [1]: [d_date_sk#15] +Input [2]: [d_date_sk#15, d_month_seq#16] + +(26) BroadcastExchange +Input [1]: [d_date_sk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#17] + +(27) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ws_ship_date_sk#2] +Right keys [1]: [d_date_sk#15] +Join condition: None + +(28) Project [codegen id : 5] +Output [5]: [ws_sold_date_sk#1, ws_ship_date_sk#2, w_warehouse_name#7, sm_type#10, web_name#13] +Input [6]: [ws_sold_date_sk#1, ws_ship_date_sk#2, w_warehouse_name#7, sm_type#10, web_name#13, d_date_sk#15] + +(29) HashAggregate [codegen id : 5] +Input [5]: [ws_sold_date_sk#1, ws_ship_date_sk#2, w_warehouse_name#7, sm_type#10, web_name#13] +Keys [3]: [substr(w_warehouse_name#7, 1, 20) AS substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, web_name#13] +Functions [5]: [partial_sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 30) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 60) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 90) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))] +Aggregate Attributes [5]: [sum#19, sum#20, sum#21, sum#22, sum#23] +Results [8]: [substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, web_name#13, sum#24, sum#25, sum#26, sum#27, sum#28] + +(30) Exchange +Input [8]: [substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, web_name#13, sum#24, sum#25, sum#26, sum#27, sum#28] +Arguments: hashpartitioning(substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, web_name#13, 5), true, [id=#29] + +(31) HashAggregate [codegen id : 6] +Input [8]: [substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, web_name#13, sum#24, sum#25, sum#26, sum#27, sum#28] +Keys [3]: [substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, web_name#13] +Functions [5]: [sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 30) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 60) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 90) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))] +Aggregate Attributes [5]: [sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint))#30, sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 30) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint))#31, sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 60) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint))#32, sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 90) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint))#33, sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))#34] +Results [8]: [substr(w_warehouse_name#7, 1, 20)#18 AS substr(w_warehouse_name, 1, 20)#35, sm_type#10, web_name#13, sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint))#30 AS 30 days #36, sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 30) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint))#31 AS 31 - 60 days #37, sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 60) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint))#32 AS 61 - 90 days #38, sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 90) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint))#33 AS 91 - 120 days #39, sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))#34 AS >120 days #40] + +(32) TakeOrderedAndProject +Input [8]: [substr(w_warehouse_name, 1, 20)#35, sm_type#10, web_name#13, 30 days #36, 31 - 60 days #37, 61 - 90 days #38, 91 - 120 days #39, >120 days #40] +Arguments: 100, [substr(w_warehouse_name, 1, 20)#35 ASC NULLS FIRST, sm_type#10 ASC NULLS FIRST, web_name#13 ASC NULLS FIRST], [substr(w_warehouse_name, 1, 20)#35, sm_type#10, web_name#13, 30 days #36, 31 - 60 days #37, 61 - 90 days #38, 91 - 120 days #39, >120 days #40] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/simplified.txt new file mode 100644 index 0000000000000..dda342f6e7c96 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/simplified.txt @@ -0,0 +1,48 @@ +TakeOrderedAndProject [30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,sm_type,substr(w_warehouse_name, 1, 20),web_name] + WholeStageCodegen (6) + HashAggregate [sm_type,substr(w_warehouse_name, 1, 20),sum,sum,sum,sum,sum,web_name] [30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,substr(w_warehouse_name, 1, 20),sum,sum,sum,sum,sum,sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 30) AND ((ws_ship_date_sk - ws_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 60) AND ((ws_ship_date_sk - ws_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 90) AND ((ws_ship_date_sk - ws_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint))] + InputAdapter + Exchange [sm_type,substr(w_warehouse_name, 1, 20),web_name] #1 + WholeStageCodegen (5) + HashAggregate [sm_type,w_warehouse_name,web_name,ws_ship_date_sk,ws_sold_date_sk] [substr(w_warehouse_name, 1, 20),sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [sm_type,w_warehouse_name,web_name,ws_ship_date_sk,ws_sold_date_sk] + BroadcastHashJoin [d_date_sk,ws_ship_date_sk] + Project [sm_type,w_warehouse_name,web_name,ws_ship_date_sk,ws_sold_date_sk] + BroadcastHashJoin [web_site_sk,ws_web_site_sk] + Project [sm_type,w_warehouse_name,ws_ship_date_sk,ws_sold_date_sk,ws_web_site_sk] + BroadcastHashJoin [sm_ship_mode_sk,ws_ship_mode_sk] + Project [w_warehouse_name,ws_ship_date_sk,ws_ship_mode_sk,ws_sold_date_sk,ws_web_site_sk] + BroadcastHashJoin [w_warehouse_sk,ws_warehouse_sk] + Filter [ws_ship_date_sk,ws_ship_mode_sk,ws_warehouse_sk,ws_web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_ship_date_sk,ws_ship_mode_sk,ws_sold_date_sk,ws_warehouse_sk,ws_web_site_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [w_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.warehouse [w_warehouse_name,w_warehouse_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [sm_ship_mode_sk] + ColumnarToRow + InputAdapter + Scan parquet default.ship_mode [sm_ship_mode_sk,sm_type] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_site [web_name,web_site_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Project [d_date_sk] + Filter [d_date_sk,d_month_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63.sf100/explain.txt new file mode 100644 index 0000000000000..76c468560a951 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63.sf100/explain.txt @@ -0,0 +1,180 @@ +== Physical Plan == +TakeOrderedAndProject (32) ++- * Project (31) + +- * Filter (30) + +- Window (29) + +- * Sort (28) + +- Exchange (27) + +- * HashAggregate (26) + +- Exchange (25) + +- * HashAggregate (24) + +- * Project (23) + +- * BroadcastHashJoin Inner BuildRight (22) + :- * Project (16) + : +- * BroadcastHashJoin Inner BuildRight (15) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildLeft (9) + : : :- BroadcastExchange (5) + : : : +- * Project (4) + : : : +- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.item (1) + : : +- * Filter (8) + : : +- * ColumnarToRow (7) + : : +- Scan parquet default.store_sales (6) + : +- BroadcastExchange (14) + : +- * Filter (13) + : +- * ColumnarToRow (12) + : +- Scan parquet default.store (11) + +- BroadcastExchange (21) + +- * Project (20) + +- * Filter (19) + +- * ColumnarToRow (18) + +- Scan parquet default.date_dim (17) + + +(1) Scan parquet default.item +Output [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manager_id#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [Or(And(And(In(i_category, [Books,Children,Electronics]),In(i_class, [personal,portable,refernece,self-help])),In(i_brand, [scholaramalgamalg #6,scholaramalgamalg #7,exportiunivamalg #8,scholaramalgamalg #8])),And(And(In(i_category, [Women,Music,Men]),In(i_class, [accessories,classical,fragrances,pants])),In(i_brand, [amalgimporto #9,edu packscholar #9,exportiimporto #9,importoamalg #9]))), IsNotNull(i_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manager_id#5] + +(3) Filter [codegen id : 1] +Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manager_id#5] +Condition : ((((i_category#4 IN (Books,Children,Electronics) AND i_class#3 IN (personal,portable,refernece,self-help)) AND i_brand#2 IN (scholaramalgamalg #6,scholaramalgamalg #7,exportiunivamalg #8,scholaramalgamalg #8)) OR ((i_category#4 IN (Women,Music,Men) AND i_class#3 IN (accessories,classical,fragrances,pants)) AND i_brand#2 IN (amalgimporto #9,edu packscholar #9,exportiimporto #9,importoamalg #9))) AND isnotnull(i_item_sk#1)) + +(4) Project [codegen id : 1] +Output [2]: [i_item_sk#1, i_manager_id#5] +Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manager_id#5] + +(5) BroadcastExchange +Input [2]: [i_item_sk#1, i_manager_id#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#10] + +(6) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#11, ss_item_sk#12, ss_store_sk#13, ss_sales_price#14] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(7) ColumnarToRow +Input [4]: [ss_sold_date_sk#11, ss_item_sk#12, ss_store_sk#13, ss_sales_price#14] + +(8) Filter +Input [4]: [ss_sold_date_sk#11, ss_item_sk#12, ss_store_sk#13, ss_sales_price#14] +Condition : ((isnotnull(ss_item_sk#12) AND isnotnull(ss_sold_date_sk#11)) AND isnotnull(ss_store_sk#13)) + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [ss_item_sk#12] +Join condition: None + +(10) Project [codegen id : 4] +Output [4]: [i_manager_id#5, ss_sold_date_sk#11, ss_store_sk#13, ss_sales_price#14] +Input [6]: [i_item_sk#1, i_manager_id#5, ss_sold_date_sk#11, ss_item_sk#12, ss_store_sk#13, ss_sales_price#14] + +(11) Scan parquet default.store +Output [1]: [s_store_sk#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [1]: [s_store_sk#15] + +(13) Filter [codegen id : 2] +Input [1]: [s_store_sk#15] +Condition : isnotnull(s_store_sk#15) + +(14) BroadcastExchange +Input [1]: [s_store_sk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#16] + +(15) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#13] +Right keys [1]: [s_store_sk#15] +Join condition: None + +(16) Project [codegen id : 4] +Output [3]: [i_manager_id#5, ss_sold_date_sk#11, ss_sales_price#14] +Input [5]: [i_manager_id#5, ss_sold_date_sk#11, ss_store_sk#13, ss_sales_price#14, s_store_sk#15] + +(17) Scan parquet default.date_dim +Output [3]: [d_date_sk#17, d_month_seq#18, d_moy#19] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [In(d_month_seq, [1200,1211,1205,1201,1206,1210,1207,1202,1209,1203,1208,1204]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#17, d_month_seq#18, d_moy#19] + +(19) Filter [codegen id : 3] +Input [3]: [d_date_sk#17, d_month_seq#18, d_moy#19] +Condition : (d_month_seq#18 INSET (1200,1211,1205,1201,1206,1210,1207,1202,1209,1203,1208,1204) AND isnotnull(d_date_sk#17)) + +(20) Project [codegen id : 3] +Output [2]: [d_date_sk#17, d_moy#19] +Input [3]: [d_date_sk#17, d_month_seq#18, d_moy#19] + +(21) BroadcastExchange +Input [2]: [d_date_sk#17, d_moy#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] + +(22) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#11] +Right keys [1]: [d_date_sk#17] +Join condition: None + +(23) Project [codegen id : 4] +Output [3]: [i_manager_id#5, ss_sales_price#14, d_moy#19] +Input [5]: [i_manager_id#5, ss_sold_date_sk#11, ss_sales_price#14, d_date_sk#17, d_moy#19] + +(24) HashAggregate [codegen id : 4] +Input [3]: [i_manager_id#5, ss_sales_price#14, d_moy#19] +Keys [2]: [i_manager_id#5, d_moy#19] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#14))] +Aggregate Attributes [1]: [sum#21] +Results [3]: [i_manager_id#5, d_moy#19, sum#22] + +(25) Exchange +Input [3]: [i_manager_id#5, d_moy#19, sum#22] +Arguments: hashpartitioning(i_manager_id#5, d_moy#19, 5), true, [id=#23] + +(26) HashAggregate [codegen id : 5] +Input [3]: [i_manager_id#5, d_moy#19, sum#22] +Keys [2]: [i_manager_id#5, d_moy#19] +Functions [1]: [sum(UnscaledValue(ss_sales_price#14))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#14))#24] +Results [3]: [i_manager_id#5, MakeDecimal(sum(UnscaledValue(ss_sales_price#14))#24,17,2) AS sum_sales#25, MakeDecimal(sum(UnscaledValue(ss_sales_price#14))#24,17,2) AS _w0#26] + +(27) Exchange +Input [3]: [i_manager_id#5, sum_sales#25, _w0#26] +Arguments: hashpartitioning(i_manager_id#5, 5), true, [id=#27] + +(28) Sort [codegen id : 6] +Input [3]: [i_manager_id#5, sum_sales#25, _w0#26] +Arguments: [i_manager_id#5 ASC NULLS FIRST], false, 0 + +(29) Window +Input [3]: [i_manager_id#5, sum_sales#25, _w0#26] +Arguments: [avg(_w0#26) windowspecdefinition(i_manager_id#5, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#28], [i_manager_id#5] + +(30) Filter [codegen id : 7] +Input [4]: [i_manager_id#5, sum_sales#25, _w0#26, avg_monthly_sales#28] +Condition : (CASE WHEN (avg_monthly_sales#28 > 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#25 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#28 as decimal(22,6)))), DecimalType(22,6), true))) / promote_precision(cast(avg_monthly_sales#28 as decimal(22,6)))), DecimalType(38,16), true) ELSE null END > 0.1000000000000000) + +(31) Project [codegen id : 7] +Output [3]: [i_manager_id#5, sum_sales#25, avg_monthly_sales#28] +Input [4]: [i_manager_id#5, sum_sales#25, _w0#26, avg_monthly_sales#28] + +(32) TakeOrderedAndProject +Input [3]: [i_manager_id#5, sum_sales#25, avg_monthly_sales#28] +Arguments: 100, [i_manager_id#5 ASC NULLS FIRST, avg_monthly_sales#28 ASC NULLS FIRST, sum_sales#25 ASC NULLS FIRST], [i_manager_id#5, sum_sales#25, avg_monthly_sales#28] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63.sf100/simplified.txt new file mode 100644 index 0000000000000..99d321b52694a --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63.sf100/simplified.txt @@ -0,0 +1,49 @@ +TakeOrderedAndProject [avg_monthly_sales,i_manager_id,sum_sales] + WholeStageCodegen (7) + Project [avg_monthly_sales,i_manager_id,sum_sales] + Filter [avg_monthly_sales,sum_sales] + InputAdapter + Window [_w0,i_manager_id] + WholeStageCodegen (6) + Sort [i_manager_id] + InputAdapter + Exchange [i_manager_id] #1 + WholeStageCodegen (5) + HashAggregate [d_moy,i_manager_id,sum] [_w0,sum,sum(UnscaledValue(ss_sales_price)),sum_sales] + InputAdapter + Exchange [d_moy,i_manager_id] #2 + WholeStageCodegen (4) + HashAggregate [d_moy,i_manager_id,ss_sales_price] [sum,sum] + Project [d_moy,i_manager_id,ss_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_manager_id,ss_sales_price,ss_sold_date_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [i_manager_id,ss_sales_price,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [i_item_sk,i_manager_id] + Filter [i_brand,i_category,i_class,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand,i_category,i_class,i_item_sk,i_manager_id] + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Project [d_date_sk,d_moy] + Filter [d_date_sk,d_month_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq,d_moy] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/explain.txt new file mode 100644 index 0000000000000..733b3e5b05756 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/explain.txt @@ -0,0 +1,180 @@ +== Physical Plan == +TakeOrderedAndProject (32) ++- * Project (31) + +- * Filter (30) + +- Window (29) + +- * Sort (28) + +- Exchange (27) + +- * HashAggregate (26) + +- Exchange (25) + +- * HashAggregate (24) + +- * Project (23) + +- * BroadcastHashJoin Inner BuildRight (22) + :- * Project (17) + : +- * BroadcastHashJoin Inner BuildRight (16) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Project (4) + : : : +- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.item (1) + : : +- BroadcastExchange (8) + : : +- * Filter (7) + : : +- * ColumnarToRow (6) + : : +- Scan parquet default.store_sales (5) + : +- BroadcastExchange (15) + : +- * Project (14) + : +- * Filter (13) + : +- * ColumnarToRow (12) + : +- Scan parquet default.date_dim (11) + +- BroadcastExchange (21) + +- * Filter (20) + +- * ColumnarToRow (19) + +- Scan parquet default.store (18) + + +(1) Scan parquet default.item +Output [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manager_id#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [Or(And(And(In(i_category, [Books,Children,Electronics]),In(i_class, [personal,portable,refernece,self-help])),In(i_brand, [scholaramalgamalg #6,scholaramalgamalg #7,exportiunivamalg #8,scholaramalgamalg #8])),And(And(In(i_category, [Women,Music,Men]),In(i_class, [accessories,classical,fragrances,pants])),In(i_brand, [amalgimporto #9,edu packscholar #9,exportiimporto #9,importoamalg #9]))), IsNotNull(i_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manager_id#5] + +(3) Filter [codegen id : 4] +Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manager_id#5] +Condition : ((((i_category#4 IN (Books,Children,Electronics) AND i_class#3 IN (personal,portable,refernece,self-help)) AND i_brand#2 IN (scholaramalgamalg #6,scholaramalgamalg #7,exportiunivamalg #8,scholaramalgamalg #8)) OR ((i_category#4 IN (Women,Music,Men) AND i_class#3 IN (accessories,classical,fragrances,pants)) AND i_brand#2 IN (amalgimporto #9,edu packscholar #9,exportiimporto #9,importoamalg #9))) AND isnotnull(i_item_sk#1)) + +(4) Project [codegen id : 4] +Output [2]: [i_item_sk#1, i_manager_id#5] +Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manager_id#5] + +(5) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#10, ss_item_sk#11, ss_store_sk#12, ss_sales_price#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [4]: [ss_sold_date_sk#10, ss_item_sk#11, ss_store_sk#12, ss_sales_price#13] + +(7) Filter [codegen id : 1] +Input [4]: [ss_sold_date_sk#10, ss_item_sk#11, ss_store_sk#12, ss_sales_price#13] +Condition : ((isnotnull(ss_item_sk#11) AND isnotnull(ss_sold_date_sk#10)) AND isnotnull(ss_store_sk#12)) + +(8) BroadcastExchange +Input [4]: [ss_sold_date_sk#10, ss_item_sk#11, ss_store_sk#12, ss_sales_price#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#14] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [ss_item_sk#11] +Join condition: None + +(10) Project [codegen id : 4] +Output [4]: [i_manager_id#5, ss_sold_date_sk#10, ss_store_sk#12, ss_sales_price#13] +Input [6]: [i_item_sk#1, i_manager_id#5, ss_sold_date_sk#10, ss_item_sk#11, ss_store_sk#12, ss_sales_price#13] + +(11) Scan parquet default.date_dim +Output [3]: [d_date_sk#15, d_month_seq#16, d_moy#17] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [In(d_month_seq, [1200,1211,1205,1201,1206,1210,1207,1202,1209,1203,1208,1204]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#15, d_month_seq#16, d_moy#17] + +(13) Filter [codegen id : 2] +Input [3]: [d_date_sk#15, d_month_seq#16, d_moy#17] +Condition : (d_month_seq#16 INSET (1200,1211,1205,1201,1206,1210,1207,1202,1209,1203,1208,1204) AND isnotnull(d_date_sk#15)) + +(14) Project [codegen id : 2] +Output [2]: [d_date_sk#15, d_moy#17] +Input [3]: [d_date_sk#15, d_month_seq#16, d_moy#17] + +(15) BroadcastExchange +Input [2]: [d_date_sk#15, d_moy#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] + +(16) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#10] +Right keys [1]: [d_date_sk#15] +Join condition: None + +(17) Project [codegen id : 4] +Output [4]: [i_manager_id#5, ss_store_sk#12, ss_sales_price#13, d_moy#17] +Input [6]: [i_manager_id#5, ss_sold_date_sk#10, ss_store_sk#12, ss_sales_price#13, d_date_sk#15, d_moy#17] + +(18) Scan parquet default.store +Output [1]: [s_store_sk#19] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [1]: [s_store_sk#19] + +(20) Filter [codegen id : 3] +Input [1]: [s_store_sk#19] +Condition : isnotnull(s_store_sk#19) + +(21) BroadcastExchange +Input [1]: [s_store_sk#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#20] + +(22) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#12] +Right keys [1]: [s_store_sk#19] +Join condition: None + +(23) Project [codegen id : 4] +Output [3]: [i_manager_id#5, ss_sales_price#13, d_moy#17] +Input [5]: [i_manager_id#5, ss_store_sk#12, ss_sales_price#13, d_moy#17, s_store_sk#19] + +(24) HashAggregate [codegen id : 4] +Input [3]: [i_manager_id#5, ss_sales_price#13, d_moy#17] +Keys [2]: [i_manager_id#5, d_moy#17] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#13))] +Aggregate Attributes [1]: [sum#21] +Results [3]: [i_manager_id#5, d_moy#17, sum#22] + +(25) Exchange +Input [3]: [i_manager_id#5, d_moy#17, sum#22] +Arguments: hashpartitioning(i_manager_id#5, d_moy#17, 5), true, [id=#23] + +(26) HashAggregate [codegen id : 5] +Input [3]: [i_manager_id#5, d_moy#17, sum#22] +Keys [2]: [i_manager_id#5, d_moy#17] +Functions [1]: [sum(UnscaledValue(ss_sales_price#13))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#13))#24] +Results [3]: [i_manager_id#5, MakeDecimal(sum(UnscaledValue(ss_sales_price#13))#24,17,2) AS sum_sales#25, MakeDecimal(sum(UnscaledValue(ss_sales_price#13))#24,17,2) AS _w0#26] + +(27) Exchange +Input [3]: [i_manager_id#5, sum_sales#25, _w0#26] +Arguments: hashpartitioning(i_manager_id#5, 5), true, [id=#27] + +(28) Sort [codegen id : 6] +Input [3]: [i_manager_id#5, sum_sales#25, _w0#26] +Arguments: [i_manager_id#5 ASC NULLS FIRST], false, 0 + +(29) Window +Input [3]: [i_manager_id#5, sum_sales#25, _w0#26] +Arguments: [avg(_w0#26) windowspecdefinition(i_manager_id#5, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#28], [i_manager_id#5] + +(30) Filter [codegen id : 7] +Input [4]: [i_manager_id#5, sum_sales#25, _w0#26, avg_monthly_sales#28] +Condition : (CASE WHEN (avg_monthly_sales#28 > 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#25 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#28 as decimal(22,6)))), DecimalType(22,6), true))) / promote_precision(cast(avg_monthly_sales#28 as decimal(22,6)))), DecimalType(38,16), true) ELSE null END > 0.1000000000000000) + +(31) Project [codegen id : 7] +Output [3]: [i_manager_id#5, sum_sales#25, avg_monthly_sales#28] +Input [4]: [i_manager_id#5, sum_sales#25, _w0#26, avg_monthly_sales#28] + +(32) TakeOrderedAndProject +Input [3]: [i_manager_id#5, sum_sales#25, avg_monthly_sales#28] +Arguments: 100, [i_manager_id#5 ASC NULLS FIRST, avg_monthly_sales#28 ASC NULLS FIRST, sum_sales#25 ASC NULLS FIRST], [i_manager_id#5, sum_sales#25, avg_monthly_sales#28] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/simplified.txt new file mode 100644 index 0000000000000..7d1b1e0ae9c41 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/simplified.txt @@ -0,0 +1,49 @@ +TakeOrderedAndProject [avg_monthly_sales,i_manager_id,sum_sales] + WholeStageCodegen (7) + Project [avg_monthly_sales,i_manager_id,sum_sales] + Filter [avg_monthly_sales,sum_sales] + InputAdapter + Window [_w0,i_manager_id] + WholeStageCodegen (6) + Sort [i_manager_id] + InputAdapter + Exchange [i_manager_id] #1 + WholeStageCodegen (5) + HashAggregate [d_moy,i_manager_id,sum] [_w0,sum,sum(UnscaledValue(ss_sales_price)),sum_sales] + InputAdapter + Exchange [d_moy,i_manager_id] #2 + WholeStageCodegen (4) + HashAggregate [d_moy,i_manager_id,ss_sales_price] [sum,sum] + Project [d_moy,i_manager_id,ss_sales_price] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [d_moy,i_manager_id,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_manager_id,ss_sales_price,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [i_item_sk,i_manager_id] + Filter [i_brand,i_category,i_class,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand,i_category,i_class,i_item_sk,i_manager_id] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [d_date_sk,d_moy] + Filter [d_date_sk,d_month_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq,d_moy] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64.sf100/explain.txt new file mode 100644 index 0000000000000..10f238ebd2ad7 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64.sf100/explain.txt @@ -0,0 +1,1110 @@ +== Physical Plan == +* Sort (209) ++- Exchange (208) + +- * Project (207) + +- * SortMergeJoin Inner (206) + :- * Sort (128) + : +- Exchange (127) + : +- * HashAggregate (126) + : +- Exchange (125) + : +- * HashAggregate (124) + : +- * Project (123) + : +- * BroadcastHashJoin Inner BuildRight (122) + : :- * Project (116) + : : +- * BroadcastHashJoin Inner BuildRight (115) + : : :- * Project (113) + : : : +- * BroadcastHashJoin Inner BuildRight (112) + : : : :- * Project (107) + : : : : +- * SortMergeJoin Inner (106) + : : : : :- * Sort (103) + : : : : : +- Exchange (102) + : : : : : +- * Project (101) + : : : : : +- * SortMergeJoin Inner (100) + : : : : : :- * Sort (94) + : : : : : : +- Exchange (93) + : : : : : : +- * Project (92) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (91) + : : : : : : :- * Project (89) + : : : : : : : +- * BroadcastHashJoin Inner BuildRight (88) + : : : : : : : :- * Project (83) + : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (82) + : : : : : : : : :- * Project (77) + : : : : : : : : : +- * SortMergeJoin Inner (76) + : : : : : : : : : :- * Sort (73) + : : : : : : : : : : +- Exchange (72) + : : : : : : : : : : +- * Project (71) + : : : : : : : : : : +- * SortMergeJoin Inner (70) + : : : : : : : : : : :- * Sort (64) + : : : : : : : : : : : +- Exchange (63) + : : : : : : : : : : : +- * Project (62) + : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (61) + : : : : : : : : : : : :- * Project (59) + : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (58) + : : : : : : : : : : : : :- * Project (53) + : : : : : : : : : : : : : +- * SortMergeJoin Inner (52) + : : : : : : : : : : : : : :- * Sort (46) + : : : : : : : : : : : : : : +- Exchange (45) + : : : : : : : : : : : : : : +- * Project (44) + : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (43) + : : : : : : : : : : : : : : :- * Project (38) + : : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (37) + : : : : : : : : : : : : : : : :- * Project (32) + : : : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (31) + : : : : : : : : : : : : : : : : :- * Project (12) + : : : : : : : : : : : : : : : : : +- * SortMergeJoin Inner (11) + : : : : : : : : : : : : : : : : : :- * Sort (5) + : : : : : : : : : : : : : : : : : : +- Exchange (4) + : : : : : : : : : : : : : : : : : : +- * Filter (3) + : : : : : : : : : : : : : : : : : : +- * ColumnarToRow (2) + : : : : : : : : : : : : : : : : : : +- Scan parquet default.store_sales (1) + : : : : : : : : : : : : : : : : : +- * Sort (10) + : : : : : : : : : : : : : : : : : +- Exchange (9) + : : : : : : : : : : : : : : : : : +- * Filter (8) + : : : : : : : : : : : : : : : : : +- * ColumnarToRow (7) + : : : : : : : : : : : : : : : : : +- Scan parquet default.store_returns (6) + : : : : : : : : : : : : : : : : +- BroadcastExchange (30) + : : : : : : : : : : : : : : : : +- * Project (29) + : : : : : : : : : : : : : : : : +- * Filter (28) + : : : : : : : : : : : : : : : : +- * HashAggregate (27) + : : : : : : : : : : : : : : : : +- Exchange (26) + : : : : : : : : : : : : : : : : +- * HashAggregate (25) + : : : : : : : : : : : : : : : : +- * Project (24) + : : : : : : : : : : : : : : : : +- * SortMergeJoin Inner (23) + : : : : : : : : : : : : : : : : :- * Sort (17) + : : : : : : : : : : : : : : : : : +- Exchange (16) + : : : : : : : : : : : : : : : : : +- * Filter (15) + : : : : : : : : : : : : : : : : : +- * ColumnarToRow (14) + : : : : : : : : : : : : : : : : : +- Scan parquet default.catalog_sales (13) + : : : : : : : : : : : : : : : : +- * Sort (22) + : : : : : : : : : : : : : : : : +- Exchange (21) + : : : : : : : : : : : : : : : : +- * Filter (20) + : : : : : : : : : : : : : : : : +- * ColumnarToRow (19) + : : : : : : : : : : : : : : : : +- Scan parquet default.catalog_returns (18) + : : : : : : : : : : : : : : : +- BroadcastExchange (36) + : : : : : : : : : : : : : : : +- * Filter (35) + : : : : : : : : : : : : : : : +- * ColumnarToRow (34) + : : : : : : : : : : : : : : : +- Scan parquet default.date_dim (33) + : : : : : : : : : : : : : : +- BroadcastExchange (42) + : : : : : : : : : : : : : : +- * Filter (41) + : : : : : : : : : : : : : : +- * ColumnarToRow (40) + : : : : : : : : : : : : : : +- Scan parquet default.store (39) + : : : : : : : : : : : : : +- * Sort (51) + : : : : : : : : : : : : : +- Exchange (50) + : : : : : : : : : : : : : +- * Filter (49) + : : : : : : : : : : : : : +- * ColumnarToRow (48) + : : : : : : : : : : : : : +- Scan parquet default.customer (47) + : : : : : : : : : : : : +- BroadcastExchange (57) + : : : : : : : : : : : : +- * Filter (56) + : : : : : : : : : : : : +- * ColumnarToRow (55) + : : : : : : : : : : : : +- Scan parquet default.date_dim (54) + : : : : : : : : : : : +- ReusedExchange (60) + : : : : : : : : : : +- * Sort (69) + : : : : : : : : : : +- Exchange (68) + : : : : : : : : : : +- * Filter (67) + : : : : : : : : : : +- * ColumnarToRow (66) + : : : : : : : : : : +- Scan parquet default.customer_demographics (65) + : : : : : : : : : +- * Sort (75) + : : : : : : : : : +- ReusedExchange (74) + : : : : : : : : +- BroadcastExchange (81) + : : : : : : : : +- * Filter (80) + : : : : : : : : +- * ColumnarToRow (79) + : : : : : : : : +- Scan parquet default.promotion (78) + : : : : : : : +- BroadcastExchange (87) + : : : : : : : +- * Filter (86) + : : : : : : : +- * ColumnarToRow (85) + : : : : : : : +- Scan parquet default.household_demographics (84) + : : : : : : +- ReusedExchange (90) + : : : : : +- * Sort (99) + : : : : : +- Exchange (98) + : : : : : +- * Filter (97) + : : : : : +- * ColumnarToRow (96) + : : : : : +- Scan parquet default.customer_address (95) + : : : : +- * Sort (105) + : : : : +- ReusedExchange (104) + : : : +- BroadcastExchange (111) + : : : +- * Filter (110) + : : : +- * ColumnarToRow (109) + : : : +- Scan parquet default.income_band (108) + : : +- ReusedExchange (114) + : +- BroadcastExchange (121) + : +- * Project (120) + : +- * Filter (119) + : +- * ColumnarToRow (118) + : +- Scan parquet default.item (117) + +- * Sort (205) + +- Exchange (204) + +- * HashAggregate (203) + +- Exchange (202) + +- * HashAggregate (201) + +- * Project (200) + +- * BroadcastHashJoin Inner BuildRight (199) + :- * Project (197) + : +- * BroadcastHashJoin Inner BuildRight (196) + : :- * Project (194) + : : +- * BroadcastHashJoin Inner BuildRight (193) + : : :- * Project (191) + : : : +- * SortMergeJoin Inner (190) + : : : :- * Sort (187) + : : : : +- Exchange (186) + : : : : +- * Project (185) + : : : : +- * SortMergeJoin Inner (184) + : : : : :- * Sort (181) + : : : : : +- Exchange (180) + : : : : : +- * Project (179) + : : : : : +- * BroadcastHashJoin Inner BuildRight (178) + : : : : : :- * Project (176) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (175) + : : : : : : :- * Project (173) + : : : : : : : +- * BroadcastHashJoin Inner BuildRight (172) + : : : : : : : :- * Project (170) + : : : : : : : : +- * SortMergeJoin Inner (169) + : : : : : : : : :- * Sort (166) + : : : : : : : : : +- Exchange (165) + : : : : : : : : : +- * Project (164) + : : : : : : : : : +- * SortMergeJoin Inner (163) + : : : : : : : : : :- * Sort (160) + : : : : : : : : : : +- Exchange (159) + : : : : : : : : : : +- * Project (158) + : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (157) + : : : : : : : : : : :- * Project (155) + : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (154) + : : : : : : : : : : : :- * Project (152) + : : : : : : : : : : : : +- * SortMergeJoin Inner (151) + : : : : : : : : : : : : :- * Sort (148) + : : : : : : : : : : : : : +- Exchange (147) + : : : : : : : : : : : : : +- * Project (146) + : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (145) + : : : : : : : : : : : : : :- * Project (143) + : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (142) + : : : : : : : : : : : : : : :- * Project (137) + : : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (136) + : : : : : : : : : : : : : : : :- * Project (134) + : : : : : : : : : : : : : : : : +- * SortMergeJoin Inner (133) + : : : : : : : : : : : : : : : : :- * Sort (130) + : : : : : : : : : : : : : : : : : +- ReusedExchange (129) + : : : : : : : : : : : : : : : : +- * Sort (132) + : : : : : : : : : : : : : : : : +- ReusedExchange (131) + : : : : : : : : : : : : : : : +- ReusedExchange (135) + : : : : : : : : : : : : : : +- BroadcastExchange (141) + : : : : : : : : : : : : : : +- * Filter (140) + : : : : : : : : : : : : : : +- * ColumnarToRow (139) + : : : : : : : : : : : : : : +- Scan parquet default.date_dim (138) + : : : : : : : : : : : : : +- ReusedExchange (144) + : : : : : : : : : : : : +- * Sort (150) + : : : : : : : : : : : : +- ReusedExchange (149) + : : : : : : : : : : : +- ReusedExchange (153) + : : : : : : : : : : +- ReusedExchange (156) + : : : : : : : : : +- * Sort (162) + : : : : : : : : : +- ReusedExchange (161) + : : : : : : : : +- * Sort (168) + : : : : : : : : +- ReusedExchange (167) + : : : : : : : +- ReusedExchange (171) + : : : : : : +- ReusedExchange (174) + : : : : : +- ReusedExchange (177) + : : : : +- * Sort (183) + : : : : +- ReusedExchange (182) + : : : +- * Sort (189) + : : : +- ReusedExchange (188) + : : +- ReusedExchange (192) + : +- ReusedExchange (195) + +- ReusedExchange (198) + + +(1) Scan parquet default.store_sales +Output [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_ticket_number#9, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_cdemo_sk), IsNotNull(ss_promo_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_addr_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_ticket_number#9, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] + +(3) Filter [codegen id : 1] +Input [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_ticket_number#9, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] +Condition : ((((((((isnotnull(ss_item_sk#2) AND isnotnull(ss_ticket_number#9)) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_store_sk#7)) AND isnotnull(ss_customer_sk#3)) AND isnotnull(ss_cdemo_sk#4)) AND isnotnull(ss_promo_sk#8)) AND isnotnull(ss_hdemo_sk#5)) AND isnotnull(ss_addr_sk#6)) + +(4) Exchange +Input [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_ticket_number#9, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] +Arguments: hashpartitioning(cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#9 as bigint), 5), true, [id=#13] + +(5) Sort [codegen id : 2] +Input [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_ticket_number#9, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] +Arguments: [cast(ss_item_sk#2 as bigint) ASC NULLS FIRST, cast(ss_ticket_number#9 as bigint) ASC NULLS FIRST], false, 0 + +(6) Scan parquet default.store_returns +Output [2]: [sr_item_sk#14, sr_ticket_number#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_returns] +PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 3] +Input [2]: [sr_item_sk#14, sr_ticket_number#15] + +(8) Filter [codegen id : 3] +Input [2]: [sr_item_sk#14, sr_ticket_number#15] +Condition : (isnotnull(sr_item_sk#14) AND isnotnull(sr_ticket_number#15)) + +(9) Exchange +Input [2]: [sr_item_sk#14, sr_ticket_number#15] +Arguments: hashpartitioning(sr_item_sk#14, sr_ticket_number#15, 5), true, [id=#16] + +(10) Sort [codegen id : 4] +Input [2]: [sr_item_sk#14, sr_ticket_number#15] +Arguments: [sr_item_sk#14 ASC NULLS FIRST, sr_ticket_number#15 ASC NULLS FIRST], false, 0 + +(11) SortMergeJoin [codegen id : 13] +Left keys [2]: [cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#9 as bigint)] +Right keys [2]: [sr_item_sk#14, sr_ticket_number#15] +Join condition: None + +(12) Project [codegen id : 13] +Output [11]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] +Input [14]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_ticket_number#9, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, sr_item_sk#14, sr_ticket_number#15] + +(13) Scan parquet default.catalog_sales +Output [3]: [cs_item_sk#17, cs_order_number#18, cs_ext_list_price#19] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_order_number)] +ReadSchema: struct + +(14) ColumnarToRow [codegen id : 5] +Input [3]: [cs_item_sk#17, cs_order_number#18, cs_ext_list_price#19] + +(15) Filter [codegen id : 5] +Input [3]: [cs_item_sk#17, cs_order_number#18, cs_ext_list_price#19] +Condition : (isnotnull(cs_item_sk#17) AND isnotnull(cs_order_number#18)) + +(16) Exchange +Input [3]: [cs_item_sk#17, cs_order_number#18, cs_ext_list_price#19] +Arguments: hashpartitioning(cs_item_sk#17, cs_order_number#18, 5), true, [id=#20] + +(17) Sort [codegen id : 6] +Input [3]: [cs_item_sk#17, cs_order_number#18, cs_ext_list_price#19] +Arguments: [cs_item_sk#17 ASC NULLS FIRST, cs_order_number#18 ASC NULLS FIRST], false, 0 + +(18) Scan parquet default.catalog_returns +Output [5]: [cr_item_sk#21, cr_order_number#22, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_returns] +PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 7] +Input [5]: [cr_item_sk#21, cr_order_number#22, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25] + +(20) Filter [codegen id : 7] +Input [5]: [cr_item_sk#21, cr_order_number#22, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25] +Condition : (isnotnull(cr_item_sk#21) AND isnotnull(cr_order_number#22)) + +(21) Exchange +Input [5]: [cr_item_sk#21, cr_order_number#22, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25] +Arguments: hashpartitioning(cr_item_sk#21, cr_order_number#22, 5), true, [id=#26] + +(22) Sort [codegen id : 8] +Input [5]: [cr_item_sk#21, cr_order_number#22, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25] +Arguments: [cr_item_sk#21 ASC NULLS FIRST, cr_order_number#22 ASC NULLS FIRST], false, 0 + +(23) SortMergeJoin [codegen id : 9] +Left keys [2]: [cs_item_sk#17, cs_order_number#18] +Right keys [2]: [cr_item_sk#21, cr_order_number#22] +Join condition: None + +(24) Project [codegen id : 9] +Output [5]: [cs_item_sk#17, cs_ext_list_price#19, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25] +Input [8]: [cs_item_sk#17, cs_order_number#18, cs_ext_list_price#19, cr_item_sk#21, cr_order_number#22, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25] + +(25) HashAggregate [codegen id : 9] +Input [5]: [cs_item_sk#17, cs_ext_list_price#19, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25] +Keys [1]: [cs_item_sk#17] +Functions [2]: [partial_sum(UnscaledValue(cs_ext_list_price#19)), partial_sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#23 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#24 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#25 as decimal(9,2)))), DecimalType(9,2), true))] +Aggregate Attributes [3]: [sum#27, sum#28, isEmpty#29] +Results [4]: [cs_item_sk#17, sum#30, sum#31, isEmpty#32] + +(26) Exchange +Input [4]: [cs_item_sk#17, sum#30, sum#31, isEmpty#32] +Arguments: hashpartitioning(cs_item_sk#17, 5), true, [id=#33] + +(27) HashAggregate [codegen id : 10] +Input [4]: [cs_item_sk#17, sum#30, sum#31, isEmpty#32] +Keys [1]: [cs_item_sk#17] +Functions [2]: [sum(UnscaledValue(cs_ext_list_price#19)), sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#23 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#24 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#25 as decimal(9,2)))), DecimalType(9,2), true))] +Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_list_price#19))#34, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#23 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#24 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#25 as decimal(9,2)))), DecimalType(9,2), true))#35] +Results [3]: [cs_item_sk#17, MakeDecimal(sum(UnscaledValue(cs_ext_list_price#19))#34,17,2) AS sum(cs_ext_list_price#19)#36, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#23 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#24 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#25 as decimal(9,2)))), DecimalType(9,2), true))#35 AS sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#23 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#24 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#25 as decimal(9,2)))), DecimalType(9,2), true))#37] + +(28) Filter [codegen id : 10] +Input [3]: [cs_item_sk#17, sum(cs_ext_list_price#19)#36, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#23 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#24 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#25 as decimal(9,2)))), DecimalType(9,2), true))#37] +Condition : (isnotnull(sum(cs_ext_list_price#19)#36) AND (cast(sum(cs_ext_list_price#19)#36 as decimal(21,2)) > CheckOverflow((2.00 * promote_precision(sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#23 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#24 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#25 as decimal(9,2)))), DecimalType(9,2), true))#37)), DecimalType(21,2), true))) + +(29) Project [codegen id : 10] +Output [1]: [cs_item_sk#17] +Input [3]: [cs_item_sk#17, sum(cs_ext_list_price#19)#36, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#23 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#24 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#25 as decimal(9,2)))), DecimalType(9,2), true))#37] + +(30) BroadcastExchange +Input [1]: [cs_item_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#38] + +(31) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [cs_item_sk#17] +Join condition: None + +(32) Project [codegen id : 13] +Output [11]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] +Input [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, cs_item_sk#17] + +(33) Scan parquet default.date_dim +Output [2]: [d_date_sk#39, d_year#40] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 11] +Input [2]: [d_date_sk#39, d_year#40] + +(35) Filter [codegen id : 11] +Input [2]: [d_date_sk#39, d_year#40] +Condition : ((isnotnull(d_year#40) AND (d_year#40 = 1999)) AND isnotnull(d_date_sk#39)) + +(36) BroadcastExchange +Input [2]: [d_date_sk#39, d_year#40] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#41] + +(37) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#39] +Join condition: None + +(38) Project [codegen id : 13] +Output [11]: [ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40] +Input [13]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_date_sk#39, d_year#40] + +(39) Scan parquet default.store +Output [3]: [s_store_sk#42, s_store_name#43, s_zip#44] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_name), IsNotNull(s_zip)] +ReadSchema: struct + +(40) ColumnarToRow [codegen id : 12] +Input [3]: [s_store_sk#42, s_store_name#43, s_zip#44] + +(41) Filter [codegen id : 12] +Input [3]: [s_store_sk#42, s_store_name#43, s_zip#44] +Condition : ((isnotnull(s_store_sk#42) AND isnotnull(s_store_name#43)) AND isnotnull(s_zip#44)) + +(42) BroadcastExchange +Input [3]: [s_store_sk#42, s_store_name#43, s_zip#44] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#45] + +(43) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [ss_store_sk#7] +Right keys [1]: [s_store_sk#42] +Join condition: None + +(44) Project [codegen id : 13] +Output [12]: [ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44] +Input [14]: [ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_sk#42, s_store_name#43, s_zip#44] + +(45) Exchange +Input [12]: [ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44] +Arguments: hashpartitioning(ss_customer_sk#3, 5), true, [id=#46] + +(46) Sort [codegen id : 14] +Input [12]: [ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44] +Arguments: [ss_customer_sk#3 ASC NULLS FIRST], false, 0 + +(47) Scan parquet default.customer +Output [6]: [c_customer_sk#47, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, c_first_shipto_date_sk#51, c_first_sales_date_sk#52] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_first_sales_date_sk), IsNotNull(c_first_shipto_date_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_hdemo_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(48) ColumnarToRow [codegen id : 15] +Input [6]: [c_customer_sk#47, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, c_first_shipto_date_sk#51, c_first_sales_date_sk#52] + +(49) Filter [codegen id : 15] +Input [6]: [c_customer_sk#47, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, c_first_shipto_date_sk#51, c_first_sales_date_sk#52] +Condition : (((((isnotnull(c_customer_sk#47) AND isnotnull(c_first_sales_date_sk#52)) AND isnotnull(c_first_shipto_date_sk#51)) AND isnotnull(c_current_cdemo_sk#48)) AND isnotnull(c_current_hdemo_sk#49)) AND isnotnull(c_current_addr_sk#50)) + +(50) Exchange +Input [6]: [c_customer_sk#47, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, c_first_shipto_date_sk#51, c_first_sales_date_sk#52] +Arguments: hashpartitioning(c_customer_sk#47, 5), true, [id=#53] + +(51) Sort [codegen id : 16] +Input [6]: [c_customer_sk#47, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, c_first_shipto_date_sk#51, c_first_sales_date_sk#52] +Arguments: [c_customer_sk#47 ASC NULLS FIRST], false, 0 + +(52) SortMergeJoin [codegen id : 19] +Left keys [1]: [ss_customer_sk#3] +Right keys [1]: [c_customer_sk#47] +Join condition: None + +(53) Project [codegen id : 19] +Output [16]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, c_first_shipto_date_sk#51, c_first_sales_date_sk#52] +Input [18]: [ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_customer_sk#47, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, c_first_shipto_date_sk#51, c_first_sales_date_sk#52] + +(54) Scan parquet default.date_dim +Output [2]: [d_date_sk#54, d_year#55] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_date_sk)] +ReadSchema: struct + +(55) ColumnarToRow [codegen id : 17] +Input [2]: [d_date_sk#54, d_year#55] + +(56) Filter [codegen id : 17] +Input [2]: [d_date_sk#54, d_year#55] +Condition : isnotnull(d_date_sk#54) + +(57) BroadcastExchange +Input [2]: [d_date_sk#54, d_year#55] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#56] + +(58) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [c_first_sales_date_sk#52] +Right keys [1]: [d_date_sk#54] +Join condition: None + +(59) Project [codegen id : 19] +Output [16]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, c_first_shipto_date_sk#51, d_year#55] +Input [18]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, c_first_shipto_date_sk#51, c_first_sales_date_sk#52, d_date_sk#54, d_year#55] + +(60) ReusedExchange [Reuses operator id: 57] +Output [2]: [d_date_sk#57, d_year#58] + +(61) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [c_first_shipto_date_sk#51] +Right keys [1]: [d_date_sk#57] +Join condition: None + +(62) Project [codegen id : 19] +Output [16]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, d_year#55, d_year#58] +Input [18]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, c_first_shipto_date_sk#51, d_year#55, d_date_sk#57, d_year#58] + +(63) Exchange +Input [16]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, d_year#55, d_year#58] +Arguments: hashpartitioning(ss_cdemo_sk#4, 5), true, [id=#59] + +(64) Sort [codegen id : 20] +Input [16]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, d_year#55, d_year#58] +Arguments: [ss_cdemo_sk#4 ASC NULLS FIRST], false, 0 + +(65) Scan parquet default.customer_demographics +Output [2]: [cd_demo_sk#60, cd_marital_status#61] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_marital_status)] +ReadSchema: struct + +(66) ColumnarToRow [codegen id : 21] +Input [2]: [cd_demo_sk#60, cd_marital_status#61] + +(67) Filter [codegen id : 21] +Input [2]: [cd_demo_sk#60, cd_marital_status#61] +Condition : (isnotnull(cd_demo_sk#60) AND isnotnull(cd_marital_status#61)) + +(68) Exchange +Input [2]: [cd_demo_sk#60, cd_marital_status#61] +Arguments: hashpartitioning(cd_demo_sk#60, 5), true, [id=#62] + +(69) Sort [codegen id : 22] +Input [2]: [cd_demo_sk#60, cd_marital_status#61] +Arguments: [cd_demo_sk#60 ASC NULLS FIRST], false, 0 + +(70) SortMergeJoin [codegen id : 23] +Left keys [1]: [ss_cdemo_sk#4] +Right keys [1]: [cd_demo_sk#60] +Join condition: None + +(71) Project [codegen id : 23] +Output [16]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, d_year#55, d_year#58, cd_marital_status#61] +Input [18]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, d_year#55, d_year#58, cd_demo_sk#60, cd_marital_status#61] + +(72) Exchange +Input [16]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, d_year#55, d_year#58, cd_marital_status#61] +Arguments: hashpartitioning(c_current_cdemo_sk#48, 5), true, [id=#63] + +(73) Sort [codegen id : 24] +Input [16]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, d_year#55, d_year#58, cd_marital_status#61] +Arguments: [c_current_cdemo_sk#48 ASC NULLS FIRST], false, 0 + +(74) ReusedExchange [Reuses operator id: 68] +Output [2]: [cd_demo_sk#64, cd_marital_status#65] + +(75) Sort [codegen id : 26] +Input [2]: [cd_demo_sk#64, cd_marital_status#65] +Arguments: [cd_demo_sk#64 ASC NULLS FIRST], false, 0 + +(76) SortMergeJoin [codegen id : 30] +Left keys [1]: [c_current_cdemo_sk#48] +Right keys [1]: [cd_demo_sk#64] +Join condition: NOT (cd_marital_status#61 = cd_marital_status#65) + +(77) Project [codegen id : 30] +Output [14]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_hdemo_sk#49, c_current_addr_sk#50, d_year#55, d_year#58] +Input [18]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, d_year#55, d_year#58, cd_marital_status#61, cd_demo_sk#64, cd_marital_status#65] + +(78) Scan parquet default.promotion +Output [1]: [p_promo_sk#66] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/promotion] +PushedFilters: [IsNotNull(p_promo_sk)] +ReadSchema: struct + +(79) ColumnarToRow [codegen id : 27] +Input [1]: [p_promo_sk#66] + +(80) Filter [codegen id : 27] +Input [1]: [p_promo_sk#66] +Condition : isnotnull(p_promo_sk#66) + +(81) BroadcastExchange +Input [1]: [p_promo_sk#66] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#67] + +(82) BroadcastHashJoin [codegen id : 30] +Left keys [1]: [ss_promo_sk#8] +Right keys [1]: [p_promo_sk#66] +Join condition: None + +(83) Project [codegen id : 30] +Output [13]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_hdemo_sk#49, c_current_addr_sk#50, d_year#55, d_year#58] +Input [15]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_hdemo_sk#49, c_current_addr_sk#50, d_year#55, d_year#58, p_promo_sk#66] + +(84) Scan parquet default.household_demographics +Output [2]: [hd_demo_sk#68, hd_income_band_sk#69] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/household_demographics] +PushedFilters: [IsNotNull(hd_demo_sk), IsNotNull(hd_income_band_sk)] +ReadSchema: struct + +(85) ColumnarToRow [codegen id : 28] +Input [2]: [hd_demo_sk#68, hd_income_band_sk#69] + +(86) Filter [codegen id : 28] +Input [2]: [hd_demo_sk#68, hd_income_band_sk#69] +Condition : (isnotnull(hd_demo_sk#68) AND isnotnull(hd_income_band_sk#69)) + +(87) BroadcastExchange +Input [2]: [hd_demo_sk#68, hd_income_band_sk#69] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#70] + +(88) BroadcastHashJoin [codegen id : 30] +Left keys [1]: [ss_hdemo_sk#5] +Right keys [1]: [hd_demo_sk#68] +Join condition: None + +(89) Project [codegen id : 30] +Output [13]: [ss_item_sk#2, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_hdemo_sk#49, c_current_addr_sk#50, d_year#55, d_year#58, hd_income_band_sk#69] +Input [15]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_hdemo_sk#49, c_current_addr_sk#50, d_year#55, d_year#58, hd_demo_sk#68, hd_income_band_sk#69] + +(90) ReusedExchange [Reuses operator id: 87] +Output [2]: [hd_demo_sk#71, hd_income_band_sk#72] + +(91) BroadcastHashJoin [codegen id : 30] +Left keys [1]: [c_current_hdemo_sk#49] +Right keys [1]: [hd_demo_sk#71] +Join condition: None + +(92) Project [codegen id : 30] +Output [13]: [ss_item_sk#2, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_addr_sk#50, d_year#55, d_year#58, hd_income_band_sk#69, hd_income_band_sk#72] +Input [15]: [ss_item_sk#2, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_hdemo_sk#49, c_current_addr_sk#50, d_year#55, d_year#58, hd_income_band_sk#69, hd_demo_sk#71, hd_income_band_sk#72] + +(93) Exchange +Input [13]: [ss_item_sk#2, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_addr_sk#50, d_year#55, d_year#58, hd_income_band_sk#69, hd_income_band_sk#72] +Arguments: hashpartitioning(ss_addr_sk#6, 5), true, [id=#73] + +(94) Sort [codegen id : 31] +Input [13]: [ss_item_sk#2, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_addr_sk#50, d_year#55, d_year#58, hd_income_band_sk#69, hd_income_band_sk#72] +Arguments: [ss_addr_sk#6 ASC NULLS FIRST], false, 0 + +(95) Scan parquet default.customer_address +Output [5]: [ca_address_sk#74, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct + +(96) ColumnarToRow [codegen id : 32] +Input [5]: [ca_address_sk#74, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78] + +(97) Filter [codegen id : 32] +Input [5]: [ca_address_sk#74, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78] +Condition : isnotnull(ca_address_sk#74) + +(98) Exchange +Input [5]: [ca_address_sk#74, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78] +Arguments: hashpartitioning(ca_address_sk#74, 5), true, [id=#79] + +(99) Sort [codegen id : 33] +Input [5]: [ca_address_sk#74, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78] +Arguments: [ca_address_sk#74 ASC NULLS FIRST], false, 0 + +(100) SortMergeJoin [codegen id : 34] +Left keys [1]: [ss_addr_sk#6] +Right keys [1]: [ca_address_sk#74] +Join condition: None + +(101) Project [codegen id : 34] +Output [16]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_addr_sk#50, d_year#55, d_year#58, hd_income_band_sk#69, hd_income_band_sk#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78] +Input [18]: [ss_item_sk#2, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_addr_sk#50, d_year#55, d_year#58, hd_income_band_sk#69, hd_income_band_sk#72, ca_address_sk#74, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78] + +(102) Exchange +Input [16]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_addr_sk#50, d_year#55, d_year#58, hd_income_band_sk#69, hd_income_band_sk#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78] +Arguments: hashpartitioning(c_current_addr_sk#50, 5), true, [id=#80] + +(103) Sort [codegen id : 35] +Input [16]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_addr_sk#50, d_year#55, d_year#58, hd_income_band_sk#69, hd_income_band_sk#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78] +Arguments: [c_current_addr_sk#50 ASC NULLS FIRST], false, 0 + +(104) ReusedExchange [Reuses operator id: 98] +Output [5]: [ca_address_sk#81, ca_street_number#82, ca_street_name#83, ca_city#84, ca_zip#85] + +(105) Sort [codegen id : 37] +Input [5]: [ca_address_sk#81, ca_street_number#82, ca_street_name#83, ca_city#84, ca_zip#85] +Arguments: [ca_address_sk#81 ASC NULLS FIRST], false, 0 + +(106) SortMergeJoin [codegen id : 41] +Left keys [1]: [c_current_addr_sk#50] +Right keys [1]: [ca_address_sk#81] +Join condition: None + +(107) Project [codegen id : 41] +Output [19]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, d_year#55, d_year#58, hd_income_band_sk#69, hd_income_band_sk#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_street_number#82, ca_street_name#83, ca_city#84, ca_zip#85] +Input [21]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_addr_sk#50, d_year#55, d_year#58, hd_income_band_sk#69, hd_income_band_sk#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_address_sk#81, ca_street_number#82, ca_street_name#83, ca_city#84, ca_zip#85] + +(108) Scan parquet default.income_band +Output [1]: [ib_income_band_sk#86] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/income_band] +PushedFilters: [IsNotNull(ib_income_band_sk)] +ReadSchema: struct + +(109) ColumnarToRow [codegen id : 38] +Input [1]: [ib_income_band_sk#86] + +(110) Filter [codegen id : 38] +Input [1]: [ib_income_band_sk#86] +Condition : isnotnull(ib_income_band_sk#86) + +(111) BroadcastExchange +Input [1]: [ib_income_band_sk#86] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#87] + +(112) BroadcastHashJoin [codegen id : 41] +Left keys [1]: [hd_income_band_sk#69] +Right keys [1]: [ib_income_band_sk#86] +Join condition: None + +(113) Project [codegen id : 41] +Output [18]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, d_year#55, d_year#58, hd_income_band_sk#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_street_number#82, ca_street_name#83, ca_city#84, ca_zip#85] +Input [20]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, d_year#55, d_year#58, hd_income_band_sk#69, hd_income_band_sk#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_street_number#82, ca_street_name#83, ca_city#84, ca_zip#85, ib_income_band_sk#86] + +(114) ReusedExchange [Reuses operator id: 111] +Output [1]: [ib_income_band_sk#88] + +(115) BroadcastHashJoin [codegen id : 41] +Left keys [1]: [hd_income_band_sk#72] +Right keys [1]: [ib_income_band_sk#88] +Join condition: None + +(116) Project [codegen id : 41] +Output [17]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, d_year#55, d_year#58, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_street_number#82, ca_street_name#83, ca_city#84, ca_zip#85] +Input [19]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, d_year#55, d_year#58, hd_income_band_sk#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_street_number#82, ca_street_name#83, ca_city#84, ca_zip#85, ib_income_band_sk#88] + +(117) Scan parquet default.item +Output [4]: [i_item_sk#89, i_current_price#90, i_color#91, i_product_name#92] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_current_price), In(i_color, [purple,burlywood,indian,spring,floral,medium]), GreaterThanOrEqual(i_current_price,64.00), IsNotNull(i_item_sk)] +ReadSchema: struct + +(118) ColumnarToRow [codegen id : 40] +Input [4]: [i_item_sk#89, i_current_price#90, i_color#91, i_product_name#92] + +(119) Filter [codegen id : 40] +Input [4]: [i_item_sk#89, i_current_price#90, i_color#91, i_product_name#92] +Condition : ((((((isnotnull(i_current_price#90) AND i_color#91 IN (purple,burlywood,indian,spring,floral,medium)) AND (i_current_price#90 >= 64.00)) AND (cast(i_current_price#90 as decimal(12,2)) <= 74.00)) AND (cast(i_current_price#90 as decimal(12,2)) >= 65.00)) AND (cast(i_current_price#90 as decimal(12,2)) <= 79.00)) AND isnotnull(i_item_sk#89)) + +(120) Project [codegen id : 40] +Output [2]: [i_item_sk#89, i_product_name#92] +Input [4]: [i_item_sk#89, i_current_price#90, i_color#91, i_product_name#92] + +(121) BroadcastExchange +Input [2]: [i_item_sk#89, i_product_name#92] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#93] + +(122) BroadcastHashJoin [codegen id : 41] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#89] +Join condition: None + +(123) Project [codegen id : 41] +Output [18]: [ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, d_year#55, d_year#58, s_store_name#43, s_zip#44, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_street_number#82, ca_street_name#83, ca_city#84, ca_zip#85, i_item_sk#89, i_product_name#92] +Input [19]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, d_year#55, d_year#58, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_street_number#82, ca_street_name#83, ca_city#84, ca_zip#85, i_item_sk#89, i_product_name#92] + +(124) HashAggregate [codegen id : 41] +Input [18]: [ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, d_year#55, d_year#58, s_store_name#43, s_zip#44, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_street_number#82, ca_street_name#83, ca_city#84, ca_zip#85, i_item_sk#89, i_product_name#92] +Keys [15]: [i_product_name#92, i_item_sk#89, s_store_name#43, s_zip#44, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_street_number#82, ca_street_name#83, ca_city#84, ca_zip#85, d_year#40, d_year#55, d_year#58] +Functions [4]: [partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#10)), partial_sum(UnscaledValue(ss_list_price#11)), partial_sum(UnscaledValue(ss_coupon_amt#12))] +Aggregate Attributes [4]: [count#94, sum#95, sum#96, sum#97] +Results [19]: [i_product_name#92, i_item_sk#89, s_store_name#43, s_zip#44, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_street_number#82, ca_street_name#83, ca_city#84, ca_zip#85, d_year#40, d_year#55, d_year#58, count#98, sum#99, sum#100, sum#101] + +(125) Exchange +Input [19]: [i_product_name#92, i_item_sk#89, s_store_name#43, s_zip#44, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_street_number#82, ca_street_name#83, ca_city#84, ca_zip#85, d_year#40, d_year#55, d_year#58, count#98, sum#99, sum#100, sum#101] +Arguments: hashpartitioning(i_product_name#92, i_item_sk#89, s_store_name#43, s_zip#44, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_street_number#82, ca_street_name#83, ca_city#84, ca_zip#85, d_year#40, d_year#55, d_year#58, 5), true, [id=#102] + +(126) HashAggregate [codegen id : 42] +Input [19]: [i_product_name#92, i_item_sk#89, s_store_name#43, s_zip#44, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_street_number#82, ca_street_name#83, ca_city#84, ca_zip#85, d_year#40, d_year#55, d_year#58, count#98, sum#99, sum#100, sum#101] +Keys [15]: [i_product_name#92, i_item_sk#89, s_store_name#43, s_zip#44, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_street_number#82, ca_street_name#83, ca_city#84, ca_zip#85, d_year#40, d_year#55, d_year#58] +Functions [4]: [count(1), sum(UnscaledValue(ss_wholesale_cost#10)), sum(UnscaledValue(ss_list_price#11)), sum(UnscaledValue(ss_coupon_amt#12))] +Aggregate Attributes [4]: [count(1)#103, sum(UnscaledValue(ss_wholesale_cost#10))#104, sum(UnscaledValue(ss_list_price#11))#105, sum(UnscaledValue(ss_coupon_amt#12))#106] +Results [17]: [i_product_name#92 AS product_name#107, i_item_sk#89 AS item_sk#108, s_store_name#43 AS store_name#109, s_zip#44 AS store_zip#110, ca_street_number#75 AS b_street_number#111, ca_street_name#76 AS b_streen_name#112, ca_city#77 AS b_city#113, ca_zip#78 AS b_zip#114, ca_street_number#82 AS c_street_number#115, ca_street_name#83 AS c_street_name#116, ca_city#84 AS c_city#117, ca_zip#85 AS c_zip#118, d_year#40 AS syear#119, count(1)#103 AS cnt#120, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#10))#104,17,2) AS s1#121, MakeDecimal(sum(UnscaledValue(ss_list_price#11))#105,17,2) AS s2#122, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#12))#106,17,2) AS s3#123] + +(127) Exchange +Input [17]: [product_name#107, item_sk#108, store_name#109, store_zip#110, b_street_number#111, b_streen_name#112, b_city#113, b_zip#114, c_street_number#115, c_street_name#116, c_city#117, c_zip#118, syear#119, cnt#120, s1#121, s2#122, s3#123] +Arguments: hashpartitioning(item_sk#108, store_name#109, store_zip#110, 5), true, [id=#124] + +(128) Sort [codegen id : 43] +Input [17]: [product_name#107, item_sk#108, store_name#109, store_zip#110, b_street_number#111, b_streen_name#112, b_city#113, b_zip#114, c_street_number#115, c_street_name#116, c_city#117, c_zip#118, syear#119, cnt#120, s1#121, s2#122, s3#123] +Arguments: [item_sk#108 ASC NULLS FIRST, store_name#109 ASC NULLS FIRST, store_zip#110 ASC NULLS FIRST], false, 0 + +(129) ReusedExchange [Reuses operator id: 4] +Output [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_ticket_number#9, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] + +(130) Sort [codegen id : 45] +Input [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_ticket_number#9, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] +Arguments: [cast(ss_item_sk#2 as bigint) ASC NULLS FIRST, cast(ss_ticket_number#9 as bigint) ASC NULLS FIRST], false, 0 + +(131) ReusedExchange [Reuses operator id: 9] +Output [2]: [sr_item_sk#14, sr_ticket_number#15] + +(132) Sort [codegen id : 47] +Input [2]: [sr_item_sk#14, sr_ticket_number#15] +Arguments: [sr_item_sk#14 ASC NULLS FIRST, sr_ticket_number#15 ASC NULLS FIRST], false, 0 + +(133) SortMergeJoin [codegen id : 56] +Left keys [2]: [cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#9 as bigint)] +Right keys [2]: [sr_item_sk#14, sr_ticket_number#15] +Join condition: None + +(134) Project [codegen id : 56] +Output [11]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] +Input [14]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_ticket_number#9, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, sr_item_sk#14, sr_ticket_number#15] + +(135) ReusedExchange [Reuses operator id: 30] +Output [1]: [cs_item_sk#17] + +(136) BroadcastHashJoin [codegen id : 56] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [cs_item_sk#17] +Join condition: None + +(137) Project [codegen id : 56] +Output [11]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] +Input [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, cs_item_sk#17] + +(138) Scan parquet default.date_dim +Output [2]: [d_date_sk#39, d_year#40] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(139) ColumnarToRow [codegen id : 54] +Input [2]: [d_date_sk#39, d_year#40] + +(140) Filter [codegen id : 54] +Input [2]: [d_date_sk#39, d_year#40] +Condition : ((isnotnull(d_year#40) AND (d_year#40 = 2000)) AND isnotnull(d_date_sk#39)) + +(141) BroadcastExchange +Input [2]: [d_date_sk#39, d_year#40] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#125] + +(142) BroadcastHashJoin [codegen id : 56] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#39] +Join condition: None + +(143) Project [codegen id : 56] +Output [11]: [ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40] +Input [13]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_date_sk#39, d_year#40] + +(144) ReusedExchange [Reuses operator id: 42] +Output [3]: [s_store_sk#42, s_store_name#43, s_zip#44] + +(145) BroadcastHashJoin [codegen id : 56] +Left keys [1]: [ss_store_sk#7] +Right keys [1]: [s_store_sk#42] +Join condition: None + +(146) Project [codegen id : 56] +Output [12]: [ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44] +Input [14]: [ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_sk#42, s_store_name#43, s_zip#44] + +(147) Exchange +Input [12]: [ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44] +Arguments: hashpartitioning(ss_customer_sk#3, 5), true, [id=#126] + +(148) Sort [codegen id : 57] +Input [12]: [ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44] +Arguments: [ss_customer_sk#3 ASC NULLS FIRST], false, 0 + +(149) ReusedExchange [Reuses operator id: 50] +Output [6]: [c_customer_sk#47, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, c_first_shipto_date_sk#51, c_first_sales_date_sk#52] + +(150) Sort [codegen id : 59] +Input [6]: [c_customer_sk#47, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, c_first_shipto_date_sk#51, c_first_sales_date_sk#52] +Arguments: [c_customer_sk#47 ASC NULLS FIRST], false, 0 + +(151) SortMergeJoin [codegen id : 62] +Left keys [1]: [ss_customer_sk#3] +Right keys [1]: [c_customer_sk#47] +Join condition: None + +(152) Project [codegen id : 62] +Output [16]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, c_first_shipto_date_sk#51, c_first_sales_date_sk#52] +Input [18]: [ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_customer_sk#47, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, c_first_shipto_date_sk#51, c_first_sales_date_sk#52] + +(153) ReusedExchange [Reuses operator id: 57] +Output [2]: [d_date_sk#127, d_year#128] + +(154) BroadcastHashJoin [codegen id : 62] +Left keys [1]: [c_first_sales_date_sk#52] +Right keys [1]: [d_date_sk#127] +Join condition: None + +(155) Project [codegen id : 62] +Output [16]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, c_first_shipto_date_sk#51, d_year#128] +Input [18]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, c_first_shipto_date_sk#51, c_first_sales_date_sk#52, d_date_sk#127, d_year#128] + +(156) ReusedExchange [Reuses operator id: 57] +Output [2]: [d_date_sk#129, d_year#130] + +(157) BroadcastHashJoin [codegen id : 62] +Left keys [1]: [c_first_shipto_date_sk#51] +Right keys [1]: [d_date_sk#129] +Join condition: None + +(158) Project [codegen id : 62] +Output [16]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, d_year#128, d_year#130] +Input [18]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, c_first_shipto_date_sk#51, d_year#128, d_date_sk#129, d_year#130] + +(159) Exchange +Input [16]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, d_year#128, d_year#130] +Arguments: hashpartitioning(ss_cdemo_sk#4, 5), true, [id=#131] + +(160) Sort [codegen id : 63] +Input [16]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, d_year#128, d_year#130] +Arguments: [ss_cdemo_sk#4 ASC NULLS FIRST], false, 0 + +(161) ReusedExchange [Reuses operator id: 68] +Output [2]: [cd_demo_sk#60, cd_marital_status#61] + +(162) Sort [codegen id : 65] +Input [2]: [cd_demo_sk#60, cd_marital_status#61] +Arguments: [cd_demo_sk#60 ASC NULLS FIRST], false, 0 + +(163) SortMergeJoin [codegen id : 66] +Left keys [1]: [ss_cdemo_sk#4] +Right keys [1]: [cd_demo_sk#60] +Join condition: None + +(164) Project [codegen id : 66] +Output [16]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, d_year#128, d_year#130, cd_marital_status#61] +Input [18]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, d_year#128, d_year#130, cd_demo_sk#60, cd_marital_status#61] + +(165) Exchange +Input [16]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, d_year#128, d_year#130, cd_marital_status#61] +Arguments: hashpartitioning(c_current_cdemo_sk#48, 5), true, [id=#132] + +(166) Sort [codegen id : 67] +Input [16]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, d_year#128, d_year#130, cd_marital_status#61] +Arguments: [c_current_cdemo_sk#48 ASC NULLS FIRST], false, 0 + +(167) ReusedExchange [Reuses operator id: 68] +Output [2]: [cd_demo_sk#133, cd_marital_status#134] + +(168) Sort [codegen id : 69] +Input [2]: [cd_demo_sk#133, cd_marital_status#134] +Arguments: [cd_demo_sk#133 ASC NULLS FIRST], false, 0 + +(169) SortMergeJoin [codegen id : 73] +Left keys [1]: [c_current_cdemo_sk#48] +Right keys [1]: [cd_demo_sk#133] +Join condition: NOT (cd_marital_status#61 = cd_marital_status#134) + +(170) Project [codegen id : 73] +Output [14]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_hdemo_sk#49, c_current_addr_sk#50, d_year#128, d_year#130] +Input [18]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, d_year#128, d_year#130, cd_marital_status#61, cd_demo_sk#133, cd_marital_status#134] + +(171) ReusedExchange [Reuses operator id: 81] +Output [1]: [p_promo_sk#66] + +(172) BroadcastHashJoin [codegen id : 73] +Left keys [1]: [ss_promo_sk#8] +Right keys [1]: [p_promo_sk#66] +Join condition: None + +(173) Project [codegen id : 73] +Output [13]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_hdemo_sk#49, c_current_addr_sk#50, d_year#128, d_year#130] +Input [15]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_hdemo_sk#49, c_current_addr_sk#50, d_year#128, d_year#130, p_promo_sk#66] + +(174) ReusedExchange [Reuses operator id: 87] +Output [2]: [hd_demo_sk#68, hd_income_band_sk#69] + +(175) BroadcastHashJoin [codegen id : 73] +Left keys [1]: [ss_hdemo_sk#5] +Right keys [1]: [hd_demo_sk#68] +Join condition: None + +(176) Project [codegen id : 73] +Output [13]: [ss_item_sk#2, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_hdemo_sk#49, c_current_addr_sk#50, d_year#128, d_year#130, hd_income_band_sk#69] +Input [15]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_hdemo_sk#49, c_current_addr_sk#50, d_year#128, d_year#130, hd_demo_sk#68, hd_income_band_sk#69] + +(177) ReusedExchange [Reuses operator id: 87] +Output [2]: [hd_demo_sk#135, hd_income_band_sk#136] + +(178) BroadcastHashJoin [codegen id : 73] +Left keys [1]: [c_current_hdemo_sk#49] +Right keys [1]: [hd_demo_sk#135] +Join condition: None + +(179) Project [codegen id : 73] +Output [13]: [ss_item_sk#2, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_addr_sk#50, d_year#128, d_year#130, hd_income_band_sk#69, hd_income_band_sk#136] +Input [15]: [ss_item_sk#2, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_hdemo_sk#49, c_current_addr_sk#50, d_year#128, d_year#130, hd_income_band_sk#69, hd_demo_sk#135, hd_income_band_sk#136] + +(180) Exchange +Input [13]: [ss_item_sk#2, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_addr_sk#50, d_year#128, d_year#130, hd_income_band_sk#69, hd_income_band_sk#136] +Arguments: hashpartitioning(ss_addr_sk#6, 5), true, [id=#137] + +(181) Sort [codegen id : 74] +Input [13]: [ss_item_sk#2, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_addr_sk#50, d_year#128, d_year#130, hd_income_band_sk#69, hd_income_band_sk#136] +Arguments: [ss_addr_sk#6 ASC NULLS FIRST], false, 0 + +(182) ReusedExchange [Reuses operator id: 98] +Output [5]: [ca_address_sk#74, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78] + +(183) Sort [codegen id : 76] +Input [5]: [ca_address_sk#74, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78] +Arguments: [ca_address_sk#74 ASC NULLS FIRST], false, 0 + +(184) SortMergeJoin [codegen id : 77] +Left keys [1]: [ss_addr_sk#6] +Right keys [1]: [ca_address_sk#74] +Join condition: None + +(185) Project [codegen id : 77] +Output [16]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_addr_sk#50, d_year#128, d_year#130, hd_income_band_sk#69, hd_income_band_sk#136, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78] +Input [18]: [ss_item_sk#2, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_addr_sk#50, d_year#128, d_year#130, hd_income_band_sk#69, hd_income_band_sk#136, ca_address_sk#74, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78] + +(186) Exchange +Input [16]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_addr_sk#50, d_year#128, d_year#130, hd_income_band_sk#69, hd_income_band_sk#136, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78] +Arguments: hashpartitioning(c_current_addr_sk#50, 5), true, [id=#138] + +(187) Sort [codegen id : 78] +Input [16]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_addr_sk#50, d_year#128, d_year#130, hd_income_band_sk#69, hd_income_band_sk#136, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78] +Arguments: [c_current_addr_sk#50 ASC NULLS FIRST], false, 0 + +(188) ReusedExchange [Reuses operator id: 98] +Output [5]: [ca_address_sk#139, ca_street_number#140, ca_street_name#141, ca_city#142, ca_zip#143] + +(189) Sort [codegen id : 80] +Input [5]: [ca_address_sk#139, ca_street_number#140, ca_street_name#141, ca_city#142, ca_zip#143] +Arguments: [ca_address_sk#139 ASC NULLS FIRST], false, 0 + +(190) SortMergeJoin [codegen id : 84] +Left keys [1]: [c_current_addr_sk#50] +Right keys [1]: [ca_address_sk#139] +Join condition: None + +(191) Project [codegen id : 84] +Output [19]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, d_year#128, d_year#130, hd_income_band_sk#69, hd_income_band_sk#136, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_street_number#140, ca_street_name#141, ca_city#142, ca_zip#143] +Input [21]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_addr_sk#50, d_year#128, d_year#130, hd_income_band_sk#69, hd_income_band_sk#136, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_address_sk#139, ca_street_number#140, ca_street_name#141, ca_city#142, ca_zip#143] + +(192) ReusedExchange [Reuses operator id: 111] +Output [1]: [ib_income_band_sk#86] + +(193) BroadcastHashJoin [codegen id : 84] +Left keys [1]: [hd_income_band_sk#69] +Right keys [1]: [ib_income_band_sk#86] +Join condition: None + +(194) Project [codegen id : 84] +Output [18]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, d_year#128, d_year#130, hd_income_band_sk#136, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_street_number#140, ca_street_name#141, ca_city#142, ca_zip#143] +Input [20]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, d_year#128, d_year#130, hd_income_band_sk#69, hd_income_band_sk#136, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_street_number#140, ca_street_name#141, ca_city#142, ca_zip#143, ib_income_band_sk#86] + +(195) ReusedExchange [Reuses operator id: 111] +Output [1]: [ib_income_band_sk#144] + +(196) BroadcastHashJoin [codegen id : 84] +Left keys [1]: [hd_income_band_sk#136] +Right keys [1]: [ib_income_band_sk#144] +Join condition: None + +(197) Project [codegen id : 84] +Output [17]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, d_year#128, d_year#130, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_street_number#140, ca_street_name#141, ca_city#142, ca_zip#143] +Input [19]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, d_year#128, d_year#130, hd_income_band_sk#136, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_street_number#140, ca_street_name#141, ca_city#142, ca_zip#143, ib_income_band_sk#144] + +(198) ReusedExchange [Reuses operator id: 121] +Output [2]: [i_item_sk#89, i_product_name#92] + +(199) BroadcastHashJoin [codegen id : 84] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#89] +Join condition: None + +(200) Project [codegen id : 84] +Output [18]: [ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, d_year#128, d_year#130, s_store_name#43, s_zip#44, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_street_number#140, ca_street_name#141, ca_city#142, ca_zip#143, i_item_sk#89, i_product_name#92] +Input [19]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, d_year#128, d_year#130, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_street_number#140, ca_street_name#141, ca_city#142, ca_zip#143, i_item_sk#89, i_product_name#92] + +(201) HashAggregate [codegen id : 84] +Input [18]: [ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, d_year#128, d_year#130, s_store_name#43, s_zip#44, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_street_number#140, ca_street_name#141, ca_city#142, ca_zip#143, i_item_sk#89, i_product_name#92] +Keys [15]: [i_product_name#92, i_item_sk#89, s_store_name#43, s_zip#44, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_street_number#140, ca_street_name#141, ca_city#142, ca_zip#143, d_year#40, d_year#128, d_year#130] +Functions [4]: [partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#10)), partial_sum(UnscaledValue(ss_list_price#11)), partial_sum(UnscaledValue(ss_coupon_amt#12))] +Aggregate Attributes [4]: [count#145, sum#146, sum#147, sum#148] +Results [19]: [i_product_name#92, i_item_sk#89, s_store_name#43, s_zip#44, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_street_number#140, ca_street_name#141, ca_city#142, ca_zip#143, d_year#40, d_year#128, d_year#130, count#149, sum#150, sum#151, sum#152] + +(202) Exchange +Input [19]: [i_product_name#92, i_item_sk#89, s_store_name#43, s_zip#44, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_street_number#140, ca_street_name#141, ca_city#142, ca_zip#143, d_year#40, d_year#128, d_year#130, count#149, sum#150, sum#151, sum#152] +Arguments: hashpartitioning(i_product_name#92, i_item_sk#89, s_store_name#43, s_zip#44, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_street_number#140, ca_street_name#141, ca_city#142, ca_zip#143, d_year#40, d_year#128, d_year#130, 5), true, [id=#153] + +(203) HashAggregate [codegen id : 85] +Input [19]: [i_product_name#92, i_item_sk#89, s_store_name#43, s_zip#44, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_street_number#140, ca_street_name#141, ca_city#142, ca_zip#143, d_year#40, d_year#128, d_year#130, count#149, sum#150, sum#151, sum#152] +Keys [15]: [i_product_name#92, i_item_sk#89, s_store_name#43, s_zip#44, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_street_number#140, ca_street_name#141, ca_city#142, ca_zip#143, d_year#40, d_year#128, d_year#130] +Functions [4]: [count(1), sum(UnscaledValue(ss_wholesale_cost#10)), sum(UnscaledValue(ss_list_price#11)), sum(UnscaledValue(ss_coupon_amt#12))] +Aggregate Attributes [4]: [count(1)#154, sum(UnscaledValue(ss_wholesale_cost#10))#155, sum(UnscaledValue(ss_list_price#11))#156, sum(UnscaledValue(ss_coupon_amt#12))#157] +Results [8]: [i_item_sk#89 AS item_sk#158, s_store_name#43 AS store_name#159, s_zip#44 AS store_zip#160, d_year#40 AS syear#161, count(1)#154 AS cnt#162, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#10))#155,17,2) AS s1#163, MakeDecimal(sum(UnscaledValue(ss_list_price#11))#156,17,2) AS s2#164, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#12))#157,17,2) AS s3#165] + +(204) Exchange +Input [8]: [item_sk#158, store_name#159, store_zip#160, syear#161, cnt#162, s1#163, s2#164, s3#165] +Arguments: hashpartitioning(item_sk#158, store_name#159, store_zip#160, 5), true, [id=#166] + +(205) Sort [codegen id : 86] +Input [8]: [item_sk#158, store_name#159, store_zip#160, syear#161, cnt#162, s1#163, s2#164, s3#165] +Arguments: [item_sk#158 ASC NULLS FIRST, store_name#159 ASC NULLS FIRST, store_zip#160 ASC NULLS FIRST], false, 0 + +(206) SortMergeJoin [codegen id : 87] +Left keys [3]: [item_sk#108, store_name#109, store_zip#110] +Right keys [3]: [item_sk#158, store_name#159, store_zip#160] +Join condition: (cnt#162 <= cnt#120) + +(207) Project [codegen id : 87] +Output [21]: [product_name#107, store_name#109, store_zip#110, b_street_number#111, b_streen_name#112, b_city#113, b_zip#114, c_street_number#115, c_street_name#116, c_city#117, c_zip#118, syear#119, cnt#120, s1#121, s2#122, s3#123, s1#163, s2#164, s3#165, syear#161, cnt#162] +Input [25]: [product_name#107, item_sk#108, store_name#109, store_zip#110, b_street_number#111, b_streen_name#112, b_city#113, b_zip#114, c_street_number#115, c_street_name#116, c_city#117, c_zip#118, syear#119, cnt#120, s1#121, s2#122, s3#123, item_sk#158, store_name#159, store_zip#160, syear#161, cnt#162, s1#163, s2#164, s3#165] + +(208) Exchange +Input [21]: [product_name#107, store_name#109, store_zip#110, b_street_number#111, b_streen_name#112, b_city#113, b_zip#114, c_street_number#115, c_street_name#116, c_city#117, c_zip#118, syear#119, cnt#120, s1#121, s2#122, s3#123, s1#163, s2#164, s3#165, syear#161, cnt#162] +Arguments: rangepartitioning(product_name#107 ASC NULLS FIRST, store_name#109 ASC NULLS FIRST, cnt#162 ASC NULLS FIRST, 5), true, [id=#167] + +(209) Sort [codegen id : 88] +Input [21]: [product_name#107, store_name#109, store_zip#110, b_street_number#111, b_streen_name#112, b_city#113, b_zip#114, c_street_number#115, c_street_name#116, c_city#117, c_zip#118, syear#119, cnt#120, s1#121, s2#122, s3#123, s1#163, s2#164, s3#165, syear#161, cnt#162] +Arguments: [product_name#107 ASC NULLS FIRST, store_name#109 ASC NULLS FIRST, cnt#162 ASC NULLS FIRST], true, 0 + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64.sf100/simplified.txt new file mode 100644 index 0000000000000..cf75c1b7685f5 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64.sf100/simplified.txt @@ -0,0 +1,367 @@ +WholeStageCodegen (88) + Sort [cnt,product_name,store_name] + InputAdapter + Exchange [cnt,product_name,store_name] #1 + WholeStageCodegen (87) + Project [b_city,b_streen_name,b_street_number,b_zip,c_city,c_street_name,c_street_number,c_zip,cnt,cnt,product_name,s1,s1,s2,s2,s3,s3,store_name,store_zip,syear,syear] + SortMergeJoin [cnt,cnt,item_sk,item_sk,store_name,store_name,store_zip,store_zip] + InputAdapter + WholeStageCodegen (43) + Sort [item_sk,store_name,store_zip] + InputAdapter + Exchange [item_sk,store_name,store_zip] #2 + WholeStageCodegen (42) + HashAggregate [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,count,d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip,sum,sum,sum] [b_city,b_streen_name,b_street_number,b_zip,c_city,c_street_name,c_street_number,c_zip,cnt,count,count(1),item_sk,product_name,s1,s2,s3,store_name,store_zip,sum,sum,sum,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_list_price)),sum(UnscaledValue(ss_wholesale_cost)),syear] + InputAdapter + Exchange [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip] #3 + WholeStageCodegen (41) + HashAggregate [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip,ss_coupon_amt,ss_list_price,ss_wholesale_cost] [count,count,sum,sum,sum,sum,sum,sum] + Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip,ss_coupon_amt,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] + Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,hd_income_band_sk,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] + Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,hd_income_band_sk,hd_income_band_sk,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + SortMergeJoin [c_current_addr_sk,ca_address_sk] + InputAdapter + WholeStageCodegen (35) + Sort [c_current_addr_sk] + InputAdapter + Exchange [c_current_addr_sk] #4 + WholeStageCodegen (34) + Project [c_current_addr_sk,ca_city,ca_street_name,ca_street_number,ca_zip,d_year,d_year,d_year,hd_income_band_sk,hd_income_band_sk,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + SortMergeJoin [ca_address_sk,ss_addr_sk] + InputAdapter + WholeStageCodegen (31) + Sort [ss_addr_sk] + InputAdapter + Exchange [ss_addr_sk] #5 + WholeStageCodegen (30) + Project [c_current_addr_sk,d_year,d_year,d_year,hd_income_band_sk,hd_income_band_sk,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] + Project [c_current_addr_sk,c_current_hdemo_sk,d_year,d_year,d_year,hd_income_band_sk,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [c_current_addr_sk,c_current_hdemo_sk,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [p_promo_sk,ss_promo_sk] + Project [c_current_addr_sk,c_current_hdemo_sk,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + SortMergeJoin [c_current_cdemo_sk,cd_demo_sk,cd_marital_status,cd_marital_status] + InputAdapter + WholeStageCodegen (24) + Sort [c_current_cdemo_sk] + InputAdapter + Exchange [c_current_cdemo_sk] #6 + WholeStageCodegen (23) + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,cd_marital_status,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + SortMergeJoin [cd_demo_sk,ss_cdemo_sk] + InputAdapter + WholeStageCodegen (20) + Sort [ss_cdemo_sk] + InputAdapter + Exchange [ss_cdemo_sk] #7 + WholeStageCodegen (19) + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + BroadcastHashJoin [c_first_shipto_date_sk,d_date_sk] + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_first_shipto_date_sk,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + BroadcastHashJoin [c_first_sales_date_sk,d_date_sk] + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_first_sales_date_sk,c_first_shipto_date_sk,d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + SortMergeJoin [c_customer_sk,ss_customer_sk] + InputAdapter + WholeStageCodegen (14) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #8 + WholeStageCodegen (13) + Project [d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [d_year,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_store_sk,ss_wholesale_cost] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_wholesale_cost] + BroadcastHashJoin [cs_item_sk,ss_item_sk] + Project [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_wholesale_cost] + SortMergeJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + InputAdapter + WholeStageCodegen (2) + Sort [ss_item_sk,ss_ticket_number] + InputAdapter + Exchange [ss_item_sk,ss_ticket_number] #9 + WholeStageCodegen (1) + Filter [ss_addr_sk,ss_cdemo_sk,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number,ss_wholesale_cost] + InputAdapter + WholeStageCodegen (4) + Sort [sr_item_sk,sr_ticket_number] + InputAdapter + Exchange [sr_item_sk,sr_ticket_number] #10 + WholeStageCodegen (3) + Filter [sr_item_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (10) + Project [cs_item_sk] + Filter [sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2), true)),sum(cs_ext_list_price)] + HashAggregate [cs_item_sk,isEmpty,sum,sum] [isEmpty,sum,sum,sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2), true)),sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2), true)),sum(UnscaledValue(cs_ext_list_price)),sum(cs_ext_list_price)] + InputAdapter + Exchange [cs_item_sk] #12 + WholeStageCodegen (9) + HashAggregate [cr_refunded_cash,cr_reversed_charge,cr_store_credit,cs_ext_list_price,cs_item_sk] [isEmpty,isEmpty,sum,sum,sum,sum] + Project [cr_refunded_cash,cr_reversed_charge,cr_store_credit,cs_ext_list_price,cs_item_sk] + SortMergeJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] + InputAdapter + WholeStageCodegen (6) + Sort [cs_item_sk,cs_order_number] + InputAdapter + Exchange [cs_item_sk,cs_order_number] #13 + WholeStageCodegen (5) + Filter [cs_item_sk,cs_order_number] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_ext_list_price,cs_item_sk,cs_order_number] + InputAdapter + WholeStageCodegen (8) + Sort [cr_item_sk,cr_order_number] + InputAdapter + Exchange [cr_item_sk,cr_order_number] #14 + WholeStageCodegen (7) + Filter [cr_item_sk,cr_order_number] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit] + InputAdapter + BroadcastExchange #15 + WholeStageCodegen (11) + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #16 + WholeStageCodegen (12) + Filter [s_store_name,s_store_sk,s_zip] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_name,s_store_sk,s_zip] + InputAdapter + WholeStageCodegen (16) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #17 + WholeStageCodegen (15) + Filter [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk,c_first_shipto_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk,c_first_shipto_date_sk] + InputAdapter + BroadcastExchange #18 + WholeStageCodegen (17) + Filter [d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + ReusedExchange [d_date_sk,d_year] #18 + InputAdapter + WholeStageCodegen (22) + Sort [cd_demo_sk] + InputAdapter + Exchange [cd_demo_sk] #19 + WholeStageCodegen (21) + Filter [cd_demo_sk,cd_marital_status] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status] + InputAdapter + WholeStageCodegen (26) + Sort [cd_demo_sk] + InputAdapter + ReusedExchange [cd_demo_sk,cd_marital_status] #19 + InputAdapter + BroadcastExchange #20 + WholeStageCodegen (27) + Filter [p_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.promotion [p_promo_sk] + InputAdapter + BroadcastExchange #21 + WholeStageCodegen (28) + Filter [hd_demo_sk,hd_income_band_sk] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_demo_sk,hd_income_band_sk] + InputAdapter + ReusedExchange [hd_demo_sk,hd_income_band_sk] #21 + InputAdapter + WholeStageCodegen (33) + Sort [ca_address_sk] + InputAdapter + Exchange [ca_address_sk] #22 + WholeStageCodegen (32) + Filter [ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_city,ca_street_name,ca_street_number,ca_zip] + InputAdapter + WholeStageCodegen (37) + Sort [ca_address_sk] + InputAdapter + ReusedExchange [ca_address_sk,ca_city,ca_street_name,ca_street_number,ca_zip] #22 + InputAdapter + BroadcastExchange #23 + WholeStageCodegen (38) + Filter [ib_income_band_sk] + ColumnarToRow + InputAdapter + Scan parquet default.income_band [ib_income_band_sk] + InputAdapter + ReusedExchange [ib_income_band_sk] #23 + InputAdapter + BroadcastExchange #24 + WholeStageCodegen (40) + Project [i_item_sk,i_product_name] + Filter [i_color,i_current_price,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_color,i_current_price,i_item_sk,i_product_name] + InputAdapter + WholeStageCodegen (86) + Sort [item_sk,store_name,store_zip] + InputAdapter + Exchange [item_sk,store_name,store_zip] #25 + WholeStageCodegen (85) + HashAggregate [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,count,d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip,sum,sum,sum] [cnt,count,count(1),item_sk,s1,s2,s3,store_name,store_zip,sum,sum,sum,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_list_price)),sum(UnscaledValue(ss_wholesale_cost)),syear] + InputAdapter + Exchange [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip] #26 + WholeStageCodegen (84) + HashAggregate [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip,ss_coupon_amt,ss_list_price,ss_wholesale_cost] [count,count,sum,sum,sum,sum,sum,sum] + Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip,ss_coupon_amt,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] + Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,hd_income_band_sk,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] + Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,hd_income_band_sk,hd_income_band_sk,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + SortMergeJoin [c_current_addr_sk,ca_address_sk] + InputAdapter + WholeStageCodegen (78) + Sort [c_current_addr_sk] + InputAdapter + Exchange [c_current_addr_sk] #27 + WholeStageCodegen (77) + Project [c_current_addr_sk,ca_city,ca_street_name,ca_street_number,ca_zip,d_year,d_year,d_year,hd_income_band_sk,hd_income_band_sk,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + SortMergeJoin [ca_address_sk,ss_addr_sk] + InputAdapter + WholeStageCodegen (74) + Sort [ss_addr_sk] + InputAdapter + Exchange [ss_addr_sk] #28 + WholeStageCodegen (73) + Project [c_current_addr_sk,d_year,d_year,d_year,hd_income_band_sk,hd_income_band_sk,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] + Project [c_current_addr_sk,c_current_hdemo_sk,d_year,d_year,d_year,hd_income_band_sk,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [c_current_addr_sk,c_current_hdemo_sk,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [p_promo_sk,ss_promo_sk] + Project [c_current_addr_sk,c_current_hdemo_sk,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + SortMergeJoin [c_current_cdemo_sk,cd_demo_sk,cd_marital_status,cd_marital_status] + InputAdapter + WholeStageCodegen (67) + Sort [c_current_cdemo_sk] + InputAdapter + Exchange [c_current_cdemo_sk] #29 + WholeStageCodegen (66) + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,cd_marital_status,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + SortMergeJoin [cd_demo_sk,ss_cdemo_sk] + InputAdapter + WholeStageCodegen (63) + Sort [ss_cdemo_sk] + InputAdapter + Exchange [ss_cdemo_sk] #30 + WholeStageCodegen (62) + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + BroadcastHashJoin [c_first_shipto_date_sk,d_date_sk] + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_first_shipto_date_sk,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + BroadcastHashJoin [c_first_sales_date_sk,d_date_sk] + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_first_sales_date_sk,c_first_shipto_date_sk,d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + SortMergeJoin [c_customer_sk,ss_customer_sk] + InputAdapter + WholeStageCodegen (57) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #31 + WholeStageCodegen (56) + Project [d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [d_year,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_store_sk,ss_wholesale_cost] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_wholesale_cost] + BroadcastHashJoin [cs_item_sk,ss_item_sk] + Project [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_wholesale_cost] + SortMergeJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + InputAdapter + WholeStageCodegen (45) + Sort [ss_item_sk,ss_ticket_number] + InputAdapter + ReusedExchange [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number,ss_wholesale_cost] #9 + InputAdapter + WholeStageCodegen (47) + Sort [sr_item_sk,sr_ticket_number] + InputAdapter + ReusedExchange [sr_item_sk,sr_ticket_number] #10 + InputAdapter + ReusedExchange [cs_item_sk] #11 + InputAdapter + BroadcastExchange #32 + WholeStageCodegen (54) + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + ReusedExchange [s_store_name,s_store_sk,s_zip] #16 + InputAdapter + WholeStageCodegen (59) + Sort [c_customer_sk] + InputAdapter + ReusedExchange [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk,c_first_shipto_date_sk] #17 + InputAdapter + ReusedExchange [d_date_sk,d_year] #18 + InputAdapter + ReusedExchange [d_date_sk,d_year] #18 + InputAdapter + WholeStageCodegen (65) + Sort [cd_demo_sk] + InputAdapter + ReusedExchange [cd_demo_sk,cd_marital_status] #19 + InputAdapter + WholeStageCodegen (69) + Sort [cd_demo_sk] + InputAdapter + ReusedExchange [cd_demo_sk,cd_marital_status] #19 + InputAdapter + ReusedExchange [p_promo_sk] #20 + InputAdapter + ReusedExchange [hd_demo_sk,hd_income_band_sk] #21 + InputAdapter + ReusedExchange [hd_demo_sk,hd_income_band_sk] #21 + InputAdapter + WholeStageCodegen (76) + Sort [ca_address_sk] + InputAdapter + ReusedExchange [ca_address_sk,ca_city,ca_street_name,ca_street_number,ca_zip] #22 + InputAdapter + WholeStageCodegen (80) + Sort [ca_address_sk] + InputAdapter + ReusedExchange [ca_address_sk,ca_city,ca_street_name,ca_street_number,ca_zip] #22 + InputAdapter + ReusedExchange [ib_income_band_sk] #23 + InputAdapter + ReusedExchange [ib_income_band_sk] #23 + InputAdapter + ReusedExchange [i_item_sk,i_product_name] #24 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64/explain.txt new file mode 100644 index 0000000000000..0f0e48d035cbf --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64/explain.txt @@ -0,0 +1,918 @@ +== Physical Plan == +* Sort (170) ++- Exchange (169) + +- * Project (168) + +- * BroadcastHashJoin Inner BuildRight (167) + :- * HashAggregate (105) + : +- Exchange (104) + : +- * HashAggregate (103) + : +- * Project (102) + : +- * BroadcastHashJoin Inner BuildRight (101) + : :- * Project (95) + : : +- * BroadcastHashJoin Inner BuildRight (94) + : : :- * Project (92) + : : : +- * BroadcastHashJoin Inner BuildRight (91) + : : : :- * Project (86) + : : : : +- * BroadcastHashJoin Inner BuildRight (85) + : : : : :- * Project (83) + : : : : : +- * BroadcastHashJoin Inner BuildRight (82) + : : : : : :- * Project (77) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (76) + : : : : : : :- * Project (74) + : : : : : : : +- * BroadcastHashJoin Inner BuildRight (73) + : : : : : : : :- * Project (68) + : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (67) + : : : : : : : : :- * Project (62) + : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (61) + : : : : : : : : : :- * Project (59) + : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (58) + : : : : : : : : : : :- * Project (53) + : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (52) + : : : : : : : : : : : :- * Project (50) + : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (49) + : : : : : : : : : : : : :- * Project (44) + : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (43) + : : : : : : : : : : : : : :- * Project (38) + : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (37) + : : : : : : : : : : : : : : :- * Project (32) + : : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (31) + : : : : : : : : : : : : : : : :- * Project (26) + : : : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (25) + : : : : : : : : : : : : : : : : :- * Project (9) + : : : : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : : : : : : : : : : : : : : :- * Filter (3) + : : : : : : : : : : : : : : : : : : +- * ColumnarToRow (2) + : : : : : : : : : : : : : : : : : : +- Scan parquet default.store_sales (1) + : : : : : : : : : : : : : : : : : +- BroadcastExchange (7) + : : : : : : : : : : : : : : : : : +- * Filter (6) + : : : : : : : : : : : : : : : : : +- * ColumnarToRow (5) + : : : : : : : : : : : : : : : : : +- Scan parquet default.store_returns (4) + : : : : : : : : : : : : : : : : +- BroadcastExchange (24) + : : : : : : : : : : : : : : : : +- * Project (23) + : : : : : : : : : : : : : : : : +- * Filter (22) + : : : : : : : : : : : : : : : : +- * HashAggregate (21) + : : : : : : : : : : : : : : : : +- Exchange (20) + : : : : : : : : : : : : : : : : +- * HashAggregate (19) + : : : : : : : : : : : : : : : : +- * Project (18) + : : : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (17) + : : : : : : : : : : : : : : : : :- * Filter (12) + : : : : : : : : : : : : : : : : : +- * ColumnarToRow (11) + : : : : : : : : : : : : : : : : : +- Scan parquet default.catalog_sales (10) + : : : : : : : : : : : : : : : : +- BroadcastExchange (16) + : : : : : : : : : : : : : : : : +- * Filter (15) + : : : : : : : : : : : : : : : : +- * ColumnarToRow (14) + : : : : : : : : : : : : : : : : +- Scan parquet default.catalog_returns (13) + : : : : : : : : : : : : : : : +- BroadcastExchange (30) + : : : : : : : : : : : : : : : +- * Filter (29) + : : : : : : : : : : : : : : : +- * ColumnarToRow (28) + : : : : : : : : : : : : : : : +- Scan parquet default.date_dim (27) + : : : : : : : : : : : : : : +- BroadcastExchange (36) + : : : : : : : : : : : : : : +- * Filter (35) + : : : : : : : : : : : : : : +- * ColumnarToRow (34) + : : : : : : : : : : : : : : +- Scan parquet default.store (33) + : : : : : : : : : : : : : +- BroadcastExchange (42) + : : : : : : : : : : : : : +- * Filter (41) + : : : : : : : : : : : : : +- * ColumnarToRow (40) + : : : : : : : : : : : : : +- Scan parquet default.customer (39) + : : : : : : : : : : : : +- BroadcastExchange (48) + : : : : : : : : : : : : +- * Filter (47) + : : : : : : : : : : : : +- * ColumnarToRow (46) + : : : : : : : : : : : : +- Scan parquet default.date_dim (45) + : : : : : : : : : : : +- ReusedExchange (51) + : : : : : : : : : : +- BroadcastExchange (57) + : : : : : : : : : : +- * Filter (56) + : : : : : : : : : : +- * ColumnarToRow (55) + : : : : : : : : : : +- Scan parquet default.customer_demographics (54) + : : : : : : : : : +- ReusedExchange (60) + : : : : : : : : +- BroadcastExchange (66) + : : : : : : : : +- * Filter (65) + : : : : : : : : +- * ColumnarToRow (64) + : : : : : : : : +- Scan parquet default.promotion (63) + : : : : : : : +- BroadcastExchange (72) + : : : : : : : +- * Filter (71) + : : : : : : : +- * ColumnarToRow (70) + : : : : : : : +- Scan parquet default.household_demographics (69) + : : : : : : +- ReusedExchange (75) + : : : : : +- BroadcastExchange (81) + : : : : : +- * Filter (80) + : : : : : +- * ColumnarToRow (79) + : : : : : +- Scan parquet default.customer_address (78) + : : : : +- ReusedExchange (84) + : : : +- BroadcastExchange (90) + : : : +- * Filter (89) + : : : +- * ColumnarToRow (88) + : : : +- Scan parquet default.income_band (87) + : : +- ReusedExchange (93) + : +- BroadcastExchange (100) + : +- * Project (99) + : +- * Filter (98) + : +- * ColumnarToRow (97) + : +- Scan parquet default.item (96) + +- BroadcastExchange (166) + +- * HashAggregate (165) + +- Exchange (164) + +- * HashAggregate (163) + +- * Project (162) + +- * BroadcastHashJoin Inner BuildRight (161) + :- * Project (159) + : +- * BroadcastHashJoin Inner BuildRight (158) + : :- * Project (156) + : : +- * BroadcastHashJoin Inner BuildRight (155) + : : :- * Project (153) + : : : +- * BroadcastHashJoin Inner BuildRight (152) + : : : :- * Project (150) + : : : : +- * BroadcastHashJoin Inner BuildRight (149) + : : : : :- * Project (147) + : : : : : +- * BroadcastHashJoin Inner BuildRight (146) + : : : : : :- * Project (144) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (143) + : : : : : : :- * Project (141) + : : : : : : : +- * BroadcastHashJoin Inner BuildRight (140) + : : : : : : : :- * Project (138) + : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (137) + : : : : : : : : :- * Project (135) + : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (134) + : : : : : : : : : :- * Project (132) + : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (131) + : : : : : : : : : : :- * Project (129) + : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (128) + : : : : : : : : : : : :- * Project (126) + : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (125) + : : : : : : : : : : : : :- * Project (123) + : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (122) + : : : : : : : : : : : : : :- * Project (120) + : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (119) + : : : : : : : : : : : : : : :- * Project (114) + : : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (113) + : : : : : : : : : : : : : : : :- * Project (111) + : : : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (110) + : : : : : : : : : : : : : : : : :- * Filter (108) + : : : : : : : : : : : : : : : : : +- * ColumnarToRow (107) + : : : : : : : : : : : : : : : : : +- Scan parquet default.store_sales (106) + : : : : : : : : : : : : : : : : +- ReusedExchange (109) + : : : : : : : : : : : : : : : +- ReusedExchange (112) + : : : : : : : : : : : : : : +- BroadcastExchange (118) + : : : : : : : : : : : : : : +- * Filter (117) + : : : : : : : : : : : : : : +- * ColumnarToRow (116) + : : : : : : : : : : : : : : +- Scan parquet default.date_dim (115) + : : : : : : : : : : : : : +- ReusedExchange (121) + : : : : : : : : : : : : +- ReusedExchange (124) + : : : : : : : : : : : +- ReusedExchange (127) + : : : : : : : : : : +- ReusedExchange (130) + : : : : : : : : : +- ReusedExchange (133) + : : : : : : : : +- ReusedExchange (136) + : : : : : : : +- ReusedExchange (139) + : : : : : : +- ReusedExchange (142) + : : : : : +- ReusedExchange (145) + : : : : +- ReusedExchange (148) + : : : +- ReusedExchange (151) + : : +- ReusedExchange (154) + : +- ReusedExchange (157) + +- ReusedExchange (160) + + +(1) Scan parquet default.store_sales +Output [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_ticket_number#9, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_cdemo_sk), IsNotNull(ss_promo_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_addr_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 20] +Input [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_ticket_number#9, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] + +(3) Filter [codegen id : 20] +Input [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_ticket_number#9, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] +Condition : ((((((((isnotnull(ss_item_sk#2) AND isnotnull(ss_ticket_number#9)) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_store_sk#7)) AND isnotnull(ss_customer_sk#3)) AND isnotnull(ss_cdemo_sk#4)) AND isnotnull(ss_promo_sk#8)) AND isnotnull(ss_hdemo_sk#5)) AND isnotnull(ss_addr_sk#6)) + +(4) Scan parquet default.store_returns +Output [2]: [sr_item_sk#13, sr_ticket_number#14] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_returns] +PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [sr_item_sk#13, sr_ticket_number#14] + +(6) Filter [codegen id : 1] +Input [2]: [sr_item_sk#13, sr_ticket_number#14] +Condition : (isnotnull(sr_item_sk#13) AND isnotnull(sr_ticket_number#14)) + +(7) BroadcastExchange +Input [2]: [sr_item_sk#13, sr_ticket_number#14] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false], input[1, bigint, false]),false), [id=#15] + +(8) BroadcastHashJoin [codegen id : 20] +Left keys [2]: [cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#9 as bigint)] +Right keys [2]: [sr_item_sk#13, sr_ticket_number#14] +Join condition: None + +(9) Project [codegen id : 20] +Output [11]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] +Input [14]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_ticket_number#9, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, sr_item_sk#13, sr_ticket_number#14] + +(10) Scan parquet default.catalog_sales +Output [3]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_order_number)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 3] +Input [3]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18] + +(12) Filter [codegen id : 3] +Input [3]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18] +Condition : (isnotnull(cs_item_sk#16) AND isnotnull(cs_order_number#17)) + +(13) Scan parquet default.catalog_returns +Output [5]: [cr_item_sk#19, cr_order_number#20, cr_refunded_cash#21, cr_reversed_charge#22, cr_store_credit#23] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_returns] +PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)] +ReadSchema: struct + +(14) ColumnarToRow [codegen id : 2] +Input [5]: [cr_item_sk#19, cr_order_number#20, cr_refunded_cash#21, cr_reversed_charge#22, cr_store_credit#23] + +(15) Filter [codegen id : 2] +Input [5]: [cr_item_sk#19, cr_order_number#20, cr_refunded_cash#21, cr_reversed_charge#22, cr_store_credit#23] +Condition : (isnotnull(cr_item_sk#19) AND isnotnull(cr_order_number#20)) + +(16) BroadcastExchange +Input [5]: [cr_item_sk#19, cr_order_number#20, cr_refunded_cash#21, cr_reversed_charge#22, cr_store_credit#23] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, false] as bigint), 32) | (cast(input[1, int, false] as bigint) & 4294967295))),false), [id=#24] + +(17) BroadcastHashJoin [codegen id : 3] +Left keys [2]: [cs_item_sk#16, cs_order_number#17] +Right keys [2]: [cr_item_sk#19, cr_order_number#20] +Join condition: None + +(18) Project [codegen id : 3] +Output [5]: [cs_item_sk#16, cs_ext_list_price#18, cr_refunded_cash#21, cr_reversed_charge#22, cr_store_credit#23] +Input [8]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18, cr_item_sk#19, cr_order_number#20, cr_refunded_cash#21, cr_reversed_charge#22, cr_store_credit#23] + +(19) HashAggregate [codegen id : 3] +Input [5]: [cs_item_sk#16, cs_ext_list_price#18, cr_refunded_cash#21, cr_reversed_charge#22, cr_store_credit#23] +Keys [1]: [cs_item_sk#16] +Functions [2]: [partial_sum(UnscaledValue(cs_ext_list_price#18)), partial_sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#21 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#22 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#23 as decimal(9,2)))), DecimalType(9,2), true))] +Aggregate Attributes [3]: [sum#25, sum#26, isEmpty#27] +Results [4]: [cs_item_sk#16, sum#28, sum#29, isEmpty#30] + +(20) Exchange +Input [4]: [cs_item_sk#16, sum#28, sum#29, isEmpty#30] +Arguments: hashpartitioning(cs_item_sk#16, 5), true, [id=#31] + +(21) HashAggregate [codegen id : 4] +Input [4]: [cs_item_sk#16, sum#28, sum#29, isEmpty#30] +Keys [1]: [cs_item_sk#16] +Functions [2]: [sum(UnscaledValue(cs_ext_list_price#18)), sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#21 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#22 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#23 as decimal(9,2)))), DecimalType(9,2), true))] +Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_list_price#18))#32, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#21 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#22 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#23 as decimal(9,2)))), DecimalType(9,2), true))#33] +Results [3]: [cs_item_sk#16, MakeDecimal(sum(UnscaledValue(cs_ext_list_price#18))#32,17,2) AS sum(cs_ext_list_price#18)#34, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#21 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#22 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#23 as decimal(9,2)))), DecimalType(9,2), true))#33 AS sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#21 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#22 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#23 as decimal(9,2)))), DecimalType(9,2), true))#35] + +(22) Filter [codegen id : 4] +Input [3]: [cs_item_sk#16, sum(cs_ext_list_price#18)#34, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#21 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#22 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#23 as decimal(9,2)))), DecimalType(9,2), true))#35] +Condition : (isnotnull(sum(cs_ext_list_price#18)#34) AND (cast(sum(cs_ext_list_price#18)#34 as decimal(21,2)) > CheckOverflow((2.00 * promote_precision(sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#21 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#22 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#23 as decimal(9,2)))), DecimalType(9,2), true))#35)), DecimalType(21,2), true))) + +(23) Project [codegen id : 4] +Output [1]: [cs_item_sk#16] +Input [3]: [cs_item_sk#16, sum(cs_ext_list_price#18)#34, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#21 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#22 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#23 as decimal(9,2)))), DecimalType(9,2), true))#35] + +(24) BroadcastExchange +Input [1]: [cs_item_sk#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#36] + +(25) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [cs_item_sk#16] +Join condition: None + +(26) Project [codegen id : 20] +Output [11]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] +Input [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, cs_item_sk#16] + +(27) Scan parquet default.date_dim +Output [2]: [d_date_sk#37, d_year#38] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk)] +ReadSchema: struct + +(28) ColumnarToRow [codegen id : 5] +Input [2]: [d_date_sk#37, d_year#38] + +(29) Filter [codegen id : 5] +Input [2]: [d_date_sk#37, d_year#38] +Condition : ((isnotnull(d_year#38) AND (d_year#38 = 1999)) AND isnotnull(d_date_sk#37)) + +(30) BroadcastExchange +Input [2]: [d_date_sk#37, d_year#38] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#39] + +(31) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#37] +Join condition: None + +(32) Project [codegen id : 20] +Output [11]: [ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38] +Input [13]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_date_sk#37, d_year#38] + +(33) Scan parquet default.store +Output [3]: [s_store_sk#40, s_store_name#41, s_zip#42] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_zip), IsNotNull(s_store_name)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 6] +Input [3]: [s_store_sk#40, s_store_name#41, s_zip#42] + +(35) Filter [codegen id : 6] +Input [3]: [s_store_sk#40, s_store_name#41, s_zip#42] +Condition : ((isnotnull(s_store_sk#40) AND isnotnull(s_zip#42)) AND isnotnull(s_store_name#41)) + +(36) BroadcastExchange +Input [3]: [s_store_sk#40, s_store_name#41, s_zip#42] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#43] + +(37) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [ss_store_sk#7] +Right keys [1]: [s_store_sk#40] +Join condition: None + +(38) Project [codegen id : 20] +Output [12]: [ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42] +Input [14]: [ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_sk#40, s_store_name#41, s_zip#42] + +(39) Scan parquet default.customer +Output [6]: [c_customer_sk#44, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_first_sales_date_sk), IsNotNull(c_first_shipto_date_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_hdemo_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(40) ColumnarToRow [codegen id : 7] +Input [6]: [c_customer_sk#44, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49] + +(41) Filter [codegen id : 7] +Input [6]: [c_customer_sk#44, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49] +Condition : (((((isnotnull(c_customer_sk#44) AND isnotnull(c_first_sales_date_sk#49)) AND isnotnull(c_first_shipto_date_sk#48)) AND isnotnull(c_current_cdemo_sk#45)) AND isnotnull(c_current_hdemo_sk#46)) AND isnotnull(c_current_addr_sk#47)) + +(42) BroadcastExchange +Input [6]: [c_customer_sk#44, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#50] + +(43) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [ss_customer_sk#3] +Right keys [1]: [c_customer_sk#44] +Join condition: None + +(44) Project [codegen id : 20] +Output [16]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49] +Input [18]: [ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_customer_sk#44, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49] + +(45) Scan parquet default.date_dim +Output [2]: [d_date_sk#51, d_year#52] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_date_sk)] +ReadSchema: struct + +(46) ColumnarToRow [codegen id : 8] +Input [2]: [d_date_sk#51, d_year#52] + +(47) Filter [codegen id : 8] +Input [2]: [d_date_sk#51, d_year#52] +Condition : isnotnull(d_date_sk#51) + +(48) BroadcastExchange +Input [2]: [d_date_sk#51, d_year#52] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#53] + +(49) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [c_first_sales_date_sk#49] +Right keys [1]: [d_date_sk#51] +Join condition: None + +(50) Project [codegen id : 20] +Output [16]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, d_year#52] +Input [18]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49, d_date_sk#51, d_year#52] + +(51) ReusedExchange [Reuses operator id: 48] +Output [2]: [d_date_sk#54, d_year#55] + +(52) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [c_first_shipto_date_sk#48] +Right keys [1]: [d_date_sk#54] +Join condition: None + +(53) Project [codegen id : 20] +Output [16]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#52, d_year#55] +Input [18]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, d_year#52, d_date_sk#54, d_year#55] + +(54) Scan parquet default.customer_demographics +Output [2]: [cd_demo_sk#56, cd_marital_status#57] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_marital_status)] +ReadSchema: struct + +(55) ColumnarToRow [codegen id : 10] +Input [2]: [cd_demo_sk#56, cd_marital_status#57] + +(56) Filter [codegen id : 10] +Input [2]: [cd_demo_sk#56, cd_marital_status#57] +Condition : (isnotnull(cd_demo_sk#56) AND isnotnull(cd_marital_status#57)) + +(57) BroadcastExchange +Input [2]: [cd_demo_sk#56, cd_marital_status#57] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#58] + +(58) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [ss_cdemo_sk#4] +Right keys [1]: [cd_demo_sk#56] +Join condition: None + +(59) Project [codegen id : 20] +Output [16]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#52, d_year#55, cd_marital_status#57] +Input [18]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#52, d_year#55, cd_demo_sk#56, cd_marital_status#57] + +(60) ReusedExchange [Reuses operator id: 57] +Output [2]: [cd_demo_sk#59, cd_marital_status#60] + +(61) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [c_current_cdemo_sk#45] +Right keys [1]: [cd_demo_sk#59] +Join condition: NOT (cd_marital_status#57 = cd_marital_status#60) + +(62) Project [codegen id : 20] +Output [14]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#52, d_year#55] +Input [18]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#52, d_year#55, cd_marital_status#57, cd_demo_sk#59, cd_marital_status#60] + +(63) Scan parquet default.promotion +Output [1]: [p_promo_sk#61] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/promotion] +PushedFilters: [IsNotNull(p_promo_sk)] +ReadSchema: struct + +(64) ColumnarToRow [codegen id : 12] +Input [1]: [p_promo_sk#61] + +(65) Filter [codegen id : 12] +Input [1]: [p_promo_sk#61] +Condition : isnotnull(p_promo_sk#61) + +(66) BroadcastExchange +Input [1]: [p_promo_sk#61] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#62] + +(67) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [ss_promo_sk#8] +Right keys [1]: [p_promo_sk#61] +Join condition: None + +(68) Project [codegen id : 20] +Output [13]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#52, d_year#55] +Input [15]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#52, d_year#55, p_promo_sk#61] + +(69) Scan parquet default.household_demographics +Output [2]: [hd_demo_sk#63, hd_income_band_sk#64] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/household_demographics] +PushedFilters: [IsNotNull(hd_demo_sk), IsNotNull(hd_income_band_sk)] +ReadSchema: struct + +(70) ColumnarToRow [codegen id : 13] +Input [2]: [hd_demo_sk#63, hd_income_band_sk#64] + +(71) Filter [codegen id : 13] +Input [2]: [hd_demo_sk#63, hd_income_band_sk#64] +Condition : (isnotnull(hd_demo_sk#63) AND isnotnull(hd_income_band_sk#64)) + +(72) BroadcastExchange +Input [2]: [hd_demo_sk#63, hd_income_band_sk#64] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#65] + +(73) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [ss_hdemo_sk#5] +Right keys [1]: [hd_demo_sk#63] +Join condition: None + +(74) Project [codegen id : 20] +Output [13]: [ss_item_sk#2, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#52, d_year#55, hd_income_band_sk#64] +Input [15]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#52, d_year#55, hd_demo_sk#63, hd_income_band_sk#64] + +(75) ReusedExchange [Reuses operator id: 72] +Output [2]: [hd_demo_sk#66, hd_income_band_sk#67] + +(76) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [c_current_hdemo_sk#46] +Right keys [1]: [hd_demo_sk#66] +Join condition: None + +(77) Project [codegen id : 20] +Output [13]: [ss_item_sk#2, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_addr_sk#47, d_year#52, d_year#55, hd_income_band_sk#64, hd_income_band_sk#67] +Input [15]: [ss_item_sk#2, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#52, d_year#55, hd_income_band_sk#64, hd_demo_sk#66, hd_income_band_sk#67] + +(78) Scan parquet default.customer_address +Output [5]: [ca_address_sk#68, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct + +(79) ColumnarToRow [codegen id : 15] +Input [5]: [ca_address_sk#68, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72] + +(80) Filter [codegen id : 15] +Input [5]: [ca_address_sk#68, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72] +Condition : isnotnull(ca_address_sk#68) + +(81) BroadcastExchange +Input [5]: [ca_address_sk#68, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#73] + +(82) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [ss_addr_sk#6] +Right keys [1]: [ca_address_sk#68] +Join condition: None + +(83) Project [codegen id : 20] +Output [16]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_addr_sk#47, d_year#52, d_year#55, hd_income_band_sk#64, hd_income_band_sk#67, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72] +Input [18]: [ss_item_sk#2, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_addr_sk#47, d_year#52, d_year#55, hd_income_band_sk#64, hd_income_band_sk#67, ca_address_sk#68, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72] + +(84) ReusedExchange [Reuses operator id: 81] +Output [5]: [ca_address_sk#74, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78] + +(85) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [c_current_addr_sk#47] +Right keys [1]: [ca_address_sk#74] +Join condition: None + +(86) Project [codegen id : 20] +Output [19]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, d_year#52, d_year#55, hd_income_band_sk#64, hd_income_band_sk#67, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78] +Input [21]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_addr_sk#47, d_year#52, d_year#55, hd_income_band_sk#64, hd_income_band_sk#67, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_address_sk#74, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78] + +(87) Scan parquet default.income_band +Output [1]: [ib_income_band_sk#79] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/income_band] +PushedFilters: [IsNotNull(ib_income_band_sk)] +ReadSchema: struct + +(88) ColumnarToRow [codegen id : 17] +Input [1]: [ib_income_band_sk#79] + +(89) Filter [codegen id : 17] +Input [1]: [ib_income_band_sk#79] +Condition : isnotnull(ib_income_band_sk#79) + +(90) BroadcastExchange +Input [1]: [ib_income_band_sk#79] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#80] + +(91) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [hd_income_band_sk#64] +Right keys [1]: [ib_income_band_sk#79] +Join condition: None + +(92) Project [codegen id : 20] +Output [18]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, d_year#52, d_year#55, hd_income_band_sk#67, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78] +Input [20]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, d_year#52, d_year#55, hd_income_band_sk#64, hd_income_band_sk#67, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ib_income_band_sk#79] + +(93) ReusedExchange [Reuses operator id: 90] +Output [1]: [ib_income_band_sk#81] + +(94) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [hd_income_band_sk#67] +Right keys [1]: [ib_income_band_sk#81] +Join condition: None + +(95) Project [codegen id : 20] +Output [17]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, d_year#52, d_year#55, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78] +Input [19]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, d_year#52, d_year#55, hd_income_band_sk#67, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ib_income_band_sk#81] + +(96) Scan parquet default.item +Output [4]: [i_item_sk#82, i_current_price#83, i_color#84, i_product_name#85] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_current_price), In(i_color, [purple,burlywood,indian,spring,floral,medium]), GreaterThanOrEqual(i_current_price,64.00), IsNotNull(i_item_sk)] +ReadSchema: struct + +(97) ColumnarToRow [codegen id : 19] +Input [4]: [i_item_sk#82, i_current_price#83, i_color#84, i_product_name#85] + +(98) Filter [codegen id : 19] +Input [4]: [i_item_sk#82, i_current_price#83, i_color#84, i_product_name#85] +Condition : ((((((isnotnull(i_current_price#83) AND i_color#84 IN (purple,burlywood,indian,spring,floral,medium)) AND (i_current_price#83 >= 64.00)) AND (cast(i_current_price#83 as decimal(12,2)) <= 74.00)) AND (cast(i_current_price#83 as decimal(12,2)) >= 65.00)) AND (cast(i_current_price#83 as decimal(12,2)) <= 79.00)) AND isnotnull(i_item_sk#82)) + +(99) Project [codegen id : 19] +Output [2]: [i_item_sk#82, i_product_name#85] +Input [4]: [i_item_sk#82, i_current_price#83, i_color#84, i_product_name#85] + +(100) BroadcastExchange +Input [2]: [i_item_sk#82, i_product_name#85] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#86] + +(101) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#82] +Join condition: None + +(102) Project [codegen id : 20] +Output [18]: [ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, d_year#52, d_year#55, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, i_item_sk#82, i_product_name#85] +Input [19]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, d_year#52, d_year#55, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, i_item_sk#82, i_product_name#85] + +(103) HashAggregate [codegen id : 20] +Input [18]: [ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, d_year#52, d_year#55, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, i_item_sk#82, i_product_name#85] +Keys [15]: [i_product_name#85, i_item_sk#82, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, d_year#38, d_year#52, d_year#55] +Functions [4]: [partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#10)), partial_sum(UnscaledValue(ss_list_price#11)), partial_sum(UnscaledValue(ss_coupon_amt#12))] +Aggregate Attributes [4]: [count#87, sum#88, sum#89, sum#90] +Results [19]: [i_product_name#85, i_item_sk#82, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, d_year#38, d_year#52, d_year#55, count#91, sum#92, sum#93, sum#94] + +(104) Exchange +Input [19]: [i_product_name#85, i_item_sk#82, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, d_year#38, d_year#52, d_year#55, count#91, sum#92, sum#93, sum#94] +Arguments: hashpartitioning(i_product_name#85, i_item_sk#82, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, d_year#38, d_year#52, d_year#55, 5), true, [id=#95] + +(105) HashAggregate [codegen id : 42] +Input [19]: [i_product_name#85, i_item_sk#82, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, d_year#38, d_year#52, d_year#55, count#91, sum#92, sum#93, sum#94] +Keys [15]: [i_product_name#85, i_item_sk#82, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, d_year#38, d_year#52, d_year#55] +Functions [4]: [count(1), sum(UnscaledValue(ss_wholesale_cost#10)), sum(UnscaledValue(ss_list_price#11)), sum(UnscaledValue(ss_coupon_amt#12))] +Aggregate Attributes [4]: [count(1)#96, sum(UnscaledValue(ss_wholesale_cost#10))#97, sum(UnscaledValue(ss_list_price#11))#98, sum(UnscaledValue(ss_coupon_amt#12))#99] +Results [17]: [i_product_name#85 AS product_name#100, i_item_sk#82 AS item_sk#101, s_store_name#41 AS store_name#102, s_zip#42 AS store_zip#103, ca_street_number#69 AS b_street_number#104, ca_street_name#70 AS b_streen_name#105, ca_city#71 AS b_city#106, ca_zip#72 AS b_zip#107, ca_street_number#75 AS c_street_number#108, ca_street_name#76 AS c_street_name#109, ca_city#77 AS c_city#110, ca_zip#78 AS c_zip#111, d_year#38 AS syear#112, count(1)#96 AS cnt#113, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#10))#97,17,2) AS s1#114, MakeDecimal(sum(UnscaledValue(ss_list_price#11))#98,17,2) AS s2#115, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#12))#99,17,2) AS s3#116] + +(106) Scan parquet default.store_sales +Output [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_ticket_number#9, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_cdemo_sk), IsNotNull(ss_promo_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_addr_sk)] +ReadSchema: struct + +(107) ColumnarToRow [codegen id : 40] +Input [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_ticket_number#9, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] + +(108) Filter [codegen id : 40] +Input [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_ticket_number#9, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] +Condition : ((((((((isnotnull(ss_item_sk#2) AND isnotnull(ss_ticket_number#9)) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_store_sk#7)) AND isnotnull(ss_customer_sk#3)) AND isnotnull(ss_cdemo_sk#4)) AND isnotnull(ss_promo_sk#8)) AND isnotnull(ss_hdemo_sk#5)) AND isnotnull(ss_addr_sk#6)) + +(109) ReusedExchange [Reuses operator id: 7] +Output [2]: [sr_item_sk#13, sr_ticket_number#14] + +(110) BroadcastHashJoin [codegen id : 40] +Left keys [2]: [cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#9 as bigint)] +Right keys [2]: [sr_item_sk#13, sr_ticket_number#14] +Join condition: None + +(111) Project [codegen id : 40] +Output [11]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] +Input [14]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_ticket_number#9, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, sr_item_sk#13, sr_ticket_number#14] + +(112) ReusedExchange [Reuses operator id: 24] +Output [1]: [cs_item_sk#16] + +(113) BroadcastHashJoin [codegen id : 40] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [cs_item_sk#16] +Join condition: None + +(114) Project [codegen id : 40] +Output [11]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] +Input [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, cs_item_sk#16] + +(115) Scan parquet default.date_dim +Output [2]: [d_date_sk#37, d_year#38] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(116) ColumnarToRow [codegen id : 25] +Input [2]: [d_date_sk#37, d_year#38] + +(117) Filter [codegen id : 25] +Input [2]: [d_date_sk#37, d_year#38] +Condition : ((isnotnull(d_year#38) AND (d_year#38 = 2000)) AND isnotnull(d_date_sk#37)) + +(118) BroadcastExchange +Input [2]: [d_date_sk#37, d_year#38] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#117] + +(119) BroadcastHashJoin [codegen id : 40] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#37] +Join condition: None + +(120) Project [codegen id : 40] +Output [11]: [ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38] +Input [13]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_date_sk#37, d_year#38] + +(121) ReusedExchange [Reuses operator id: 36] +Output [3]: [s_store_sk#40, s_store_name#41, s_zip#42] + +(122) BroadcastHashJoin [codegen id : 40] +Left keys [1]: [ss_store_sk#7] +Right keys [1]: [s_store_sk#40] +Join condition: None + +(123) Project [codegen id : 40] +Output [12]: [ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42] +Input [14]: [ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_sk#40, s_store_name#41, s_zip#42] + +(124) ReusedExchange [Reuses operator id: 42] +Output [6]: [c_customer_sk#44, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49] + +(125) BroadcastHashJoin [codegen id : 40] +Left keys [1]: [ss_customer_sk#3] +Right keys [1]: [c_customer_sk#44] +Join condition: None + +(126) Project [codegen id : 40] +Output [16]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49] +Input [18]: [ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_customer_sk#44, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49] + +(127) ReusedExchange [Reuses operator id: 48] +Output [2]: [d_date_sk#118, d_year#119] + +(128) BroadcastHashJoin [codegen id : 40] +Left keys [1]: [c_first_sales_date_sk#49] +Right keys [1]: [d_date_sk#118] +Join condition: None + +(129) Project [codegen id : 40] +Output [16]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, d_year#119] +Input [18]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49, d_date_sk#118, d_year#119] + +(130) ReusedExchange [Reuses operator id: 48] +Output [2]: [d_date_sk#120, d_year#121] + +(131) BroadcastHashJoin [codegen id : 40] +Left keys [1]: [c_first_shipto_date_sk#48] +Right keys [1]: [d_date_sk#120] +Join condition: None + +(132) Project [codegen id : 40] +Output [16]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#119, d_year#121] +Input [18]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, d_year#119, d_date_sk#120, d_year#121] + +(133) ReusedExchange [Reuses operator id: 57] +Output [2]: [cd_demo_sk#56, cd_marital_status#57] + +(134) BroadcastHashJoin [codegen id : 40] +Left keys [1]: [ss_cdemo_sk#4] +Right keys [1]: [cd_demo_sk#56] +Join condition: None + +(135) Project [codegen id : 40] +Output [16]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#119, d_year#121, cd_marital_status#57] +Input [18]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#119, d_year#121, cd_demo_sk#56, cd_marital_status#57] + +(136) ReusedExchange [Reuses operator id: 57] +Output [2]: [cd_demo_sk#122, cd_marital_status#123] + +(137) BroadcastHashJoin [codegen id : 40] +Left keys [1]: [c_current_cdemo_sk#45] +Right keys [1]: [cd_demo_sk#122] +Join condition: NOT (cd_marital_status#57 = cd_marital_status#123) + +(138) Project [codegen id : 40] +Output [14]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#119, d_year#121] +Input [18]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#119, d_year#121, cd_marital_status#57, cd_demo_sk#122, cd_marital_status#123] + +(139) ReusedExchange [Reuses operator id: 66] +Output [1]: [p_promo_sk#61] + +(140) BroadcastHashJoin [codegen id : 40] +Left keys [1]: [ss_promo_sk#8] +Right keys [1]: [p_promo_sk#61] +Join condition: None + +(141) Project [codegen id : 40] +Output [13]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#119, d_year#121] +Input [15]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#119, d_year#121, p_promo_sk#61] + +(142) ReusedExchange [Reuses operator id: 72] +Output [2]: [hd_demo_sk#63, hd_income_band_sk#64] + +(143) BroadcastHashJoin [codegen id : 40] +Left keys [1]: [ss_hdemo_sk#5] +Right keys [1]: [hd_demo_sk#63] +Join condition: None + +(144) Project [codegen id : 40] +Output [13]: [ss_item_sk#2, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#119, d_year#121, hd_income_band_sk#64] +Input [15]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#119, d_year#121, hd_demo_sk#63, hd_income_band_sk#64] + +(145) ReusedExchange [Reuses operator id: 72] +Output [2]: [hd_demo_sk#124, hd_income_band_sk#125] + +(146) BroadcastHashJoin [codegen id : 40] +Left keys [1]: [c_current_hdemo_sk#46] +Right keys [1]: [hd_demo_sk#124] +Join condition: None + +(147) Project [codegen id : 40] +Output [13]: [ss_item_sk#2, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_addr_sk#47, d_year#119, d_year#121, hd_income_band_sk#64, hd_income_band_sk#125] +Input [15]: [ss_item_sk#2, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#119, d_year#121, hd_income_band_sk#64, hd_demo_sk#124, hd_income_band_sk#125] + +(148) ReusedExchange [Reuses operator id: 81] +Output [5]: [ca_address_sk#68, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72] + +(149) BroadcastHashJoin [codegen id : 40] +Left keys [1]: [ss_addr_sk#6] +Right keys [1]: [ca_address_sk#68] +Join condition: None + +(150) Project [codegen id : 40] +Output [16]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_addr_sk#47, d_year#119, d_year#121, hd_income_band_sk#64, hd_income_band_sk#125, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72] +Input [18]: [ss_item_sk#2, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_addr_sk#47, d_year#119, d_year#121, hd_income_band_sk#64, hd_income_band_sk#125, ca_address_sk#68, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72] + +(151) ReusedExchange [Reuses operator id: 81] +Output [5]: [ca_address_sk#126, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130] + +(152) BroadcastHashJoin [codegen id : 40] +Left keys [1]: [c_current_addr_sk#47] +Right keys [1]: [ca_address_sk#126] +Join condition: None + +(153) Project [codegen id : 40] +Output [19]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, d_year#119, d_year#121, hd_income_band_sk#64, hd_income_band_sk#125, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130] +Input [21]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_addr_sk#47, d_year#119, d_year#121, hd_income_band_sk#64, hd_income_band_sk#125, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_address_sk#126, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130] + +(154) ReusedExchange [Reuses operator id: 90] +Output [1]: [ib_income_band_sk#79] + +(155) BroadcastHashJoin [codegen id : 40] +Left keys [1]: [hd_income_band_sk#64] +Right keys [1]: [ib_income_band_sk#79] +Join condition: None + +(156) Project [codegen id : 40] +Output [18]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, d_year#119, d_year#121, hd_income_band_sk#125, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130] +Input [20]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, d_year#119, d_year#121, hd_income_band_sk#64, hd_income_band_sk#125, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130, ib_income_band_sk#79] + +(157) ReusedExchange [Reuses operator id: 90] +Output [1]: [ib_income_band_sk#131] + +(158) BroadcastHashJoin [codegen id : 40] +Left keys [1]: [hd_income_band_sk#125] +Right keys [1]: [ib_income_band_sk#131] +Join condition: None + +(159) Project [codegen id : 40] +Output [17]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, d_year#119, d_year#121, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130] +Input [19]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, d_year#119, d_year#121, hd_income_band_sk#125, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130, ib_income_band_sk#131] + +(160) ReusedExchange [Reuses operator id: 100] +Output [2]: [i_item_sk#82, i_product_name#85] + +(161) BroadcastHashJoin [codegen id : 40] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#82] +Join condition: None + +(162) Project [codegen id : 40] +Output [18]: [ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, d_year#119, d_year#121, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130, i_item_sk#82, i_product_name#85] +Input [19]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, d_year#119, d_year#121, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130, i_item_sk#82, i_product_name#85] + +(163) HashAggregate [codegen id : 40] +Input [18]: [ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, d_year#119, d_year#121, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130, i_item_sk#82, i_product_name#85] +Keys [15]: [i_product_name#85, i_item_sk#82, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130, d_year#38, d_year#119, d_year#121] +Functions [4]: [partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#10)), partial_sum(UnscaledValue(ss_list_price#11)), partial_sum(UnscaledValue(ss_coupon_amt#12))] +Aggregate Attributes [4]: [count#132, sum#133, sum#134, sum#135] +Results [19]: [i_product_name#85, i_item_sk#82, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130, d_year#38, d_year#119, d_year#121, count#136, sum#137, sum#138, sum#139] + +(164) Exchange +Input [19]: [i_product_name#85, i_item_sk#82, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130, d_year#38, d_year#119, d_year#121, count#136, sum#137, sum#138, sum#139] +Arguments: hashpartitioning(i_product_name#85, i_item_sk#82, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130, d_year#38, d_year#119, d_year#121, 5), true, [id=#140] + +(165) HashAggregate [codegen id : 41] +Input [19]: [i_product_name#85, i_item_sk#82, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130, d_year#38, d_year#119, d_year#121, count#136, sum#137, sum#138, sum#139] +Keys [15]: [i_product_name#85, i_item_sk#82, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130, d_year#38, d_year#119, d_year#121] +Functions [4]: [count(1), sum(UnscaledValue(ss_wholesale_cost#10)), sum(UnscaledValue(ss_list_price#11)), sum(UnscaledValue(ss_coupon_amt#12))] +Aggregate Attributes [4]: [count(1)#141, sum(UnscaledValue(ss_wholesale_cost#10))#142, sum(UnscaledValue(ss_list_price#11))#143, sum(UnscaledValue(ss_coupon_amt#12))#144] +Results [8]: [i_item_sk#82 AS item_sk#145, s_store_name#41 AS store_name#146, s_zip#42 AS store_zip#147, d_year#38 AS syear#148, count(1)#141 AS cnt#149, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#10))#142,17,2) AS s1#150, MakeDecimal(sum(UnscaledValue(ss_list_price#11))#143,17,2) AS s2#151, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#12))#144,17,2) AS s3#152] + +(166) BroadcastExchange +Input [8]: [item_sk#145, store_name#146, store_zip#147, syear#148, cnt#149, s1#150, s2#151, s3#152] +Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, string, true], input[2, string, true]),false), [id=#153] + +(167) BroadcastHashJoin [codegen id : 42] +Left keys [3]: [item_sk#101, store_name#102, store_zip#103] +Right keys [3]: [item_sk#145, store_name#146, store_zip#147] +Join condition: (cnt#149 <= cnt#113) + +(168) Project [codegen id : 42] +Output [21]: [product_name#100, store_name#102, store_zip#103, b_street_number#104, b_streen_name#105, b_city#106, b_zip#107, c_street_number#108, c_street_name#109, c_city#110, c_zip#111, syear#112, cnt#113, s1#114, s2#115, s3#116, s1#150, s2#151, s3#152, syear#148, cnt#149] +Input [25]: [product_name#100, item_sk#101, store_name#102, store_zip#103, b_street_number#104, b_streen_name#105, b_city#106, b_zip#107, c_street_number#108, c_street_name#109, c_city#110, c_zip#111, syear#112, cnt#113, s1#114, s2#115, s3#116, item_sk#145, store_name#146, store_zip#147, syear#148, cnt#149, s1#150, s2#151, s3#152] + +(169) Exchange +Input [21]: [product_name#100, store_name#102, store_zip#103, b_street_number#104, b_streen_name#105, b_city#106, b_zip#107, c_street_number#108, c_street_name#109, c_city#110, c_zip#111, syear#112, cnt#113, s1#114, s2#115, s3#116, s1#150, s2#151, s3#152, syear#148, cnt#149] +Arguments: rangepartitioning(product_name#100 ASC NULLS FIRST, store_name#102 ASC NULLS FIRST, cnt#149 ASC NULLS FIRST, 5), true, [id=#154] + +(170) Sort [codegen id : 43] +Input [21]: [product_name#100, store_name#102, store_zip#103, b_street_number#104, b_streen_name#105, b_city#106, b_zip#107, c_street_number#108, c_street_name#109, c_city#110, c_zip#111, syear#112, cnt#113, s1#114, s2#115, s3#116, s1#150, s2#151, s3#152, syear#148, cnt#149] +Arguments: [product_name#100 ASC NULLS FIRST, store_name#102 ASC NULLS FIRST, cnt#149 ASC NULLS FIRST], true, 0 + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64/simplified.txt new file mode 100644 index 0000000000000..78b316fcc2195 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64/simplified.txt @@ -0,0 +1,246 @@ +WholeStageCodegen (43) + Sort [cnt,product_name,store_name] + InputAdapter + Exchange [cnt,product_name,store_name] #1 + WholeStageCodegen (42) + Project [b_city,b_streen_name,b_street_number,b_zip,c_city,c_street_name,c_street_number,c_zip,cnt,cnt,product_name,s1,s1,s2,s2,s3,s3,store_name,store_zip,syear,syear] + BroadcastHashJoin [cnt,cnt,item_sk,item_sk,store_name,store_name,store_zip,store_zip] + HashAggregate [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,count,d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip,sum,sum,sum] [b_city,b_streen_name,b_street_number,b_zip,c_city,c_street_name,c_street_number,c_zip,cnt,count,count(1),item_sk,product_name,s1,s2,s3,store_name,store_zip,sum,sum,sum,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_list_price)),sum(UnscaledValue(ss_wholesale_cost)),syear] + InputAdapter + Exchange [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip] #2 + WholeStageCodegen (20) + HashAggregate [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip,ss_coupon_amt,ss_list_price,ss_wholesale_cost] [count,count,sum,sum,sum,sum,sum,sum] + Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip,ss_coupon_amt,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] + Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,hd_income_band_sk,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] + Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,hd_income_band_sk,hd_income_band_sk,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_addr_sk,ca_city,ca_street_name,ca_street_number,ca_zip,d_year,d_year,d_year,hd_income_band_sk,hd_income_band_sk,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [ca_address_sk,ss_addr_sk] + Project [c_current_addr_sk,d_year,d_year,d_year,hd_income_band_sk,hd_income_band_sk,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] + Project [c_current_addr_sk,c_current_hdemo_sk,d_year,d_year,d_year,hd_income_band_sk,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [c_current_addr_sk,c_current_hdemo_sk,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [p_promo_sk,ss_promo_sk] + Project [c_current_addr_sk,c_current_hdemo_sk,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk,cd_marital_status,cd_marital_status] + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,cd_marital_status,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + BroadcastHashJoin [cd_demo_sk,ss_cdemo_sk] + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + BroadcastHashJoin [c_first_shipto_date_sk,d_date_sk] + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_first_shipto_date_sk,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + BroadcastHashJoin [c_first_sales_date_sk,d_date_sk] + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_first_sales_date_sk,c_first_shipto_date_sk,d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Project [d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [d_year,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_store_sk,ss_wholesale_cost] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_wholesale_cost] + BroadcastHashJoin [cs_item_sk,ss_item_sk] + Project [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_wholesale_cost] + BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + Filter [ss_addr_sk,ss_cdemo_sk,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number,ss_wholesale_cost] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [sr_item_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (4) + Project [cs_item_sk] + Filter [sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2), true)),sum(cs_ext_list_price)] + HashAggregate [cs_item_sk,isEmpty,sum,sum] [isEmpty,sum,sum,sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2), true)),sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2), true)),sum(UnscaledValue(cs_ext_list_price)),sum(cs_ext_list_price)] + InputAdapter + Exchange [cs_item_sk] #5 + WholeStageCodegen (3) + HashAggregate [cr_refunded_cash,cr_reversed_charge,cr_store_credit,cs_ext_list_price,cs_item_sk] [isEmpty,isEmpty,sum,sum,sum,sum] + Project [cr_refunded_cash,cr_reversed_charge,cr_store_credit,cs_ext_list_price,cs_item_sk] + BroadcastHashJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] + Filter [cs_item_sk,cs_order_number] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_ext_list_price,cs_item_sk,cs_order_number] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (2) + Filter [cr_item_sk,cr_order_number] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (5) + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (6) + Filter [s_store_name,s_store_sk,s_zip] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_name,s_store_sk,s_zip] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (7) + Filter [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk,c_first_shipto_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk,c_first_shipto_date_sk] + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (8) + Filter [d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + ReusedExchange [d_date_sk,d_year] #10 + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (10) + Filter [cd_demo_sk,cd_marital_status] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status] + InputAdapter + ReusedExchange [cd_demo_sk,cd_marital_status] #11 + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (12) + Filter [p_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.promotion [p_promo_sk] + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (13) + Filter [hd_demo_sk,hd_income_band_sk] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_demo_sk,hd_income_band_sk] + InputAdapter + ReusedExchange [hd_demo_sk,hd_income_band_sk] #13 + InputAdapter + BroadcastExchange #14 + WholeStageCodegen (15) + Filter [ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_city,ca_street_name,ca_street_number,ca_zip] + InputAdapter + ReusedExchange [ca_address_sk,ca_city,ca_street_name,ca_street_number,ca_zip] #14 + InputAdapter + BroadcastExchange #15 + WholeStageCodegen (17) + Filter [ib_income_band_sk] + ColumnarToRow + InputAdapter + Scan parquet default.income_band [ib_income_band_sk] + InputAdapter + ReusedExchange [ib_income_band_sk] #15 + InputAdapter + BroadcastExchange #16 + WholeStageCodegen (19) + Project [i_item_sk,i_product_name] + Filter [i_color,i_current_price,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_color,i_current_price,i_item_sk,i_product_name] + InputAdapter + BroadcastExchange #17 + WholeStageCodegen (41) + HashAggregate [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,count,d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip,sum,sum,sum] [cnt,count,count(1),item_sk,s1,s2,s3,store_name,store_zip,sum,sum,sum,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_list_price)),sum(UnscaledValue(ss_wholesale_cost)),syear] + InputAdapter + Exchange [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip] #18 + WholeStageCodegen (40) + HashAggregate [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip,ss_coupon_amt,ss_list_price,ss_wholesale_cost] [count,count,sum,sum,sum,sum,sum,sum] + Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip,ss_coupon_amt,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] + Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,hd_income_band_sk,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] + Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,hd_income_band_sk,hd_income_band_sk,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_addr_sk,ca_city,ca_street_name,ca_street_number,ca_zip,d_year,d_year,d_year,hd_income_band_sk,hd_income_band_sk,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [ca_address_sk,ss_addr_sk] + Project [c_current_addr_sk,d_year,d_year,d_year,hd_income_band_sk,hd_income_band_sk,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] + Project [c_current_addr_sk,c_current_hdemo_sk,d_year,d_year,d_year,hd_income_band_sk,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [c_current_addr_sk,c_current_hdemo_sk,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [p_promo_sk,ss_promo_sk] + Project [c_current_addr_sk,c_current_hdemo_sk,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk,cd_marital_status,cd_marital_status] + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,cd_marital_status,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + BroadcastHashJoin [cd_demo_sk,ss_cdemo_sk] + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + BroadcastHashJoin [c_first_shipto_date_sk,d_date_sk] + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_first_shipto_date_sk,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + BroadcastHashJoin [c_first_sales_date_sk,d_date_sk] + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_first_sales_date_sk,c_first_shipto_date_sk,d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Project [d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [d_year,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_store_sk,ss_wholesale_cost] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_wholesale_cost] + BroadcastHashJoin [cs_item_sk,ss_item_sk] + Project [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_wholesale_cost] + BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + Filter [ss_addr_sk,ss_cdemo_sk,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number,ss_wholesale_cost] + InputAdapter + ReusedExchange [sr_item_sk,sr_ticket_number] #3 + InputAdapter + ReusedExchange [cs_item_sk] #4 + InputAdapter + BroadcastExchange #19 + WholeStageCodegen (25) + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + ReusedExchange [s_store_name,s_store_sk,s_zip] #8 + InputAdapter + ReusedExchange [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk,c_first_shipto_date_sk] #9 + InputAdapter + ReusedExchange [d_date_sk,d_year] #10 + InputAdapter + ReusedExchange [d_date_sk,d_year] #10 + InputAdapter + ReusedExchange [cd_demo_sk,cd_marital_status] #11 + InputAdapter + ReusedExchange [cd_demo_sk,cd_marital_status] #11 + InputAdapter + ReusedExchange [p_promo_sk] #12 + InputAdapter + ReusedExchange [hd_demo_sk,hd_income_band_sk] #13 + InputAdapter + ReusedExchange [hd_demo_sk,hd_income_band_sk] #13 + InputAdapter + ReusedExchange [ca_address_sk,ca_city,ca_street_name,ca_street_number,ca_zip] #14 + InputAdapter + ReusedExchange [ca_address_sk,ca_city,ca_street_name,ca_street_number,ca_zip] #14 + InputAdapter + ReusedExchange [ib_income_band_sk] #15 + InputAdapter + ReusedExchange [ib_income_band_sk] #15 + InputAdapter + ReusedExchange [i_item_sk,i_product_name] #16 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65.sf100/explain.txt new file mode 100644 index 0000000000000..badf00877da8e --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65.sf100/explain.txt @@ -0,0 +1,260 @@ +== Physical Plan == +TakeOrderedAndProject (45) ++- * Project (44) + +- * SortMergeJoin Inner (43) + :- * Sort (37) + : +- Exchange (36) + : +- * Project (35) + : +- * BroadcastHashJoin Inner BuildRight (34) + : :- * Project (29) + : : +- * BroadcastHashJoin Inner BuildRight (28) + : : :- * Filter (14) + : : : +- * HashAggregate (13) + : : : +- Exchange (12) + : : : +- * HashAggregate (11) + : : : +- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (27) + : : +- * HashAggregate (26) + : : +- Exchange (25) + : : +- * HashAggregate (24) + : : +- * HashAggregate (23) + : : +- Exchange (22) + : : +- * HashAggregate (21) + : : +- * Project (20) + : : +- * BroadcastHashJoin Inner BuildRight (19) + : : :- * Filter (17) + : : : +- * ColumnarToRow (16) + : : : +- Scan parquet default.store_sales (15) + : : +- ReusedExchange (18) + : +- BroadcastExchange (33) + : +- * Filter (32) + : +- * ColumnarToRow (31) + : +- Scan parquet default.store (30) + +- * Sort (42) + +- Exchange (41) + +- * Filter (40) + +- * ColumnarToRow (39) + +- Scan parquet default.item (38) + + +(1) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_sales_price#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 2] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_sales_price#4] + +(3) Filter [codegen id : 2] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_sales_price#4] +Condition : ((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_item_sk#2)) + +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#5, d_month_seq#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1176), LessThanOrEqual(d_month_seq,1187), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#5, d_month_seq#6] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#5, d_month_seq#6] +Condition : (((isnotnull(d_month_seq#6) AND (d_month_seq#6 >= 1176)) AND (d_month_seq#6 <= 1187)) AND isnotnull(d_date_sk#5)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#5] +Input [2]: [d_date_sk#5, d_month_seq#6] + +(8) BroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#7] + +(9) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(10) Project [codegen id : 2] +Output [3]: [ss_item_sk#2, ss_store_sk#3, ss_sales_price#4] +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_sales_price#4, d_date_sk#5] + +(11) HashAggregate [codegen id : 2] +Input [3]: [ss_item_sk#2, ss_store_sk#3, ss_sales_price#4] +Keys [2]: [ss_store_sk#3, ss_item_sk#2] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#4))] +Aggregate Attributes [1]: [sum#8] +Results [3]: [ss_store_sk#3, ss_item_sk#2, sum#9] + +(12) Exchange +Input [3]: [ss_store_sk#3, ss_item_sk#2, sum#9] +Arguments: hashpartitioning(ss_store_sk#3, ss_item_sk#2, 5), true, [id=#10] + +(13) HashAggregate [codegen id : 8] +Input [3]: [ss_store_sk#3, ss_item_sk#2, sum#9] +Keys [2]: [ss_store_sk#3, ss_item_sk#2] +Functions [1]: [sum(UnscaledValue(ss_sales_price#4))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#4))#11] +Results [3]: [ss_store_sk#3, ss_item_sk#2, MakeDecimal(sum(UnscaledValue(ss_sales_price#4))#11,17,2) AS revenue#12] + +(14) Filter [codegen id : 8] +Input [3]: [ss_store_sk#3, ss_item_sk#2, revenue#12] +Condition : isnotnull(revenue#12) + +(15) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#13, ss_item_sk#14, ss_store_sk#15, ss_sales_price#16] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(16) ColumnarToRow [codegen id : 4] +Input [4]: [ss_sold_date_sk#13, ss_item_sk#14, ss_store_sk#15, ss_sales_price#16] + +(17) Filter [codegen id : 4] +Input [4]: [ss_sold_date_sk#13, ss_item_sk#14, ss_store_sk#15, ss_sales_price#16] +Condition : (isnotnull(ss_sold_date_sk#13) AND isnotnull(ss_store_sk#15)) + +(18) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(19) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#13] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(20) Project [codegen id : 4] +Output [3]: [ss_item_sk#14, ss_store_sk#15, ss_sales_price#16] +Input [5]: [ss_sold_date_sk#13, ss_item_sk#14, ss_store_sk#15, ss_sales_price#16, d_date_sk#5] + +(21) HashAggregate [codegen id : 4] +Input [3]: [ss_item_sk#14, ss_store_sk#15, ss_sales_price#16] +Keys [2]: [ss_store_sk#15, ss_item_sk#14] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#16))] +Aggregate Attributes [1]: [sum#17] +Results [3]: [ss_store_sk#15, ss_item_sk#14, sum#18] + +(22) Exchange +Input [3]: [ss_store_sk#15, ss_item_sk#14, sum#18] +Arguments: hashpartitioning(ss_store_sk#15, ss_item_sk#14, 5), true, [id=#19] + +(23) HashAggregate [codegen id : 5] +Input [3]: [ss_store_sk#15, ss_item_sk#14, sum#18] +Keys [2]: [ss_store_sk#15, ss_item_sk#14] +Functions [1]: [sum(UnscaledValue(ss_sales_price#16))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#16))#20] +Results [2]: [ss_store_sk#15, MakeDecimal(sum(UnscaledValue(ss_sales_price#16))#20,17,2) AS revenue#21] + +(24) HashAggregate [codegen id : 5] +Input [2]: [ss_store_sk#15, revenue#21] +Keys [1]: [ss_store_sk#15] +Functions [1]: [partial_avg(revenue#21)] +Aggregate Attributes [2]: [sum#22, count#23] +Results [3]: [ss_store_sk#15, sum#24, count#25] + +(25) Exchange +Input [3]: [ss_store_sk#15, sum#24, count#25] +Arguments: hashpartitioning(ss_store_sk#15, 5), true, [id=#26] + +(26) HashAggregate [codegen id : 6] +Input [3]: [ss_store_sk#15, sum#24, count#25] +Keys [1]: [ss_store_sk#15] +Functions [1]: [avg(revenue#21)] +Aggregate Attributes [1]: [avg(revenue#21)#27] +Results [2]: [ss_store_sk#15, avg(revenue#21)#27 AS ave#28] + +(27) BroadcastExchange +Input [2]: [ss_store_sk#15, ave#28] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#29] + +(28) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [ss_store_sk#15] +Join condition: (cast(revenue#12 as decimal(23,7)) <= CheckOverflow((0.100000 * promote_precision(ave#28)), DecimalType(23,7), true)) + +(29) Project [codegen id : 8] +Output [3]: [ss_store_sk#3, ss_item_sk#2, revenue#12] +Input [5]: [ss_store_sk#3, ss_item_sk#2, revenue#12, ss_store_sk#15, ave#28] + +(30) Scan parquet default.store +Output [2]: [s_store_sk#30, s_store_name#31] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 7] +Input [2]: [s_store_sk#30, s_store_name#31] + +(32) Filter [codegen id : 7] +Input [2]: [s_store_sk#30, s_store_name#31] +Condition : isnotnull(s_store_sk#30) + +(33) BroadcastExchange +Input [2]: [s_store_sk#30, s_store_name#31] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#32] + +(34) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#30] +Join condition: None + +(35) Project [codegen id : 8] +Output [3]: [ss_item_sk#2, revenue#12, s_store_name#31] +Input [5]: [ss_store_sk#3, ss_item_sk#2, revenue#12, s_store_sk#30, s_store_name#31] + +(36) Exchange +Input [3]: [ss_item_sk#2, revenue#12, s_store_name#31] +Arguments: hashpartitioning(ss_item_sk#2, 5), true, [id=#33] + +(37) Sort [codegen id : 9] +Input [3]: [ss_item_sk#2, revenue#12, s_store_name#31] +Arguments: [ss_item_sk#2 ASC NULLS FIRST], false, 0 + +(38) Scan parquet default.item +Output [5]: [i_item_sk#34, i_item_desc#35, i_current_price#36, i_wholesale_cost#37, i_brand#38] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(39) ColumnarToRow [codegen id : 10] +Input [5]: [i_item_sk#34, i_item_desc#35, i_current_price#36, i_wholesale_cost#37, i_brand#38] + +(40) Filter [codegen id : 10] +Input [5]: [i_item_sk#34, i_item_desc#35, i_current_price#36, i_wholesale_cost#37, i_brand#38] +Condition : isnotnull(i_item_sk#34) + +(41) Exchange +Input [5]: [i_item_sk#34, i_item_desc#35, i_current_price#36, i_wholesale_cost#37, i_brand#38] +Arguments: hashpartitioning(i_item_sk#34, 5), true, [id=#39] + +(42) Sort [codegen id : 11] +Input [5]: [i_item_sk#34, i_item_desc#35, i_current_price#36, i_wholesale_cost#37, i_brand#38] +Arguments: [i_item_sk#34 ASC NULLS FIRST], false, 0 + +(43) SortMergeJoin [codegen id : 12] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#34] +Join condition: None + +(44) Project [codegen id : 12] +Output [6]: [s_store_name#31, i_item_desc#35, revenue#12, i_current_price#36, i_wholesale_cost#37, i_brand#38] +Input [8]: [ss_item_sk#2, revenue#12, s_store_name#31, i_item_sk#34, i_item_desc#35, i_current_price#36, i_wholesale_cost#37, i_brand#38] + +(45) TakeOrderedAndProject +Input [6]: [s_store_name#31, i_item_desc#35, revenue#12, i_current_price#36, i_wholesale_cost#37, i_brand#38] +Arguments: 100, [s_store_name#31 ASC NULLS FIRST, i_item_desc#35 ASC NULLS FIRST], [s_store_name#31, i_item_desc#35, revenue#12, i_current_price#36, i_wholesale_cost#37, i_brand#38] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65.sf100/simplified.txt new file mode 100644 index 0000000000000..0d9bf297d5ae0 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65.sf100/simplified.txt @@ -0,0 +1,72 @@ +TakeOrderedAndProject [i_brand,i_current_price,i_item_desc,i_wholesale_cost,revenue,s_store_name] + WholeStageCodegen (12) + Project [i_brand,i_current_price,i_item_desc,i_wholesale_cost,revenue,s_store_name] + SortMergeJoin [i_item_sk,ss_item_sk] + InputAdapter + WholeStageCodegen (9) + Sort [ss_item_sk] + InputAdapter + Exchange [ss_item_sk] #1 + WholeStageCodegen (8) + Project [revenue,s_store_name,ss_item_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [revenue,ss_item_sk,ss_store_sk] + BroadcastHashJoin [ave,revenue,ss_store_sk,ss_store_sk] + Filter [revenue] + HashAggregate [ss_item_sk,ss_store_sk,sum] [revenue,sum,sum(UnscaledValue(ss_sales_price))] + InputAdapter + Exchange [ss_item_sk,ss_store_sk] #2 + WholeStageCodegen (2) + HashAggregate [ss_item_sk,ss_sales_price,ss_store_sk] [sum,sum] + Project [ss_item_sk,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_month_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (6) + HashAggregate [count,ss_store_sk,sum] [ave,avg(revenue),count,sum] + InputAdapter + Exchange [ss_store_sk] #5 + WholeStageCodegen (5) + HashAggregate [revenue,ss_store_sk] [count,count,sum,sum] + HashAggregate [ss_item_sk,ss_store_sk,sum] [revenue,sum,sum(UnscaledValue(ss_sales_price))] + InputAdapter + Exchange [ss_item_sk,ss_store_sk] #6 + WholeStageCodegen (4) + HashAggregate [ss_item_sk,ss_sales_price,ss_store_sk] [sum,sum] + Project [ss_item_sk,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (7) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_name,s_store_sk] + InputAdapter + WholeStageCodegen (11) + Sort [i_item_sk] + InputAdapter + Exchange [i_item_sk] #8 + WholeStageCodegen (10) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand,i_current_price,i_item_desc,i_item_sk,i_wholesale_cost] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65/explain.txt new file mode 100644 index 0000000000000..6e6e6a00c4d78 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65/explain.txt @@ -0,0 +1,245 @@ +== Physical Plan == +TakeOrderedAndProject (42) ++- * Project (41) + +- * BroadcastHashJoin Inner BuildRight (40) + :- * Project (26) + : +- * BroadcastHashJoin Inner BuildRight (25) + : :- * Project (20) + : : +- * BroadcastHashJoin Inner BuildRight (19) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.store (1) + : : +- BroadcastExchange (18) + : : +- * Filter (17) + : : +- * HashAggregate (16) + : : +- Exchange (15) + : : +- * HashAggregate (14) + : : +- * Project (13) + : : +- * BroadcastHashJoin Inner BuildRight (12) + : : :- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.store_sales (4) + : : +- BroadcastExchange (11) + : : +- * Project (10) + : : +- * Filter (9) + : : +- * ColumnarToRow (8) + : : +- Scan parquet default.date_dim (7) + : +- BroadcastExchange (24) + : +- * Filter (23) + : +- * ColumnarToRow (22) + : +- Scan parquet default.item (21) + +- BroadcastExchange (39) + +- * HashAggregate (38) + +- Exchange (37) + +- * HashAggregate (36) + +- * HashAggregate (35) + +- Exchange (34) + +- * HashAggregate (33) + +- * Project (32) + +- * BroadcastHashJoin Inner BuildRight (31) + :- * Filter (29) + : +- * ColumnarToRow (28) + : +- Scan parquet default.store_sales (27) + +- ReusedExchange (30) + + +(1) Scan parquet default.store +Output [2]: [s_store_sk#1, s_store_name#2] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 9] +Input [2]: [s_store_sk#1, s_store_name#2] + +(3) Filter [codegen id : 9] +Input [2]: [s_store_sk#1, s_store_name#2] +Condition : isnotnull(s_store_sk#1) + +(4) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#3, ss_item_sk#4, ss_store_sk#5, ss_sales_price#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 2] +Input [4]: [ss_sold_date_sk#3, ss_item_sk#4, ss_store_sk#5, ss_sales_price#6] + +(6) Filter [codegen id : 2] +Input [4]: [ss_sold_date_sk#3, ss_item_sk#4, ss_store_sk#5, ss_sales_price#6] +Condition : ((isnotnull(ss_sold_date_sk#3) AND isnotnull(ss_store_sk#5)) AND isnotnull(ss_item_sk#4)) + +(7) Scan parquet default.date_dim +Output [2]: [d_date_sk#7, d_month_seq#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1176), LessThanOrEqual(d_month_seq,1187), IsNotNull(d_date_sk)] +ReadSchema: struct + +(8) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#7, d_month_seq#8] + +(9) Filter [codegen id : 1] +Input [2]: [d_date_sk#7, d_month_seq#8] +Condition : (((isnotnull(d_month_seq#8) AND (d_month_seq#8 >= 1176)) AND (d_month_seq#8 <= 1187)) AND isnotnull(d_date_sk#7)) + +(10) Project [codegen id : 1] +Output [1]: [d_date_sk#7] +Input [2]: [d_date_sk#7, d_month_seq#8] + +(11) BroadcastExchange +Input [1]: [d_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] + +(12) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#3] +Right keys [1]: [d_date_sk#7] +Join condition: None + +(13) Project [codegen id : 2] +Output [3]: [ss_item_sk#4, ss_store_sk#5, ss_sales_price#6] +Input [5]: [ss_sold_date_sk#3, ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, d_date_sk#7] + +(14) HashAggregate [codegen id : 2] +Input [3]: [ss_item_sk#4, ss_store_sk#5, ss_sales_price#6] +Keys [2]: [ss_store_sk#5, ss_item_sk#4] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#6))] +Aggregate Attributes [1]: [sum#10] +Results [3]: [ss_store_sk#5, ss_item_sk#4, sum#11] + +(15) Exchange +Input [3]: [ss_store_sk#5, ss_item_sk#4, sum#11] +Arguments: hashpartitioning(ss_store_sk#5, ss_item_sk#4, 5), true, [id=#12] + +(16) HashAggregate [codegen id : 3] +Input [3]: [ss_store_sk#5, ss_item_sk#4, sum#11] +Keys [2]: [ss_store_sk#5, ss_item_sk#4] +Functions [1]: [sum(UnscaledValue(ss_sales_price#6))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#6))#13] +Results [3]: [ss_store_sk#5, ss_item_sk#4, MakeDecimal(sum(UnscaledValue(ss_sales_price#6))#13,17,2) AS revenue#14] + +(17) Filter [codegen id : 3] +Input [3]: [ss_store_sk#5, ss_item_sk#4, revenue#14] +Condition : isnotnull(revenue#14) + +(18) BroadcastExchange +Input [3]: [ss_store_sk#5, ss_item_sk#4, revenue#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] + +(19) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [s_store_sk#1] +Right keys [1]: [ss_store_sk#5] +Join condition: None + +(20) Project [codegen id : 9] +Output [4]: [s_store_name#2, ss_store_sk#5, ss_item_sk#4, revenue#14] +Input [5]: [s_store_sk#1, s_store_name#2, ss_store_sk#5, ss_item_sk#4, revenue#14] + +(21) Scan parquet default.item +Output [5]: [i_item_sk#16, i_item_desc#17, i_current_price#18, i_wholesale_cost#19, i_brand#20] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(22) ColumnarToRow [codegen id : 4] +Input [5]: [i_item_sk#16, i_item_desc#17, i_current_price#18, i_wholesale_cost#19, i_brand#20] + +(23) Filter [codegen id : 4] +Input [5]: [i_item_sk#16, i_item_desc#17, i_current_price#18, i_wholesale_cost#19, i_brand#20] +Condition : isnotnull(i_item_sk#16) + +(24) BroadcastExchange +Input [5]: [i_item_sk#16, i_item_desc#17, i_current_price#18, i_wholesale_cost#19, i_brand#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#21] + +(25) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_item_sk#4] +Right keys [1]: [i_item_sk#16] +Join condition: None + +(26) Project [codegen id : 9] +Output [7]: [s_store_name#2, ss_store_sk#5, revenue#14, i_item_desc#17, i_current_price#18, i_wholesale_cost#19, i_brand#20] +Input [9]: [s_store_name#2, ss_store_sk#5, ss_item_sk#4, revenue#14, i_item_sk#16, i_item_desc#17, i_current_price#18, i_wholesale_cost#19, i_brand#20] + +(27) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#22, ss_item_sk#23, ss_store_sk#24, ss_sales_price#25] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(28) ColumnarToRow [codegen id : 6] +Input [4]: [ss_sold_date_sk#22, ss_item_sk#23, ss_store_sk#24, ss_sales_price#25] + +(29) Filter [codegen id : 6] +Input [4]: [ss_sold_date_sk#22, ss_item_sk#23, ss_store_sk#24, ss_sales_price#25] +Condition : (isnotnull(ss_sold_date_sk#22) AND isnotnull(ss_store_sk#24)) + +(30) ReusedExchange [Reuses operator id: 11] +Output [1]: [d_date_sk#7] + +(31) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_sold_date_sk#22] +Right keys [1]: [d_date_sk#7] +Join condition: None + +(32) Project [codegen id : 6] +Output [3]: [ss_item_sk#23, ss_store_sk#24, ss_sales_price#25] +Input [5]: [ss_sold_date_sk#22, ss_item_sk#23, ss_store_sk#24, ss_sales_price#25, d_date_sk#7] + +(33) HashAggregate [codegen id : 6] +Input [3]: [ss_item_sk#23, ss_store_sk#24, ss_sales_price#25] +Keys [2]: [ss_store_sk#24, ss_item_sk#23] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#25))] +Aggregate Attributes [1]: [sum#26] +Results [3]: [ss_store_sk#24, ss_item_sk#23, sum#27] + +(34) Exchange +Input [3]: [ss_store_sk#24, ss_item_sk#23, sum#27] +Arguments: hashpartitioning(ss_store_sk#24, ss_item_sk#23, 5), true, [id=#28] + +(35) HashAggregate [codegen id : 7] +Input [3]: [ss_store_sk#24, ss_item_sk#23, sum#27] +Keys [2]: [ss_store_sk#24, ss_item_sk#23] +Functions [1]: [sum(UnscaledValue(ss_sales_price#25))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#25))#29] +Results [2]: [ss_store_sk#24, MakeDecimal(sum(UnscaledValue(ss_sales_price#25))#29,17,2) AS revenue#30] + +(36) HashAggregate [codegen id : 7] +Input [2]: [ss_store_sk#24, revenue#30] +Keys [1]: [ss_store_sk#24] +Functions [1]: [partial_avg(revenue#30)] +Aggregate Attributes [2]: [sum#31, count#32] +Results [3]: [ss_store_sk#24, sum#33, count#34] + +(37) Exchange +Input [3]: [ss_store_sk#24, sum#33, count#34] +Arguments: hashpartitioning(ss_store_sk#24, 5), true, [id=#35] + +(38) HashAggregate [codegen id : 8] +Input [3]: [ss_store_sk#24, sum#33, count#34] +Keys [1]: [ss_store_sk#24] +Functions [1]: [avg(revenue#30)] +Aggregate Attributes [1]: [avg(revenue#30)#36] +Results [2]: [ss_store_sk#24, avg(revenue#30)#36 AS ave#37] + +(39) BroadcastExchange +Input [2]: [ss_store_sk#24, ave#37] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#38] + +(40) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_store_sk#5] +Right keys [1]: [ss_store_sk#24] +Join condition: (cast(revenue#14 as decimal(23,7)) <= CheckOverflow((0.100000 * promote_precision(ave#37)), DecimalType(23,7), true)) + +(41) Project [codegen id : 9] +Output [6]: [s_store_name#2, i_item_desc#17, revenue#14, i_current_price#18, i_wholesale_cost#19, i_brand#20] +Input [9]: [s_store_name#2, ss_store_sk#5, revenue#14, i_item_desc#17, i_current_price#18, i_wholesale_cost#19, i_brand#20, ss_store_sk#24, ave#37] + +(42) TakeOrderedAndProject +Input [6]: [s_store_name#2, i_item_desc#17, revenue#14, i_current_price#18, i_wholesale_cost#19, i_brand#20] +Arguments: 100, [s_store_name#2 ASC NULLS FIRST, i_item_desc#17 ASC NULLS FIRST], [s_store_name#2, i_item_desc#17, revenue#14, i_current_price#18, i_wholesale_cost#19, i_brand#20] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65/simplified.txt new file mode 100644 index 0000000000000..77f6cc15c8210 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65/simplified.txt @@ -0,0 +1,63 @@ +TakeOrderedAndProject [i_brand,i_current_price,i_item_desc,i_wholesale_cost,revenue,s_store_name] + WholeStageCodegen (9) + Project [i_brand,i_current_price,i_item_desc,i_wholesale_cost,revenue,s_store_name] + BroadcastHashJoin [ave,revenue,ss_store_sk,ss_store_sk] + Project [i_brand,i_current_price,i_item_desc,i_wholesale_cost,revenue,s_store_name,ss_store_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [revenue,s_store_name,ss_item_sk,ss_store_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_name,s_store_sk] + InputAdapter + BroadcastExchange #1 + WholeStageCodegen (3) + Filter [revenue] + HashAggregate [ss_item_sk,ss_store_sk,sum] [revenue,sum,sum(UnscaledValue(ss_sales_price))] + InputAdapter + Exchange [ss_item_sk,ss_store_sk] #2 + WholeStageCodegen (2) + HashAggregate [ss_item_sk,ss_sales_price,ss_store_sk] [sum,sum] + Project [ss_item_sk,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_month_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (4) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand,i_current_price,i_item_desc,i_item_sk,i_wholesale_cost] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (8) + HashAggregate [count,ss_store_sk,sum] [ave,avg(revenue),count,sum] + InputAdapter + Exchange [ss_store_sk] #6 + WholeStageCodegen (7) + HashAggregate [revenue,ss_store_sk] [count,count,sum,sum] + HashAggregate [ss_item_sk,ss_store_sk,sum] [revenue,sum,sum(UnscaledValue(ss_sales_price))] + InputAdapter + Exchange [ss_item_sk,ss_store_sk] #7 + WholeStageCodegen (6) + HashAggregate [ss_item_sk,ss_sales_price,ss_store_sk] [sum,sum] + Project [ss_item_sk,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.sf100/explain.txt new file mode 100644 index 0000000000000..6eead0c581c17 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.sf100/explain.txt @@ -0,0 +1,310 @@ +== Physical Plan == +TakeOrderedAndProject (55) ++- * HashAggregate (54) + +- Exchange (53) + +- * HashAggregate (52) + +- Union (51) + :- * HashAggregate (32) + : +- Exchange (31) + : +- * HashAggregate (30) + : +- * Project (29) + : +- * BroadcastHashJoin Inner BuildRight (28) + : :- * Project (23) + : : +- * BroadcastHashJoin Inner BuildRight (22) + : : :- * Project (17) + : : : +- * BroadcastHashJoin Inner BuildRight (16) + : : : :- * Project (10) + : : : : +- * BroadcastHashJoin Inner BuildLeft (9) + : : : : :- BroadcastExchange (5) + : : : : : +- * Project (4) + : : : : : +- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.ship_mode (1) + : : : : +- * Filter (8) + : : : : +- * ColumnarToRow (7) + : : : : +- Scan parquet default.web_sales (6) + : : : +- BroadcastExchange (15) + : : : +- * Project (14) + : : : +- * Filter (13) + : : : +- * ColumnarToRow (12) + : : : +- Scan parquet default.time_dim (11) + : : +- BroadcastExchange (21) + : : +- * Filter (20) + : : +- * ColumnarToRow (19) + : : +- Scan parquet default.date_dim (18) + : +- BroadcastExchange (27) + : +- * Filter (26) + : +- * ColumnarToRow (25) + : +- Scan parquet default.warehouse (24) + +- * HashAggregate (50) + +- Exchange (49) + +- * HashAggregate (48) + +- * Project (47) + +- * BroadcastHashJoin Inner BuildRight (46) + :- * Project (44) + : +- * BroadcastHashJoin Inner BuildRight (43) + : :- * Project (41) + : : +- * BroadcastHashJoin Inner BuildRight (40) + : : :- * Project (38) + : : : +- * BroadcastHashJoin Inner BuildLeft (37) + : : : :- ReusedExchange (33) + : : : +- * Filter (36) + : : : +- * ColumnarToRow (35) + : : : +- Scan parquet default.catalog_sales (34) + : : +- ReusedExchange (39) + : +- ReusedExchange (42) + +- ReusedExchange (45) + + +(1) Scan parquet default.ship_mode +Output [2]: [sm_ship_mode_sk#1, sm_carrier#2] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/ship_mode] +PushedFilters: [In(sm_carrier, [DHL,BARIAN]), IsNotNull(sm_ship_mode_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [2]: [sm_ship_mode_sk#1, sm_carrier#2] + +(3) Filter [codegen id : 1] +Input [2]: [sm_ship_mode_sk#1, sm_carrier#2] +Condition : (sm_carrier#2 IN (DHL,BARIAN) AND isnotnull(sm_ship_mode_sk#1)) + +(4) Project [codegen id : 1] +Output [1]: [sm_ship_mode_sk#1] +Input [2]: [sm_ship_mode_sk#1, sm_carrier#2] + +(5) BroadcastExchange +Input [1]: [sm_ship_mode_sk#1] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#3] + +(6) Scan parquet default.web_sales +Output [7]: [ws_sold_date_sk#4, ws_sold_time_sk#5, ws_ship_mode_sk#6, ws_warehouse_sk#7, ws_quantity#8, ws_ext_sales_price#9, ws_net_paid#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_warehouse_sk), IsNotNull(ws_sold_date_sk), IsNotNull(ws_sold_time_sk), IsNotNull(ws_ship_mode_sk)] +ReadSchema: struct + +(7) ColumnarToRow +Input [7]: [ws_sold_date_sk#4, ws_sold_time_sk#5, ws_ship_mode_sk#6, ws_warehouse_sk#7, ws_quantity#8, ws_ext_sales_price#9, ws_net_paid#10] + +(8) Filter +Input [7]: [ws_sold_date_sk#4, ws_sold_time_sk#5, ws_ship_mode_sk#6, ws_warehouse_sk#7, ws_quantity#8, ws_ext_sales_price#9, ws_net_paid#10] +Condition : (((isnotnull(ws_warehouse_sk#7) AND isnotnull(ws_sold_date_sk#4)) AND isnotnull(ws_sold_time_sk#5)) AND isnotnull(ws_ship_mode_sk#6)) + +(9) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [sm_ship_mode_sk#1] +Right keys [1]: [ws_ship_mode_sk#6] +Join condition: None + +(10) Project [codegen id : 5] +Output [6]: [ws_sold_date_sk#4, ws_sold_time_sk#5, ws_warehouse_sk#7, ws_quantity#8, ws_ext_sales_price#9, ws_net_paid#10] +Input [8]: [sm_ship_mode_sk#1, ws_sold_date_sk#4, ws_sold_time_sk#5, ws_ship_mode_sk#6, ws_warehouse_sk#7, ws_quantity#8, ws_ext_sales_price#9, ws_net_paid#10] + +(11) Scan parquet default.time_dim +Output [2]: [t_time_sk#11, t_time#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/time_dim] +PushedFilters: [IsNotNull(t_time), GreaterThanOrEqual(t_time,30838), LessThanOrEqual(t_time,59638), IsNotNull(t_time_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [t_time_sk#11, t_time#12] + +(13) Filter [codegen id : 2] +Input [2]: [t_time_sk#11, t_time#12] +Condition : (((isnotnull(t_time#12) AND (t_time#12 >= 30838)) AND (t_time#12 <= 59638)) AND isnotnull(t_time_sk#11)) + +(14) Project [codegen id : 2] +Output [1]: [t_time_sk#11] +Input [2]: [t_time_sk#11, t_time#12] + +(15) BroadcastExchange +Input [1]: [t_time_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] + +(16) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ws_sold_time_sk#5] +Right keys [1]: [t_time_sk#11] +Join condition: None + +(17) Project [codegen id : 5] +Output [5]: [ws_sold_date_sk#4, ws_warehouse_sk#7, ws_quantity#8, ws_ext_sales_price#9, ws_net_paid#10] +Input [7]: [ws_sold_date_sk#4, ws_sold_time_sk#5, ws_warehouse_sk#7, ws_quantity#8, ws_ext_sales_price#9, ws_net_paid#10, t_time_sk#11] + +(18) Scan parquet default.date_dim +Output [3]: [d_date_sk#14, d_year#15, d_moy#16] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#14, d_year#15, d_moy#16] + +(20) Filter [codegen id : 3] +Input [3]: [d_date_sk#14, d_year#15, d_moy#16] +Condition : ((isnotnull(d_year#15) AND (d_year#15 = 2001)) AND isnotnull(d_date_sk#14)) + +(21) BroadcastExchange +Input [3]: [d_date_sk#14, d_year#15, d_moy#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#17] + +(22) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ws_sold_date_sk#4] +Right keys [1]: [d_date_sk#14] +Join condition: None + +(23) Project [codegen id : 5] +Output [6]: [ws_warehouse_sk#7, ws_quantity#8, ws_ext_sales_price#9, ws_net_paid#10, d_year#15, d_moy#16] +Input [8]: [ws_sold_date_sk#4, ws_warehouse_sk#7, ws_quantity#8, ws_ext_sales_price#9, ws_net_paid#10, d_date_sk#14, d_year#15, d_moy#16] + +(24) Scan parquet default.warehouse +Output [7]: [w_warehouse_sk#18, w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(25) ColumnarToRow [codegen id : 4] +Input [7]: [w_warehouse_sk#18, w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24] + +(26) Filter [codegen id : 4] +Input [7]: [w_warehouse_sk#18, w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24] +Condition : isnotnull(w_warehouse_sk#18) + +(27) BroadcastExchange +Input [7]: [w_warehouse_sk#18, w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#25] + +(28) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ws_warehouse_sk#7] +Right keys [1]: [w_warehouse_sk#18] +Join condition: None + +(29) Project [codegen id : 5] +Output [11]: [ws_quantity#8, ws_ext_sales_price#9, ws_net_paid#10, w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, d_year#15, d_moy#16] +Input [13]: [ws_warehouse_sk#7, ws_quantity#8, ws_ext_sales_price#9, ws_net_paid#10, d_year#15, d_moy#16, w_warehouse_sk#18, w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24] + +(30) HashAggregate [codegen id : 5] +Input [11]: [ws_quantity#8, ws_ext_sales_price#9, ws_net_paid#10, w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, d_year#15, d_moy#16] +Keys [7]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, d_year#15] +Functions [24]: [partial_sum(CASE WHEN (d_moy#16 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)] +Aggregate Attributes [48]: [sum#26, isEmpty#27, sum#28, isEmpty#29, sum#30, isEmpty#31, sum#32, isEmpty#33, sum#34, isEmpty#35, sum#36, isEmpty#37, sum#38, isEmpty#39, sum#40, isEmpty#41, sum#42, isEmpty#43, sum#44, isEmpty#45, sum#46, isEmpty#47, sum#48, isEmpty#49, sum#50, isEmpty#51, sum#52, isEmpty#53, sum#54, isEmpty#55, sum#56, isEmpty#57, sum#58, isEmpty#59, sum#60, isEmpty#61, sum#62, isEmpty#63, sum#64, isEmpty#65, sum#66, isEmpty#67, sum#68, isEmpty#69, sum#70, isEmpty#71, sum#72, isEmpty#73] +Results [55]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, d_year#15, sum#74, isEmpty#75, sum#76, isEmpty#77, sum#78, isEmpty#79, sum#80, isEmpty#81, sum#82, isEmpty#83, sum#84, isEmpty#85, sum#86, isEmpty#87, sum#88, isEmpty#89, sum#90, isEmpty#91, sum#92, isEmpty#93, sum#94, isEmpty#95, sum#96, isEmpty#97, sum#98, isEmpty#99, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105, sum#106, isEmpty#107, sum#108, isEmpty#109, sum#110, isEmpty#111, sum#112, isEmpty#113, sum#114, isEmpty#115, sum#116, isEmpty#117, sum#118, isEmpty#119, sum#120, isEmpty#121] + +(31) Exchange +Input [55]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, d_year#15, sum#74, isEmpty#75, sum#76, isEmpty#77, sum#78, isEmpty#79, sum#80, isEmpty#81, sum#82, isEmpty#83, sum#84, isEmpty#85, sum#86, isEmpty#87, sum#88, isEmpty#89, sum#90, isEmpty#91, sum#92, isEmpty#93, sum#94, isEmpty#95, sum#96, isEmpty#97, sum#98, isEmpty#99, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105, sum#106, isEmpty#107, sum#108, isEmpty#109, sum#110, isEmpty#111, sum#112, isEmpty#113, sum#114, isEmpty#115, sum#116, isEmpty#117, sum#118, isEmpty#119, sum#120, isEmpty#121] +Arguments: hashpartitioning(w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, d_year#15, 5), true, [id=#122] + +(32) HashAggregate [codegen id : 6] +Input [55]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, d_year#15, sum#74, isEmpty#75, sum#76, isEmpty#77, sum#78, isEmpty#79, sum#80, isEmpty#81, sum#82, isEmpty#83, sum#84, isEmpty#85, sum#86, isEmpty#87, sum#88, isEmpty#89, sum#90, isEmpty#91, sum#92, isEmpty#93, sum#94, isEmpty#95, sum#96, isEmpty#97, sum#98, isEmpty#99, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105, sum#106, isEmpty#107, sum#108, isEmpty#109, sum#110, isEmpty#111, sum#112, isEmpty#113, sum#114, isEmpty#115, sum#116, isEmpty#117, sum#118, isEmpty#119, sum#120, isEmpty#121] +Keys [7]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, d_year#15] +Functions [24]: [sum(CASE WHEN (d_moy#16 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)] +Aggregate Attributes [24]: [sum(CASE WHEN (d_moy#16 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#123, sum(CASE WHEN (d_moy#16 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#124, sum(CASE WHEN (d_moy#16 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#125, sum(CASE WHEN (d_moy#16 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#126, sum(CASE WHEN (d_moy#16 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#127, sum(CASE WHEN (d_moy#16 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#128, sum(CASE WHEN (d_moy#16 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#129, sum(CASE WHEN (d_moy#16 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#130, sum(CASE WHEN (d_moy#16 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#131, sum(CASE WHEN (d_moy#16 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#132, sum(CASE WHEN (d_moy#16 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#133, sum(CASE WHEN (d_moy#16 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#134, sum(CASE WHEN (d_moy#16 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#135, sum(CASE WHEN (d_moy#16 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#136, sum(CASE WHEN (d_moy#16 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#137, sum(CASE WHEN (d_moy#16 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#138, sum(CASE WHEN (d_moy#16 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#139, sum(CASE WHEN (d_moy#16 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#140, sum(CASE WHEN (d_moy#16 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#141, sum(CASE WHEN (d_moy#16 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#142, sum(CASE WHEN (d_moy#16 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#143, sum(CASE WHEN (d_moy#16 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#144, sum(CASE WHEN (d_moy#16 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#145, sum(CASE WHEN (d_moy#16 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#146] +Results [32]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, DHL,BARIAN AS ship_carriers#147, d_year#15 AS year#148, sum(CASE WHEN (d_moy#16 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#123 AS jan_sales#149, sum(CASE WHEN (d_moy#16 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#124 AS feb_sales#150, sum(CASE WHEN (d_moy#16 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#125 AS mar_sales#151, sum(CASE WHEN (d_moy#16 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#126 AS apr_sales#152, sum(CASE WHEN (d_moy#16 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#127 AS may_sales#153, sum(CASE WHEN (d_moy#16 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#128 AS jun_sales#154, sum(CASE WHEN (d_moy#16 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#129 AS jul_sales#155, sum(CASE WHEN (d_moy#16 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#130 AS aug_sales#156, sum(CASE WHEN (d_moy#16 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#131 AS sep_sales#157, sum(CASE WHEN (d_moy#16 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#132 AS oct_sales#158, sum(CASE WHEN (d_moy#16 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#133 AS nov_sales#159, sum(CASE WHEN (d_moy#16 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#134 AS dec_sales#160, sum(CASE WHEN (d_moy#16 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#135 AS jan_net#161, sum(CASE WHEN (d_moy#16 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#136 AS feb_net#162, sum(CASE WHEN (d_moy#16 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#137 AS mar_net#163, sum(CASE WHEN (d_moy#16 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#138 AS apr_net#164, sum(CASE WHEN (d_moy#16 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#139 AS may_net#165, sum(CASE WHEN (d_moy#16 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#140 AS jun_net#166, sum(CASE WHEN (d_moy#16 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#141 AS jul_net#167, sum(CASE WHEN (d_moy#16 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#142 AS aug_net#168, sum(CASE WHEN (d_moy#16 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#143 AS sep_net#169, sum(CASE WHEN (d_moy#16 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#144 AS oct_net#170, sum(CASE WHEN (d_moy#16 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#145 AS nov_net#171, sum(CASE WHEN (d_moy#16 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#146 AS dec_net#172] + +(33) ReusedExchange [Reuses operator id: 5] +Output [1]: [sm_ship_mode_sk#1] + +(34) Scan parquet default.catalog_sales +Output [7]: [cs_sold_date_sk#173, cs_sold_time_sk#174, cs_ship_mode_sk#175, cs_warehouse_sk#176, cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_warehouse_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_sold_time_sk), IsNotNull(cs_ship_mode_sk)] +ReadSchema: struct + +(35) ColumnarToRow +Input [7]: [cs_sold_date_sk#173, cs_sold_time_sk#174, cs_ship_mode_sk#175, cs_warehouse_sk#176, cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179] + +(36) Filter +Input [7]: [cs_sold_date_sk#173, cs_sold_time_sk#174, cs_ship_mode_sk#175, cs_warehouse_sk#176, cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179] +Condition : (((isnotnull(cs_warehouse_sk#176) AND isnotnull(cs_sold_date_sk#173)) AND isnotnull(cs_sold_time_sk#174)) AND isnotnull(cs_ship_mode_sk#175)) + +(37) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [sm_ship_mode_sk#1] +Right keys [1]: [cs_ship_mode_sk#175] +Join condition: None + +(38) Project [codegen id : 11] +Output [6]: [cs_sold_date_sk#173, cs_sold_time_sk#174, cs_warehouse_sk#176, cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179] +Input [8]: [sm_ship_mode_sk#1, cs_sold_date_sk#173, cs_sold_time_sk#174, cs_ship_mode_sk#175, cs_warehouse_sk#176, cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179] + +(39) ReusedExchange [Reuses operator id: 15] +Output [1]: [t_time_sk#11] + +(40) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_sold_time_sk#174] +Right keys [1]: [t_time_sk#11] +Join condition: None + +(41) Project [codegen id : 11] +Output [5]: [cs_sold_date_sk#173, cs_warehouse_sk#176, cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179] +Input [7]: [cs_sold_date_sk#173, cs_sold_time_sk#174, cs_warehouse_sk#176, cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179, t_time_sk#11] + +(42) ReusedExchange [Reuses operator id: 21] +Output [3]: [d_date_sk#14, d_year#15, d_moy#16] + +(43) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_sold_date_sk#173] +Right keys [1]: [d_date_sk#14] +Join condition: None + +(44) Project [codegen id : 11] +Output [6]: [cs_warehouse_sk#176, cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179, d_year#15, d_moy#16] +Input [8]: [cs_sold_date_sk#173, cs_warehouse_sk#176, cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179, d_date_sk#14, d_year#15, d_moy#16] + +(45) ReusedExchange [Reuses operator id: 27] +Output [7]: [w_warehouse_sk#18, w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24] + +(46) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_warehouse_sk#176] +Right keys [1]: [w_warehouse_sk#18] +Join condition: None + +(47) Project [codegen id : 11] +Output [11]: [cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179, w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, d_year#15, d_moy#16] +Input [13]: [cs_warehouse_sk#176, cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179, d_year#15, d_moy#16, w_warehouse_sk#18, w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24] + +(48) HashAggregate [codegen id : 11] +Input [11]: [cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179, w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, d_year#15, d_moy#16] +Keys [7]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, d_year#15] +Functions [24]: [partial_sum(CASE WHEN (d_moy#16 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)] +Aggregate Attributes [48]: [sum#180, isEmpty#181, sum#182, isEmpty#183, sum#184, isEmpty#185, sum#186, isEmpty#187, sum#188, isEmpty#189, sum#190, isEmpty#191, sum#192, isEmpty#193, sum#194, isEmpty#195, sum#196, isEmpty#197, sum#198, isEmpty#199, sum#200, isEmpty#201, sum#202, isEmpty#203, sum#204, isEmpty#205, sum#206, isEmpty#207, sum#208, isEmpty#209, sum#210, isEmpty#211, sum#212, isEmpty#213, sum#214, isEmpty#215, sum#216, isEmpty#217, sum#218, isEmpty#219, sum#220, isEmpty#221, sum#222, isEmpty#223, sum#224, isEmpty#225, sum#226, isEmpty#227] +Results [55]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, d_year#15, sum#228, isEmpty#229, sum#230, isEmpty#231, sum#232, isEmpty#233, sum#234, isEmpty#235, sum#236, isEmpty#237, sum#238, isEmpty#239, sum#240, isEmpty#241, sum#242, isEmpty#243, sum#244, isEmpty#245, sum#246, isEmpty#247, sum#248, isEmpty#249, sum#250, isEmpty#251, sum#252, isEmpty#253, sum#254, isEmpty#255, sum#256, isEmpty#257, sum#258, isEmpty#259, sum#260, isEmpty#261, sum#262, isEmpty#263, sum#264, isEmpty#265, sum#266, isEmpty#267, sum#268, isEmpty#269, sum#270, isEmpty#271, sum#272, isEmpty#273, sum#274, isEmpty#275] + +(49) Exchange +Input [55]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, d_year#15, sum#228, isEmpty#229, sum#230, isEmpty#231, sum#232, isEmpty#233, sum#234, isEmpty#235, sum#236, isEmpty#237, sum#238, isEmpty#239, sum#240, isEmpty#241, sum#242, isEmpty#243, sum#244, isEmpty#245, sum#246, isEmpty#247, sum#248, isEmpty#249, sum#250, isEmpty#251, sum#252, isEmpty#253, sum#254, isEmpty#255, sum#256, isEmpty#257, sum#258, isEmpty#259, sum#260, isEmpty#261, sum#262, isEmpty#263, sum#264, isEmpty#265, sum#266, isEmpty#267, sum#268, isEmpty#269, sum#270, isEmpty#271, sum#272, isEmpty#273, sum#274, isEmpty#275] +Arguments: hashpartitioning(w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, d_year#15, 5), true, [id=#276] + +(50) HashAggregate [codegen id : 12] +Input [55]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, d_year#15, sum#228, isEmpty#229, sum#230, isEmpty#231, sum#232, isEmpty#233, sum#234, isEmpty#235, sum#236, isEmpty#237, sum#238, isEmpty#239, sum#240, isEmpty#241, sum#242, isEmpty#243, sum#244, isEmpty#245, sum#246, isEmpty#247, sum#248, isEmpty#249, sum#250, isEmpty#251, sum#252, isEmpty#253, sum#254, isEmpty#255, sum#256, isEmpty#257, sum#258, isEmpty#259, sum#260, isEmpty#261, sum#262, isEmpty#263, sum#264, isEmpty#265, sum#266, isEmpty#267, sum#268, isEmpty#269, sum#270, isEmpty#271, sum#272, isEmpty#273, sum#274, isEmpty#275] +Keys [7]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, d_year#15] +Functions [24]: [sum(CASE WHEN (d_moy#16 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)] +Aggregate Attributes [24]: [sum(CASE WHEN (d_moy#16 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#277, sum(CASE WHEN (d_moy#16 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#278, sum(CASE WHEN (d_moy#16 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#279, sum(CASE WHEN (d_moy#16 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#280, sum(CASE WHEN (d_moy#16 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#281, sum(CASE WHEN (d_moy#16 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#282, sum(CASE WHEN (d_moy#16 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#283, sum(CASE WHEN (d_moy#16 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#284, sum(CASE WHEN (d_moy#16 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#285, sum(CASE WHEN (d_moy#16 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#286, sum(CASE WHEN (d_moy#16 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#287, sum(CASE WHEN (d_moy#16 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#288, sum(CASE WHEN (d_moy#16 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#289, sum(CASE WHEN (d_moy#16 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#290, sum(CASE WHEN (d_moy#16 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#291, sum(CASE WHEN (d_moy#16 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#292, sum(CASE WHEN (d_moy#16 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#293, sum(CASE WHEN (d_moy#16 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#294, sum(CASE WHEN (d_moy#16 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#295, sum(CASE WHEN (d_moy#16 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#296, sum(CASE WHEN (d_moy#16 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#297, sum(CASE WHEN (d_moy#16 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#298, sum(CASE WHEN (d_moy#16 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#299, sum(CASE WHEN (d_moy#16 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#300] +Results [32]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, DHL,BARIAN AS ship_carriers#301, d_year#15 AS year#302, sum(CASE WHEN (d_moy#16 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#277 AS jan_sales#303, sum(CASE WHEN (d_moy#16 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#278 AS feb_sales#304, sum(CASE WHEN (d_moy#16 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#279 AS mar_sales#305, sum(CASE WHEN (d_moy#16 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#280 AS apr_sales#306, sum(CASE WHEN (d_moy#16 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#281 AS may_sales#307, sum(CASE WHEN (d_moy#16 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#282 AS jun_sales#308, sum(CASE WHEN (d_moy#16 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#283 AS jul_sales#309, sum(CASE WHEN (d_moy#16 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#284 AS aug_sales#310, sum(CASE WHEN (d_moy#16 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#285 AS sep_sales#311, sum(CASE WHEN (d_moy#16 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#286 AS oct_sales#312, sum(CASE WHEN (d_moy#16 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#287 AS nov_sales#313, sum(CASE WHEN (d_moy#16 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#288 AS dec_sales#314, sum(CASE WHEN (d_moy#16 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#289 AS jan_net#315, sum(CASE WHEN (d_moy#16 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#290 AS feb_net#316, sum(CASE WHEN (d_moy#16 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#291 AS mar_net#317, sum(CASE WHEN (d_moy#16 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#292 AS apr_net#318, sum(CASE WHEN (d_moy#16 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#293 AS may_net#319, sum(CASE WHEN (d_moy#16 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#294 AS jun_net#320, sum(CASE WHEN (d_moy#16 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#295 AS jul_net#321, sum(CASE WHEN (d_moy#16 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#296 AS aug_net#322, sum(CASE WHEN (d_moy#16 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#297 AS sep_net#323, sum(CASE WHEN (d_moy#16 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#298 AS oct_net#324, sum(CASE WHEN (d_moy#16 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#299 AS nov_net#325, sum(CASE WHEN (d_moy#16 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#300 AS dec_net#326] + +(51) Union + +(52) HashAggregate [codegen id : 13] +Input [32]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, ship_carriers#147, year#148, jan_sales#149, feb_sales#150, mar_sales#151, apr_sales#152, may_sales#153, jun_sales#154, jul_sales#155, aug_sales#156, sep_sales#157, oct_sales#158, nov_sales#159, dec_sales#160, jan_net#161, feb_net#162, mar_net#163, apr_net#164, may_net#165, jun_net#166, jul_net#167, aug_net#168, sep_net#169, oct_net#170, nov_net#171, dec_net#172] +Keys [8]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, ship_carriers#147, year#148] +Functions [36]: [partial_sum(jan_sales#149), partial_sum(feb_sales#150), partial_sum(mar_sales#151), partial_sum(apr_sales#152), partial_sum(may_sales#153), partial_sum(jun_sales#154), partial_sum(jul_sales#155), partial_sum(aug_sales#156), partial_sum(sep_sales#157), partial_sum(oct_sales#158), partial_sum(nov_sales#159), partial_sum(dec_sales#160), partial_sum(CheckOverflow((promote_precision(jan_sales#149) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(feb_sales#150) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(mar_sales#151) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(apr_sales#152) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(may_sales#153) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(jun_sales#154) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(jul_sales#155) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(aug_sales#156) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(sep_sales#157) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(oct_sales#158) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(nov_sales#159) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(dec_sales#160) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(jan_net#161), partial_sum(feb_net#162), partial_sum(mar_net#163), partial_sum(apr_net#164), partial_sum(may_net#165), partial_sum(jun_net#166), partial_sum(jul_net#167), partial_sum(aug_net#168), partial_sum(sep_net#169), partial_sum(oct_net#170), partial_sum(nov_net#171), partial_sum(dec_net#172)] +Aggregate Attributes [72]: [sum#327, isEmpty#328, sum#329, isEmpty#330, sum#331, isEmpty#332, sum#333, isEmpty#334, sum#335, isEmpty#336, sum#337, isEmpty#338, sum#339, isEmpty#340, sum#341, isEmpty#342, sum#343, isEmpty#344, sum#345, isEmpty#346, sum#347, isEmpty#348, sum#349, isEmpty#350, sum#351, isEmpty#352, sum#353, isEmpty#354, sum#355, isEmpty#356, sum#357, isEmpty#358, sum#359, isEmpty#360, sum#361, isEmpty#362, sum#363, isEmpty#364, sum#365, isEmpty#366, sum#367, isEmpty#368, sum#369, isEmpty#370, sum#371, isEmpty#372, sum#373, isEmpty#374, sum#375, isEmpty#376, sum#377, isEmpty#378, sum#379, isEmpty#380, sum#381, isEmpty#382, sum#383, isEmpty#384, sum#385, isEmpty#386, sum#387, isEmpty#388, sum#389, isEmpty#390, sum#391, isEmpty#392, sum#393, isEmpty#394, sum#395, isEmpty#396, sum#397, isEmpty#398] +Results [80]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, ship_carriers#147, year#148, sum#399, isEmpty#400, sum#401, isEmpty#402, sum#403, isEmpty#404, sum#405, isEmpty#406, sum#407, isEmpty#408, sum#409, isEmpty#410, sum#411, isEmpty#412, sum#413, isEmpty#414, sum#415, isEmpty#416, sum#417, isEmpty#418, sum#419, isEmpty#420, sum#421, isEmpty#422, sum#423, isEmpty#424, sum#425, isEmpty#426, sum#427, isEmpty#428, sum#429, isEmpty#430, sum#431, isEmpty#432, sum#433, isEmpty#434, sum#435, isEmpty#436, sum#437, isEmpty#438, sum#439, isEmpty#440, sum#441, isEmpty#442, sum#443, isEmpty#444, sum#445, isEmpty#446, sum#447, isEmpty#448, sum#449, isEmpty#450, sum#451, isEmpty#452, sum#453, isEmpty#454, sum#455, isEmpty#456, sum#457, isEmpty#458, sum#459, isEmpty#460, sum#461, isEmpty#462, sum#463, isEmpty#464, sum#465, isEmpty#466, sum#467, isEmpty#468, sum#469, isEmpty#470] + +(53) Exchange +Input [80]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, ship_carriers#147, year#148, sum#399, isEmpty#400, sum#401, isEmpty#402, sum#403, isEmpty#404, sum#405, isEmpty#406, sum#407, isEmpty#408, sum#409, isEmpty#410, sum#411, isEmpty#412, sum#413, isEmpty#414, sum#415, isEmpty#416, sum#417, isEmpty#418, sum#419, isEmpty#420, sum#421, isEmpty#422, sum#423, isEmpty#424, sum#425, isEmpty#426, sum#427, isEmpty#428, sum#429, isEmpty#430, sum#431, isEmpty#432, sum#433, isEmpty#434, sum#435, isEmpty#436, sum#437, isEmpty#438, sum#439, isEmpty#440, sum#441, isEmpty#442, sum#443, isEmpty#444, sum#445, isEmpty#446, sum#447, isEmpty#448, sum#449, isEmpty#450, sum#451, isEmpty#452, sum#453, isEmpty#454, sum#455, isEmpty#456, sum#457, isEmpty#458, sum#459, isEmpty#460, sum#461, isEmpty#462, sum#463, isEmpty#464, sum#465, isEmpty#466, sum#467, isEmpty#468, sum#469, isEmpty#470] +Arguments: hashpartitioning(w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, ship_carriers#147, year#148, 5), true, [id=#471] + +(54) HashAggregate [codegen id : 14] +Input [80]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, ship_carriers#147, year#148, sum#399, isEmpty#400, sum#401, isEmpty#402, sum#403, isEmpty#404, sum#405, isEmpty#406, sum#407, isEmpty#408, sum#409, isEmpty#410, sum#411, isEmpty#412, sum#413, isEmpty#414, sum#415, isEmpty#416, sum#417, isEmpty#418, sum#419, isEmpty#420, sum#421, isEmpty#422, sum#423, isEmpty#424, sum#425, isEmpty#426, sum#427, isEmpty#428, sum#429, isEmpty#430, sum#431, isEmpty#432, sum#433, isEmpty#434, sum#435, isEmpty#436, sum#437, isEmpty#438, sum#439, isEmpty#440, sum#441, isEmpty#442, sum#443, isEmpty#444, sum#445, isEmpty#446, sum#447, isEmpty#448, sum#449, isEmpty#450, sum#451, isEmpty#452, sum#453, isEmpty#454, sum#455, isEmpty#456, sum#457, isEmpty#458, sum#459, isEmpty#460, sum#461, isEmpty#462, sum#463, isEmpty#464, sum#465, isEmpty#466, sum#467, isEmpty#468, sum#469, isEmpty#470] +Keys [8]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, ship_carriers#147, year#148] +Functions [36]: [sum(jan_sales#149), sum(feb_sales#150), sum(mar_sales#151), sum(apr_sales#152), sum(may_sales#153), sum(jun_sales#154), sum(jul_sales#155), sum(aug_sales#156), sum(sep_sales#157), sum(oct_sales#158), sum(nov_sales#159), sum(dec_sales#160), sum(CheckOverflow((promote_precision(jan_sales#149) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(feb_sales#150) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(mar_sales#151) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(apr_sales#152) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(may_sales#153) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(jun_sales#154) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(jul_sales#155) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(aug_sales#156) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(sep_sales#157) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(oct_sales#158) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(nov_sales#159) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(dec_sales#160) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(jan_net#161), sum(feb_net#162), sum(mar_net#163), sum(apr_net#164), sum(may_net#165), sum(jun_net#166), sum(jul_net#167), sum(aug_net#168), sum(sep_net#169), sum(oct_net#170), sum(nov_net#171), sum(dec_net#172)] +Aggregate Attributes [36]: [sum(jan_sales#149)#472, sum(feb_sales#150)#473, sum(mar_sales#151)#474, sum(apr_sales#152)#475, sum(may_sales#153)#476, sum(jun_sales#154)#477, sum(jul_sales#155)#478, sum(aug_sales#156)#479, sum(sep_sales#157)#480, sum(oct_sales#158)#481, sum(nov_sales#159)#482, sum(dec_sales#160)#483, sum(CheckOverflow((promote_precision(jan_sales#149) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#484, sum(CheckOverflow((promote_precision(feb_sales#150) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#485, sum(CheckOverflow((promote_precision(mar_sales#151) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#486, sum(CheckOverflow((promote_precision(apr_sales#152) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#487, sum(CheckOverflow((promote_precision(may_sales#153) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#488, sum(CheckOverflow((promote_precision(jun_sales#154) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#489, sum(CheckOverflow((promote_precision(jul_sales#155) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#490, sum(CheckOverflow((promote_precision(aug_sales#156) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#491, sum(CheckOverflow((promote_precision(sep_sales#157) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#492, sum(CheckOverflow((promote_precision(oct_sales#158) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#493, sum(CheckOverflow((promote_precision(nov_sales#159) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#494, sum(CheckOverflow((promote_precision(dec_sales#160) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#495, sum(jan_net#161)#496, sum(feb_net#162)#497, sum(mar_net#163)#498, sum(apr_net#164)#499, sum(may_net#165)#500, sum(jun_net#166)#501, sum(jul_net#167)#502, sum(aug_net#168)#503, sum(sep_net#169)#504, sum(oct_net#170)#505, sum(nov_net#171)#506, sum(dec_net#172)#507] +Results [44]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, ship_carriers#147, year#148, sum(jan_sales#149)#472 AS jan_sales#508, sum(feb_sales#150)#473 AS feb_sales#509, sum(mar_sales#151)#474 AS mar_sales#510, sum(apr_sales#152)#475 AS apr_sales#511, sum(may_sales#153)#476 AS may_sales#512, sum(jun_sales#154)#477 AS jun_sales#513, sum(jul_sales#155)#478 AS jul_sales#514, sum(aug_sales#156)#479 AS aug_sales#515, sum(sep_sales#157)#480 AS sep_sales#516, sum(oct_sales#158)#481 AS oct_sales#517, sum(nov_sales#159)#482 AS nov_sales#518, sum(dec_sales#160)#483 AS dec_sales#519, sum(CheckOverflow((promote_precision(jan_sales#149) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#484 AS jan_sales_per_sq_foot#520, sum(CheckOverflow((promote_precision(feb_sales#150) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#485 AS feb_sales_per_sq_foot#521, sum(CheckOverflow((promote_precision(mar_sales#151) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#486 AS mar_sales_per_sq_foot#522, sum(CheckOverflow((promote_precision(apr_sales#152) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#487 AS apr_sales_per_sq_foot#523, sum(CheckOverflow((promote_precision(may_sales#153) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#488 AS may_sales_per_sq_foot#524, sum(CheckOverflow((promote_precision(jun_sales#154) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#489 AS jun_sales_per_sq_foot#525, sum(CheckOverflow((promote_precision(jul_sales#155) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#490 AS jul_sales_per_sq_foot#526, sum(CheckOverflow((promote_precision(aug_sales#156) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#491 AS aug_sales_per_sq_foot#527, sum(CheckOverflow((promote_precision(sep_sales#157) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#492 AS sep_sales_per_sq_foot#528, sum(CheckOverflow((promote_precision(oct_sales#158) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#493 AS oct_sales_per_sq_foot#529, sum(CheckOverflow((promote_precision(nov_sales#159) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#494 AS nov_sales_per_sq_foot#530, sum(CheckOverflow((promote_precision(dec_sales#160) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#495 AS dec_sales_per_sq_foot#531, sum(jan_net#161)#496 AS jan_net#532, sum(feb_net#162)#497 AS feb_net#533, sum(mar_net#163)#498 AS mar_net#534, sum(apr_net#164)#499 AS apr_net#535, sum(may_net#165)#500 AS may_net#536, sum(jun_net#166)#501 AS jun_net#537, sum(jul_net#167)#502 AS jul_net#538, sum(aug_net#168)#503 AS aug_net#539, sum(sep_net#169)#504 AS sep_net#540, sum(oct_net#170)#505 AS oct_net#541, sum(nov_net#171)#506 AS nov_net#542, sum(dec_net#172)#507 AS dec_net#543] + +(55) TakeOrderedAndProject +Input [44]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, ship_carriers#147, year#148, jan_sales#508, feb_sales#509, mar_sales#510, apr_sales#511, may_sales#512, jun_sales#513, jul_sales#514, aug_sales#515, sep_sales#516, oct_sales#517, nov_sales#518, dec_sales#519, jan_sales_per_sq_foot#520, feb_sales_per_sq_foot#521, mar_sales_per_sq_foot#522, apr_sales_per_sq_foot#523, may_sales_per_sq_foot#524, jun_sales_per_sq_foot#525, jul_sales_per_sq_foot#526, aug_sales_per_sq_foot#527, sep_sales_per_sq_foot#528, oct_sales_per_sq_foot#529, nov_sales_per_sq_foot#530, dec_sales_per_sq_foot#531, jan_net#532, feb_net#533, mar_net#534, apr_net#535, may_net#536, jun_net#537, jul_net#538, aug_net#539, sep_net#540, oct_net#541, nov_net#542, dec_net#543] +Arguments: 100, [w_warehouse_name#19 ASC NULLS FIRST], [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, ship_carriers#147, year#148, jan_sales#508, feb_sales#509, mar_sales#510, apr_sales#511, may_sales#512, jun_sales#513, jul_sales#514, aug_sales#515, sep_sales#516, oct_sales#517, nov_sales#518, dec_sales#519, jan_sales_per_sq_foot#520, feb_sales_per_sq_foot#521, mar_sales_per_sq_foot#522, apr_sales_per_sq_foot#523, ... 20 more fields] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.sf100/simplified.txt new file mode 100644 index 0000000000000..488b1c27b8987 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.sf100/simplified.txt @@ -0,0 +1,83 @@ +TakeOrderedAndProject [apr_net,apr_sales,apr_sales_per_sq_foot,aug_net,aug_sales,aug_sales_per_sq_foot,dec_net,dec_sales,dec_sales_per_sq_foot,feb_net,feb_sales,feb_sales_per_sq_foot,jan_net,jan_sales,jan_sales_per_sq_foot,jul_net,jul_sales,jul_sales_per_sq_foot,jun_net,jun_sales,jun_sales_per_sq_foot,mar_net,mar_sales,mar_sales_per_sq_foot,may_net,may_sales,may_sales_per_sq_foot,nov_net,nov_sales,nov_sales_per_sq_foot,oct_net,oct_sales,oct_sales_per_sq_foot,sep_net,sep_sales,sep_sales_per_sq_foot,ship_carriers,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft,year] + WholeStageCodegen (14) + HashAggregate [isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,ship_carriers,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft,year] [apr_net,apr_sales,apr_sales_per_sq_foot,aug_net,aug_sales,aug_sales_per_sq_foot,dec_net,dec_sales,dec_sales_per_sq_foot,feb_net,feb_sales,feb_sales_per_sq_foot,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,jan_net,jan_sales,jan_sales_per_sq_foot,jul_net,jul_sales,jul_sales_per_sq_foot,jun_net,jun_sales,jun_sales_per_sq_foot,mar_net,mar_sales,mar_sales_per_sq_foot,may_net,may_sales,may_sales_per_sq_foot,nov_net,nov_sales,nov_sales_per_sq_foot,oct_net,oct_sales,oct_sales_per_sq_foot,sep_net,sep_sales,sep_sales_per_sq_foot,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum(CheckOverflow((promote_precision(apr_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(aug_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(dec_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(feb_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(jan_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(jul_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(jun_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(mar_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(may_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(nov_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(oct_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(sep_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(apr_net),sum(apr_sales),sum(aug_net),sum(aug_sales),sum(dec_net),sum(dec_sales),sum(feb_net),sum(feb_sales),sum(jan_net),sum(jan_sales),sum(jul_net),sum(jul_sales),sum(jun_net),sum(jun_sales),sum(mar_net),sum(mar_sales),sum(may_net),sum(may_sales),sum(nov_net),sum(nov_sales),sum(oct_net),sum(oct_sales),sum(sep_net),sum(sep_sales)] + InputAdapter + Exchange [ship_carriers,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft,year] #1 + WholeStageCodegen (13) + HashAggregate [apr_net,apr_sales,aug_net,aug_sales,dec_net,dec_sales,feb_net,feb_sales,jan_net,jan_sales,jul_net,jul_sales,jun_net,jun_sales,mar_net,mar_sales,may_net,may_sales,nov_net,nov_sales,oct_net,oct_sales,sep_net,sep_sales,ship_carriers,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft,year] [isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + InputAdapter + Union + WholeStageCodegen (6) + HashAggregate [d_year,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft] [apr_net,apr_sales,aug_net,aug_sales,dec_net,dec_sales,feb_net,feb_sales,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,jan_net,jan_sales,jul_net,jul_sales,jun_net,jun_sales,mar_net,mar_sales,may_net,may_sales,nov_net,nov_sales,oct_net,oct_sales,sep_net,sep_sales,ship_carriers,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),year] + InputAdapter + Exchange [d_year,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft] #2 + WholeStageCodegen (5) + HashAggregate [d_moy,d_year,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft,ws_ext_sales_price,ws_net_paid,ws_quantity] [isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [d_moy,d_year,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft,ws_ext_sales_price,ws_net_paid,ws_quantity] + BroadcastHashJoin [w_warehouse_sk,ws_warehouse_sk] + Project [d_moy,d_year,ws_ext_sales_price,ws_net_paid,ws_quantity,ws_warehouse_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ws_ext_sales_price,ws_net_paid,ws_quantity,ws_sold_date_sk,ws_warehouse_sk] + BroadcastHashJoin [t_time_sk,ws_sold_time_sk] + Project [ws_ext_sales_price,ws_net_paid,ws_quantity,ws_sold_date_sk,ws_sold_time_sk,ws_warehouse_sk] + BroadcastHashJoin [sm_ship_mode_sk,ws_ship_mode_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [sm_ship_mode_sk] + Filter [sm_carrier,sm_ship_mode_sk] + ColumnarToRow + InputAdapter + Scan parquet default.ship_mode [sm_carrier,sm_ship_mode_sk] + Filter [ws_ship_mode_sk,ws_sold_date_sk,ws_sold_time_sk,ws_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_ext_sales_price,ws_net_paid,ws_quantity,ws_ship_mode_sk,ws_sold_date_sk,ws_sold_time_sk,ws_warehouse_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [t_time_sk] + Filter [t_time,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet default.time_dim [t_time,t_time_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (4) + Filter [w_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.warehouse [w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sk,w_warehouse_sq_ft] + WholeStageCodegen (12) + HashAggregate [d_year,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft] [apr_net,apr_sales,aug_net,aug_sales,dec_net,dec_sales,feb_net,feb_sales,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,jan_net,jan_sales,jul_net,jul_sales,jun_net,jun_sales,mar_net,mar_sales,may_net,may_sales,nov_net,nov_sales,oct_net,oct_sales,sep_net,sep_sales,ship_carriers,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),year] + InputAdapter + Exchange [d_year,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft] #7 + WholeStageCodegen (11) + HashAggregate [cs_net_paid_inc_tax,cs_quantity,cs_sales_price,d_moy,d_year,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft] [isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [cs_net_paid_inc_tax,cs_quantity,cs_sales_price,d_moy,d_year,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft] + BroadcastHashJoin [cs_warehouse_sk,w_warehouse_sk] + Project [cs_net_paid_inc_tax,cs_quantity,cs_sales_price,cs_warehouse_sk,d_moy,d_year] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_net_paid_inc_tax,cs_quantity,cs_sales_price,cs_sold_date_sk,cs_warehouse_sk] + BroadcastHashJoin [cs_sold_time_sk,t_time_sk] + Project [cs_net_paid_inc_tax,cs_quantity,cs_sales_price,cs_sold_date_sk,cs_sold_time_sk,cs_warehouse_sk] + BroadcastHashJoin [cs_ship_mode_sk,sm_ship_mode_sk] + InputAdapter + ReusedExchange [sm_ship_mode_sk] #3 + Filter [cs_ship_mode_sk,cs_sold_date_sk,cs_sold_time_sk,cs_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_net_paid_inc_tax,cs_quantity,cs_sales_price,cs_ship_mode_sk,cs_sold_date_sk,cs_sold_time_sk,cs_warehouse_sk] + InputAdapter + ReusedExchange [t_time_sk] #4 + InputAdapter + ReusedExchange [d_date_sk,d_moy,d_year] #5 + InputAdapter + ReusedExchange [w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sk,w_warehouse_sq_ft] #6 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/explain.txt new file mode 100644 index 0000000000000..48ee7fa1c8aa7 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/explain.txt @@ -0,0 +1,310 @@ +== Physical Plan == +TakeOrderedAndProject (55) ++- * HashAggregate (54) + +- Exchange (53) + +- * HashAggregate (52) + +- Union (51) + :- * HashAggregate (32) + : +- Exchange (31) + : +- * HashAggregate (30) + : +- * Project (29) + : +- * BroadcastHashJoin Inner BuildRight (28) + : :- * Project (22) + : : +- * BroadcastHashJoin Inner BuildRight (21) + : : :- * Project (15) + : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : :- * Project (9) + : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.web_sales (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.warehouse (4) + : : : +- BroadcastExchange (13) + : : : +- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet default.date_dim (10) + : : +- BroadcastExchange (20) + : : +- * Project (19) + : : +- * Filter (18) + : : +- * ColumnarToRow (17) + : : +- Scan parquet default.time_dim (16) + : +- BroadcastExchange (27) + : +- * Project (26) + : +- * Filter (25) + : +- * ColumnarToRow (24) + : +- Scan parquet default.ship_mode (23) + +- * HashAggregate (50) + +- Exchange (49) + +- * HashAggregate (48) + +- * Project (47) + +- * BroadcastHashJoin Inner BuildRight (46) + :- * Project (44) + : +- * BroadcastHashJoin Inner BuildRight (43) + : :- * Project (41) + : : +- * BroadcastHashJoin Inner BuildRight (40) + : : :- * Project (38) + : : : +- * BroadcastHashJoin Inner BuildRight (37) + : : : :- * Filter (35) + : : : : +- * ColumnarToRow (34) + : : : : +- Scan parquet default.catalog_sales (33) + : : : +- ReusedExchange (36) + : : +- ReusedExchange (39) + : +- ReusedExchange (42) + +- ReusedExchange (45) + + +(1) Scan parquet default.web_sales +Output [7]: [ws_sold_date_sk#1, ws_sold_time_sk#2, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_warehouse_sk), IsNotNull(ws_sold_date_sk), IsNotNull(ws_sold_time_sk), IsNotNull(ws_ship_mode_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [7]: [ws_sold_date_sk#1, ws_sold_time_sk#2, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7] + +(3) Filter [codegen id : 5] +Input [7]: [ws_sold_date_sk#1, ws_sold_time_sk#2, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7] +Condition : (((isnotnull(ws_warehouse_sk#4) AND isnotnull(ws_sold_date_sk#1)) AND isnotnull(ws_sold_time_sk#2)) AND isnotnull(ws_ship_mode_sk#3)) + +(4) Scan parquet default.warehouse +Output [7]: [w_warehouse_sk#8, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [7]: [w_warehouse_sk#8, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] + +(6) Filter [codegen id : 1] +Input [7]: [w_warehouse_sk#8, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] +Condition : isnotnull(w_warehouse_sk#8) + +(7) BroadcastExchange +Input [7]: [w_warehouse_sk#8, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#15] + +(8) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ws_warehouse_sk#4] +Right keys [1]: [w_warehouse_sk#8] +Join condition: None + +(9) Project [codegen id : 5] +Output [12]: [ws_sold_date_sk#1, ws_sold_time_sk#2, ws_ship_mode_sk#3, ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] +Input [14]: [ws_sold_date_sk#1, ws_sold_time_sk#2, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7, w_warehouse_sk#8, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] + +(10) Scan parquet default.date_dim +Output [3]: [d_date_sk#16, d_year#17, d_moy#18] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#16, d_year#17, d_moy#18] + +(12) Filter [codegen id : 2] +Input [3]: [d_date_sk#16, d_year#17, d_moy#18] +Condition : ((isnotnull(d_year#17) AND (d_year#17 = 2001)) AND isnotnull(d_date_sk#16)) + +(13) BroadcastExchange +Input [3]: [d_date_sk#16, d_year#17, d_moy#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] + +(14) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ws_sold_date_sk#1] +Right keys [1]: [d_date_sk#16] +Join condition: None + +(15) Project [codegen id : 5] +Output [13]: [ws_sold_time_sk#2, ws_ship_mode_sk#3, ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, d_moy#18] +Input [15]: [ws_sold_date_sk#1, ws_sold_time_sk#2, ws_ship_mode_sk#3, ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_date_sk#16, d_year#17, d_moy#18] + +(16) Scan parquet default.time_dim +Output [2]: [t_time_sk#20, t_time#21] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/time_dim] +PushedFilters: [IsNotNull(t_time), GreaterThanOrEqual(t_time,30838), LessThanOrEqual(t_time,59638), IsNotNull(t_time_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [2]: [t_time_sk#20, t_time#21] + +(18) Filter [codegen id : 3] +Input [2]: [t_time_sk#20, t_time#21] +Condition : (((isnotnull(t_time#21) AND (t_time#21 >= 30838)) AND (t_time#21 <= 59638)) AND isnotnull(t_time_sk#20)) + +(19) Project [codegen id : 3] +Output [1]: [t_time_sk#20] +Input [2]: [t_time_sk#20, t_time#21] + +(20) BroadcastExchange +Input [1]: [t_time_sk#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#22] + +(21) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ws_sold_time_sk#2] +Right keys [1]: [t_time_sk#20] +Join condition: None + +(22) Project [codegen id : 5] +Output [12]: [ws_ship_mode_sk#3, ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, d_moy#18] +Input [14]: [ws_sold_time_sk#2, ws_ship_mode_sk#3, ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, d_moy#18, t_time_sk#20] + +(23) Scan parquet default.ship_mode +Output [2]: [sm_ship_mode_sk#23, sm_carrier#24] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/ship_mode] +PushedFilters: [In(sm_carrier, [DHL,BARIAN]), IsNotNull(sm_ship_mode_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 4] +Input [2]: [sm_ship_mode_sk#23, sm_carrier#24] + +(25) Filter [codegen id : 4] +Input [2]: [sm_ship_mode_sk#23, sm_carrier#24] +Condition : (sm_carrier#24 IN (DHL,BARIAN) AND isnotnull(sm_ship_mode_sk#23)) + +(26) Project [codegen id : 4] +Output [1]: [sm_ship_mode_sk#23] +Input [2]: [sm_ship_mode_sk#23, sm_carrier#24] + +(27) BroadcastExchange +Input [1]: [sm_ship_mode_sk#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#25] + +(28) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ws_ship_mode_sk#3] +Right keys [1]: [sm_ship_mode_sk#23] +Join condition: None + +(29) Project [codegen id : 5] +Output [11]: [ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, d_moy#18] +Input [13]: [ws_ship_mode_sk#3, ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, d_moy#18, sm_ship_mode_sk#23] + +(30) HashAggregate [codegen id : 5] +Input [11]: [ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, d_moy#18] +Keys [7]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17] +Functions [24]: [partial_sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)] +Aggregate Attributes [48]: [sum#26, isEmpty#27, sum#28, isEmpty#29, sum#30, isEmpty#31, sum#32, isEmpty#33, sum#34, isEmpty#35, sum#36, isEmpty#37, sum#38, isEmpty#39, sum#40, isEmpty#41, sum#42, isEmpty#43, sum#44, isEmpty#45, sum#46, isEmpty#47, sum#48, isEmpty#49, sum#50, isEmpty#51, sum#52, isEmpty#53, sum#54, isEmpty#55, sum#56, isEmpty#57, sum#58, isEmpty#59, sum#60, isEmpty#61, sum#62, isEmpty#63, sum#64, isEmpty#65, sum#66, isEmpty#67, sum#68, isEmpty#69, sum#70, isEmpty#71, sum#72, isEmpty#73] +Results [55]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, sum#74, isEmpty#75, sum#76, isEmpty#77, sum#78, isEmpty#79, sum#80, isEmpty#81, sum#82, isEmpty#83, sum#84, isEmpty#85, sum#86, isEmpty#87, sum#88, isEmpty#89, sum#90, isEmpty#91, sum#92, isEmpty#93, sum#94, isEmpty#95, sum#96, isEmpty#97, sum#98, isEmpty#99, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105, sum#106, isEmpty#107, sum#108, isEmpty#109, sum#110, isEmpty#111, sum#112, isEmpty#113, sum#114, isEmpty#115, sum#116, isEmpty#117, sum#118, isEmpty#119, sum#120, isEmpty#121] + +(31) Exchange +Input [55]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, sum#74, isEmpty#75, sum#76, isEmpty#77, sum#78, isEmpty#79, sum#80, isEmpty#81, sum#82, isEmpty#83, sum#84, isEmpty#85, sum#86, isEmpty#87, sum#88, isEmpty#89, sum#90, isEmpty#91, sum#92, isEmpty#93, sum#94, isEmpty#95, sum#96, isEmpty#97, sum#98, isEmpty#99, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105, sum#106, isEmpty#107, sum#108, isEmpty#109, sum#110, isEmpty#111, sum#112, isEmpty#113, sum#114, isEmpty#115, sum#116, isEmpty#117, sum#118, isEmpty#119, sum#120, isEmpty#121] +Arguments: hashpartitioning(w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, 5), true, [id=#122] + +(32) HashAggregate [codegen id : 6] +Input [55]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, sum#74, isEmpty#75, sum#76, isEmpty#77, sum#78, isEmpty#79, sum#80, isEmpty#81, sum#82, isEmpty#83, sum#84, isEmpty#85, sum#86, isEmpty#87, sum#88, isEmpty#89, sum#90, isEmpty#91, sum#92, isEmpty#93, sum#94, isEmpty#95, sum#96, isEmpty#97, sum#98, isEmpty#99, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105, sum#106, isEmpty#107, sum#108, isEmpty#109, sum#110, isEmpty#111, sum#112, isEmpty#113, sum#114, isEmpty#115, sum#116, isEmpty#117, sum#118, isEmpty#119, sum#120, isEmpty#121] +Keys [7]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17] +Functions [24]: [sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)] +Aggregate Attributes [24]: [sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#123, sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#124, sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#125, sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#126, sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#127, sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#128, sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#129, sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#130, sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#131, sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#132, sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#133, sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#134, sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#135, sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#136, sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#137, sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#138, sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#139, sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#140, sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#141, sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#142, sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#143, sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#144, sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#145, sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#146] +Results [32]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, DHL,BARIAN AS ship_carriers#147, d_year#17 AS year#148, sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#123 AS jan_sales#149, sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#124 AS feb_sales#150, sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#125 AS mar_sales#151, sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#126 AS apr_sales#152, sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#127 AS may_sales#153, sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#128 AS jun_sales#154, sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#129 AS jul_sales#155, sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#130 AS aug_sales#156, sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#131 AS sep_sales#157, sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#132 AS oct_sales#158, sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#133 AS nov_sales#159, sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#134 AS dec_sales#160, sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#135 AS jan_net#161, sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#136 AS feb_net#162, sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#137 AS mar_net#163, sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#138 AS apr_net#164, sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#139 AS may_net#165, sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#140 AS jun_net#166, sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#141 AS jul_net#167, sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#142 AS aug_net#168, sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#143 AS sep_net#169, sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#144 AS oct_net#170, sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#145 AS nov_net#171, sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#146 AS dec_net#172] + +(33) Scan parquet default.catalog_sales +Output [7]: [cs_sold_date_sk#173, cs_sold_time_sk#174, cs_ship_mode_sk#175, cs_warehouse_sk#176, cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_warehouse_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_sold_time_sk), IsNotNull(cs_ship_mode_sk)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 11] +Input [7]: [cs_sold_date_sk#173, cs_sold_time_sk#174, cs_ship_mode_sk#175, cs_warehouse_sk#176, cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179] + +(35) Filter [codegen id : 11] +Input [7]: [cs_sold_date_sk#173, cs_sold_time_sk#174, cs_ship_mode_sk#175, cs_warehouse_sk#176, cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179] +Condition : (((isnotnull(cs_warehouse_sk#176) AND isnotnull(cs_sold_date_sk#173)) AND isnotnull(cs_sold_time_sk#174)) AND isnotnull(cs_ship_mode_sk#175)) + +(36) ReusedExchange [Reuses operator id: 7] +Output [7]: [w_warehouse_sk#8, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] + +(37) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_warehouse_sk#176] +Right keys [1]: [w_warehouse_sk#8] +Join condition: None + +(38) Project [codegen id : 11] +Output [12]: [cs_sold_date_sk#173, cs_sold_time_sk#174, cs_ship_mode_sk#175, cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] +Input [14]: [cs_sold_date_sk#173, cs_sold_time_sk#174, cs_ship_mode_sk#175, cs_warehouse_sk#176, cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179, w_warehouse_sk#8, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] + +(39) ReusedExchange [Reuses operator id: 13] +Output [3]: [d_date_sk#16, d_year#17, d_moy#18] + +(40) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_sold_date_sk#173] +Right keys [1]: [d_date_sk#16] +Join condition: None + +(41) Project [codegen id : 11] +Output [13]: [cs_sold_time_sk#174, cs_ship_mode_sk#175, cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, d_moy#18] +Input [15]: [cs_sold_date_sk#173, cs_sold_time_sk#174, cs_ship_mode_sk#175, cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_date_sk#16, d_year#17, d_moy#18] + +(42) ReusedExchange [Reuses operator id: 20] +Output [1]: [t_time_sk#20] + +(43) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_sold_time_sk#174] +Right keys [1]: [t_time_sk#20] +Join condition: None + +(44) Project [codegen id : 11] +Output [12]: [cs_ship_mode_sk#175, cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, d_moy#18] +Input [14]: [cs_sold_time_sk#174, cs_ship_mode_sk#175, cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, d_moy#18, t_time_sk#20] + +(45) ReusedExchange [Reuses operator id: 27] +Output [1]: [sm_ship_mode_sk#23] + +(46) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_ship_mode_sk#175] +Right keys [1]: [sm_ship_mode_sk#23] +Join condition: None + +(47) Project [codegen id : 11] +Output [11]: [cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, d_moy#18] +Input [13]: [cs_ship_mode_sk#175, cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, d_moy#18, sm_ship_mode_sk#23] + +(48) HashAggregate [codegen id : 11] +Input [11]: [cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, d_moy#18] +Keys [7]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17] +Functions [24]: [partial_sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)] +Aggregate Attributes [48]: [sum#180, isEmpty#181, sum#182, isEmpty#183, sum#184, isEmpty#185, sum#186, isEmpty#187, sum#188, isEmpty#189, sum#190, isEmpty#191, sum#192, isEmpty#193, sum#194, isEmpty#195, sum#196, isEmpty#197, sum#198, isEmpty#199, sum#200, isEmpty#201, sum#202, isEmpty#203, sum#204, isEmpty#205, sum#206, isEmpty#207, sum#208, isEmpty#209, sum#210, isEmpty#211, sum#212, isEmpty#213, sum#214, isEmpty#215, sum#216, isEmpty#217, sum#218, isEmpty#219, sum#220, isEmpty#221, sum#222, isEmpty#223, sum#224, isEmpty#225, sum#226, isEmpty#227] +Results [55]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, sum#228, isEmpty#229, sum#230, isEmpty#231, sum#232, isEmpty#233, sum#234, isEmpty#235, sum#236, isEmpty#237, sum#238, isEmpty#239, sum#240, isEmpty#241, sum#242, isEmpty#243, sum#244, isEmpty#245, sum#246, isEmpty#247, sum#248, isEmpty#249, sum#250, isEmpty#251, sum#252, isEmpty#253, sum#254, isEmpty#255, sum#256, isEmpty#257, sum#258, isEmpty#259, sum#260, isEmpty#261, sum#262, isEmpty#263, sum#264, isEmpty#265, sum#266, isEmpty#267, sum#268, isEmpty#269, sum#270, isEmpty#271, sum#272, isEmpty#273, sum#274, isEmpty#275] + +(49) Exchange +Input [55]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, sum#228, isEmpty#229, sum#230, isEmpty#231, sum#232, isEmpty#233, sum#234, isEmpty#235, sum#236, isEmpty#237, sum#238, isEmpty#239, sum#240, isEmpty#241, sum#242, isEmpty#243, sum#244, isEmpty#245, sum#246, isEmpty#247, sum#248, isEmpty#249, sum#250, isEmpty#251, sum#252, isEmpty#253, sum#254, isEmpty#255, sum#256, isEmpty#257, sum#258, isEmpty#259, sum#260, isEmpty#261, sum#262, isEmpty#263, sum#264, isEmpty#265, sum#266, isEmpty#267, sum#268, isEmpty#269, sum#270, isEmpty#271, sum#272, isEmpty#273, sum#274, isEmpty#275] +Arguments: hashpartitioning(w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, 5), true, [id=#276] + +(50) HashAggregate [codegen id : 12] +Input [55]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, sum#228, isEmpty#229, sum#230, isEmpty#231, sum#232, isEmpty#233, sum#234, isEmpty#235, sum#236, isEmpty#237, sum#238, isEmpty#239, sum#240, isEmpty#241, sum#242, isEmpty#243, sum#244, isEmpty#245, sum#246, isEmpty#247, sum#248, isEmpty#249, sum#250, isEmpty#251, sum#252, isEmpty#253, sum#254, isEmpty#255, sum#256, isEmpty#257, sum#258, isEmpty#259, sum#260, isEmpty#261, sum#262, isEmpty#263, sum#264, isEmpty#265, sum#266, isEmpty#267, sum#268, isEmpty#269, sum#270, isEmpty#271, sum#272, isEmpty#273, sum#274, isEmpty#275] +Keys [7]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17] +Functions [24]: [sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)] +Aggregate Attributes [24]: [sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#277, sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#278, sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#279, sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#280, sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#281, sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#282, sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#283, sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#284, sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#285, sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#286, sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#287, sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#288, sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#289, sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#290, sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#291, sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#292, sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#293, sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#294, sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#295, sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#296, sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#297, sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#298, sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#299, sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#300] +Results [32]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, DHL,BARIAN AS ship_carriers#301, d_year#17 AS year#302, sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#277 AS jan_sales#303, sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#278 AS feb_sales#304, sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#279 AS mar_sales#305, sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#280 AS apr_sales#306, sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#281 AS may_sales#307, sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#282 AS jun_sales#308, sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#283 AS jul_sales#309, sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#284 AS aug_sales#310, sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#285 AS sep_sales#311, sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#286 AS oct_sales#312, sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#287 AS nov_sales#313, sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#288 AS dec_sales#314, sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#289 AS jan_net#315, sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#290 AS feb_net#316, sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#291 AS mar_net#317, sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#292 AS apr_net#318, sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#293 AS may_net#319, sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#294 AS jun_net#320, sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#295 AS jul_net#321, sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#296 AS aug_net#322, sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#297 AS sep_net#323, sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#298 AS oct_net#324, sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#299 AS nov_net#325, sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#300 AS dec_net#326] + +(51) Union + +(52) HashAggregate [codegen id : 13] +Input [32]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#147, year#148, jan_sales#149, feb_sales#150, mar_sales#151, apr_sales#152, may_sales#153, jun_sales#154, jul_sales#155, aug_sales#156, sep_sales#157, oct_sales#158, nov_sales#159, dec_sales#160, jan_net#161, feb_net#162, mar_net#163, apr_net#164, may_net#165, jun_net#166, jul_net#167, aug_net#168, sep_net#169, oct_net#170, nov_net#171, dec_net#172] +Keys [8]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#147, year#148] +Functions [36]: [partial_sum(jan_sales#149), partial_sum(feb_sales#150), partial_sum(mar_sales#151), partial_sum(apr_sales#152), partial_sum(may_sales#153), partial_sum(jun_sales#154), partial_sum(jul_sales#155), partial_sum(aug_sales#156), partial_sum(sep_sales#157), partial_sum(oct_sales#158), partial_sum(nov_sales#159), partial_sum(dec_sales#160), partial_sum(CheckOverflow((promote_precision(jan_sales#149) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(feb_sales#150) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(mar_sales#151) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(apr_sales#152) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(may_sales#153) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(jun_sales#154) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(jul_sales#155) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(aug_sales#156) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(sep_sales#157) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(oct_sales#158) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(nov_sales#159) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(dec_sales#160) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(jan_net#161), partial_sum(feb_net#162), partial_sum(mar_net#163), partial_sum(apr_net#164), partial_sum(may_net#165), partial_sum(jun_net#166), partial_sum(jul_net#167), partial_sum(aug_net#168), partial_sum(sep_net#169), partial_sum(oct_net#170), partial_sum(nov_net#171), partial_sum(dec_net#172)] +Aggregate Attributes [72]: [sum#327, isEmpty#328, sum#329, isEmpty#330, sum#331, isEmpty#332, sum#333, isEmpty#334, sum#335, isEmpty#336, sum#337, isEmpty#338, sum#339, isEmpty#340, sum#341, isEmpty#342, sum#343, isEmpty#344, sum#345, isEmpty#346, sum#347, isEmpty#348, sum#349, isEmpty#350, sum#351, isEmpty#352, sum#353, isEmpty#354, sum#355, isEmpty#356, sum#357, isEmpty#358, sum#359, isEmpty#360, sum#361, isEmpty#362, sum#363, isEmpty#364, sum#365, isEmpty#366, sum#367, isEmpty#368, sum#369, isEmpty#370, sum#371, isEmpty#372, sum#373, isEmpty#374, sum#375, isEmpty#376, sum#377, isEmpty#378, sum#379, isEmpty#380, sum#381, isEmpty#382, sum#383, isEmpty#384, sum#385, isEmpty#386, sum#387, isEmpty#388, sum#389, isEmpty#390, sum#391, isEmpty#392, sum#393, isEmpty#394, sum#395, isEmpty#396, sum#397, isEmpty#398] +Results [80]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#147, year#148, sum#399, isEmpty#400, sum#401, isEmpty#402, sum#403, isEmpty#404, sum#405, isEmpty#406, sum#407, isEmpty#408, sum#409, isEmpty#410, sum#411, isEmpty#412, sum#413, isEmpty#414, sum#415, isEmpty#416, sum#417, isEmpty#418, sum#419, isEmpty#420, sum#421, isEmpty#422, sum#423, isEmpty#424, sum#425, isEmpty#426, sum#427, isEmpty#428, sum#429, isEmpty#430, sum#431, isEmpty#432, sum#433, isEmpty#434, sum#435, isEmpty#436, sum#437, isEmpty#438, sum#439, isEmpty#440, sum#441, isEmpty#442, sum#443, isEmpty#444, sum#445, isEmpty#446, sum#447, isEmpty#448, sum#449, isEmpty#450, sum#451, isEmpty#452, sum#453, isEmpty#454, sum#455, isEmpty#456, sum#457, isEmpty#458, sum#459, isEmpty#460, sum#461, isEmpty#462, sum#463, isEmpty#464, sum#465, isEmpty#466, sum#467, isEmpty#468, sum#469, isEmpty#470] + +(53) Exchange +Input [80]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#147, year#148, sum#399, isEmpty#400, sum#401, isEmpty#402, sum#403, isEmpty#404, sum#405, isEmpty#406, sum#407, isEmpty#408, sum#409, isEmpty#410, sum#411, isEmpty#412, sum#413, isEmpty#414, sum#415, isEmpty#416, sum#417, isEmpty#418, sum#419, isEmpty#420, sum#421, isEmpty#422, sum#423, isEmpty#424, sum#425, isEmpty#426, sum#427, isEmpty#428, sum#429, isEmpty#430, sum#431, isEmpty#432, sum#433, isEmpty#434, sum#435, isEmpty#436, sum#437, isEmpty#438, sum#439, isEmpty#440, sum#441, isEmpty#442, sum#443, isEmpty#444, sum#445, isEmpty#446, sum#447, isEmpty#448, sum#449, isEmpty#450, sum#451, isEmpty#452, sum#453, isEmpty#454, sum#455, isEmpty#456, sum#457, isEmpty#458, sum#459, isEmpty#460, sum#461, isEmpty#462, sum#463, isEmpty#464, sum#465, isEmpty#466, sum#467, isEmpty#468, sum#469, isEmpty#470] +Arguments: hashpartitioning(w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#147, year#148, 5), true, [id=#471] + +(54) HashAggregate [codegen id : 14] +Input [80]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#147, year#148, sum#399, isEmpty#400, sum#401, isEmpty#402, sum#403, isEmpty#404, sum#405, isEmpty#406, sum#407, isEmpty#408, sum#409, isEmpty#410, sum#411, isEmpty#412, sum#413, isEmpty#414, sum#415, isEmpty#416, sum#417, isEmpty#418, sum#419, isEmpty#420, sum#421, isEmpty#422, sum#423, isEmpty#424, sum#425, isEmpty#426, sum#427, isEmpty#428, sum#429, isEmpty#430, sum#431, isEmpty#432, sum#433, isEmpty#434, sum#435, isEmpty#436, sum#437, isEmpty#438, sum#439, isEmpty#440, sum#441, isEmpty#442, sum#443, isEmpty#444, sum#445, isEmpty#446, sum#447, isEmpty#448, sum#449, isEmpty#450, sum#451, isEmpty#452, sum#453, isEmpty#454, sum#455, isEmpty#456, sum#457, isEmpty#458, sum#459, isEmpty#460, sum#461, isEmpty#462, sum#463, isEmpty#464, sum#465, isEmpty#466, sum#467, isEmpty#468, sum#469, isEmpty#470] +Keys [8]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#147, year#148] +Functions [36]: [sum(jan_sales#149), sum(feb_sales#150), sum(mar_sales#151), sum(apr_sales#152), sum(may_sales#153), sum(jun_sales#154), sum(jul_sales#155), sum(aug_sales#156), sum(sep_sales#157), sum(oct_sales#158), sum(nov_sales#159), sum(dec_sales#160), sum(CheckOverflow((promote_precision(jan_sales#149) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(feb_sales#150) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(mar_sales#151) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(apr_sales#152) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(may_sales#153) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(jun_sales#154) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(jul_sales#155) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(aug_sales#156) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(sep_sales#157) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(oct_sales#158) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(nov_sales#159) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(dec_sales#160) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(jan_net#161), sum(feb_net#162), sum(mar_net#163), sum(apr_net#164), sum(may_net#165), sum(jun_net#166), sum(jul_net#167), sum(aug_net#168), sum(sep_net#169), sum(oct_net#170), sum(nov_net#171), sum(dec_net#172)] +Aggregate Attributes [36]: [sum(jan_sales#149)#472, sum(feb_sales#150)#473, sum(mar_sales#151)#474, sum(apr_sales#152)#475, sum(may_sales#153)#476, sum(jun_sales#154)#477, sum(jul_sales#155)#478, sum(aug_sales#156)#479, sum(sep_sales#157)#480, sum(oct_sales#158)#481, sum(nov_sales#159)#482, sum(dec_sales#160)#483, sum(CheckOverflow((promote_precision(jan_sales#149) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#484, sum(CheckOverflow((promote_precision(feb_sales#150) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#485, sum(CheckOverflow((promote_precision(mar_sales#151) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#486, sum(CheckOverflow((promote_precision(apr_sales#152) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#487, sum(CheckOverflow((promote_precision(may_sales#153) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#488, sum(CheckOverflow((promote_precision(jun_sales#154) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#489, sum(CheckOverflow((promote_precision(jul_sales#155) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#490, sum(CheckOverflow((promote_precision(aug_sales#156) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#491, sum(CheckOverflow((promote_precision(sep_sales#157) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#492, sum(CheckOverflow((promote_precision(oct_sales#158) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#493, sum(CheckOverflow((promote_precision(nov_sales#159) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#494, sum(CheckOverflow((promote_precision(dec_sales#160) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#495, sum(jan_net#161)#496, sum(feb_net#162)#497, sum(mar_net#163)#498, sum(apr_net#164)#499, sum(may_net#165)#500, sum(jun_net#166)#501, sum(jul_net#167)#502, sum(aug_net#168)#503, sum(sep_net#169)#504, sum(oct_net#170)#505, sum(nov_net#171)#506, sum(dec_net#172)#507] +Results [44]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#147, year#148, sum(jan_sales#149)#472 AS jan_sales#508, sum(feb_sales#150)#473 AS feb_sales#509, sum(mar_sales#151)#474 AS mar_sales#510, sum(apr_sales#152)#475 AS apr_sales#511, sum(may_sales#153)#476 AS may_sales#512, sum(jun_sales#154)#477 AS jun_sales#513, sum(jul_sales#155)#478 AS jul_sales#514, sum(aug_sales#156)#479 AS aug_sales#515, sum(sep_sales#157)#480 AS sep_sales#516, sum(oct_sales#158)#481 AS oct_sales#517, sum(nov_sales#159)#482 AS nov_sales#518, sum(dec_sales#160)#483 AS dec_sales#519, sum(CheckOverflow((promote_precision(jan_sales#149) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#484 AS jan_sales_per_sq_foot#520, sum(CheckOverflow((promote_precision(feb_sales#150) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#485 AS feb_sales_per_sq_foot#521, sum(CheckOverflow((promote_precision(mar_sales#151) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#486 AS mar_sales_per_sq_foot#522, sum(CheckOverflow((promote_precision(apr_sales#152) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#487 AS apr_sales_per_sq_foot#523, sum(CheckOverflow((promote_precision(may_sales#153) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#488 AS may_sales_per_sq_foot#524, sum(CheckOverflow((promote_precision(jun_sales#154) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#489 AS jun_sales_per_sq_foot#525, sum(CheckOverflow((promote_precision(jul_sales#155) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#490 AS jul_sales_per_sq_foot#526, sum(CheckOverflow((promote_precision(aug_sales#156) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#491 AS aug_sales_per_sq_foot#527, sum(CheckOverflow((promote_precision(sep_sales#157) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#492 AS sep_sales_per_sq_foot#528, sum(CheckOverflow((promote_precision(oct_sales#158) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#493 AS oct_sales_per_sq_foot#529, sum(CheckOverflow((promote_precision(nov_sales#159) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#494 AS nov_sales_per_sq_foot#530, sum(CheckOverflow((promote_precision(dec_sales#160) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#495 AS dec_sales_per_sq_foot#531, sum(jan_net#161)#496 AS jan_net#532, sum(feb_net#162)#497 AS feb_net#533, sum(mar_net#163)#498 AS mar_net#534, sum(apr_net#164)#499 AS apr_net#535, sum(may_net#165)#500 AS may_net#536, sum(jun_net#166)#501 AS jun_net#537, sum(jul_net#167)#502 AS jul_net#538, sum(aug_net#168)#503 AS aug_net#539, sum(sep_net#169)#504 AS sep_net#540, sum(oct_net#170)#505 AS oct_net#541, sum(nov_net#171)#506 AS nov_net#542, sum(dec_net#172)#507 AS dec_net#543] + +(55) TakeOrderedAndProject +Input [44]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#147, year#148, jan_sales#508, feb_sales#509, mar_sales#510, apr_sales#511, may_sales#512, jun_sales#513, jul_sales#514, aug_sales#515, sep_sales#516, oct_sales#517, nov_sales#518, dec_sales#519, jan_sales_per_sq_foot#520, feb_sales_per_sq_foot#521, mar_sales_per_sq_foot#522, apr_sales_per_sq_foot#523, may_sales_per_sq_foot#524, jun_sales_per_sq_foot#525, jul_sales_per_sq_foot#526, aug_sales_per_sq_foot#527, sep_sales_per_sq_foot#528, oct_sales_per_sq_foot#529, nov_sales_per_sq_foot#530, dec_sales_per_sq_foot#531, jan_net#532, feb_net#533, mar_net#534, apr_net#535, may_net#536, jun_net#537, jul_net#538, aug_net#539, sep_net#540, oct_net#541, nov_net#542, dec_net#543] +Arguments: 100, [w_warehouse_name#9 ASC NULLS FIRST], [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#147, year#148, jan_sales#508, feb_sales#509, mar_sales#510, apr_sales#511, may_sales#512, jun_sales#513, jul_sales#514, aug_sales#515, sep_sales#516, oct_sales#517, nov_sales#518, dec_sales#519, jan_sales_per_sq_foot#520, feb_sales_per_sq_foot#521, mar_sales_per_sq_foot#522, apr_sales_per_sq_foot#523, ... 20 more fields] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/simplified.txt new file mode 100644 index 0000000000000..2bce0e80224d2 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/simplified.txt @@ -0,0 +1,83 @@ +TakeOrderedAndProject [apr_net,apr_sales,apr_sales_per_sq_foot,aug_net,aug_sales,aug_sales_per_sq_foot,dec_net,dec_sales,dec_sales_per_sq_foot,feb_net,feb_sales,feb_sales_per_sq_foot,jan_net,jan_sales,jan_sales_per_sq_foot,jul_net,jul_sales,jul_sales_per_sq_foot,jun_net,jun_sales,jun_sales_per_sq_foot,mar_net,mar_sales,mar_sales_per_sq_foot,may_net,may_sales,may_sales_per_sq_foot,nov_net,nov_sales,nov_sales_per_sq_foot,oct_net,oct_sales,oct_sales_per_sq_foot,sep_net,sep_sales,sep_sales_per_sq_foot,ship_carriers,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft,year] + WholeStageCodegen (14) + HashAggregate [isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,ship_carriers,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft,year] [apr_net,apr_sales,apr_sales_per_sq_foot,aug_net,aug_sales,aug_sales_per_sq_foot,dec_net,dec_sales,dec_sales_per_sq_foot,feb_net,feb_sales,feb_sales_per_sq_foot,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,jan_net,jan_sales,jan_sales_per_sq_foot,jul_net,jul_sales,jul_sales_per_sq_foot,jun_net,jun_sales,jun_sales_per_sq_foot,mar_net,mar_sales,mar_sales_per_sq_foot,may_net,may_sales,may_sales_per_sq_foot,nov_net,nov_sales,nov_sales_per_sq_foot,oct_net,oct_sales,oct_sales_per_sq_foot,sep_net,sep_sales,sep_sales_per_sq_foot,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum(CheckOverflow((promote_precision(apr_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(aug_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(dec_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(feb_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(jan_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(jul_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(jun_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(mar_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(may_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(nov_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(oct_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(sep_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(apr_net),sum(apr_sales),sum(aug_net),sum(aug_sales),sum(dec_net),sum(dec_sales),sum(feb_net),sum(feb_sales),sum(jan_net),sum(jan_sales),sum(jul_net),sum(jul_sales),sum(jun_net),sum(jun_sales),sum(mar_net),sum(mar_sales),sum(may_net),sum(may_sales),sum(nov_net),sum(nov_sales),sum(oct_net),sum(oct_sales),sum(sep_net),sum(sep_sales)] + InputAdapter + Exchange [ship_carriers,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft,year] #1 + WholeStageCodegen (13) + HashAggregate [apr_net,apr_sales,aug_net,aug_sales,dec_net,dec_sales,feb_net,feb_sales,jan_net,jan_sales,jul_net,jul_sales,jun_net,jun_sales,mar_net,mar_sales,may_net,may_sales,nov_net,nov_sales,oct_net,oct_sales,sep_net,sep_sales,ship_carriers,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft,year] [isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + InputAdapter + Union + WholeStageCodegen (6) + HashAggregate [d_year,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft] [apr_net,apr_sales,aug_net,aug_sales,dec_net,dec_sales,feb_net,feb_sales,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,jan_net,jan_sales,jul_net,jul_sales,jun_net,jun_sales,mar_net,mar_sales,may_net,may_sales,nov_net,nov_sales,oct_net,oct_sales,sep_net,sep_sales,ship_carriers,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),year] + InputAdapter + Exchange [d_year,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft] #2 + WholeStageCodegen (5) + HashAggregate [d_moy,d_year,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft,ws_ext_sales_price,ws_net_paid,ws_quantity] [isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [d_moy,d_year,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft,ws_ext_sales_price,ws_net_paid,ws_quantity] + BroadcastHashJoin [sm_ship_mode_sk,ws_ship_mode_sk] + Project [d_moy,d_year,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft,ws_ext_sales_price,ws_net_paid,ws_quantity,ws_ship_mode_sk] + BroadcastHashJoin [t_time_sk,ws_sold_time_sk] + Project [d_moy,d_year,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft,ws_ext_sales_price,ws_net_paid,ws_quantity,ws_ship_mode_sk,ws_sold_time_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft,ws_ext_sales_price,ws_net_paid,ws_quantity,ws_ship_mode_sk,ws_sold_date_sk,ws_sold_time_sk] + BroadcastHashJoin [w_warehouse_sk,ws_warehouse_sk] + Filter [ws_ship_mode_sk,ws_sold_date_sk,ws_sold_time_sk,ws_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_ext_sales_price,ws_net_paid,ws_quantity,ws_ship_mode_sk,ws_sold_date_sk,ws_sold_time_sk,ws_warehouse_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [w_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.warehouse [w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sk,w_warehouse_sq_ft] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Project [t_time_sk] + Filter [t_time,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet default.time_dim [t_time,t_time_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (4) + Project [sm_ship_mode_sk] + Filter [sm_carrier,sm_ship_mode_sk] + ColumnarToRow + InputAdapter + Scan parquet default.ship_mode [sm_carrier,sm_ship_mode_sk] + WholeStageCodegen (12) + HashAggregate [d_year,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft] [apr_net,apr_sales,aug_net,aug_sales,dec_net,dec_sales,feb_net,feb_sales,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,jan_net,jan_sales,jul_net,jul_sales,jun_net,jun_sales,mar_net,mar_sales,may_net,may_sales,nov_net,nov_sales,oct_net,oct_sales,sep_net,sep_sales,ship_carriers,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),year] + InputAdapter + Exchange [d_year,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft] #7 + WholeStageCodegen (11) + HashAggregate [cs_net_paid_inc_tax,cs_quantity,cs_sales_price,d_moy,d_year,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft] [isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [cs_net_paid_inc_tax,cs_quantity,cs_sales_price,d_moy,d_year,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft] + BroadcastHashJoin [cs_ship_mode_sk,sm_ship_mode_sk] + Project [cs_net_paid_inc_tax,cs_quantity,cs_sales_price,cs_ship_mode_sk,d_moy,d_year,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft] + BroadcastHashJoin [cs_sold_time_sk,t_time_sk] + Project [cs_net_paid_inc_tax,cs_quantity,cs_sales_price,cs_ship_mode_sk,cs_sold_time_sk,d_moy,d_year,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_net_paid_inc_tax,cs_quantity,cs_sales_price,cs_ship_mode_sk,cs_sold_date_sk,cs_sold_time_sk,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft] + BroadcastHashJoin [cs_warehouse_sk,w_warehouse_sk] + Filter [cs_ship_mode_sk,cs_sold_date_sk,cs_sold_time_sk,cs_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_net_paid_inc_tax,cs_quantity,cs_sales_price,cs_ship_mode_sk,cs_sold_date_sk,cs_sold_time_sk,cs_warehouse_sk] + InputAdapter + ReusedExchange [w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sk,w_warehouse_sq_ft] #3 + InputAdapter + ReusedExchange [d_date_sk,d_moy,d_year] #4 + InputAdapter + ReusedExchange [t_time_sk] #5 + InputAdapter + ReusedExchange [sm_ship_mode_sk] #6 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67.sf100/explain.txt new file mode 100644 index 0000000000000..06890e80266c9 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67.sf100/explain.txt @@ -0,0 +1,190 @@ +== Physical Plan == +TakeOrderedAndProject (34) ++- * Filter (33) + +- Window (32) + +- * Sort (31) + +- Exchange (30) + +- * HashAggregate (29) + +- Exchange (28) + +- * HashAggregate (27) + +- * Expand (26) + +- * Project (25) + +- * SortMergeJoin Inner (24) + :- * Sort (18) + : +- Exchange (17) + : +- * Project (16) + : +- * BroadcastHashJoin Inner BuildRight (15) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.store_sales (1) + : : +- BroadcastExchange (8) + : : +- * Project (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet default.date_dim (4) + : +- BroadcastExchange (14) + : +- * Filter (13) + : +- * ColumnarToRow (12) + : +- Scan parquet default.store (11) + +- * Sort (23) + +- Exchange (22) + +- * Filter (21) + +- * ColumnarToRow (20) + +- Scan parquet default.item (19) + + +(1) Scan parquet default.store_sales +Output [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5] + +(3) Filter [codegen id : 3] +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5] +Condition : ((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_item_sk#2)) + +(4) Scan parquet default.date_dim +Output [5]: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [5]: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] + +(6) Filter [codegen id : 1] +Input [5]: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] +Condition : (((isnotnull(d_month_seq#7) AND (d_month_seq#7 >= 1200)) AND (d_month_seq#7 <= 1211)) AND isnotnull(d_date_sk#6)) + +(7) Project [codegen id : 1] +Output [4]: [d_date_sk#6, d_year#8, d_moy#9, d_qoy#10] +Input [5]: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] + +(8) BroadcastExchange +Input [4]: [d_date_sk#6, d_year#8, d_moy#9, d_qoy#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#6] +Join condition: None + +(10) Project [codegen id : 3] +Output [7]: [ss_item_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5, d_year#8, d_moy#9, d_qoy#10] +Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5, d_date_sk#6, d_year#8, d_moy#9, d_qoy#10] + +(11) Scan parquet default.store +Output [2]: [s_store_sk#12, s_store_id#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [s_store_sk#12, s_store_id#13] + +(13) Filter [codegen id : 2] +Input [2]: [s_store_sk#12, s_store_id#13] +Condition : isnotnull(s_store_sk#12) + +(14) BroadcastExchange +Input [2]: [s_store_sk#12, s_store_id#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#12] +Join condition: None + +(16) Project [codegen id : 3] +Output [7]: [ss_item_sk#2, ss_quantity#4, ss_sales_price#5, d_year#8, d_moy#9, d_qoy#10, s_store_id#13] +Input [9]: [ss_item_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5, d_year#8, d_moy#9, d_qoy#10, s_store_sk#12, s_store_id#13] + +(17) Exchange +Input [7]: [ss_item_sk#2, ss_quantity#4, ss_sales_price#5, d_year#8, d_moy#9, d_qoy#10, s_store_id#13] +Arguments: hashpartitioning(ss_item_sk#2, 5), true, [id=#15] + +(18) Sort [codegen id : 4] +Input [7]: [ss_item_sk#2, ss_quantity#4, ss_sales_price#5, d_year#8, d_moy#9, d_qoy#10, s_store_id#13] +Arguments: [ss_item_sk#2 ASC NULLS FIRST], false, 0 + +(19) Scan parquet default.item +Output [5]: [i_item_sk#16, i_brand#17, i_class#18, i_category#19, i_product_name#20] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 5] +Input [5]: [i_item_sk#16, i_brand#17, i_class#18, i_category#19, i_product_name#20] + +(21) Filter [codegen id : 5] +Input [5]: [i_item_sk#16, i_brand#17, i_class#18, i_category#19, i_product_name#20] +Condition : isnotnull(i_item_sk#16) + +(22) Exchange +Input [5]: [i_item_sk#16, i_brand#17, i_class#18, i_category#19, i_product_name#20] +Arguments: hashpartitioning(i_item_sk#16, 5), true, [id=#21] + +(23) Sort [codegen id : 6] +Input [5]: [i_item_sk#16, i_brand#17, i_class#18, i_category#19, i_product_name#20] +Arguments: [i_item_sk#16 ASC NULLS FIRST], false, 0 + +(24) SortMergeJoin [codegen id : 7] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#16] +Join condition: None + +(25) Project [codegen id : 7] +Output [10]: [ss_quantity#4, ss_sales_price#5, i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, d_moy#9, s_store_id#13] +Input [12]: [ss_item_sk#2, ss_quantity#4, ss_sales_price#5, d_year#8, d_moy#9, d_qoy#10, s_store_id#13, i_item_sk#16, i_brand#17, i_class#18, i_category#19, i_product_name#20] + +(26) Expand [codegen id : 7] +Input [10]: [ss_quantity#4, ss_sales_price#5, i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, d_moy#9, s_store_id#13] +Arguments: [List(ss_quantity#4, ss_sales_price#5, i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, 0), List(ss_quantity#4, ss_sales_price#5, i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, d_moy#9, null, 1), List(ss_quantity#4, ss_sales_price#5, i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, null, null, 3), List(ss_quantity#4, ss_sales_price#5, i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, null, null, null, 7), List(ss_quantity#4, ss_sales_price#5, i_category#19, i_class#18, i_brand#17, i_product_name#20, null, null, null, null, 15), List(ss_quantity#4, ss_sales_price#5, i_category#19, i_class#18, i_brand#17, null, null, null, null, null, 31), List(ss_quantity#4, ss_sales_price#5, i_category#19, i_class#18, null, null, null, null, null, null, 63), List(ss_quantity#4, ss_sales_price#5, i_category#19, null, null, null, null, null, null, null, 127), List(ss_quantity#4, ss_sales_price#5, null, null, null, null, null, null, null, null, 255)], [ss_quantity#4, ss_sales_price#5, i_category#22, i_class#23, i_brand#24, i_product_name#25, d_year#26, d_qoy#27, d_moy#28, s_store_id#29, spark_grouping_id#30] + +(27) HashAggregate [codegen id : 7] +Input [11]: [ss_quantity#4, ss_sales_price#5, i_category#22, i_class#23, i_brand#24, i_product_name#25, d_year#26, d_qoy#27, d_moy#28, s_store_id#29, spark_grouping_id#30] +Keys [9]: [i_category#22, i_class#23, i_brand#24, i_product_name#25, d_year#26, d_qoy#27, d_moy#28, s_store_id#29, spark_grouping_id#30] +Functions [1]: [partial_sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))] +Aggregate Attributes [2]: [sum#31, isEmpty#32] +Results [11]: [i_category#22, i_class#23, i_brand#24, i_product_name#25, d_year#26, d_qoy#27, d_moy#28, s_store_id#29, spark_grouping_id#30, sum#33, isEmpty#34] + +(28) Exchange +Input [11]: [i_category#22, i_class#23, i_brand#24, i_product_name#25, d_year#26, d_qoy#27, d_moy#28, s_store_id#29, spark_grouping_id#30, sum#33, isEmpty#34] +Arguments: hashpartitioning(i_category#22, i_class#23, i_brand#24, i_product_name#25, d_year#26, d_qoy#27, d_moy#28, s_store_id#29, spark_grouping_id#30, 5), true, [id=#35] + +(29) HashAggregate [codegen id : 8] +Input [11]: [i_category#22, i_class#23, i_brand#24, i_product_name#25, d_year#26, d_qoy#27, d_moy#28, s_store_id#29, spark_grouping_id#30, sum#33, isEmpty#34] +Keys [9]: [i_category#22, i_class#23, i_brand#24, i_product_name#25, d_year#26, d_qoy#27, d_moy#28, s_store_id#29, spark_grouping_id#30] +Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))] +Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#36] +Results [9]: [i_category#22, i_class#23, i_brand#24, i_product_name#25, d_year#26, d_qoy#27, d_moy#28, s_store_id#29, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#36 AS sumsales#37] + +(30) Exchange +Input [9]: [i_category#22, i_class#23, i_brand#24, i_product_name#25, d_year#26, d_qoy#27, d_moy#28, s_store_id#29, sumsales#37] +Arguments: hashpartitioning(i_category#22, 5), true, [id=#38] + +(31) Sort [codegen id : 9] +Input [9]: [i_category#22, i_class#23, i_brand#24, i_product_name#25, d_year#26, d_qoy#27, d_moy#28, s_store_id#29, sumsales#37] +Arguments: [i_category#22 ASC NULLS FIRST, sumsales#37 DESC NULLS LAST], false, 0 + +(32) Window +Input [9]: [i_category#22, i_class#23, i_brand#24, i_product_name#25, d_year#26, d_qoy#27, d_moy#28, s_store_id#29, sumsales#37] +Arguments: [rank(sumsales#37) windowspecdefinition(i_category#22, sumsales#37 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#39], [i_category#22], [sumsales#37 DESC NULLS LAST] + +(33) Filter [codegen id : 10] +Input [10]: [i_category#22, i_class#23, i_brand#24, i_product_name#25, d_year#26, d_qoy#27, d_moy#28, s_store_id#29, sumsales#37, rk#39] +Condition : (isnotnull(rk#39) AND (rk#39 <= 100)) + +(34) TakeOrderedAndProject +Input [10]: [i_category#22, i_class#23, i_brand#24, i_product_name#25, d_year#26, d_qoy#27, d_moy#28, s_store_id#29, sumsales#37, rk#39] +Arguments: 100, [i_category#22 ASC NULLS FIRST, i_class#23 ASC NULLS FIRST, i_brand#24 ASC NULLS FIRST, i_product_name#25 ASC NULLS FIRST, d_year#26 ASC NULLS FIRST, d_qoy#27 ASC NULLS FIRST, d_moy#28 ASC NULLS FIRST, s_store_id#29 ASC NULLS FIRST, sumsales#37 ASC NULLS FIRST, rk#39 ASC NULLS FIRST], [i_category#22, i_class#23, i_brand#24, i_product_name#25, d_year#26, d_qoy#27, d_moy#28, s_store_id#29, sumsales#37, rk#39] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67.sf100/simplified.txt new file mode 100644 index 0000000000000..bba681c658162 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67.sf100/simplified.txt @@ -0,0 +1,57 @@ +TakeOrderedAndProject [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,rk,s_store_id,sumsales] + WholeStageCodegen (10) + Filter [rk] + InputAdapter + Window [i_category,sumsales] + WholeStageCodegen (9) + Sort [i_category,sumsales] + InputAdapter + Exchange [i_category] #1 + WholeStageCodegen (8) + HashAggregate [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,isEmpty,s_store_id,spark_grouping_id,sum] [isEmpty,sum,sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales] + InputAdapter + Exchange [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,s_store_id,spark_grouping_id] #2 + WholeStageCodegen (7) + HashAggregate [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,s_store_id,spark_grouping_id,ss_quantity,ss_sales_price] [isEmpty,isEmpty,sum,sum] + Expand [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,s_store_id,ss_quantity,ss_sales_price] + Project [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,s_store_id,ss_quantity,ss_sales_price] + SortMergeJoin [i_item_sk,ss_item_sk] + InputAdapter + WholeStageCodegen (4) + Sort [ss_item_sk] + InputAdapter + Exchange [ss_item_sk] #3 + WholeStageCodegen (3) + Project [d_moy,d_qoy,d_year,s_store_id,ss_item_sk,ss_quantity,ss_sales_price] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [d_moy,d_qoy,d_year,ss_item_sk,ss_quantity,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Project [d_date_sk,d_moy,d_qoy,d_year] + Filter [d_date_sk,d_month_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq,d_moy,d_qoy,d_year] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_id,s_store_sk] + InputAdapter + WholeStageCodegen (6) + Sort [i_item_sk] + InputAdapter + Exchange [i_item_sk] #6 + WholeStageCodegen (5) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand,i_category,i_class,i_item_sk,i_product_name] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/explain.txt new file mode 100644 index 0000000000000..21070074a3111 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/explain.txt @@ -0,0 +1,175 @@ +== Physical Plan == +TakeOrderedAndProject (31) ++- * Filter (30) + +- Window (29) + +- * Sort (28) + +- Exchange (27) + +- * HashAggregate (26) + +- Exchange (25) + +- * HashAggregate (24) + +- * Expand (23) + +- * Project (22) + +- * BroadcastHashJoin Inner BuildRight (21) + :- * Project (16) + : +- * BroadcastHashJoin Inner BuildRight (15) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.store_sales (1) + : : +- BroadcastExchange (8) + : : +- * Project (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet default.date_dim (4) + : +- BroadcastExchange (14) + : +- * Filter (13) + : +- * ColumnarToRow (12) + : +- Scan parquet default.store (11) + +- BroadcastExchange (20) + +- * Filter (19) + +- * ColumnarToRow (18) + +- Scan parquet default.item (17) + + +(1) Scan parquet default.store_sales +Output [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5] + +(3) Filter [codegen id : 4] +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5] +Condition : ((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_item_sk#2)) + +(4) Scan parquet default.date_dim +Output [5]: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [5]: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] + +(6) Filter [codegen id : 1] +Input [5]: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] +Condition : (((isnotnull(d_month_seq#7) AND (d_month_seq#7 >= 1200)) AND (d_month_seq#7 <= 1211)) AND isnotnull(d_date_sk#6)) + +(7) Project [codegen id : 1] +Output [4]: [d_date_sk#6, d_year#8, d_moy#9, d_qoy#10] +Input [5]: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] + +(8) BroadcastExchange +Input [4]: [d_date_sk#6, d_year#8, d_moy#9, d_qoy#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#6] +Join condition: None + +(10) Project [codegen id : 4] +Output [7]: [ss_item_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5, d_year#8, d_moy#9, d_qoy#10] +Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5, d_date_sk#6, d_year#8, d_moy#9, d_qoy#10] + +(11) Scan parquet default.store +Output [2]: [s_store_sk#12, s_store_id#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [s_store_sk#12, s_store_id#13] + +(13) Filter [codegen id : 2] +Input [2]: [s_store_sk#12, s_store_id#13] +Condition : isnotnull(s_store_sk#12) + +(14) BroadcastExchange +Input [2]: [s_store_sk#12, s_store_id#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] + +(15) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#12] +Join condition: None + +(16) Project [codegen id : 4] +Output [7]: [ss_item_sk#2, ss_quantity#4, ss_sales_price#5, d_year#8, d_moy#9, d_qoy#10, s_store_id#13] +Input [9]: [ss_item_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5, d_year#8, d_moy#9, d_qoy#10, s_store_sk#12, s_store_id#13] + +(17) Scan parquet default.item +Output [5]: [i_item_sk#15, i_brand#16, i_class#17, i_category#18, i_product_name#19] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [5]: [i_item_sk#15, i_brand#16, i_class#17, i_category#18, i_product_name#19] + +(19) Filter [codegen id : 3] +Input [5]: [i_item_sk#15, i_brand#16, i_class#17, i_category#18, i_product_name#19] +Condition : isnotnull(i_item_sk#15) + +(20) BroadcastExchange +Input [5]: [i_item_sk#15, i_brand#16, i_class#17, i_category#18, i_product_name#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#20] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#15] +Join condition: None + +(22) Project [codegen id : 4] +Output [10]: [ss_quantity#4, ss_sales_price#5, i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#13] +Input [12]: [ss_item_sk#2, ss_quantity#4, ss_sales_price#5, d_year#8, d_moy#9, d_qoy#10, s_store_id#13, i_item_sk#15, i_brand#16, i_class#17, i_category#18, i_product_name#19] + +(23) Expand [codegen id : 4] +Input [10]: [ss_quantity#4, ss_sales_price#5, i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#13] +Arguments: [List(ss_quantity#4, ss_sales_price#5, i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, 0), List(ss_quantity#4, ss_sales_price#5, i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, null, 1), List(ss_quantity#4, ss_sales_price#5, i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, null, null, 3), List(ss_quantity#4, ss_sales_price#5, i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, null, null, null, 7), List(ss_quantity#4, ss_sales_price#5, i_category#18, i_class#17, i_brand#16, i_product_name#19, null, null, null, null, 15), List(ss_quantity#4, ss_sales_price#5, i_category#18, i_class#17, i_brand#16, null, null, null, null, null, 31), List(ss_quantity#4, ss_sales_price#5, i_category#18, i_class#17, null, null, null, null, null, null, 63), List(ss_quantity#4, ss_sales_price#5, i_category#18, null, null, null, null, null, null, null, 127), List(ss_quantity#4, ss_sales_price#5, null, null, null, null, null, null, null, null, 255)], [ss_quantity#4, ss_sales_price#5, i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, spark_grouping_id#29] + +(24) HashAggregate [codegen id : 4] +Input [11]: [ss_quantity#4, ss_sales_price#5, i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, spark_grouping_id#29] +Keys [9]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, spark_grouping_id#29] +Functions [1]: [partial_sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))] +Aggregate Attributes [2]: [sum#30, isEmpty#31] +Results [11]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, spark_grouping_id#29, sum#32, isEmpty#33] + +(25) Exchange +Input [11]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, spark_grouping_id#29, sum#32, isEmpty#33] +Arguments: hashpartitioning(i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, spark_grouping_id#29, 5), true, [id=#34] + +(26) HashAggregate [codegen id : 5] +Input [11]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, spark_grouping_id#29, sum#32, isEmpty#33] +Keys [9]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, spark_grouping_id#29] +Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))] +Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#35] +Results [9]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#35 AS sumsales#36] + +(27) Exchange +Input [9]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, sumsales#36] +Arguments: hashpartitioning(i_category#21, 5), true, [id=#37] + +(28) Sort [codegen id : 6] +Input [9]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, sumsales#36] +Arguments: [i_category#21 ASC NULLS FIRST, sumsales#36 DESC NULLS LAST], false, 0 + +(29) Window +Input [9]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, sumsales#36] +Arguments: [rank(sumsales#36) windowspecdefinition(i_category#21, sumsales#36 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#38], [i_category#21], [sumsales#36 DESC NULLS LAST] + +(30) Filter [codegen id : 7] +Input [10]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, sumsales#36, rk#38] +Condition : (isnotnull(rk#38) AND (rk#38 <= 100)) + +(31) TakeOrderedAndProject +Input [10]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, sumsales#36, rk#38] +Arguments: 100, [i_category#21 ASC NULLS FIRST, i_class#22 ASC NULLS FIRST, i_brand#23 ASC NULLS FIRST, i_product_name#24 ASC NULLS FIRST, d_year#25 ASC NULLS FIRST, d_qoy#26 ASC NULLS FIRST, d_moy#27 ASC NULLS FIRST, s_store_id#28 ASC NULLS FIRST, sumsales#36 ASC NULLS FIRST, rk#38 ASC NULLS FIRST], [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, sumsales#36, rk#38] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/simplified.txt new file mode 100644 index 0000000000000..faf21736c3f60 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/simplified.txt @@ -0,0 +1,48 @@ +TakeOrderedAndProject [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,rk,s_store_id,sumsales] + WholeStageCodegen (7) + Filter [rk] + InputAdapter + Window [i_category,sumsales] + WholeStageCodegen (6) + Sort [i_category,sumsales] + InputAdapter + Exchange [i_category] #1 + WholeStageCodegen (5) + HashAggregate [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,isEmpty,s_store_id,spark_grouping_id,sum] [isEmpty,sum,sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales] + InputAdapter + Exchange [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,s_store_id,spark_grouping_id] #2 + WholeStageCodegen (4) + HashAggregate [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,s_store_id,spark_grouping_id,ss_quantity,ss_sales_price] [isEmpty,isEmpty,sum,sum] + Expand [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,s_store_id,ss_quantity,ss_sales_price] + Project [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,s_store_id,ss_quantity,ss_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [d_moy,d_qoy,d_year,s_store_id,ss_item_sk,ss_quantity,ss_sales_price] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [d_moy,d_qoy,d_year,ss_item_sk,ss_quantity,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk,d_moy,d_qoy,d_year] + Filter [d_date_sk,d_month_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq,d_moy,d_qoy,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_id,s_store_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand,i_category,i_class,i_item_sk,i_product_name] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68.sf100/explain.txt new file mode 100644 index 0000000000000..ef8dc7fc917e7 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68.sf100/explain.txt @@ -0,0 +1,281 @@ +== Physical Plan == +TakeOrderedAndProject (51) ++- * Project (50) + +- * SortMergeJoin Inner (49) + :- * Sort (14) + : +- Exchange (13) + : +- * Project (12) + : +- * SortMergeJoin Inner (11) + : :- * Sort (5) + : : +- Exchange (4) + : : +- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.customer (1) + : +- * Sort (10) + : +- Exchange (9) + : +- * Filter (8) + : +- * ColumnarToRow (7) + : +- Scan parquet default.customer_address (6) + +- * Sort (48) + +- Exchange (47) + +- * HashAggregate (46) + +- * HashAggregate (45) + +- * Project (44) + +- * SortMergeJoin Inner (43) + :- * Sort (40) + : +- Exchange (39) + : +- * Project (38) + : +- * BroadcastHashJoin Inner BuildRight (37) + : :- * Project (31) + : : +- * BroadcastHashJoin Inner BuildRight (30) + : : :- * Project (24) + : : : +- * BroadcastHashJoin Inner BuildRight (23) + : : : :- * Filter (17) + : : : : +- * ColumnarToRow (16) + : : : : +- Scan parquet default.store_sales (15) + : : : +- BroadcastExchange (22) + : : : +- * Project (21) + : : : +- * Filter (20) + : : : +- * ColumnarToRow (19) + : : : +- Scan parquet default.date_dim (18) + : : +- BroadcastExchange (29) + : : +- * Project (28) + : : +- * Filter (27) + : : +- * ColumnarToRow (26) + : : +- Scan parquet default.store (25) + : +- BroadcastExchange (36) + : +- * Project (35) + : +- * Filter (34) + : +- * ColumnarToRow (33) + : +- Scan parquet default.household_demographics (32) + +- * Sort (42) + +- ReusedExchange (41) + + +(1) Scan parquet default.customer +Output [4]: [c_customer_sk#1, c_current_addr_sk#2, c_first_name#3, c_last_name#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [4]: [c_customer_sk#1, c_current_addr_sk#2, c_first_name#3, c_last_name#4] + +(3) Filter [codegen id : 1] +Input [4]: [c_customer_sk#1, c_current_addr_sk#2, c_first_name#3, c_last_name#4] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_current_addr_sk#2)) + +(4) Exchange +Input [4]: [c_customer_sk#1, c_current_addr_sk#2, c_first_name#3, c_last_name#4] +Arguments: hashpartitioning(c_current_addr_sk#2, 5), true, [id=#5] + +(5) Sort [codegen id : 2] +Input [4]: [c_customer_sk#1, c_current_addr_sk#2, c_first_name#3, c_last_name#4] +Arguments: [c_current_addr_sk#2 ASC NULLS FIRST], false, 0 + +(6) Scan parquet default.customer_address +Output [2]: [ca_address_sk#6, ca_city#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_city)] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 3] +Input [2]: [ca_address_sk#6, ca_city#7] + +(8) Filter [codegen id : 3] +Input [2]: [ca_address_sk#6, ca_city#7] +Condition : (isnotnull(ca_address_sk#6) AND isnotnull(ca_city#7)) + +(9) Exchange +Input [2]: [ca_address_sk#6, ca_city#7] +Arguments: hashpartitioning(ca_address_sk#6, 5), true, [id=#8] + +(10) Sort [codegen id : 4] +Input [2]: [ca_address_sk#6, ca_city#7] +Arguments: [ca_address_sk#6 ASC NULLS FIRST], false, 0 + +(11) SortMergeJoin [codegen id : 5] +Left keys [1]: [c_current_addr_sk#2] +Right keys [1]: [ca_address_sk#6] +Join condition: None + +(12) Project [codegen id : 5] +Output [4]: [c_customer_sk#1, c_first_name#3, c_last_name#4, ca_city#7] +Input [6]: [c_customer_sk#1, c_current_addr_sk#2, c_first_name#3, c_last_name#4, ca_address_sk#6, ca_city#7] + +(13) Exchange +Input [4]: [c_customer_sk#1, c_first_name#3, c_last_name#4, ca_city#7] +Arguments: hashpartitioning(c_customer_sk#1, 5), true, [id=#9] + +(14) Sort [codegen id : 6] +Input [4]: [c_customer_sk#1, c_first_name#3, c_last_name#4, ca_city#7] +Arguments: [c_customer_sk#1 ASC NULLS FIRST], false, 0 + +(15) Scan parquet default.store_sales +Output [9]: [ss_sold_date_sk#10, ss_customer_sk#11, ss_hdemo_sk#12, ss_addr_sk#13, ss_store_sk#14, ss_ticket_number#15, ss_ext_sales_price#16, ss_ext_list_price#17, ss_ext_tax#18] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(16) ColumnarToRow [codegen id : 10] +Input [9]: [ss_sold_date_sk#10, ss_customer_sk#11, ss_hdemo_sk#12, ss_addr_sk#13, ss_store_sk#14, ss_ticket_number#15, ss_ext_sales_price#16, ss_ext_list_price#17, ss_ext_tax#18] + +(17) Filter [codegen id : 10] +Input [9]: [ss_sold_date_sk#10, ss_customer_sk#11, ss_hdemo_sk#12, ss_addr_sk#13, ss_store_sk#14, ss_ticket_number#15, ss_ext_sales_price#16, ss_ext_list_price#17, ss_ext_tax#18] +Condition : ((((isnotnull(ss_sold_date_sk#10) AND isnotnull(ss_store_sk#14)) AND isnotnull(ss_hdemo_sk#12)) AND isnotnull(ss_addr_sk#13)) AND isnotnull(ss_customer_sk#11)) + +(18) Scan parquet default.date_dim +Output [3]: [d_date_sk#19, d_year#20, d_dom#21] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_dom), GreaterThanOrEqual(d_dom,1), LessThanOrEqual(d_dom,2), In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 7] +Input [3]: [d_date_sk#19, d_year#20, d_dom#21] + +(20) Filter [codegen id : 7] +Input [3]: [d_date_sk#19, d_year#20, d_dom#21] +Condition : ((((isnotnull(d_dom#21) AND (d_dom#21 >= 1)) AND (d_dom#21 <= 2)) AND d_year#20 IN (1999,2000,2001)) AND isnotnull(d_date_sk#19)) + +(21) Project [codegen id : 7] +Output [1]: [d_date_sk#19] +Input [3]: [d_date_sk#19, d_year#20, d_dom#21] + +(22) BroadcastExchange +Input [1]: [d_date_sk#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#22] + +(23) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ss_sold_date_sk#10] +Right keys [1]: [d_date_sk#19] +Join condition: None + +(24) Project [codegen id : 10] +Output [8]: [ss_customer_sk#11, ss_hdemo_sk#12, ss_addr_sk#13, ss_store_sk#14, ss_ticket_number#15, ss_ext_sales_price#16, ss_ext_list_price#17, ss_ext_tax#18] +Input [10]: [ss_sold_date_sk#10, ss_customer_sk#11, ss_hdemo_sk#12, ss_addr_sk#13, ss_store_sk#14, ss_ticket_number#15, ss_ext_sales_price#16, ss_ext_list_price#17, ss_ext_tax#18, d_date_sk#19] + +(25) Scan parquet default.store +Output [2]: [s_store_sk#23, s_city#24] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store] +PushedFilters: [In(s_city, [Midway,Fairview]), IsNotNull(s_store_sk)] +ReadSchema: struct + +(26) ColumnarToRow [codegen id : 8] +Input [2]: [s_store_sk#23, s_city#24] + +(27) Filter [codegen id : 8] +Input [2]: [s_store_sk#23, s_city#24] +Condition : (s_city#24 IN (Midway,Fairview) AND isnotnull(s_store_sk#23)) + +(28) Project [codegen id : 8] +Output [1]: [s_store_sk#23] +Input [2]: [s_store_sk#23, s_city#24] + +(29) BroadcastExchange +Input [1]: [s_store_sk#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#25] + +(30) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ss_store_sk#14] +Right keys [1]: [s_store_sk#23] +Join condition: None + +(31) Project [codegen id : 10] +Output [7]: [ss_customer_sk#11, ss_hdemo_sk#12, ss_addr_sk#13, ss_ticket_number#15, ss_ext_sales_price#16, ss_ext_list_price#17, ss_ext_tax#18] +Input [9]: [ss_customer_sk#11, ss_hdemo_sk#12, ss_addr_sk#13, ss_store_sk#14, ss_ticket_number#15, ss_ext_sales_price#16, ss_ext_list_price#17, ss_ext_tax#18, s_store_sk#23] + +(32) Scan parquet default.household_demographics +Output [3]: [hd_demo_sk#26, hd_dep_count#27, hd_vehicle_count#28] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/household_demographics] +PushedFilters: [Or(EqualTo(hd_dep_count,4),EqualTo(hd_vehicle_count,3)), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(33) ColumnarToRow [codegen id : 9] +Input [3]: [hd_demo_sk#26, hd_dep_count#27, hd_vehicle_count#28] + +(34) Filter [codegen id : 9] +Input [3]: [hd_demo_sk#26, hd_dep_count#27, hd_vehicle_count#28] +Condition : (((hd_dep_count#27 = 4) OR (hd_vehicle_count#28 = 3)) AND isnotnull(hd_demo_sk#26)) + +(35) Project [codegen id : 9] +Output [1]: [hd_demo_sk#26] +Input [3]: [hd_demo_sk#26, hd_dep_count#27, hd_vehicle_count#28] + +(36) BroadcastExchange +Input [1]: [hd_demo_sk#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#29] + +(37) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ss_hdemo_sk#12] +Right keys [1]: [hd_demo_sk#26] +Join condition: None + +(38) Project [codegen id : 10] +Output [6]: [ss_customer_sk#11, ss_addr_sk#13, ss_ticket_number#15, ss_ext_sales_price#16, ss_ext_list_price#17, ss_ext_tax#18] +Input [8]: [ss_customer_sk#11, ss_hdemo_sk#12, ss_addr_sk#13, ss_ticket_number#15, ss_ext_sales_price#16, ss_ext_list_price#17, ss_ext_tax#18, hd_demo_sk#26] + +(39) Exchange +Input [6]: [ss_customer_sk#11, ss_addr_sk#13, ss_ticket_number#15, ss_ext_sales_price#16, ss_ext_list_price#17, ss_ext_tax#18] +Arguments: hashpartitioning(ss_addr_sk#13, 5), true, [id=#30] + +(40) Sort [codegen id : 11] +Input [6]: [ss_customer_sk#11, ss_addr_sk#13, ss_ticket_number#15, ss_ext_sales_price#16, ss_ext_list_price#17, ss_ext_tax#18] +Arguments: [ss_addr_sk#13 ASC NULLS FIRST], false, 0 + +(41) ReusedExchange [Reuses operator id: 9] +Output [2]: [ca_address_sk#6, ca_city#7] + +(42) Sort [codegen id : 13] +Input [2]: [ca_address_sk#6, ca_city#7] +Arguments: [ca_address_sk#6 ASC NULLS FIRST], false, 0 + +(43) SortMergeJoin [codegen id : 14] +Left keys [1]: [ss_addr_sk#13] +Right keys [1]: [ca_address_sk#6] +Join condition: None + +(44) Project [codegen id : 14] +Output [7]: [ss_customer_sk#11, ss_addr_sk#13, ss_ticket_number#15, ss_ext_sales_price#16, ss_ext_list_price#17, ss_ext_tax#18, ca_city#7] +Input [8]: [ss_customer_sk#11, ss_addr_sk#13, ss_ticket_number#15, ss_ext_sales_price#16, ss_ext_list_price#17, ss_ext_tax#18, ca_address_sk#6, ca_city#7] + +(45) HashAggregate [codegen id : 14] +Input [7]: [ss_customer_sk#11, ss_addr_sk#13, ss_ticket_number#15, ss_ext_sales_price#16, ss_ext_list_price#17, ss_ext_tax#18, ca_city#7] +Keys [4]: [ss_ticket_number#15, ss_customer_sk#11, ss_addr_sk#13, ca_city#7] +Functions [3]: [partial_sum(UnscaledValue(ss_ext_sales_price#16)), partial_sum(UnscaledValue(ss_ext_list_price#17)), partial_sum(UnscaledValue(ss_ext_tax#18))] +Aggregate Attributes [3]: [sum#31, sum#32, sum#33] +Results [7]: [ss_ticket_number#15, ss_customer_sk#11, ss_addr_sk#13, ca_city#7, sum#34, sum#35, sum#36] + +(46) HashAggregate [codegen id : 14] +Input [7]: [ss_ticket_number#15, ss_customer_sk#11, ss_addr_sk#13, ca_city#7, sum#34, sum#35, sum#36] +Keys [4]: [ss_ticket_number#15, ss_customer_sk#11, ss_addr_sk#13, ca_city#7] +Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#16)), sum(UnscaledValue(ss_ext_list_price#17)), sum(UnscaledValue(ss_ext_tax#18))] +Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#16))#37, sum(UnscaledValue(ss_ext_list_price#17))#38, sum(UnscaledValue(ss_ext_tax#18))#39] +Results [6]: [ss_ticket_number#15, ss_customer_sk#11, ca_city#7 AS bought_city#40, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#16))#37,17,2) AS extended_price#41, MakeDecimal(sum(UnscaledValue(ss_ext_list_price#17))#38,17,2) AS list_price#42, MakeDecimal(sum(UnscaledValue(ss_ext_tax#18))#39,17,2) AS extended_tax#43] + +(47) Exchange +Input [6]: [ss_ticket_number#15, ss_customer_sk#11, bought_city#40, extended_price#41, list_price#42, extended_tax#43] +Arguments: hashpartitioning(ss_customer_sk#11, 5), true, [id=#44] + +(48) Sort [codegen id : 15] +Input [6]: [ss_ticket_number#15, ss_customer_sk#11, bought_city#40, extended_price#41, list_price#42, extended_tax#43] +Arguments: [ss_customer_sk#11 ASC NULLS FIRST], false, 0 + +(49) SortMergeJoin [codegen id : 16] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ss_customer_sk#11] +Join condition: NOT (ca_city#7 = bought_city#40) + +(50) Project [codegen id : 16] +Output [8]: [c_last_name#4, c_first_name#3, ca_city#7, bought_city#40, ss_ticket_number#15, extended_price#41, extended_tax#43, list_price#42] +Input [10]: [c_customer_sk#1, c_first_name#3, c_last_name#4, ca_city#7, ss_ticket_number#15, ss_customer_sk#11, bought_city#40, extended_price#41, list_price#42, extended_tax#43] + +(51) TakeOrderedAndProject +Input [8]: [c_last_name#4, c_first_name#3, ca_city#7, bought_city#40, ss_ticket_number#15, extended_price#41, extended_tax#43, list_price#42] +Arguments: 100, [c_last_name#4 ASC NULLS FIRST, ss_ticket_number#15 ASC NULLS FIRST], [c_last_name#4, c_first_name#3, ca_city#7, bought_city#40, ss_ticket_number#15, extended_price#41, extended_tax#43, list_price#42] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68.sf100/simplified.txt new file mode 100644 index 0000000000000..0108f810f7b49 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68.sf100/simplified.txt @@ -0,0 +1,87 @@ +TakeOrderedAndProject [bought_city,c_first_name,c_last_name,ca_city,extended_price,extended_tax,list_price,ss_ticket_number] + WholeStageCodegen (16) + Project [bought_city,c_first_name,c_last_name,ca_city,extended_price,extended_tax,list_price,ss_ticket_number] + SortMergeJoin [bought_city,c_customer_sk,ca_city,ss_customer_sk] + InputAdapter + WholeStageCodegen (6) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #1 + WholeStageCodegen (5) + Project [c_customer_sk,c_first_name,c_last_name,ca_city] + SortMergeJoin [c_current_addr_sk,ca_address_sk] + InputAdapter + WholeStageCodegen (2) + Sort [c_current_addr_sk] + InputAdapter + Exchange [c_current_addr_sk] #2 + WholeStageCodegen (1) + Filter [c_current_addr_sk,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_current_addr_sk,c_customer_sk,c_first_name,c_last_name] + InputAdapter + WholeStageCodegen (4) + Sort [ca_address_sk] + InputAdapter + Exchange [ca_address_sk] #3 + WholeStageCodegen (3) + Filter [ca_address_sk,ca_city] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_city] + InputAdapter + WholeStageCodegen (15) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #4 + WholeStageCodegen (14) + HashAggregate [ca_city,ss_addr_sk,ss_customer_sk,ss_ticket_number,sum,sum,sum] [bought_city,extended_price,extended_tax,list_price,sum,sum,sum,sum(UnscaledValue(ss_ext_list_price)),sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_ext_tax))] + HashAggregate [ca_city,ss_addr_sk,ss_customer_sk,ss_ext_list_price,ss_ext_sales_price,ss_ext_tax,ss_ticket_number] [sum,sum,sum,sum,sum,sum] + Project [ca_city,ss_addr_sk,ss_customer_sk,ss_ext_list_price,ss_ext_sales_price,ss_ext_tax,ss_ticket_number] + SortMergeJoin [ca_address_sk,ss_addr_sk] + InputAdapter + WholeStageCodegen (11) + Sort [ss_addr_sk] + InputAdapter + Exchange [ss_addr_sk] #5 + WholeStageCodegen (10) + Project [ss_addr_sk,ss_customer_sk,ss_ext_list_price,ss_ext_sales_price,ss_ext_tax,ss_ticket_number] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [ss_addr_sk,ss_customer_sk,ss_ext_list_price,ss_ext_sales_price,ss_ext_tax,ss_hdemo_sk,ss_ticket_number] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_addr_sk,ss_customer_sk,ss_ext_list_price,ss_ext_sales_price,ss_ext_tax,ss_hdemo_sk,ss_store_sk,ss_ticket_number] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_addr_sk,ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_addr_sk,ss_customer_sk,ss_ext_list_price,ss_ext_sales_price,ss_ext_tax,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (7) + Project [d_date_sk] + Filter [d_date_sk,d_dom,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_dom,d_year] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (8) + Project [s_store_sk] + Filter [s_city,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_city,s_store_sk] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (9) + Project [hd_demo_sk] + Filter [hd_demo_sk,hd_dep_count,hd_vehicle_count] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] + InputAdapter + WholeStageCodegen (13) + Sort [ca_address_sk] + InputAdapter + ReusedExchange [ca_address_sk,ca_city] #3 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68/explain.txt new file mode 100644 index 0000000000000..98af6d9af75b8 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68/explain.txt @@ -0,0 +1,241 @@ +== Physical Plan == +TakeOrderedAndProject (43) ++- * Project (42) + +- * BroadcastHashJoin Inner BuildRight (41) + :- * Project (39) + : +- * BroadcastHashJoin Inner BuildRight (38) + : :- * HashAggregate (33) + : : +- Exchange (32) + : : +- * HashAggregate (31) + : : +- * Project (30) + : : +- * BroadcastHashJoin Inner BuildRight (29) + : : :- * Project (24) + : : : +- * BroadcastHashJoin Inner BuildRight (23) + : : : :- * Project (17) + : : : : +- * BroadcastHashJoin Inner BuildRight (16) + : : : : :- * Project (10) + : : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : : :- * Filter (3) + : : : : : : +- * ColumnarToRow (2) + : : : : : : +- Scan parquet default.store_sales (1) + : : : : : +- BroadcastExchange (8) + : : : : : +- * Project (7) + : : : : : +- * Filter (6) + : : : : : +- * ColumnarToRow (5) + : : : : : +- Scan parquet default.date_dim (4) + : : : : +- BroadcastExchange (15) + : : : : +- * Project (14) + : : : : +- * Filter (13) + : : : : +- * ColumnarToRow (12) + : : : : +- Scan parquet default.store (11) + : : : +- BroadcastExchange (22) + : : : +- * Project (21) + : : : +- * Filter (20) + : : : +- * ColumnarToRow (19) + : : : +- Scan parquet default.household_demographics (18) + : : +- BroadcastExchange (28) + : : +- * Filter (27) + : : +- * ColumnarToRow (26) + : : +- Scan parquet default.customer_address (25) + : +- BroadcastExchange (37) + : +- * Filter (36) + : +- * ColumnarToRow (35) + : +- Scan parquet default.customer (34) + +- ReusedExchange (40) + + +(1) Scan parquet default.store_sales +Output [9]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [9]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9] + +(3) Filter [codegen id : 5] +Input [9]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9] +Condition : ((((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#5)) AND isnotnull(ss_hdemo_sk#3)) AND isnotnull(ss_addr_sk#4)) AND isnotnull(ss_customer_sk#2)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#10, d_year#11, d_dom#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_dom), GreaterThanOrEqual(d_dom,1), LessThanOrEqual(d_dom,2), In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#10, d_year#11, d_dom#12] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#10, d_year#11, d_dom#12] +Condition : ((((isnotnull(d_dom#12) AND (d_dom#12 >= 1)) AND (d_dom#12 <= 2)) AND d_year#11 IN (1999,2000,2001)) AND isnotnull(d_date_sk#10)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#10] +Input [3]: [d_date_sk#10, d_year#11, d_dom#12] + +(8) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] + +(9) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(10) Project [codegen id : 5] +Output [8]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9] +Input [10]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9, d_date_sk#10] + +(11) Scan parquet default.store +Output [2]: [s_store_sk#14, s_city#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store] +PushedFilters: [In(s_city, [Midway,Fairview]), IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [s_store_sk#14, s_city#15] + +(13) Filter [codegen id : 2] +Input [2]: [s_store_sk#14, s_city#15] +Condition : (s_city#15 IN (Midway,Fairview) AND isnotnull(s_store_sk#14)) + +(14) Project [codegen id : 2] +Output [1]: [s_store_sk#14] +Input [2]: [s_store_sk#14, s_city#15] + +(15) BroadcastExchange +Input [1]: [s_store_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] + +(16) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_store_sk#5] +Right keys [1]: [s_store_sk#14] +Join condition: None + +(17) Project [codegen id : 5] +Output [7]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9] +Input [9]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9, s_store_sk#14] + +(18) Scan parquet default.household_demographics +Output [3]: [hd_demo_sk#17, hd_dep_count#18, hd_vehicle_count#19] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/household_demographics] +PushedFilters: [Or(EqualTo(hd_dep_count,4),EqualTo(hd_vehicle_count,3)), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [3]: [hd_demo_sk#17, hd_dep_count#18, hd_vehicle_count#19] + +(20) Filter [codegen id : 3] +Input [3]: [hd_demo_sk#17, hd_dep_count#18, hd_vehicle_count#19] +Condition : (((hd_dep_count#18 = 4) OR (hd_vehicle_count#19 = 3)) AND isnotnull(hd_demo_sk#17)) + +(21) Project [codegen id : 3] +Output [1]: [hd_demo_sk#17] +Input [3]: [hd_demo_sk#17, hd_dep_count#18, hd_vehicle_count#19] + +(22) BroadcastExchange +Input [1]: [hd_demo_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] + +(23) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_hdemo_sk#3] +Right keys [1]: [hd_demo_sk#17] +Join condition: None + +(24) Project [codegen id : 5] +Output [6]: [ss_customer_sk#2, ss_addr_sk#4, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9] +Input [8]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9, hd_demo_sk#17] + +(25) Scan parquet default.customer_address +Output [2]: [ca_address_sk#21, ca_city#22] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_city)] +ReadSchema: struct + +(26) ColumnarToRow [codegen id : 4] +Input [2]: [ca_address_sk#21, ca_city#22] + +(27) Filter [codegen id : 4] +Input [2]: [ca_address_sk#21, ca_city#22] +Condition : (isnotnull(ca_address_sk#21) AND isnotnull(ca_city#22)) + +(28) BroadcastExchange +Input [2]: [ca_address_sk#21, ca_city#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#23] + +(29) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_addr_sk#4] +Right keys [1]: [ca_address_sk#21] +Join condition: None + +(30) Project [codegen id : 5] +Output [7]: [ss_customer_sk#2, ss_addr_sk#4, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9, ca_city#22] +Input [8]: [ss_customer_sk#2, ss_addr_sk#4, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9, ca_address_sk#21, ca_city#22] + +(31) HashAggregate [codegen id : 5] +Input [7]: [ss_customer_sk#2, ss_addr_sk#4, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9, ca_city#22] +Keys [4]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#22] +Functions [3]: [partial_sum(UnscaledValue(ss_ext_sales_price#7)), partial_sum(UnscaledValue(ss_ext_list_price#8)), partial_sum(UnscaledValue(ss_ext_tax#9))] +Aggregate Attributes [3]: [sum#24, sum#25, sum#26] +Results [7]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#22, sum#27, sum#28, sum#29] + +(32) Exchange +Input [7]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#22, sum#27, sum#28, sum#29] +Arguments: hashpartitioning(ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#22, 5), true, [id=#30] + +(33) HashAggregate [codegen id : 8] +Input [7]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#22, sum#27, sum#28, sum#29] +Keys [4]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#22] +Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#7)), sum(UnscaledValue(ss_ext_list_price#8)), sum(UnscaledValue(ss_ext_tax#9))] +Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#7))#31, sum(UnscaledValue(ss_ext_list_price#8))#32, sum(UnscaledValue(ss_ext_tax#9))#33] +Results [6]: [ss_ticket_number#6, ss_customer_sk#2, ca_city#22 AS bought_city#34, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#7))#31,17,2) AS extended_price#35, MakeDecimal(sum(UnscaledValue(ss_ext_list_price#8))#32,17,2) AS list_price#36, MakeDecimal(sum(UnscaledValue(ss_ext_tax#9))#33,17,2) AS extended_tax#37] + +(34) Scan parquet default.customer +Output [4]: [c_customer_sk#38, c_current_addr_sk#39, c_first_name#40, c_last_name#41] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(35) ColumnarToRow [codegen id : 6] +Input [4]: [c_customer_sk#38, c_current_addr_sk#39, c_first_name#40, c_last_name#41] + +(36) Filter [codegen id : 6] +Input [4]: [c_customer_sk#38, c_current_addr_sk#39, c_first_name#40, c_last_name#41] +Condition : (isnotnull(c_customer_sk#38) AND isnotnull(c_current_addr_sk#39)) + +(37) BroadcastExchange +Input [4]: [c_customer_sk#38, c_current_addr_sk#39, c_first_name#40, c_last_name#41] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#42] + +(38) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#38] +Join condition: None + +(39) Project [codegen id : 8] +Output [8]: [ss_ticket_number#6, bought_city#34, extended_price#35, list_price#36, extended_tax#37, c_current_addr_sk#39, c_first_name#40, c_last_name#41] +Input [10]: [ss_ticket_number#6, ss_customer_sk#2, bought_city#34, extended_price#35, list_price#36, extended_tax#37, c_customer_sk#38, c_current_addr_sk#39, c_first_name#40, c_last_name#41] + +(40) ReusedExchange [Reuses operator id: 28] +Output [2]: [ca_address_sk#21, ca_city#22] + +(41) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [c_current_addr_sk#39] +Right keys [1]: [ca_address_sk#21] +Join condition: NOT (ca_city#22 = bought_city#34) + +(42) Project [codegen id : 8] +Output [8]: [c_last_name#41, c_first_name#40, ca_city#22, bought_city#34, ss_ticket_number#6, extended_price#35, extended_tax#37, list_price#36] +Input [10]: [ss_ticket_number#6, bought_city#34, extended_price#35, list_price#36, extended_tax#37, c_current_addr_sk#39, c_first_name#40, c_last_name#41, ca_address_sk#21, ca_city#22] + +(43) TakeOrderedAndProject +Input [8]: [c_last_name#41, c_first_name#40, ca_city#22, bought_city#34, ss_ticket_number#6, extended_price#35, extended_tax#37, list_price#36] +Arguments: 100, [c_last_name#41 ASC NULLS FIRST, ss_ticket_number#6 ASC NULLS FIRST], [c_last_name#41, c_first_name#40, ca_city#22, bought_city#34, ss_ticket_number#6, extended_price#35, extended_tax#37, list_price#36] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68/simplified.txt new file mode 100644 index 0000000000000..4c2d24b06c709 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68/simplified.txt @@ -0,0 +1,63 @@ +TakeOrderedAndProject [bought_city,c_first_name,c_last_name,ca_city,extended_price,extended_tax,list_price,ss_ticket_number] + WholeStageCodegen (8) + Project [bought_city,c_first_name,c_last_name,ca_city,extended_price,extended_tax,list_price,ss_ticket_number] + BroadcastHashJoin [bought_city,c_current_addr_sk,ca_address_sk,ca_city] + Project [bought_city,c_current_addr_sk,c_first_name,c_last_name,extended_price,extended_tax,list_price,ss_ticket_number] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + HashAggregate [ca_city,ss_addr_sk,ss_customer_sk,ss_ticket_number,sum,sum,sum] [bought_city,extended_price,extended_tax,list_price,sum,sum,sum,sum(UnscaledValue(ss_ext_list_price)),sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_ext_tax))] + InputAdapter + Exchange [ca_city,ss_addr_sk,ss_customer_sk,ss_ticket_number] #1 + WholeStageCodegen (5) + HashAggregate [ca_city,ss_addr_sk,ss_customer_sk,ss_ext_list_price,ss_ext_sales_price,ss_ext_tax,ss_ticket_number] [sum,sum,sum,sum,sum,sum] + Project [ca_city,ss_addr_sk,ss_customer_sk,ss_ext_list_price,ss_ext_sales_price,ss_ext_tax,ss_ticket_number] + BroadcastHashJoin [ca_address_sk,ss_addr_sk] + Project [ss_addr_sk,ss_customer_sk,ss_ext_list_price,ss_ext_sales_price,ss_ext_tax,ss_ticket_number] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [ss_addr_sk,ss_customer_sk,ss_ext_list_price,ss_ext_sales_price,ss_ext_tax,ss_hdemo_sk,ss_ticket_number] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_addr_sk,ss_customer_sk,ss_ext_list_price,ss_ext_sales_price,ss_ext_tax,ss_hdemo_sk,ss_store_sk,ss_ticket_number] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_addr_sk,ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_addr_sk,ss_customer_sk,ss_ext_list_price,ss_ext_sales_price,ss_ext_tax,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_dom,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_dom,d_year] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [s_store_sk] + Filter [s_city,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_city,s_store_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [hd_demo_sk] + Filter [hd_demo_sk,hd_dep_count,hd_vehicle_count] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Filter [ca_address_sk,ca_city] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_city] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (6) + Filter [c_current_addr_sk,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_current_addr_sk,c_customer_sk,c_first_name,c_last_name] + InputAdapter + ReusedExchange [ca_address_sk,ca_city] #5 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69.sf100/explain.txt new file mode 100644 index 0000000000000..e54aecbc37c22 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69.sf100/explain.txt @@ -0,0 +1,299 @@ +== Physical Plan == +TakeOrderedAndProject (54) ++- * HashAggregate (53) + +- Exchange (52) + +- * HashAggregate (51) + +- * Project (50) + +- * BroadcastHashJoin Inner BuildLeft (49) + :- BroadcastExchange (45) + : +- * Project (44) + : +- * BroadcastHashJoin Inner BuildRight (43) + : :- * Project (37) + : : +- SortMergeJoin LeftAnti (36) + : : :- SortMergeJoin LeftAnti (27) + : : : :- SortMergeJoin LeftSemi (18) + : : : : :- * Sort (5) + : : : : : +- Exchange (4) + : : : : : +- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.customer (1) + : : : : +- * Sort (17) + : : : : +- Exchange (16) + : : : : +- * Project (15) + : : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : : :- * Filter (8) + : : : : : +- * ColumnarToRow (7) + : : : : : +- Scan parquet default.store_sales (6) + : : : : +- BroadcastExchange (13) + : : : : +- * Project (12) + : : : : +- * Filter (11) + : : : : +- * ColumnarToRow (10) + : : : : +- Scan parquet default.date_dim (9) + : : : +- * Sort (26) + : : : +- Exchange (25) + : : : +- * Project (24) + : : : +- * BroadcastHashJoin Inner BuildRight (23) + : : : :- * Filter (21) + : : : : +- * ColumnarToRow (20) + : : : : +- Scan parquet default.web_sales (19) + : : : +- ReusedExchange (22) + : : +- * Sort (35) + : : +- Exchange (34) + : : +- * Project (33) + : : +- * BroadcastHashJoin Inner BuildRight (32) + : : :- * Filter (30) + : : : +- * ColumnarToRow (29) + : : : +- Scan parquet default.catalog_sales (28) + : : +- ReusedExchange (31) + : +- BroadcastExchange (42) + : +- * Project (41) + : +- * Filter (40) + : +- * ColumnarToRow (39) + : +- Scan parquet default.customer_address (38) + +- * Filter (48) + +- * ColumnarToRow (47) + +- Scan parquet default.customer_demographics (46) + + +(1) Scan parquet default.customer +Output [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer] +PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] + +(3) Filter [codegen id : 1] +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Condition : (isnotnull(c_current_addr_sk#3) AND isnotnull(c_current_cdemo_sk#2)) + +(4) Exchange +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Arguments: hashpartitioning(c_customer_sk#1, 5), true, [id=#4] + +(5) Sort [codegen id : 2] +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Arguments: [c_customer_sk#1 ASC NULLS FIRST], false, 0 + +(6) Scan parquet default.store_sales +Output [2]: [ss_sold_date_sk#5, ss_customer_sk#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 4] +Input [2]: [ss_sold_date_sk#5, ss_customer_sk#6] + +(8) Filter [codegen id : 4] +Input [2]: [ss_sold_date_sk#5, ss_customer_sk#6] +Condition : isnotnull(ss_sold_date_sk#5) + +(9) Scan parquet default.date_dim +Output [3]: [d_date_sk#7, d_year#8, d_moy#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,6), IsNotNull(d_date_sk)] +ReadSchema: struct + +(10) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#7, d_year#8, d_moy#9] + +(11) Filter [codegen id : 3] +Input [3]: [d_date_sk#7, d_year#8, d_moy#9] +Condition : (((((isnotnull(d_year#8) AND isnotnull(d_moy#9)) AND (d_year#8 = 2001)) AND (d_moy#9 >= 4)) AND (d_moy#9 <= 6)) AND isnotnull(d_date_sk#7)) + +(12) Project [codegen id : 3] +Output [1]: [d_date_sk#7] +Input [3]: [d_date_sk#7, d_year#8, d_moy#9] + +(13) BroadcastExchange +Input [1]: [d_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#10] + +(14) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#5] +Right keys [1]: [d_date_sk#7] +Join condition: None + +(15) Project [codegen id : 4] +Output [1]: [ss_customer_sk#6] +Input [3]: [ss_sold_date_sk#5, ss_customer_sk#6, d_date_sk#7] + +(16) Exchange +Input [1]: [ss_customer_sk#6] +Arguments: hashpartitioning(ss_customer_sk#6, 5), true, [id=#11] + +(17) Sort [codegen id : 5] +Input [1]: [ss_customer_sk#6] +Arguments: [ss_customer_sk#6 ASC NULLS FIRST], false, 0 + +(18) SortMergeJoin +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ss_customer_sk#6] +Join condition: None + +(19) Scan parquet default.web_sales +Output [2]: [ws_sold_date_sk#12, ws_bill_customer_sk#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 7] +Input [2]: [ws_sold_date_sk#12, ws_bill_customer_sk#13] + +(21) Filter [codegen id : 7] +Input [2]: [ws_sold_date_sk#12, ws_bill_customer_sk#13] +Condition : isnotnull(ws_sold_date_sk#12) + +(22) ReusedExchange [Reuses operator id: 13] +Output [1]: [d_date_sk#7] + +(23) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ws_sold_date_sk#12] +Right keys [1]: [d_date_sk#7] +Join condition: None + +(24) Project [codegen id : 7] +Output [1]: [ws_bill_customer_sk#13] +Input [3]: [ws_sold_date_sk#12, ws_bill_customer_sk#13, d_date_sk#7] + +(25) Exchange +Input [1]: [ws_bill_customer_sk#13] +Arguments: hashpartitioning(ws_bill_customer_sk#13, 5), true, [id=#14] + +(26) Sort [codegen id : 8] +Input [1]: [ws_bill_customer_sk#13] +Arguments: [ws_bill_customer_sk#13 ASC NULLS FIRST], false, 0 + +(27) SortMergeJoin +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ws_bill_customer_sk#13] +Join condition: None + +(28) Scan parquet default.catalog_sales +Output [2]: [cs_sold_date_sk#15, cs_ship_customer_sk#16] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 10] +Input [2]: [cs_sold_date_sk#15, cs_ship_customer_sk#16] + +(30) Filter [codegen id : 10] +Input [2]: [cs_sold_date_sk#15, cs_ship_customer_sk#16] +Condition : isnotnull(cs_sold_date_sk#15) + +(31) ReusedExchange [Reuses operator id: 13] +Output [1]: [d_date_sk#7] + +(32) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_sold_date_sk#15] +Right keys [1]: [d_date_sk#7] +Join condition: None + +(33) Project [codegen id : 10] +Output [1]: [cs_ship_customer_sk#16] +Input [3]: [cs_sold_date_sk#15, cs_ship_customer_sk#16, d_date_sk#7] + +(34) Exchange +Input [1]: [cs_ship_customer_sk#16] +Arguments: hashpartitioning(cs_ship_customer_sk#16, 5), true, [id=#17] + +(35) Sort [codegen id : 11] +Input [1]: [cs_ship_customer_sk#16] +Arguments: [cs_ship_customer_sk#16 ASC NULLS FIRST], false, 0 + +(36) SortMergeJoin +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [cs_ship_customer_sk#16] +Join condition: None + +(37) Project [codegen id : 13] +Output [2]: [c_current_cdemo_sk#2, c_current_addr_sk#3] +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] + +(38) Scan parquet default.customer_address +Output [2]: [ca_address_sk#18, ca_state#19] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer_address] +PushedFilters: [In(ca_state, [KY,GA,NM]), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(39) ColumnarToRow [codegen id : 12] +Input [2]: [ca_address_sk#18, ca_state#19] + +(40) Filter [codegen id : 12] +Input [2]: [ca_address_sk#18, ca_state#19] +Condition : (ca_state#19 IN (KY,GA,NM) AND isnotnull(ca_address_sk#18)) + +(41) Project [codegen id : 12] +Output [1]: [ca_address_sk#18] +Input [2]: [ca_address_sk#18, ca_state#19] + +(42) BroadcastExchange +Input [1]: [ca_address_sk#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] + +(43) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [c_current_addr_sk#3] +Right keys [1]: [ca_address_sk#18] +Join condition: None + +(44) Project [codegen id : 13] +Output [1]: [c_current_cdemo_sk#2] +Input [3]: [c_current_cdemo_sk#2, c_current_addr_sk#3, ca_address_sk#18] + +(45) BroadcastExchange +Input [1]: [c_current_cdemo_sk#2] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#21] + +(46) Scan parquet default.customer_demographics +Output [6]: [cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(47) ColumnarToRow +Input [6]: [cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27] + +(48) Filter +Input [6]: [cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27] +Condition : isnotnull(cd_demo_sk#22) + +(49) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [c_current_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#22] +Join condition: None + +(50) Project [codegen id : 14] +Output [5]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27] +Input [7]: [c_current_cdemo_sk#2, cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27] + +(51) HashAggregate [codegen id : 14] +Input [5]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27] +Keys [5]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#28] +Results [6]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, count#29] + +(52) Exchange +Input [6]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, count#29] +Arguments: hashpartitioning(cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, 5), true, [id=#30] + +(53) HashAggregate [codegen id : 15] +Input [6]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, count#29] +Keys [5]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#31] +Results [8]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, count(1)#31 AS cnt1#32, cd_purchase_estimate#26, count(1)#31 AS cnt2#33, cd_credit_rating#27, count(1)#31 AS cnt3#34] + +(54) TakeOrderedAndProject +Input [8]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cnt1#32, cd_purchase_estimate#26, cnt2#33, cd_credit_rating#27, cnt3#34] +Arguments: 100, [cd_gender#23 ASC NULLS FIRST, cd_marital_status#24 ASC NULLS FIRST, cd_education_status#25 ASC NULLS FIRST, cd_purchase_estimate#26 ASC NULLS FIRST, cd_credit_rating#27 ASC NULLS FIRST], [cd_gender#23, cd_marital_status#24, cd_education_status#25, cnt1#32, cd_purchase_estimate#26, cnt2#33, cd_credit_rating#27, cnt3#34] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69.sf100/simplified.txt new file mode 100644 index 0000000000000..0d637a4674e69 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69.sf100/simplified.txt @@ -0,0 +1,85 @@ +TakeOrderedAndProject [cd_credit_rating,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate,cnt1,cnt2,cnt3] + WholeStageCodegen (15) + HashAggregate [cd_credit_rating,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate,count] [cnt1,cnt2,cnt3,count,count(1)] + InputAdapter + Exchange [cd_credit_rating,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate] #1 + WholeStageCodegen (14) + HashAggregate [cd_credit_rating,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate] [count,count] + Project [cd_credit_rating,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (13) + Project [c_current_cdemo_sk] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_addr_sk,c_current_cdemo_sk] + InputAdapter + SortMergeJoin [c_customer_sk,cs_ship_customer_sk] + SortMergeJoin [c_customer_sk,ws_bill_customer_sk] + SortMergeJoin [c_customer_sk,ss_customer_sk] + WholeStageCodegen (2) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #3 + WholeStageCodegen (1) + Filter [c_current_addr_sk,c_current_cdemo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] + WholeStageCodegen (5) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #4 + WholeStageCodegen (4) + Project [ss_customer_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + WholeStageCodegen (8) + Sort [ws_bill_customer_sk] + InputAdapter + Exchange [ws_bill_customer_sk] #6 + WholeStageCodegen (7) + Project [ws_bill_customer_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #5 + WholeStageCodegen (11) + Sort [cs_ship_customer_sk] + InputAdapter + Exchange [cs_ship_customer_sk] #7 + WholeStageCodegen (10) + Project [cs_ship_customer_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #5 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (12) + Project [ca_address_sk] + Filter [ca_address_sk,ca_state] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_state] + Filter [cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_credit_rating,cd_demo_sk,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69/explain.txt new file mode 100644 index 0000000000000..a5448b18bd4d8 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69/explain.txt @@ -0,0 +1,274 @@ +== Physical Plan == +TakeOrderedAndProject (49) ++- * HashAggregate (48) + +- Exchange (47) + +- * HashAggregate (46) + +- * Project (45) + +- * BroadcastHashJoin Inner BuildRight (44) + :- * Project (39) + : +- * BroadcastHashJoin Inner BuildRight (38) + : :- * Project (32) + : : +- * BroadcastHashJoin LeftAnti BuildRight (31) + : : :- * BroadcastHashJoin LeftAnti BuildRight (23) + : : : :- * BroadcastHashJoin LeftSemi BuildRight (15) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.customer (1) + : : : : +- BroadcastExchange (14) + : : : : +- * Project (13) + : : : : +- * BroadcastHashJoin Inner BuildRight (12) + : : : : :- * Filter (6) + : : : : : +- * ColumnarToRow (5) + : : : : : +- Scan parquet default.store_sales (4) + : : : : +- BroadcastExchange (11) + : : : : +- * Project (10) + : : : : +- * Filter (9) + : : : : +- * ColumnarToRow (8) + : : : : +- Scan parquet default.date_dim (7) + : : : +- BroadcastExchange (22) + : : : +- * Project (21) + : : : +- * BroadcastHashJoin Inner BuildRight (20) + : : : :- * Filter (18) + : : : : +- * ColumnarToRow (17) + : : : : +- Scan parquet default.web_sales (16) + : : : +- ReusedExchange (19) + : : +- BroadcastExchange (30) + : : +- * Project (29) + : : +- * BroadcastHashJoin Inner BuildRight (28) + : : :- * Filter (26) + : : : +- * ColumnarToRow (25) + : : : +- Scan parquet default.catalog_sales (24) + : : +- ReusedExchange (27) + : +- BroadcastExchange (37) + : +- * Project (36) + : +- * Filter (35) + : +- * ColumnarToRow (34) + : +- Scan parquet default.customer_address (33) + +- BroadcastExchange (43) + +- * Filter (42) + +- * ColumnarToRow (41) + +- Scan parquet default.customer_demographics (40) + + +(1) Scan parquet default.customer +Output [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 9] +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] + +(3) Filter [codegen id : 9] +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Condition : (isnotnull(c_current_addr_sk#3) AND isnotnull(c_current_cdemo_sk#2)) + +(4) Scan parquet default.store_sales +Output [2]: [ss_sold_date_sk#4, ss_customer_sk#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 2] +Input [2]: [ss_sold_date_sk#4, ss_customer_sk#5] + +(6) Filter [codegen id : 2] +Input [2]: [ss_sold_date_sk#4, ss_customer_sk#5] +Condition : isnotnull(ss_sold_date_sk#4) + +(7) Scan parquet default.date_dim +Output [3]: [d_date_sk#6, d_year#7, d_moy#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_year,2001), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,6), IsNotNull(d_date_sk)] +ReadSchema: struct + +(8) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_moy#8] + +(9) Filter [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_moy#8] +Condition : (((((isnotnull(d_moy#8) AND isnotnull(d_year#7)) AND (d_year#7 = 2001)) AND (d_moy#8 >= 4)) AND (d_moy#8 <= 6)) AND isnotnull(d_date_sk#6)) + +(10) Project [codegen id : 1] +Output [1]: [d_date_sk#6] +Input [3]: [d_date_sk#6, d_year#7, d_moy#8] + +(11) BroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] + +(12) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#4] +Right keys [1]: [d_date_sk#6] +Join condition: None + +(13) Project [codegen id : 2] +Output [1]: [ss_customer_sk#5] +Input [3]: [ss_sold_date_sk#4, ss_customer_sk#5, d_date_sk#6] + +(14) BroadcastExchange +Input [1]: [ss_customer_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#10] + +(15) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ss_customer_sk#5] +Join condition: None + +(16) Scan parquet default.web_sales +Output [2]: [ws_sold_date_sk#11, ws_bill_customer_sk#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 4] +Input [2]: [ws_sold_date_sk#11, ws_bill_customer_sk#12] + +(18) Filter [codegen id : 4] +Input [2]: [ws_sold_date_sk#11, ws_bill_customer_sk#12] +Condition : isnotnull(ws_sold_date_sk#11) + +(19) ReusedExchange [Reuses operator id: 11] +Output [1]: [d_date_sk#6] + +(20) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ws_sold_date_sk#11] +Right keys [1]: [d_date_sk#6] +Join condition: None + +(21) Project [codegen id : 4] +Output [1]: [ws_bill_customer_sk#12] +Input [3]: [ws_sold_date_sk#11, ws_bill_customer_sk#12, d_date_sk#6] + +(22) BroadcastExchange +Input [1]: [ws_bill_customer_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] + +(23) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ws_bill_customer_sk#12] +Join condition: None + +(24) Scan parquet default.catalog_sales +Output [2]: [cs_sold_date_sk#14, cs_ship_customer_sk#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(25) ColumnarToRow [codegen id : 6] +Input [2]: [cs_sold_date_sk#14, cs_ship_customer_sk#15] + +(26) Filter [codegen id : 6] +Input [2]: [cs_sold_date_sk#14, cs_ship_customer_sk#15] +Condition : isnotnull(cs_sold_date_sk#14) + +(27) ReusedExchange [Reuses operator id: 11] +Output [1]: [d_date_sk#6] + +(28) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cs_sold_date_sk#14] +Right keys [1]: [d_date_sk#6] +Join condition: None + +(29) Project [codegen id : 6] +Output [1]: [cs_ship_customer_sk#15] +Input [3]: [cs_sold_date_sk#14, cs_ship_customer_sk#15, d_date_sk#6] + +(30) BroadcastExchange +Input [1]: [cs_ship_customer_sk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] + +(31) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [cs_ship_customer_sk#15] +Join condition: None + +(32) Project [codegen id : 9] +Output [2]: [c_current_cdemo_sk#2, c_current_addr_sk#3] +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] + +(33) Scan parquet default.customer_address +Output [2]: [ca_address_sk#17, ca_state#18] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer_address] +PushedFilters: [In(ca_state, [KY,GA,NM]), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 7] +Input [2]: [ca_address_sk#17, ca_state#18] + +(35) Filter [codegen id : 7] +Input [2]: [ca_address_sk#17, ca_state#18] +Condition : (ca_state#18 IN (KY,GA,NM) AND isnotnull(ca_address_sk#17)) + +(36) Project [codegen id : 7] +Output [1]: [ca_address_sk#17] +Input [2]: [ca_address_sk#17, ca_state#18] + +(37) BroadcastExchange +Input [1]: [ca_address_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#19] + +(38) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_current_addr_sk#3] +Right keys [1]: [ca_address_sk#17] +Join condition: None + +(39) Project [codegen id : 9] +Output [1]: [c_current_cdemo_sk#2] +Input [3]: [c_current_cdemo_sk#2, c_current_addr_sk#3, ca_address_sk#17] + +(40) Scan parquet default.customer_demographics +Output [6]: [cd_demo_sk#20, cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(41) ColumnarToRow [codegen id : 8] +Input [6]: [cd_demo_sk#20, cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25] + +(42) Filter [codegen id : 8] +Input [6]: [cd_demo_sk#20, cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25] +Condition : isnotnull(cd_demo_sk#20) + +(43) BroadcastExchange +Input [6]: [cd_demo_sk#20, cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#26] + +(44) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_current_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#20] +Join condition: None + +(45) Project [codegen id : 9] +Output [5]: [cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25] +Input [7]: [c_current_cdemo_sk#2, cd_demo_sk#20, cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25] + +(46) HashAggregate [codegen id : 9] +Input [5]: [cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25] +Keys [5]: [cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#27] +Results [6]: [cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25, count#28] + +(47) Exchange +Input [6]: [cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25, count#28] +Arguments: hashpartitioning(cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25, 5), true, [id=#29] + +(48) HashAggregate [codegen id : 10] +Input [6]: [cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25, count#28] +Keys [5]: [cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#30] +Results [8]: [cd_gender#21, cd_marital_status#22, cd_education_status#23, count(1)#30 AS cnt1#31, cd_purchase_estimate#24, count(1)#30 AS cnt2#32, cd_credit_rating#25, count(1)#30 AS cnt3#33] + +(49) TakeOrderedAndProject +Input [8]: [cd_gender#21, cd_marital_status#22, cd_education_status#23, cnt1#31, cd_purchase_estimate#24, cnt2#32, cd_credit_rating#25, cnt3#33] +Arguments: 100, [cd_gender#21 ASC NULLS FIRST, cd_marital_status#22 ASC NULLS FIRST, cd_education_status#23 ASC NULLS FIRST, cd_purchase_estimate#24 ASC NULLS FIRST, cd_credit_rating#25 ASC NULLS FIRST], [cd_gender#21, cd_marital_status#22, cd_education_status#23, cnt1#31, cd_purchase_estimate#24, cnt2#32, cd_credit_rating#25, cnt3#33] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69/simplified.txt new file mode 100644 index 0000000000000..765e6a7524dea --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69/simplified.txt @@ -0,0 +1,73 @@ +TakeOrderedAndProject [cd_credit_rating,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate,cnt1,cnt2,cnt3] + WholeStageCodegen (10) + HashAggregate [cd_credit_rating,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate,count] [cnt1,cnt2,cnt3,count,count(1)] + InputAdapter + Exchange [cd_credit_rating,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate] #1 + WholeStageCodegen (9) + HashAggregate [cd_credit_rating,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate] [count,count] + Project [cd_credit_rating,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [c_current_cdemo_sk] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_addr_sk,c_current_cdemo_sk] + BroadcastHashJoin [c_customer_sk,cs_ship_customer_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [c_current_addr_sk,c_current_cdemo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (2) + Project [ss_customer_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (4) + Project [ws_bill_customer_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (6) + Project [cs_ship_customer_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (7) + Project [ca_address_sk] + Filter [ca_address_sk,ca_state] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (8) + Filter [cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_credit_rating,cd_demo_sk,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7.sf100/explain.txt new file mode 100644 index 0000000000000..d497558b628d7 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7.sf100/explain.txt @@ -0,0 +1,193 @@ +== Physical Plan == +TakeOrderedAndProject (34) ++- * HashAggregate (33) + +- Exchange (32) + +- * HashAggregate (31) + +- * Project (30) + +- * BroadcastHashJoin Inner BuildRight (29) + :- * Project (24) + : +- * BroadcastHashJoin Inner BuildRight (23) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.customer_demographics (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.promotion (11) + : +- BroadcastExchange (22) + : +- * Project (21) + : +- * Filter (20) + : +- * ColumnarToRow (19) + : +- Scan parquet default.date_dim (18) + +- BroadcastExchange (28) + +- * Filter (27) + +- * ColumnarToRow (26) + +- Scan parquet default.item (25) + + +(1) Scan parquet default.store_sales +Output [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_cdemo_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk), IsNotNull(ss_promo_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] + +(3) Filter [codegen id : 5] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Condition : (((isnotnull(ss_cdemo_sk#3) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_item_sk#2)) AND isnotnull(ss_promo_sk#4)) + +(4) Scan parquet default.customer_demographics +Output [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer_demographics] +PushedFilters: [IsNotNull(cd_gender), IsNotNull(cd_education_status), IsNotNull(cd_marital_status), EqualTo(cd_gender,M), EqualTo(cd_marital_status,S), EqualTo(cd_education_status,College), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] + +(6) Filter [codegen id : 1] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Condition : ((((((isnotnull(cd_gender#10) AND isnotnull(cd_education_status#12)) AND isnotnull(cd_marital_status#11)) AND (cd_gender#10 = M)) AND (cd_marital_status#11 = S)) AND (cd_education_status#12 = College)) AND isnotnull(cd_demo_sk#9)) + +(7) Project [codegen id : 1] +Output [1]: [cd_demo_sk#9] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] + +(8) BroadcastExchange +Input [1]: [cd_demo_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] + +(9) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_cdemo_sk#3] +Right keys [1]: [cd_demo_sk#9] +Join condition: None + +(10) Project [codegen id : 5] +Output [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, cd_demo_sk#9] + +(11) Scan parquet default.promotion +Output [3]: [p_promo_sk#14, p_channel_email#15, p_channel_event#16] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/promotion] +PushedFilters: [Or(EqualTo(p_channel_email,N),EqualTo(p_channel_event,N)), IsNotNull(p_promo_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [p_promo_sk#14, p_channel_email#15, p_channel_event#16] + +(13) Filter [codegen id : 2] +Input [3]: [p_promo_sk#14, p_channel_email#15, p_channel_event#16] +Condition : (((p_channel_email#15 = N) OR (p_channel_event#16 = N)) AND isnotnull(p_promo_sk#14)) + +(14) Project [codegen id : 2] +Output [1]: [p_promo_sk#14] +Input [3]: [p_promo_sk#14, p_channel_email#15, p_channel_event#16] + +(15) BroadcastExchange +Input [1]: [p_promo_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#17] + +(16) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_promo_sk#4] +Right keys [1]: [p_promo_sk#14] +Join condition: None + +(17) Project [codegen id : 5] +Output [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, p_promo_sk#14] + +(18) Scan parquet default.date_dim +Output [2]: [d_date_sk#18, d_year#19] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [2]: [d_date_sk#18, d_year#19] + +(20) Filter [codegen id : 3] +Input [2]: [d_date_sk#18, d_year#19] +Condition : ((isnotnull(d_year#19) AND (d_year#19 = 2000)) AND isnotnull(d_date_sk#18)) + +(21) Project [codegen id : 3] +Output [1]: [d_date_sk#18] +Input [2]: [d_date_sk#18, d_year#19] + +(22) BroadcastExchange +Input [1]: [d_date_sk#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] + +(23) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#18] +Join condition: None + +(24) Project [codegen id : 5] +Output [5]: [ss_item_sk#2, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Input [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, d_date_sk#18] + +(25) Scan parquet default.item +Output [2]: [i_item_sk#21, i_item_id#22] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(26) ColumnarToRow [codegen id : 4] +Input [2]: [i_item_sk#21, i_item_id#22] + +(27) Filter [codegen id : 4] +Input [2]: [i_item_sk#21, i_item_id#22] +Condition : isnotnull(i_item_sk#21) + +(28) BroadcastExchange +Input [2]: [i_item_sk#21, i_item_id#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#23] + +(29) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#21] +Join condition: None + +(30) Project [codegen id : 5] +Output [5]: [ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_id#22] +Input [7]: [ss_item_sk#2, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_sk#21, i_item_id#22] + +(31) HashAggregate [codegen id : 5] +Input [5]: [ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_id#22] +Keys [1]: [i_item_id#22] +Functions [4]: [partial_avg(cast(ss_quantity#5 as bigint)), partial_avg(UnscaledValue(ss_list_price#6)), partial_avg(UnscaledValue(ss_coupon_amt#8)), partial_avg(UnscaledValue(ss_sales_price#7))] +Aggregate Attributes [8]: [sum#24, count#25, sum#26, count#27, sum#28, count#29, sum#30, count#31] +Results [9]: [i_item_id#22, sum#32, count#33, sum#34, count#35, sum#36, count#37, sum#38, count#39] + +(32) Exchange +Input [9]: [i_item_id#22, sum#32, count#33, sum#34, count#35, sum#36, count#37, sum#38, count#39] +Arguments: hashpartitioning(i_item_id#22, 5), true, [id=#40] + +(33) HashAggregate [codegen id : 6] +Input [9]: [i_item_id#22, sum#32, count#33, sum#34, count#35, sum#36, count#37, sum#38, count#39] +Keys [1]: [i_item_id#22] +Functions [4]: [avg(cast(ss_quantity#5 as bigint)), avg(UnscaledValue(ss_list_price#6)), avg(UnscaledValue(ss_coupon_amt#8)), avg(UnscaledValue(ss_sales_price#7))] +Aggregate Attributes [4]: [avg(cast(ss_quantity#5 as bigint))#41, avg(UnscaledValue(ss_list_price#6))#42, avg(UnscaledValue(ss_coupon_amt#8))#43, avg(UnscaledValue(ss_sales_price#7))#44] +Results [5]: [i_item_id#22, avg(cast(ss_quantity#5 as bigint))#41 AS agg1#45, cast((avg(UnscaledValue(ss_list_price#6))#42 / 100.0) as decimal(11,6)) AS agg2#46, cast((avg(UnscaledValue(ss_coupon_amt#8))#43 / 100.0) as decimal(11,6)) AS agg3#47, cast((avg(UnscaledValue(ss_sales_price#7))#44 / 100.0) as decimal(11,6)) AS agg4#48] + +(34) TakeOrderedAndProject +Input [5]: [i_item_id#22, agg1#45, agg2#46, agg3#47, agg4#48] +Arguments: 100, [i_item_id#22 ASC NULLS FIRST], [i_item_id#22, agg1#45, agg2#46, agg3#47, agg4#48] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7.sf100/simplified.txt new file mode 100644 index 0000000000000..533447867c5aa --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7.sf100/simplified.txt @@ -0,0 +1,50 @@ +TakeOrderedAndProject [agg1,agg2,agg3,agg4,i_item_id] + WholeStageCodegen (6) + HashAggregate [count,count,count,count,i_item_id,sum,sum,sum,sum] [agg1,agg2,agg3,agg4,avg(UnscaledValue(ss_coupon_amt)),avg(UnscaledValue(ss_list_price)),avg(UnscaledValue(ss_sales_price)),avg(cast(ss_quantity as bigint)),count,count,count,count,sum,sum,sum,sum] + InputAdapter + Exchange [i_item_id] #1 + WholeStageCodegen (5) + HashAggregate [i_item_id,ss_coupon_amt,ss_list_price,ss_quantity,ss_sales_price] [count,count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum,sum] + Project [i_item_id,ss_coupon_amt,ss_list_price,ss_quantity,ss_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_sold_date_sk] + BroadcastHashJoin [p_promo_sk,ss_promo_sk] + Project [ss_coupon_amt,ss_item_sk,ss_list_price,ss_promo_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + BroadcastHashJoin [cd_demo_sk,ss_cdemo_sk] + Filter [ss_cdemo_sk,ss_item_sk,ss_promo_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_cdemo_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_promo_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [cd_demo_sk] + Filter [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [p_promo_sk] + Filter [p_channel_email,p_channel_event,p_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.promotion [p_channel_email,p_channel_event,p_promo_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_id,i_item_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7/explain.txt new file mode 100644 index 0000000000000..34ec2e61198f6 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7/explain.txt @@ -0,0 +1,193 @@ +== Physical Plan == +TakeOrderedAndProject (34) ++- * HashAggregate (33) + +- Exchange (32) + +- * HashAggregate (31) + +- * Project (30) + +- * BroadcastHashJoin Inner BuildRight (29) + :- * Project (23) + : +- * BroadcastHashJoin Inner BuildRight (22) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.customer_demographics (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.date_dim (11) + : +- BroadcastExchange (21) + : +- * Filter (20) + : +- * ColumnarToRow (19) + : +- Scan parquet default.item (18) + +- BroadcastExchange (28) + +- * Project (27) + +- * Filter (26) + +- * ColumnarToRow (25) + +- Scan parquet default.promotion (24) + + +(1) Scan parquet default.store_sales +Output [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_cdemo_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk), IsNotNull(ss_promo_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] + +(3) Filter [codegen id : 5] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Condition : (((isnotnull(ss_cdemo_sk#3) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_item_sk#2)) AND isnotnull(ss_promo_sk#4)) + +(4) Scan parquet default.customer_demographics +Output [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer_demographics] +PushedFilters: [IsNotNull(cd_marital_status), IsNotNull(cd_gender), IsNotNull(cd_education_status), EqualTo(cd_gender,M), EqualTo(cd_marital_status,S), EqualTo(cd_education_status,College), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] + +(6) Filter [codegen id : 1] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Condition : ((((((isnotnull(cd_marital_status#11) AND isnotnull(cd_gender#10)) AND isnotnull(cd_education_status#12)) AND (cd_gender#10 = M)) AND (cd_marital_status#11 = S)) AND (cd_education_status#12 = College)) AND isnotnull(cd_demo_sk#9)) + +(7) Project [codegen id : 1] +Output [1]: [cd_demo_sk#9] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] + +(8) BroadcastExchange +Input [1]: [cd_demo_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] + +(9) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_cdemo_sk#3] +Right keys [1]: [cd_demo_sk#9] +Join condition: None + +(10) Project [codegen id : 5] +Output [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, cd_demo_sk#9] + +(11) Scan parquet default.date_dim +Output [2]: [d_date_sk#14, d_year#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#14, d_year#15] + +(13) Filter [codegen id : 2] +Input [2]: [d_date_sk#14, d_year#15] +Condition : ((isnotnull(d_year#15) AND (d_year#15 = 2000)) AND isnotnull(d_date_sk#14)) + +(14) Project [codegen id : 2] +Output [1]: [d_date_sk#14] +Input [2]: [d_date_sk#14, d_year#15] + +(15) BroadcastExchange +Input [1]: [d_date_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] + +(16) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#14] +Join condition: None + +(17) Project [codegen id : 5] +Output [6]: [ss_item_sk#2, ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, d_date_sk#14] + +(18) Scan parquet default.item +Output [2]: [i_item_sk#17, i_item_id#18] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [2]: [i_item_sk#17, i_item_id#18] + +(20) Filter [codegen id : 3] +Input [2]: [i_item_sk#17, i_item_id#18] +Condition : isnotnull(i_item_sk#17) + +(21) BroadcastExchange +Input [2]: [i_item_sk#17, i_item_id#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] + +(22) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#17] +Join condition: None + +(23) Project [codegen id : 5] +Output [6]: [ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_id#18] +Input [8]: [ss_item_sk#2, ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_sk#17, i_item_id#18] + +(24) Scan parquet default.promotion +Output [3]: [p_promo_sk#20, p_channel_email#21, p_channel_event#22] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/promotion] +PushedFilters: [Or(EqualTo(p_channel_email,N),EqualTo(p_channel_event,N)), IsNotNull(p_promo_sk)] +ReadSchema: struct + +(25) ColumnarToRow [codegen id : 4] +Input [3]: [p_promo_sk#20, p_channel_email#21, p_channel_event#22] + +(26) Filter [codegen id : 4] +Input [3]: [p_promo_sk#20, p_channel_email#21, p_channel_event#22] +Condition : (((p_channel_email#21 = N) OR (p_channel_event#22 = N)) AND isnotnull(p_promo_sk#20)) + +(27) Project [codegen id : 4] +Output [1]: [p_promo_sk#20] +Input [3]: [p_promo_sk#20, p_channel_email#21, p_channel_event#22] + +(28) BroadcastExchange +Input [1]: [p_promo_sk#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#23] + +(29) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_promo_sk#4] +Right keys [1]: [p_promo_sk#20] +Join condition: None + +(30) Project [codegen id : 5] +Output [5]: [ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_id#18] +Input [7]: [ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_id#18, p_promo_sk#20] + +(31) HashAggregate [codegen id : 5] +Input [5]: [ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_id#18] +Keys [1]: [i_item_id#18] +Functions [4]: [partial_avg(cast(ss_quantity#5 as bigint)), partial_avg(UnscaledValue(ss_list_price#6)), partial_avg(UnscaledValue(ss_coupon_amt#8)), partial_avg(UnscaledValue(ss_sales_price#7))] +Aggregate Attributes [8]: [sum#24, count#25, sum#26, count#27, sum#28, count#29, sum#30, count#31] +Results [9]: [i_item_id#18, sum#32, count#33, sum#34, count#35, sum#36, count#37, sum#38, count#39] + +(32) Exchange +Input [9]: [i_item_id#18, sum#32, count#33, sum#34, count#35, sum#36, count#37, sum#38, count#39] +Arguments: hashpartitioning(i_item_id#18, 5), true, [id=#40] + +(33) HashAggregate [codegen id : 6] +Input [9]: [i_item_id#18, sum#32, count#33, sum#34, count#35, sum#36, count#37, sum#38, count#39] +Keys [1]: [i_item_id#18] +Functions [4]: [avg(cast(ss_quantity#5 as bigint)), avg(UnscaledValue(ss_list_price#6)), avg(UnscaledValue(ss_coupon_amt#8)), avg(UnscaledValue(ss_sales_price#7))] +Aggregate Attributes [4]: [avg(cast(ss_quantity#5 as bigint))#41, avg(UnscaledValue(ss_list_price#6))#42, avg(UnscaledValue(ss_coupon_amt#8))#43, avg(UnscaledValue(ss_sales_price#7))#44] +Results [5]: [i_item_id#18, avg(cast(ss_quantity#5 as bigint))#41 AS agg1#45, cast((avg(UnscaledValue(ss_list_price#6))#42 / 100.0) as decimal(11,6)) AS agg2#46, cast((avg(UnscaledValue(ss_coupon_amt#8))#43 / 100.0) as decimal(11,6)) AS agg3#47, cast((avg(UnscaledValue(ss_sales_price#7))#44 / 100.0) as decimal(11,6)) AS agg4#48] + +(34) TakeOrderedAndProject +Input [5]: [i_item_id#18, agg1#45, agg2#46, agg3#47, agg4#48] +Arguments: 100, [i_item_id#18 ASC NULLS FIRST], [i_item_id#18, agg1#45, agg2#46, agg3#47, agg4#48] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7/simplified.txt new file mode 100644 index 0000000000000..db56467a0218d --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7/simplified.txt @@ -0,0 +1,50 @@ +TakeOrderedAndProject [agg1,agg2,agg3,agg4,i_item_id] + WholeStageCodegen (6) + HashAggregate [count,count,count,count,i_item_id,sum,sum,sum,sum] [agg1,agg2,agg3,agg4,avg(UnscaledValue(ss_coupon_amt)),avg(UnscaledValue(ss_list_price)),avg(UnscaledValue(ss_sales_price)),avg(cast(ss_quantity as bigint)),count,count,count,count,sum,sum,sum,sum] + InputAdapter + Exchange [i_item_id] #1 + WholeStageCodegen (5) + HashAggregate [i_item_id,ss_coupon_amt,ss_list_price,ss_quantity,ss_sales_price] [count,count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum,sum] + Project [i_item_id,ss_coupon_amt,ss_list_price,ss_quantity,ss_sales_price] + BroadcastHashJoin [p_promo_sk,ss_promo_sk] + Project [i_item_id,ss_coupon_amt,ss_list_price,ss_promo_sk,ss_quantity,ss_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_coupon_amt,ss_item_sk,ss_list_price,ss_promo_sk,ss_quantity,ss_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_coupon_amt,ss_item_sk,ss_list_price,ss_promo_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + BroadcastHashJoin [cd_demo_sk,ss_cdemo_sk] + Filter [ss_cdemo_sk,ss_item_sk,ss_promo_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_cdemo_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_promo_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [cd_demo_sk] + Filter [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_id,i_item_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Project [p_promo_sk] + Filter [p_channel_email,p_channel_event,p_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.promotion [p_channel_email,p_channel_event,p_promo_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70.sf100/explain.txt new file mode 100644 index 0000000000000..abca0d859fd17 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70.sf100/explain.txt @@ -0,0 +1,264 @@ +== Physical Plan == +TakeOrderedAndProject (47) ++- * Project (46) + +- Window (45) + +- * Sort (44) + +- Exchange (43) + +- * HashAggregate (42) + +- Exchange (41) + +- * HashAggregate (40) + +- * Expand (39) + +- * Project (38) + +- * BroadcastHashJoin Inner BuildRight (37) + :- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.store_sales (1) + : +- BroadcastExchange (8) + : +- * Project (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet default.date_dim (4) + +- BroadcastExchange (36) + +- * BroadcastHashJoin LeftSemi BuildRight (35) + :- * Filter (13) + : +- * ColumnarToRow (12) + : +- Scan parquet default.store (11) + +- BroadcastExchange (34) + +- * Project (33) + +- * Filter (32) + +- Window (31) + +- * Sort (30) + +- Exchange (29) + +- * HashAggregate (28) + +- Exchange (27) + +- * HashAggregate (26) + +- * Project (25) + +- * BroadcastHashJoin Inner BuildRight (24) + :- * Project (19) + : +- * BroadcastHashJoin Inner BuildRight (18) + : :- * Filter (16) + : : +- * ColumnarToRow (15) + : : +- Scan parquet default.store_sales (14) + : +- ReusedExchange (17) + +- BroadcastExchange (23) + +- * Filter (22) + +- * ColumnarToRow (21) + +- Scan parquet default.store (20) + + +(1) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 9] +Input [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3] + +(3) Filter [codegen id : 9] +Input [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3] +Condition : (isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#2)) + +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#4, d_month_seq#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1200)) AND (d_month_seq#5 <= 1211)) AND isnotnull(d_date_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#4] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(8) BroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#6] + +(9) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(10) Project [codegen id : 9] +Output [2]: [ss_store_sk#2, ss_net_profit#3] +Input [4]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3, d_date_sk#4] + +(11) Scan parquet default.store +Output [3]: [s_store_sk#7, s_county#8, s_state#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 8] +Input [3]: [s_store_sk#7, s_county#8, s_state#9] + +(13) Filter [codegen id : 8] +Input [3]: [s_store_sk#7, s_county#8, s_state#9] +Condition : isnotnull(s_store_sk#7) + +(14) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(15) ColumnarToRow [codegen id : 4] +Input [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3] + +(16) Filter [codegen id : 4] +Input [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3] +Condition : (isnotnull(ss_store_sk#2) AND isnotnull(ss_sold_date_sk#1)) + +(17) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#4] + +(18) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(19) Project [codegen id : 4] +Output [2]: [ss_store_sk#2, ss_net_profit#3] +Input [4]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3, d_date_sk#4] + +(20) Scan parquet default.store +Output [2]: [s_store_sk#7, s_state#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 3] +Input [2]: [s_store_sk#7, s_state#9] + +(22) Filter [codegen id : 3] +Input [2]: [s_store_sk#7, s_state#9] +Condition : isnotnull(s_store_sk#7) + +(23) BroadcastExchange +Input [2]: [s_store_sk#7, s_state#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#10] + +(24) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#2] +Right keys [1]: [s_store_sk#7] +Join condition: None + +(25) Project [codegen id : 4] +Output [2]: [ss_net_profit#3, s_state#9] +Input [4]: [ss_store_sk#2, ss_net_profit#3, s_store_sk#7, s_state#9] + +(26) HashAggregate [codegen id : 4] +Input [2]: [ss_net_profit#3, s_state#9] +Keys [1]: [s_state#9] +Functions [1]: [partial_sum(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [1]: [sum#11] +Results [2]: [s_state#9, sum#12] + +(27) Exchange +Input [2]: [s_state#9, sum#12] +Arguments: hashpartitioning(s_state#9, 5), true, [id=#13] + +(28) HashAggregate [codegen id : 5] +Input [2]: [s_state#9, sum#12] +Keys [1]: [s_state#9] +Functions [1]: [sum(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#3))#14] +Results [3]: [s_state#9 AS s_state#15, s_state#9, MakeDecimal(sum(UnscaledValue(ss_net_profit#3))#14,17,2) AS _w2#16] + +(29) Exchange +Input [3]: [s_state#15, s_state#9, _w2#16] +Arguments: hashpartitioning(s_state#9, 5), true, [id=#17] + +(30) Sort [codegen id : 6] +Input [3]: [s_state#15, s_state#9, _w2#16] +Arguments: [s_state#9 ASC NULLS FIRST, _w2#16 DESC NULLS LAST], false, 0 + +(31) Window +Input [3]: [s_state#15, s_state#9, _w2#16] +Arguments: [rank(_w2#16) windowspecdefinition(s_state#9, _w2#16 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS ranking#18], [s_state#9], [_w2#16 DESC NULLS LAST] + +(32) Filter [codegen id : 7] +Input [4]: [s_state#15, s_state#9, _w2#16, ranking#18] +Condition : (isnotnull(ranking#18) AND (ranking#18 <= 5)) + +(33) Project [codegen id : 7] +Output [1]: [s_state#15] +Input [4]: [s_state#15, s_state#9, _w2#16, ranking#18] + +(34) BroadcastExchange +Input [1]: [s_state#15] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#19] + +(35) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [s_state#9] +Right keys [1]: [s_state#15] +Join condition: None + +(36) BroadcastExchange +Input [3]: [s_store_sk#7, s_county#8, s_state#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#20] + +(37) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_store_sk#2] +Right keys [1]: [s_store_sk#7] +Join condition: None + +(38) Project [codegen id : 9] +Output [3]: [ss_net_profit#3, s_state#9, s_county#8] +Input [5]: [ss_store_sk#2, ss_net_profit#3, s_store_sk#7, s_county#8, s_state#9] + +(39) Expand [codegen id : 9] +Input [3]: [ss_net_profit#3, s_state#9, s_county#8] +Arguments: [List(ss_net_profit#3, s_state#9, s_county#8, 0), List(ss_net_profit#3, s_state#9, null, 1), List(ss_net_profit#3, null, null, 3)], [ss_net_profit#3, s_state#21, s_county#22, spark_grouping_id#23] + +(40) HashAggregate [codegen id : 9] +Input [4]: [ss_net_profit#3, s_state#21, s_county#22, spark_grouping_id#23] +Keys [3]: [s_state#21, s_county#22, spark_grouping_id#23] +Functions [1]: [partial_sum(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [1]: [sum#24] +Results [4]: [s_state#21, s_county#22, spark_grouping_id#23, sum#25] + +(41) Exchange +Input [4]: [s_state#21, s_county#22, spark_grouping_id#23, sum#25] +Arguments: hashpartitioning(s_state#21, s_county#22, spark_grouping_id#23, 5), true, [id=#26] + +(42) HashAggregate [codegen id : 10] +Input [4]: [s_state#21, s_county#22, spark_grouping_id#23, sum#25] +Keys [3]: [s_state#21, s_county#22, spark_grouping_id#23] +Functions [1]: [sum(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#3))#27] +Results [7]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#3))#27,17,2) AS total_sum#28, s_state#21, s_county#22, (cast((shiftright(spark_grouping_id#23, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#23, 0) & 1) as tinyint)) AS lochierarchy#29, (cast((shiftright(spark_grouping_id#23, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#23, 0) & 1) as tinyint)) AS _w1#30, CASE WHEN (cast(cast((shiftright(spark_grouping_id#23, 0) & 1) as tinyint) as int) = 0) THEN s_state#21 END AS _w2#31, MakeDecimal(sum(UnscaledValue(ss_net_profit#3))#27,17,2) AS _w3#32] + +(43) Exchange +Input [7]: [total_sum#28, s_state#21, s_county#22, lochierarchy#29, _w1#30, _w2#31, _w3#32] +Arguments: hashpartitioning(_w1#30, _w2#31, 5), true, [id=#33] + +(44) Sort [codegen id : 11] +Input [7]: [total_sum#28, s_state#21, s_county#22, lochierarchy#29, _w1#30, _w2#31, _w3#32] +Arguments: [_w1#30 ASC NULLS FIRST, _w2#31 ASC NULLS FIRST, _w3#32 DESC NULLS LAST], false, 0 + +(45) Window +Input [7]: [total_sum#28, s_state#21, s_county#22, lochierarchy#29, _w1#30, _w2#31, _w3#32] +Arguments: [rank(_w3#32) windowspecdefinition(_w1#30, _w2#31, _w3#32 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#34], [_w1#30, _w2#31], [_w3#32 DESC NULLS LAST] + +(46) Project [codegen id : 12] +Output [5]: [total_sum#28, s_state#21, s_county#22, lochierarchy#29, rank_within_parent#34] +Input [8]: [total_sum#28, s_state#21, s_county#22, lochierarchy#29, _w1#30, _w2#31, _w3#32, rank_within_parent#34] + +(47) TakeOrderedAndProject +Input [5]: [total_sum#28, s_state#21, s_county#22, lochierarchy#29, rank_within_parent#34] +Arguments: 100, [lochierarchy#29 DESC NULLS LAST, CASE WHEN (cast(lochierarchy#29 as int) = 0) THEN s_state#21 END ASC NULLS FIRST, rank_within_parent#34 ASC NULLS FIRST], [total_sum#28, s_state#21, s_county#22, lochierarchy#29, rank_within_parent#34] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70.sf100/simplified.txt new file mode 100644 index 0000000000000..1b14fd24d1aee --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70.sf100/simplified.txt @@ -0,0 +1,74 @@ +TakeOrderedAndProject [lochierarchy,rank_within_parent,s_county,s_state,total_sum] + WholeStageCodegen (12) + Project [lochierarchy,rank_within_parent,s_county,s_state,total_sum] + InputAdapter + Window [_w1,_w2,_w3] + WholeStageCodegen (11) + Sort [_w1,_w2,_w3] + InputAdapter + Exchange [_w1,_w2] #1 + WholeStageCodegen (10) + HashAggregate [s_county,s_state,spark_grouping_id,sum] [_w1,_w2,_w3,lochierarchy,sum,sum(UnscaledValue(ss_net_profit)),total_sum] + InputAdapter + Exchange [s_county,s_state,spark_grouping_id] #2 + WholeStageCodegen (9) + HashAggregate [s_county,s_state,spark_grouping_id,ss_net_profit] [sum,sum] + Expand [s_county,s_state,ss_net_profit] + Project [s_county,s_state,ss_net_profit] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_net_profit,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_net_profit,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_month_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (8) + BroadcastHashJoin [s_state,s_state] + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_county,s_state,s_store_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (7) + Project [s_state] + Filter [ranking] + InputAdapter + Window [_w2,s_state] + WholeStageCodegen (6) + Sort [_w2,s_state] + InputAdapter + Exchange [s_state] #6 + WholeStageCodegen (5) + HashAggregate [s_state,sum] [_w2,s_state,sum,sum(UnscaledValue(ss_net_profit))] + InputAdapter + Exchange [s_state] #7 + WholeStageCodegen (4) + HashAggregate [s_state,ss_net_profit] [sum,sum] + Project [s_state,ss_net_profit] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_net_profit,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_net_profit,ss_sold_date_sk,ss_store_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (3) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_state,s_store_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70/explain.txt new file mode 100644 index 0000000000000..2e6b9ebdd0226 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70/explain.txt @@ -0,0 +1,264 @@ +== Physical Plan == +TakeOrderedAndProject (47) ++- * Project (46) + +- Window (45) + +- * Sort (44) + +- Exchange (43) + +- * HashAggregate (42) + +- Exchange (41) + +- * HashAggregate (40) + +- * Expand (39) + +- * Project (38) + +- * BroadcastHashJoin Inner BuildRight (37) + :- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.store_sales (1) + : +- BroadcastExchange (8) + : +- * Project (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet default.date_dim (4) + +- BroadcastExchange (36) + +- * BroadcastHashJoin LeftSemi BuildRight (35) + :- * Filter (13) + : +- * ColumnarToRow (12) + : +- Scan parquet default.store (11) + +- BroadcastExchange (34) + +- * Project (33) + +- * Filter (32) + +- Window (31) + +- * Sort (30) + +- Exchange (29) + +- * HashAggregate (28) + +- Exchange (27) + +- * HashAggregate (26) + +- * Project (25) + +- * BroadcastHashJoin Inner BuildRight (24) + :- * Project (22) + : +- * BroadcastHashJoin Inner BuildRight (21) + : :- * Filter (16) + : : +- * ColumnarToRow (15) + : : +- Scan parquet default.store_sales (14) + : +- BroadcastExchange (20) + : +- * Filter (19) + : +- * ColumnarToRow (18) + : +- Scan parquet default.store (17) + +- ReusedExchange (23) + + +(1) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 9] +Input [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3] + +(3) Filter [codegen id : 9] +Input [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3] +Condition : (isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#2)) + +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#4, d_month_seq#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1200)) AND (d_month_seq#5 <= 1211)) AND isnotnull(d_date_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#4] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(8) BroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#6] + +(9) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(10) Project [codegen id : 9] +Output [2]: [ss_store_sk#2, ss_net_profit#3] +Input [4]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3, d_date_sk#4] + +(11) Scan parquet default.store +Output [3]: [s_store_sk#7, s_county#8, s_state#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 8] +Input [3]: [s_store_sk#7, s_county#8, s_state#9] + +(13) Filter [codegen id : 8] +Input [3]: [s_store_sk#7, s_county#8, s_state#9] +Condition : isnotnull(s_store_sk#7) + +(14) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(15) ColumnarToRow [codegen id : 4] +Input [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3] + +(16) Filter [codegen id : 4] +Input [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3] +Condition : (isnotnull(ss_store_sk#2) AND isnotnull(ss_sold_date_sk#1)) + +(17) Scan parquet default.store +Output [2]: [s_store_sk#7, s_state#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 2] +Input [2]: [s_store_sk#7, s_state#9] + +(19) Filter [codegen id : 2] +Input [2]: [s_store_sk#7, s_state#9] +Condition : isnotnull(s_store_sk#7) + +(20) BroadcastExchange +Input [2]: [s_store_sk#7, s_state#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#10] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#2] +Right keys [1]: [s_store_sk#7] +Join condition: None + +(22) Project [codegen id : 4] +Output [3]: [ss_sold_date_sk#1, ss_net_profit#3, s_state#9] +Input [5]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3, s_store_sk#7, s_state#9] + +(23) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#4] + +(24) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(25) Project [codegen id : 4] +Output [2]: [ss_net_profit#3, s_state#9] +Input [4]: [ss_sold_date_sk#1, ss_net_profit#3, s_state#9, d_date_sk#4] + +(26) HashAggregate [codegen id : 4] +Input [2]: [ss_net_profit#3, s_state#9] +Keys [1]: [s_state#9] +Functions [1]: [partial_sum(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [1]: [sum#11] +Results [2]: [s_state#9, sum#12] + +(27) Exchange +Input [2]: [s_state#9, sum#12] +Arguments: hashpartitioning(s_state#9, 5), true, [id=#13] + +(28) HashAggregate [codegen id : 5] +Input [2]: [s_state#9, sum#12] +Keys [1]: [s_state#9] +Functions [1]: [sum(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#3))#14] +Results [3]: [s_state#9 AS s_state#15, s_state#9, MakeDecimal(sum(UnscaledValue(ss_net_profit#3))#14,17,2) AS _w2#16] + +(29) Exchange +Input [3]: [s_state#15, s_state#9, _w2#16] +Arguments: hashpartitioning(s_state#9, 5), true, [id=#17] + +(30) Sort [codegen id : 6] +Input [3]: [s_state#15, s_state#9, _w2#16] +Arguments: [s_state#9 ASC NULLS FIRST, _w2#16 DESC NULLS LAST], false, 0 + +(31) Window +Input [3]: [s_state#15, s_state#9, _w2#16] +Arguments: [rank(_w2#16) windowspecdefinition(s_state#9, _w2#16 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS ranking#18], [s_state#9], [_w2#16 DESC NULLS LAST] + +(32) Filter [codegen id : 7] +Input [4]: [s_state#15, s_state#9, _w2#16, ranking#18] +Condition : (isnotnull(ranking#18) AND (ranking#18 <= 5)) + +(33) Project [codegen id : 7] +Output [1]: [s_state#15] +Input [4]: [s_state#15, s_state#9, _w2#16, ranking#18] + +(34) BroadcastExchange +Input [1]: [s_state#15] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#19] + +(35) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [s_state#9] +Right keys [1]: [s_state#15] +Join condition: None + +(36) BroadcastExchange +Input [3]: [s_store_sk#7, s_county#8, s_state#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#20] + +(37) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_store_sk#2] +Right keys [1]: [s_store_sk#7] +Join condition: None + +(38) Project [codegen id : 9] +Output [3]: [ss_net_profit#3, s_state#9, s_county#8] +Input [5]: [ss_store_sk#2, ss_net_profit#3, s_store_sk#7, s_county#8, s_state#9] + +(39) Expand [codegen id : 9] +Input [3]: [ss_net_profit#3, s_state#9, s_county#8] +Arguments: [List(ss_net_profit#3, s_state#9, s_county#8, 0), List(ss_net_profit#3, s_state#9, null, 1), List(ss_net_profit#3, null, null, 3)], [ss_net_profit#3, s_state#21, s_county#22, spark_grouping_id#23] + +(40) HashAggregate [codegen id : 9] +Input [4]: [ss_net_profit#3, s_state#21, s_county#22, spark_grouping_id#23] +Keys [3]: [s_state#21, s_county#22, spark_grouping_id#23] +Functions [1]: [partial_sum(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [1]: [sum#24] +Results [4]: [s_state#21, s_county#22, spark_grouping_id#23, sum#25] + +(41) Exchange +Input [4]: [s_state#21, s_county#22, spark_grouping_id#23, sum#25] +Arguments: hashpartitioning(s_state#21, s_county#22, spark_grouping_id#23, 5), true, [id=#26] + +(42) HashAggregate [codegen id : 10] +Input [4]: [s_state#21, s_county#22, spark_grouping_id#23, sum#25] +Keys [3]: [s_state#21, s_county#22, spark_grouping_id#23] +Functions [1]: [sum(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#3))#27] +Results [7]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#3))#27,17,2) AS total_sum#28, s_state#21, s_county#22, (cast((shiftright(spark_grouping_id#23, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#23, 0) & 1) as tinyint)) AS lochierarchy#29, (cast((shiftright(spark_grouping_id#23, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#23, 0) & 1) as tinyint)) AS _w1#30, CASE WHEN (cast(cast((shiftright(spark_grouping_id#23, 0) & 1) as tinyint) as int) = 0) THEN s_state#21 END AS _w2#31, MakeDecimal(sum(UnscaledValue(ss_net_profit#3))#27,17,2) AS _w3#32] + +(43) Exchange +Input [7]: [total_sum#28, s_state#21, s_county#22, lochierarchy#29, _w1#30, _w2#31, _w3#32] +Arguments: hashpartitioning(_w1#30, _w2#31, 5), true, [id=#33] + +(44) Sort [codegen id : 11] +Input [7]: [total_sum#28, s_state#21, s_county#22, lochierarchy#29, _w1#30, _w2#31, _w3#32] +Arguments: [_w1#30 ASC NULLS FIRST, _w2#31 ASC NULLS FIRST, _w3#32 DESC NULLS LAST], false, 0 + +(45) Window +Input [7]: [total_sum#28, s_state#21, s_county#22, lochierarchy#29, _w1#30, _w2#31, _w3#32] +Arguments: [rank(_w3#32) windowspecdefinition(_w1#30, _w2#31, _w3#32 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#34], [_w1#30, _w2#31], [_w3#32 DESC NULLS LAST] + +(46) Project [codegen id : 12] +Output [5]: [total_sum#28, s_state#21, s_county#22, lochierarchy#29, rank_within_parent#34] +Input [8]: [total_sum#28, s_state#21, s_county#22, lochierarchy#29, _w1#30, _w2#31, _w3#32, rank_within_parent#34] + +(47) TakeOrderedAndProject +Input [5]: [total_sum#28, s_state#21, s_county#22, lochierarchy#29, rank_within_parent#34] +Arguments: 100, [lochierarchy#29 DESC NULLS LAST, CASE WHEN (cast(lochierarchy#29 as int) = 0) THEN s_state#21 END ASC NULLS FIRST, rank_within_parent#34 ASC NULLS FIRST], [total_sum#28, s_state#21, s_county#22, lochierarchy#29, rank_within_parent#34] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70/simplified.txt new file mode 100644 index 0000000000000..1587213842374 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70/simplified.txt @@ -0,0 +1,74 @@ +TakeOrderedAndProject [lochierarchy,rank_within_parent,s_county,s_state,total_sum] + WholeStageCodegen (12) + Project [lochierarchy,rank_within_parent,s_county,s_state,total_sum] + InputAdapter + Window [_w1,_w2,_w3] + WholeStageCodegen (11) + Sort [_w1,_w2,_w3] + InputAdapter + Exchange [_w1,_w2] #1 + WholeStageCodegen (10) + HashAggregate [s_county,s_state,spark_grouping_id,sum] [_w1,_w2,_w3,lochierarchy,sum,sum(UnscaledValue(ss_net_profit)),total_sum] + InputAdapter + Exchange [s_county,s_state,spark_grouping_id] #2 + WholeStageCodegen (9) + HashAggregate [s_county,s_state,spark_grouping_id,ss_net_profit] [sum,sum] + Expand [s_county,s_state,ss_net_profit] + Project [s_county,s_state,ss_net_profit] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_net_profit,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_net_profit,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_month_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (8) + BroadcastHashJoin [s_state,s_state] + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_county,s_state,s_store_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (7) + Project [s_state] + Filter [ranking] + InputAdapter + Window [_w2,s_state] + WholeStageCodegen (6) + Sort [_w2,s_state] + InputAdapter + Exchange [s_state] #6 + WholeStageCodegen (5) + HashAggregate [s_state,sum] [_w2,s_state,sum,sum(UnscaledValue(ss_net_profit))] + InputAdapter + Exchange [s_state] #7 + WholeStageCodegen (4) + HashAggregate [s_state,ss_net_profit] [sum,sum] + Project [s_state,ss_net_profit] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [s_state,ss_net_profit,ss_sold_date_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Filter [ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_net_profit,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (2) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_state,s_store_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71.sf100/explain.txt new file mode 100644 index 0000000000000..3513c8f42a2e1 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71.sf100/explain.txt @@ -0,0 +1,232 @@ +== Physical Plan == +* Sort (42) ++- Exchange (41) + +- * HashAggregate (40) + +- Exchange (39) + +- * HashAggregate (38) + +- * Project (37) + +- * BroadcastHashJoin Inner BuildRight (36) + :- * Project (30) + : +- * BroadcastHashJoin Inner BuildLeft (29) + : :- BroadcastExchange (5) + : : +- * Project (4) + : : +- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.item (1) + : +- Union (28) + : :- * Project (15) + : : +- * BroadcastHashJoin Inner BuildRight (14) + : : :- * Filter (8) + : : : +- * ColumnarToRow (7) + : : : +- Scan parquet default.web_sales (6) + : : +- BroadcastExchange (13) + : : +- * Project (12) + : : +- * Filter (11) + : : +- * ColumnarToRow (10) + : : +- Scan parquet default.date_dim (9) + : :- * Project (21) + : : +- * BroadcastHashJoin Inner BuildRight (20) + : : :- * Filter (18) + : : : +- * ColumnarToRow (17) + : : : +- Scan parquet default.catalog_sales (16) + : : +- ReusedExchange (19) + : +- * Project (27) + : +- * BroadcastHashJoin Inner BuildRight (26) + : :- * Filter (24) + : : +- * ColumnarToRow (23) + : : +- Scan parquet default.store_sales (22) + : +- ReusedExchange (25) + +- BroadcastExchange (35) + +- * Project (34) + +- * Filter (33) + +- * ColumnarToRow (32) + +- Scan parquet default.time_dim (31) + + +(1) Scan parquet default.item +Output [4]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manager_id#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,1), IsNotNull(i_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [4]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manager_id#4] + +(3) Filter [codegen id : 1] +Input [4]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manager_id#4] +Condition : ((isnotnull(i_manager_id#4) AND (i_manager_id#4 = 1)) AND isnotnull(i_item_sk#1)) + +(4) Project [codegen id : 1] +Output [3]: [i_item_sk#1, i_brand_id#2, i_brand#3] +Input [4]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manager_id#4] + +(5) BroadcastExchange +Input [3]: [i_item_sk#1, i_brand_id#2, i_brand#3] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#5] + +(6) Scan parquet default.web_sales +Output [4]: [ws_sold_date_sk#6, ws_sold_time_sk#7, ws_item_sk#8, ws_ext_sales_price#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_item_sk), IsNotNull(ws_sold_time_sk)] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 3] +Input [4]: [ws_sold_date_sk#6, ws_sold_time_sk#7, ws_item_sk#8, ws_ext_sales_price#9] + +(8) Filter [codegen id : 3] +Input [4]: [ws_sold_date_sk#6, ws_sold_time_sk#7, ws_item_sk#8, ws_ext_sales_price#9] +Condition : ((isnotnull(ws_sold_date_sk#6) AND isnotnull(ws_item_sk#8)) AND isnotnull(ws_sold_time_sk#7)) + +(9) Scan parquet default.date_dim +Output [3]: [d_date_sk#10, d_year#11, d_moy#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,1999), IsNotNull(d_date_sk)] +ReadSchema: struct + +(10) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#10, d_year#11, d_moy#12] + +(11) Filter [codegen id : 2] +Input [3]: [d_date_sk#10, d_year#11, d_moy#12] +Condition : ((((isnotnull(d_moy#12) AND isnotnull(d_year#11)) AND (d_moy#12 = 11)) AND (d_year#11 = 1999)) AND isnotnull(d_date_sk#10)) + +(12) Project [codegen id : 2] +Output [1]: [d_date_sk#10] +Input [3]: [d_date_sk#10, d_year#11, d_moy#12] + +(13) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] + +(14) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ws_sold_date_sk#6] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(15) Project [codegen id : 3] +Output [3]: [ws_ext_sales_price#9 AS ext_price#14, ws_item_sk#8 AS sold_item_sk#15, ws_sold_time_sk#7 AS time_sk#16] +Input [5]: [ws_sold_date_sk#6, ws_sold_time_sk#7, ws_item_sk#8, ws_ext_sales_price#9, d_date_sk#10] + +(16) Scan parquet default.catalog_sales +Output [4]: [cs_sold_date_sk#17, cs_sold_time_sk#18, cs_item_sk#19, cs_ext_sales_price#20] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk), IsNotNull(cs_sold_time_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 5] +Input [4]: [cs_sold_date_sk#17, cs_sold_time_sk#18, cs_item_sk#19, cs_ext_sales_price#20] + +(18) Filter [codegen id : 5] +Input [4]: [cs_sold_date_sk#17, cs_sold_time_sk#18, cs_item_sk#19, cs_ext_sales_price#20] +Condition : ((isnotnull(cs_sold_date_sk#17) AND isnotnull(cs_item_sk#19)) AND isnotnull(cs_sold_time_sk#18)) + +(19) ReusedExchange [Reuses operator id: 13] +Output [1]: [d_date_sk#10] + +(20) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_sold_date_sk#17] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(21) Project [codegen id : 5] +Output [3]: [cs_ext_sales_price#20 AS ext_price#21, cs_item_sk#19 AS sold_item_sk#22, cs_sold_time_sk#18 AS time_sk#23] +Input [5]: [cs_sold_date_sk#17, cs_sold_time_sk#18, cs_item_sk#19, cs_ext_sales_price#20, d_date_sk#10] + +(22) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#24, ss_sold_time_sk#25, ss_item_sk#26, ss_ext_sales_price#27] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk), IsNotNull(ss_sold_time_sk)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 7] +Input [4]: [ss_sold_date_sk#24, ss_sold_time_sk#25, ss_item_sk#26, ss_ext_sales_price#27] + +(24) Filter [codegen id : 7] +Input [4]: [ss_sold_date_sk#24, ss_sold_time_sk#25, ss_item_sk#26, ss_ext_sales_price#27] +Condition : ((isnotnull(ss_sold_date_sk#24) AND isnotnull(ss_item_sk#26)) AND isnotnull(ss_sold_time_sk#25)) + +(25) ReusedExchange [Reuses operator id: 13] +Output [1]: [d_date_sk#10] + +(26) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ss_sold_date_sk#24] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(27) Project [codegen id : 7] +Output [3]: [ss_ext_sales_price#27 AS ext_price#28, ss_item_sk#26 AS sold_item_sk#29, ss_sold_time_sk#25 AS time_sk#30] +Input [5]: [ss_sold_date_sk#24, ss_sold_time_sk#25, ss_item_sk#26, ss_ext_sales_price#27, d_date_sk#10] + +(28) Union + +(29) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [sold_item_sk#15] +Join condition: None + +(30) Project [codegen id : 9] +Output [4]: [i_brand_id#2, i_brand#3, ext_price#14, time_sk#16] +Input [6]: [i_item_sk#1, i_brand_id#2, i_brand#3, ext_price#14, sold_item_sk#15, time_sk#16] + +(31) Scan parquet default.time_dim +Output [4]: [t_time_sk#31, t_hour#32, t_minute#33, t_meal_time#34] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/time_dim] +PushedFilters: [Or(EqualTo(t_meal_time,breakfast),EqualTo(t_meal_time,dinner)), IsNotNull(t_time_sk)] +ReadSchema: struct + +(32) ColumnarToRow [codegen id : 8] +Input [4]: [t_time_sk#31, t_hour#32, t_minute#33, t_meal_time#34] + +(33) Filter [codegen id : 8] +Input [4]: [t_time_sk#31, t_hour#32, t_minute#33, t_meal_time#34] +Condition : (((t_meal_time#34 = breakfast) OR (t_meal_time#34 = dinner)) AND isnotnull(t_time_sk#31)) + +(34) Project [codegen id : 8] +Output [3]: [t_time_sk#31, t_hour#32, t_minute#33] +Input [4]: [t_time_sk#31, t_hour#32, t_minute#33, t_meal_time#34] + +(35) BroadcastExchange +Input [3]: [t_time_sk#31, t_hour#32, t_minute#33] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#35] + +(36) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [time_sk#16] +Right keys [1]: [t_time_sk#31] +Join condition: None + +(37) Project [codegen id : 9] +Output [5]: [i_brand_id#2, i_brand#3, ext_price#14, t_hour#32, t_minute#33] +Input [7]: [i_brand_id#2, i_brand#3, ext_price#14, time_sk#16, t_time_sk#31, t_hour#32, t_minute#33] + +(38) HashAggregate [codegen id : 9] +Input [5]: [i_brand_id#2, i_brand#3, ext_price#14, t_hour#32, t_minute#33] +Keys [4]: [i_brand#3, i_brand_id#2, t_hour#32, t_minute#33] +Functions [1]: [partial_sum(UnscaledValue(ext_price#14))] +Aggregate Attributes [1]: [sum#36] +Results [5]: [i_brand#3, i_brand_id#2, t_hour#32, t_minute#33, sum#37] + +(39) Exchange +Input [5]: [i_brand#3, i_brand_id#2, t_hour#32, t_minute#33, sum#37] +Arguments: hashpartitioning(i_brand#3, i_brand_id#2, t_hour#32, t_minute#33, 5), true, [id=#38] + +(40) HashAggregate [codegen id : 10] +Input [5]: [i_brand#3, i_brand_id#2, t_hour#32, t_minute#33, sum#37] +Keys [4]: [i_brand#3, i_brand_id#2, t_hour#32, t_minute#33] +Functions [1]: [sum(UnscaledValue(ext_price#14))] +Aggregate Attributes [1]: [sum(UnscaledValue(ext_price#14))#39] +Results [5]: [i_brand_id#2 AS brand_id#40, i_brand#3 AS brand#41, t_hour#32, t_minute#33, MakeDecimal(sum(UnscaledValue(ext_price#14))#39,17,2) AS ext_price#42] + +(41) Exchange +Input [5]: [brand_id#40, brand#41, t_hour#32, t_minute#33, ext_price#42] +Arguments: rangepartitioning(ext_price#42 DESC NULLS LAST, brand_id#40 ASC NULLS FIRST, 5), true, [id=#43] + +(42) Sort [codegen id : 11] +Input [5]: [brand_id#40, brand#41, t_hour#32, t_minute#33, ext_price#42] +Arguments: [ext_price#42 DESC NULLS LAST, brand_id#40 ASC NULLS FIRST], true, 0 + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71.sf100/simplified.txt new file mode 100644 index 0000000000000..cca51b9457474 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71.sf100/simplified.txt @@ -0,0 +1,65 @@ +WholeStageCodegen (11) + Sort [brand_id,ext_price] + InputAdapter + Exchange [brand_id,ext_price] #1 + WholeStageCodegen (10) + HashAggregate [i_brand,i_brand_id,sum,t_hour,t_minute] [brand,brand_id,ext_price,sum,sum(UnscaledValue(ext_price))] + InputAdapter + Exchange [i_brand,i_brand_id,t_hour,t_minute] #2 + WholeStageCodegen (9) + HashAggregate [ext_price,i_brand,i_brand_id,t_hour,t_minute] [sum,sum] + Project [ext_price,i_brand,i_brand_id,t_hour,t_minute] + BroadcastHashJoin [t_time_sk,time_sk] + Project [ext_price,i_brand,i_brand_id,time_sk] + BroadcastHashJoin [i_item_sk,sold_item_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [i_brand,i_brand_id,i_item_sk] + Filter [i_item_sk,i_manager_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand,i_brand_id,i_item_sk,i_manager_id] + InputAdapter + Union + WholeStageCodegen (3) + Project [ws_ext_sales_price,ws_item_sk,ws_sold_time_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_item_sk,ws_sold_date_sk,ws_sold_time_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_ext_sales_price,ws_item_sk,ws_sold_date_sk,ws_sold_time_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + WholeStageCodegen (5) + Project [cs_ext_sales_price,cs_item_sk,cs_sold_time_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_item_sk,cs_sold_date_sk,cs_sold_time_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_ext_sales_price,cs_item_sk,cs_sold_date_sk,cs_sold_time_sk] + InputAdapter + ReusedExchange [d_date_sk] #4 + WholeStageCodegen (7) + Project [ss_ext_sales_price,ss_item_sk,ss_sold_time_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_item_sk,ss_sold_date_sk,ss_sold_time_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk,ss_sold_time_sk] + InputAdapter + ReusedExchange [d_date_sk] #4 + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (8) + Project [t_hour,t_minute,t_time_sk] + Filter [t_meal_time,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet default.time_dim [t_hour,t_meal_time,t_minute,t_time_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71/explain.txt new file mode 100644 index 0000000000000..11046e192f86a --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71/explain.txt @@ -0,0 +1,232 @@ +== Physical Plan == +* Sort (42) ++- Exchange (41) + +- * HashAggregate (40) + +- Exchange (39) + +- * HashAggregate (38) + +- * Project (37) + +- * BroadcastHashJoin Inner BuildRight (36) + :- * Project (30) + : +- * BroadcastHashJoin Inner BuildLeft (29) + : :- BroadcastExchange (5) + : : +- * Project (4) + : : +- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.item (1) + : +- Union (28) + : :- * Project (15) + : : +- * BroadcastHashJoin Inner BuildRight (14) + : : :- * Filter (8) + : : : +- * ColumnarToRow (7) + : : : +- Scan parquet default.web_sales (6) + : : +- BroadcastExchange (13) + : : +- * Project (12) + : : +- * Filter (11) + : : +- * ColumnarToRow (10) + : : +- Scan parquet default.date_dim (9) + : :- * Project (21) + : : +- * BroadcastHashJoin Inner BuildRight (20) + : : :- * Filter (18) + : : : +- * ColumnarToRow (17) + : : : +- Scan parquet default.catalog_sales (16) + : : +- ReusedExchange (19) + : +- * Project (27) + : +- * BroadcastHashJoin Inner BuildRight (26) + : :- * Filter (24) + : : +- * ColumnarToRow (23) + : : +- Scan parquet default.store_sales (22) + : +- ReusedExchange (25) + +- BroadcastExchange (35) + +- * Project (34) + +- * Filter (33) + +- * ColumnarToRow (32) + +- Scan parquet default.time_dim (31) + + +(1) Scan parquet default.item +Output [4]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manager_id#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,1), IsNotNull(i_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [4]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manager_id#4] + +(3) Filter [codegen id : 1] +Input [4]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manager_id#4] +Condition : ((isnotnull(i_manager_id#4) AND (i_manager_id#4 = 1)) AND isnotnull(i_item_sk#1)) + +(4) Project [codegen id : 1] +Output [3]: [i_item_sk#1, i_brand_id#2, i_brand#3] +Input [4]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manager_id#4] + +(5) BroadcastExchange +Input [3]: [i_item_sk#1, i_brand_id#2, i_brand#3] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#5] + +(6) Scan parquet default.web_sales +Output [4]: [ws_sold_date_sk#6, ws_sold_time_sk#7, ws_item_sk#8, ws_ext_sales_price#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_item_sk), IsNotNull(ws_sold_time_sk)] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 3] +Input [4]: [ws_sold_date_sk#6, ws_sold_time_sk#7, ws_item_sk#8, ws_ext_sales_price#9] + +(8) Filter [codegen id : 3] +Input [4]: [ws_sold_date_sk#6, ws_sold_time_sk#7, ws_item_sk#8, ws_ext_sales_price#9] +Condition : ((isnotnull(ws_sold_date_sk#6) AND isnotnull(ws_item_sk#8)) AND isnotnull(ws_sold_time_sk#7)) + +(9) Scan parquet default.date_dim +Output [3]: [d_date_sk#10, d_year#11, d_moy#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,1999), IsNotNull(d_date_sk)] +ReadSchema: struct + +(10) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#10, d_year#11, d_moy#12] + +(11) Filter [codegen id : 2] +Input [3]: [d_date_sk#10, d_year#11, d_moy#12] +Condition : ((((isnotnull(d_moy#12) AND isnotnull(d_year#11)) AND (d_moy#12 = 11)) AND (d_year#11 = 1999)) AND isnotnull(d_date_sk#10)) + +(12) Project [codegen id : 2] +Output [1]: [d_date_sk#10] +Input [3]: [d_date_sk#10, d_year#11, d_moy#12] + +(13) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] + +(14) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ws_sold_date_sk#6] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(15) Project [codegen id : 3] +Output [3]: [ws_ext_sales_price#9 AS ext_price#14, ws_item_sk#8 AS sold_item_sk#15, ws_sold_time_sk#7 AS time_sk#16] +Input [5]: [ws_sold_date_sk#6, ws_sold_time_sk#7, ws_item_sk#8, ws_ext_sales_price#9, d_date_sk#10] + +(16) Scan parquet default.catalog_sales +Output [4]: [cs_sold_date_sk#17, cs_sold_time_sk#18, cs_item_sk#19, cs_ext_sales_price#20] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk), IsNotNull(cs_sold_time_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 5] +Input [4]: [cs_sold_date_sk#17, cs_sold_time_sk#18, cs_item_sk#19, cs_ext_sales_price#20] + +(18) Filter [codegen id : 5] +Input [4]: [cs_sold_date_sk#17, cs_sold_time_sk#18, cs_item_sk#19, cs_ext_sales_price#20] +Condition : ((isnotnull(cs_sold_date_sk#17) AND isnotnull(cs_item_sk#19)) AND isnotnull(cs_sold_time_sk#18)) + +(19) ReusedExchange [Reuses operator id: 13] +Output [1]: [d_date_sk#10] + +(20) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_sold_date_sk#17] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(21) Project [codegen id : 5] +Output [3]: [cs_ext_sales_price#20 AS ext_price#21, cs_item_sk#19 AS sold_item_sk#22, cs_sold_time_sk#18 AS time_sk#23] +Input [5]: [cs_sold_date_sk#17, cs_sold_time_sk#18, cs_item_sk#19, cs_ext_sales_price#20, d_date_sk#10] + +(22) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#24, ss_sold_time_sk#25, ss_item_sk#26, ss_ext_sales_price#27] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk), IsNotNull(ss_sold_time_sk)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 7] +Input [4]: [ss_sold_date_sk#24, ss_sold_time_sk#25, ss_item_sk#26, ss_ext_sales_price#27] + +(24) Filter [codegen id : 7] +Input [4]: [ss_sold_date_sk#24, ss_sold_time_sk#25, ss_item_sk#26, ss_ext_sales_price#27] +Condition : ((isnotnull(ss_sold_date_sk#24) AND isnotnull(ss_item_sk#26)) AND isnotnull(ss_sold_time_sk#25)) + +(25) ReusedExchange [Reuses operator id: 13] +Output [1]: [d_date_sk#10] + +(26) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ss_sold_date_sk#24] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(27) Project [codegen id : 7] +Output [3]: [ss_ext_sales_price#27 AS ext_price#28, ss_item_sk#26 AS sold_item_sk#29, ss_sold_time_sk#25 AS time_sk#30] +Input [5]: [ss_sold_date_sk#24, ss_sold_time_sk#25, ss_item_sk#26, ss_ext_sales_price#27, d_date_sk#10] + +(28) Union + +(29) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [sold_item_sk#15] +Join condition: None + +(30) Project [codegen id : 9] +Output [4]: [i_brand_id#2, i_brand#3, ext_price#14, time_sk#16] +Input [6]: [i_item_sk#1, i_brand_id#2, i_brand#3, ext_price#14, sold_item_sk#15, time_sk#16] + +(31) Scan parquet default.time_dim +Output [4]: [t_time_sk#31, t_hour#32, t_minute#33, t_meal_time#34] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/time_dim] +PushedFilters: [Or(EqualTo(t_meal_time,breakfast),EqualTo(t_meal_time,dinner)), IsNotNull(t_time_sk)] +ReadSchema: struct + +(32) ColumnarToRow [codegen id : 8] +Input [4]: [t_time_sk#31, t_hour#32, t_minute#33, t_meal_time#34] + +(33) Filter [codegen id : 8] +Input [4]: [t_time_sk#31, t_hour#32, t_minute#33, t_meal_time#34] +Condition : (((t_meal_time#34 = breakfast) OR (t_meal_time#34 = dinner)) AND isnotnull(t_time_sk#31)) + +(34) Project [codegen id : 8] +Output [3]: [t_time_sk#31, t_hour#32, t_minute#33] +Input [4]: [t_time_sk#31, t_hour#32, t_minute#33, t_meal_time#34] + +(35) BroadcastExchange +Input [3]: [t_time_sk#31, t_hour#32, t_minute#33] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#35] + +(36) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [time_sk#16] +Right keys [1]: [t_time_sk#31] +Join condition: None + +(37) Project [codegen id : 9] +Output [5]: [i_brand_id#2, i_brand#3, ext_price#14, t_hour#32, t_minute#33] +Input [7]: [i_brand_id#2, i_brand#3, ext_price#14, time_sk#16, t_time_sk#31, t_hour#32, t_minute#33] + +(38) HashAggregate [codegen id : 9] +Input [5]: [i_brand_id#2, i_brand#3, ext_price#14, t_hour#32, t_minute#33] +Keys [4]: [i_brand#3, i_brand_id#2, t_hour#32, t_minute#33] +Functions [1]: [partial_sum(UnscaledValue(ext_price#14))] +Aggregate Attributes [1]: [sum#36] +Results [5]: [i_brand#3, i_brand_id#2, t_hour#32, t_minute#33, sum#37] + +(39) Exchange +Input [5]: [i_brand#3, i_brand_id#2, t_hour#32, t_minute#33, sum#37] +Arguments: hashpartitioning(i_brand#3, i_brand_id#2, t_hour#32, t_minute#33, 5), true, [id=#38] + +(40) HashAggregate [codegen id : 10] +Input [5]: [i_brand#3, i_brand_id#2, t_hour#32, t_minute#33, sum#37] +Keys [4]: [i_brand#3, i_brand_id#2, t_hour#32, t_minute#33] +Functions [1]: [sum(UnscaledValue(ext_price#14))] +Aggregate Attributes [1]: [sum(UnscaledValue(ext_price#14))#39] +Results [5]: [i_brand_id#2 AS brand_id#40, i_brand#3 AS brand#41, t_hour#32, t_minute#33, MakeDecimal(sum(UnscaledValue(ext_price#14))#39,17,2) AS ext_price#42] + +(41) Exchange +Input [5]: [brand_id#40, brand#41, t_hour#32, t_minute#33, ext_price#42] +Arguments: rangepartitioning(ext_price#42 DESC NULLS LAST, brand_id#40 ASC NULLS FIRST, 5), true, [id=#43] + +(42) Sort [codegen id : 11] +Input [5]: [brand_id#40, brand#41, t_hour#32, t_minute#33, ext_price#42] +Arguments: [ext_price#42 DESC NULLS LAST, brand_id#40 ASC NULLS FIRST], true, 0 + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71/simplified.txt new file mode 100644 index 0000000000000..cca51b9457474 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71/simplified.txt @@ -0,0 +1,65 @@ +WholeStageCodegen (11) + Sort [brand_id,ext_price] + InputAdapter + Exchange [brand_id,ext_price] #1 + WholeStageCodegen (10) + HashAggregate [i_brand,i_brand_id,sum,t_hour,t_minute] [brand,brand_id,ext_price,sum,sum(UnscaledValue(ext_price))] + InputAdapter + Exchange [i_brand,i_brand_id,t_hour,t_minute] #2 + WholeStageCodegen (9) + HashAggregate [ext_price,i_brand,i_brand_id,t_hour,t_minute] [sum,sum] + Project [ext_price,i_brand,i_brand_id,t_hour,t_minute] + BroadcastHashJoin [t_time_sk,time_sk] + Project [ext_price,i_brand,i_brand_id,time_sk] + BroadcastHashJoin [i_item_sk,sold_item_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [i_brand,i_brand_id,i_item_sk] + Filter [i_item_sk,i_manager_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand,i_brand_id,i_item_sk,i_manager_id] + InputAdapter + Union + WholeStageCodegen (3) + Project [ws_ext_sales_price,ws_item_sk,ws_sold_time_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_item_sk,ws_sold_date_sk,ws_sold_time_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_ext_sales_price,ws_item_sk,ws_sold_date_sk,ws_sold_time_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + WholeStageCodegen (5) + Project [cs_ext_sales_price,cs_item_sk,cs_sold_time_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_item_sk,cs_sold_date_sk,cs_sold_time_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_ext_sales_price,cs_item_sk,cs_sold_date_sk,cs_sold_time_sk] + InputAdapter + ReusedExchange [d_date_sk] #4 + WholeStageCodegen (7) + Project [ss_ext_sales_price,ss_item_sk,ss_sold_time_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_item_sk,ss_sold_date_sk,ss_sold_time_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk,ss_sold_time_sk] + InputAdapter + ReusedExchange [d_date_sk] #4 + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (8) + Project [t_hour,t_minute,t_time_sk] + Filter [t_meal_time,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet default.time_dim [t_hour,t_meal_time,t_minute,t_time_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/explain.txt new file mode 100644 index 0000000000000..50422e7949201 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/explain.txt @@ -0,0 +1,436 @@ +== Physical Plan == +TakeOrderedAndProject (79) ++- * HashAggregate (78) + +- Exchange (77) + +- * HashAggregate (76) + +- * Project (75) + +- SortMergeJoin LeftOuter (74) + :- * Sort (68) + : +- Exchange (67) + : +- * Project (66) + : +- * BroadcastHashJoin LeftOuter BuildRight (65) + : :- * Project (60) + : : +- * SortMergeJoin Inner (59) + : : :- * Sort (47) + : : : +- Exchange (46) + : : : +- * Project (45) + : : : +- * BroadcastHashJoin Inner BuildRight (44) + : : : :- * Project (32) + : : : : +- * SortMergeJoin Inner (31) + : : : : :- * Sort (25) + : : : : : +- Exchange (24) + : : : : : +- * Project (23) + : : : : : +- * BroadcastHashJoin Inner BuildRight (22) + : : : : : :- * Project (17) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (16) + : : : : : : :- * Project (10) + : : : : : : : +- * BroadcastHashJoin Inner BuildLeft (9) + : : : : : : : :- BroadcastExchange (5) + : : : : : : : : +- * Project (4) + : : : : : : : : +- * Filter (3) + : : : : : : : : +- * ColumnarToRow (2) + : : : : : : : : +- Scan parquet default.household_demographics (1) + : : : : : : : +- * Filter (8) + : : : : : : : +- * ColumnarToRow (7) + : : : : : : : +- Scan parquet default.catalog_sales (6) + : : : : : : +- BroadcastExchange (15) + : : : : : : +- * Project (14) + : : : : : : +- * Filter (13) + : : : : : : +- * ColumnarToRow (12) + : : : : : : +- Scan parquet default.customer_demographics (11) + : : : : : +- BroadcastExchange (21) + : : : : : +- * Filter (20) + : : : : : +- * ColumnarToRow (19) + : : : : : +- Scan parquet default.date_dim (18) + : : : : +- * Sort (30) + : : : : +- Exchange (29) + : : : : +- * Filter (28) + : : : : +- * ColumnarToRow (27) + : : : : +- Scan parquet default.item (26) + : : : +- BroadcastExchange (43) + : : : +- * Project (42) + : : : +- * BroadcastHashJoin Inner BuildRight (41) + : : : :- * Filter (35) + : : : : +- * ColumnarToRow (34) + : : : : +- Scan parquet default.date_dim (33) + : : : +- BroadcastExchange (40) + : : : +- * Project (39) + : : : +- * Filter (38) + : : : +- * ColumnarToRow (37) + : : : +- Scan parquet default.date_dim (36) + : : +- * Sort (58) + : : +- Exchange (57) + : : +- * Project (56) + : : +- * BroadcastHashJoin Inner BuildLeft (55) + : : :- BroadcastExchange (51) + : : : +- * Filter (50) + : : : +- * ColumnarToRow (49) + : : : +- Scan parquet default.warehouse (48) + : : +- * Filter (54) + : : +- * ColumnarToRow (53) + : : +- Scan parquet default.inventory (52) + : +- BroadcastExchange (64) + : +- * Filter (63) + : +- * ColumnarToRow (62) + : +- Scan parquet default.promotion (61) + +- * Sort (73) + +- Exchange (72) + +- * Filter (71) + +- * ColumnarToRow (70) + +- Scan parquet default.catalog_returns (69) + + +(1) Scan parquet default.household_demographics +Output [2]: [hd_demo_sk#1, hd_buy_potential#2] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/household_demographics] +PushedFilters: [IsNotNull(hd_buy_potential), EqualTo(hd_buy_potential,>10000), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [2]: [hd_demo_sk#1, hd_buy_potential#2] + +(3) Filter [codegen id : 1] +Input [2]: [hd_demo_sk#1, hd_buy_potential#2] +Condition : ((isnotnull(hd_buy_potential#2) AND (hd_buy_potential#2 = >10000)) AND isnotnull(hd_demo_sk#1)) + +(4) Project [codegen id : 1] +Output [1]: [hd_demo_sk#1] +Input [2]: [hd_demo_sk#1, hd_buy_potential#2] + +(5) BroadcastExchange +Input [1]: [hd_demo_sk#1] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#3] + +(6) Scan parquet default.catalog_sales +Output [8]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_bill_cdemo_sk#6, cs_bill_hdemo_sk#7, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_quantity), IsNotNull(cs_item_sk), IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_hdemo_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_ship_date_sk)] +ReadSchema: struct + +(7) ColumnarToRow +Input [8]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_bill_cdemo_sk#6, cs_bill_hdemo_sk#7, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11] + +(8) Filter +Input [8]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_bill_cdemo_sk#6, cs_bill_hdemo_sk#7, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11] +Condition : (((((isnotnull(cs_quantity#11) AND isnotnull(cs_item_sk#8)) AND isnotnull(cs_bill_cdemo_sk#6)) AND isnotnull(cs_bill_hdemo_sk#7)) AND isnotnull(cs_sold_date_sk#4)) AND isnotnull(cs_ship_date_sk#5)) + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [hd_demo_sk#1] +Right keys [1]: [cs_bill_hdemo_sk#7] +Join condition: None + +(10) Project [codegen id : 4] +Output [7]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_bill_cdemo_sk#6, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11] +Input [9]: [hd_demo_sk#1, cs_sold_date_sk#4, cs_ship_date_sk#5, cs_bill_cdemo_sk#6, cs_bill_hdemo_sk#7, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11] + +(11) Scan parquet default.customer_demographics +Output [2]: [cd_demo_sk#12, cd_marital_status#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer_demographics] +PushedFilters: [IsNotNull(cd_marital_status), EqualTo(cd_marital_status,D), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [cd_demo_sk#12, cd_marital_status#13] + +(13) Filter [codegen id : 2] +Input [2]: [cd_demo_sk#12, cd_marital_status#13] +Condition : ((isnotnull(cd_marital_status#13) AND (cd_marital_status#13 = D)) AND isnotnull(cd_demo_sk#12)) + +(14) Project [codegen id : 2] +Output [1]: [cd_demo_sk#12] +Input [2]: [cd_demo_sk#12, cd_marital_status#13] + +(15) BroadcastExchange +Input [1]: [cd_demo_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] + +(16) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_bill_cdemo_sk#6] +Right keys [1]: [cd_demo_sk#12] +Join condition: None + +(17) Project [codegen id : 4] +Output [6]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11] +Input [8]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_bill_cdemo_sk#6, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, cd_demo_sk#12] + +(18) Scan parquet default.date_dim +Output [2]: [d_date_sk#15, d_date#16] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_date_sk), IsNotNull(d_date)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [2]: [d_date_sk#15, d_date#16] + +(20) Filter [codegen id : 3] +Input [2]: [d_date_sk#15, d_date#16] +Condition : (isnotnull(d_date_sk#15) AND isnotnull(d_date#16)) + +(21) BroadcastExchange +Input [2]: [d_date_sk#15, d_date#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#17] + +(22) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_ship_date_sk#5] +Right keys [1]: [d_date_sk#15] +Join condition: None + +(23) Project [codegen id : 4] +Output [6]: [cs_sold_date_sk#4, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, d_date#16] +Input [8]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, d_date_sk#15, d_date#16] + +(24) Exchange +Input [6]: [cs_sold_date_sk#4, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, d_date#16] +Arguments: hashpartitioning(cs_item_sk#8, 5), true, [id=#18] + +(25) Sort [codegen id : 5] +Input [6]: [cs_sold_date_sk#4, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, d_date#16] +Arguments: [cs_item_sk#8 ASC NULLS FIRST], false, 0 + +(26) Scan parquet default.item +Output [2]: [i_item_sk#19, i_item_desc#20] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 6] +Input [2]: [i_item_sk#19, i_item_desc#20] + +(28) Filter [codegen id : 6] +Input [2]: [i_item_sk#19, i_item_desc#20] +Condition : isnotnull(i_item_sk#19) + +(29) Exchange +Input [2]: [i_item_sk#19, i_item_desc#20] +Arguments: hashpartitioning(i_item_sk#19, 5), true, [id=#21] + +(30) Sort [codegen id : 7] +Input [2]: [i_item_sk#19, i_item_desc#20] +Arguments: [i_item_sk#19 ASC NULLS FIRST], false, 0 + +(31) SortMergeJoin [codegen id : 10] +Left keys [1]: [cs_item_sk#8] +Right keys [1]: [i_item_sk#19] +Join condition: None + +(32) Project [codegen id : 10] +Output [7]: [cs_sold_date_sk#4, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, d_date#16, i_item_desc#20] +Input [8]: [cs_sold_date_sk#4, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, d_date#16, i_item_sk#19, i_item_desc#20] + +(33) Scan parquet default.date_dim +Output [2]: [d_date_sk#22, d_week_seq#23] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_date_sk), IsNotNull(d_week_seq)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 9] +Input [2]: [d_date_sk#22, d_week_seq#23] + +(35) Filter [codegen id : 9] +Input [2]: [d_date_sk#22, d_week_seq#23] +Condition : (isnotnull(d_date_sk#22) AND isnotnull(d_week_seq#23)) + +(36) Scan parquet default.date_dim +Output [4]: [d_date_sk#24, d_date#25, d_week_seq#26, d_year#27] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk), IsNotNull(d_week_seq), IsNotNull(d_date)] +ReadSchema: struct + +(37) ColumnarToRow [codegen id : 8] +Input [4]: [d_date_sk#24, d_date#25, d_week_seq#26, d_year#27] + +(38) Filter [codegen id : 8] +Input [4]: [d_date_sk#24, d_date#25, d_week_seq#26, d_year#27] +Condition : ((((isnotnull(d_year#27) AND (d_year#27 = 1999)) AND isnotnull(d_date_sk#24)) AND isnotnull(d_week_seq#26)) AND isnotnull(d_date#25)) + +(39) Project [codegen id : 8] +Output [3]: [d_date_sk#24, d_date#25, d_week_seq#26] +Input [4]: [d_date_sk#24, d_date#25, d_week_seq#26, d_year#27] + +(40) BroadcastExchange +Input [3]: [d_date_sk#24, d_date#25, d_week_seq#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [id=#28] + +(41) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [d_week_seq#23] +Right keys [1]: [d_week_seq#26] +Join condition: None + +(42) Project [codegen id : 9] +Output [4]: [d_date_sk#22, d_date_sk#24, d_date#25, d_week_seq#26] +Input [5]: [d_date_sk#22, d_week_seq#23, d_date_sk#24, d_date#25, d_week_seq#26] + +(43) BroadcastExchange +Input [4]: [d_date_sk#22, d_date_sk#24, d_date#25, d_week_seq#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [id=#29] + +(44) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_sold_date_sk#4] +Right keys [1]: [d_date_sk#24] +Join condition: (d_date#16 > d_date#25 + 5 days) + +(45) Project [codegen id : 10] +Output [7]: [cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, i_item_desc#20, d_date_sk#22, d_week_seq#26] +Input [11]: [cs_sold_date_sk#4, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, d_date#16, i_item_desc#20, d_date_sk#22, d_date_sk#24, d_date#25, d_week_seq#26] + +(46) Exchange +Input [7]: [cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, i_item_desc#20, d_date_sk#22, d_week_seq#26] +Arguments: hashpartitioning(cs_item_sk#8, d_date_sk#22, 5), true, [id=#30] + +(47) Sort [codegen id : 11] +Input [7]: [cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, i_item_desc#20, d_date_sk#22, d_week_seq#26] +Arguments: [cs_item_sk#8 ASC NULLS FIRST, d_date_sk#22 ASC NULLS FIRST], false, 0 + +(48) Scan parquet default.warehouse +Output [2]: [w_warehouse_sk#31, w_warehouse_name#32] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(49) ColumnarToRow [codegen id : 12] +Input [2]: [w_warehouse_sk#31, w_warehouse_name#32] + +(50) Filter [codegen id : 12] +Input [2]: [w_warehouse_sk#31, w_warehouse_name#32] +Condition : isnotnull(w_warehouse_sk#31) + +(51) BroadcastExchange +Input [2]: [w_warehouse_sk#31, w_warehouse_name#32] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#33] + +(52) Scan parquet default.inventory +Output [4]: [inv_date_sk#34, inv_item_sk#35, inv_warehouse_sk#36, inv_quantity_on_hand#37] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/inventory] +PushedFilters: [IsNotNull(inv_quantity_on_hand), IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)] +ReadSchema: struct + +(53) ColumnarToRow +Input [4]: [inv_date_sk#34, inv_item_sk#35, inv_warehouse_sk#36, inv_quantity_on_hand#37] + +(54) Filter +Input [4]: [inv_date_sk#34, inv_item_sk#35, inv_warehouse_sk#36, inv_quantity_on_hand#37] +Condition : (((isnotnull(inv_quantity_on_hand#37) AND isnotnull(inv_item_sk#35)) AND isnotnull(inv_warehouse_sk#36)) AND isnotnull(inv_date_sk#34)) + +(55) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [w_warehouse_sk#31] +Right keys [1]: [inv_warehouse_sk#36] +Join condition: None + +(56) Project [codegen id : 13] +Output [4]: [w_warehouse_name#32, inv_date_sk#34, inv_item_sk#35, inv_quantity_on_hand#37] +Input [6]: [w_warehouse_sk#31, w_warehouse_name#32, inv_date_sk#34, inv_item_sk#35, inv_warehouse_sk#36, inv_quantity_on_hand#37] + +(57) Exchange +Input [4]: [w_warehouse_name#32, inv_date_sk#34, inv_item_sk#35, inv_quantity_on_hand#37] +Arguments: hashpartitioning(inv_item_sk#35, inv_date_sk#34, 5), true, [id=#38] + +(58) Sort [codegen id : 14] +Input [4]: [w_warehouse_name#32, inv_date_sk#34, inv_item_sk#35, inv_quantity_on_hand#37] +Arguments: [inv_item_sk#35 ASC NULLS FIRST, inv_date_sk#34 ASC NULLS FIRST], false, 0 + +(59) SortMergeJoin [codegen id : 16] +Left keys [2]: [cs_item_sk#8, d_date_sk#22] +Right keys [2]: [inv_item_sk#35, inv_date_sk#34] +Join condition: (inv_quantity_on_hand#37 < cs_quantity#11) + +(60) Project [codegen id : 16] +Output [6]: [cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, w_warehouse_name#32, i_item_desc#20, d_week_seq#26] +Input [11]: [cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, i_item_desc#20, d_date_sk#22, d_week_seq#26, w_warehouse_name#32, inv_date_sk#34, inv_item_sk#35, inv_quantity_on_hand#37] + +(61) Scan parquet default.promotion +Output [1]: [p_promo_sk#39] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/promotion] +PushedFilters: [IsNotNull(p_promo_sk)] +ReadSchema: struct + +(62) ColumnarToRow [codegen id : 15] +Input [1]: [p_promo_sk#39] + +(63) Filter [codegen id : 15] +Input [1]: [p_promo_sk#39] +Condition : isnotnull(p_promo_sk#39) + +(64) BroadcastExchange +Input [1]: [p_promo_sk#39] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#40] + +(65) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [cs_promo_sk#9] +Right keys [1]: [p_promo_sk#39] +Join condition: None + +(66) Project [codegen id : 16] +Output [5]: [cs_item_sk#8, cs_order_number#10, w_warehouse_name#32, i_item_desc#20, d_week_seq#26] +Input [7]: [cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, w_warehouse_name#32, i_item_desc#20, d_week_seq#26, p_promo_sk#39] + +(67) Exchange +Input [5]: [cs_item_sk#8, cs_order_number#10, w_warehouse_name#32, i_item_desc#20, d_week_seq#26] +Arguments: hashpartitioning(cs_item_sk#8, cs_order_number#10, 5), true, [id=#41] + +(68) Sort [codegen id : 17] +Input [5]: [cs_item_sk#8, cs_order_number#10, w_warehouse_name#32, i_item_desc#20, d_week_seq#26] +Arguments: [cs_item_sk#8 ASC NULLS FIRST, cs_order_number#10 ASC NULLS FIRST], false, 0 + +(69) Scan parquet default.catalog_returns +Output [2]: [cr_item_sk#42, cr_order_number#43] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_returns] +PushedFilters: [IsNotNull(cr_order_number), IsNotNull(cr_item_sk)] +ReadSchema: struct + +(70) ColumnarToRow [codegen id : 18] +Input [2]: [cr_item_sk#42, cr_order_number#43] + +(71) Filter [codegen id : 18] +Input [2]: [cr_item_sk#42, cr_order_number#43] +Condition : (isnotnull(cr_order_number#43) AND isnotnull(cr_item_sk#42)) + +(72) Exchange +Input [2]: [cr_item_sk#42, cr_order_number#43] +Arguments: hashpartitioning(cr_item_sk#42, cr_order_number#43, 5), true, [id=#44] + +(73) Sort [codegen id : 19] +Input [2]: [cr_item_sk#42, cr_order_number#43] +Arguments: [cr_item_sk#42 ASC NULLS FIRST, cr_order_number#43 ASC NULLS FIRST], false, 0 + +(74) SortMergeJoin +Left keys [2]: [cs_item_sk#8, cs_order_number#10] +Right keys [2]: [cr_item_sk#42, cr_order_number#43] +Join condition: None + +(75) Project [codegen id : 20] +Output [3]: [w_warehouse_name#32, i_item_desc#20, d_week_seq#26] +Input [7]: [cs_item_sk#8, cs_order_number#10, w_warehouse_name#32, i_item_desc#20, d_week_seq#26, cr_item_sk#42, cr_order_number#43] + +(76) HashAggregate [codegen id : 20] +Input [3]: [w_warehouse_name#32, i_item_desc#20, d_week_seq#26] +Keys [3]: [i_item_desc#20, w_warehouse_name#32, d_week_seq#26] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#45] +Results [4]: [i_item_desc#20, w_warehouse_name#32, d_week_seq#26, count#46] + +(77) Exchange +Input [4]: [i_item_desc#20, w_warehouse_name#32, d_week_seq#26, count#46] +Arguments: hashpartitioning(i_item_desc#20, w_warehouse_name#32, d_week_seq#26, 5), true, [id=#47] + +(78) HashAggregate [codegen id : 21] +Input [4]: [i_item_desc#20, w_warehouse_name#32, d_week_seq#26, count#46] +Keys [3]: [i_item_desc#20, w_warehouse_name#32, d_week_seq#26] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#48] +Results [6]: [i_item_desc#20, w_warehouse_name#32, d_week_seq#26, count(1)#48 AS no_promo#49, count(1)#48 AS promo#50, count(1)#48 AS total_cnt#51] + +(79) TakeOrderedAndProject +Input [6]: [i_item_desc#20, w_warehouse_name#32, d_week_seq#26, no_promo#49, promo#50, total_cnt#51] +Arguments: 100, [total_cnt#51 DESC NULLS LAST, i_item_desc#20 ASC NULLS FIRST, w_warehouse_name#32 ASC NULLS FIRST, d_week_seq#26 ASC NULLS FIRST], [i_item_desc#20, w_warehouse_name#32, d_week_seq#26, no_promo#49, promo#50, total_cnt#51] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/simplified.txt new file mode 100644 index 0000000000000..d0b53caa76851 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/simplified.txt @@ -0,0 +1,130 @@ +TakeOrderedAndProject [d_week_seq,i_item_desc,no_promo,promo,total_cnt,w_warehouse_name] + WholeStageCodegen (21) + HashAggregate [count,d_week_seq,i_item_desc,w_warehouse_name] [count,count(1),no_promo,promo,total_cnt] + InputAdapter + Exchange [d_week_seq,i_item_desc,w_warehouse_name] #1 + WholeStageCodegen (20) + HashAggregate [d_week_seq,i_item_desc,w_warehouse_name] [count,count] + Project [d_week_seq,i_item_desc,w_warehouse_name] + InputAdapter + SortMergeJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] + WholeStageCodegen (17) + Sort [cs_item_sk,cs_order_number] + InputAdapter + Exchange [cs_item_sk,cs_order_number] #2 + WholeStageCodegen (16) + Project [cs_item_sk,cs_order_number,d_week_seq,i_item_desc,w_warehouse_name] + BroadcastHashJoin [cs_promo_sk,p_promo_sk] + Project [cs_item_sk,cs_order_number,cs_promo_sk,d_week_seq,i_item_desc,w_warehouse_name] + SortMergeJoin [cs_item_sk,cs_quantity,d_date_sk,inv_date_sk,inv_item_sk,inv_quantity_on_hand] + InputAdapter + WholeStageCodegen (11) + Sort [cs_item_sk,d_date_sk] + InputAdapter + Exchange [cs_item_sk,d_date_sk] #3 + WholeStageCodegen (10) + Project [cs_item_sk,cs_order_number,cs_promo_sk,cs_quantity,d_date_sk,d_week_seq,i_item_desc] + BroadcastHashJoin [cs_sold_date_sk,d_date,d_date,d_date_sk] + Project [cs_item_sk,cs_order_number,cs_promo_sk,cs_quantity,cs_sold_date_sk,d_date,i_item_desc] + SortMergeJoin [cs_item_sk,i_item_sk] + InputAdapter + WholeStageCodegen (5) + Sort [cs_item_sk] + InputAdapter + Exchange [cs_item_sk] #4 + WholeStageCodegen (4) + Project [cs_item_sk,cs_order_number,cs_promo_sk,cs_quantity,cs_sold_date_sk,d_date] + BroadcastHashJoin [cs_ship_date_sk,d_date_sk] + Project [cs_item_sk,cs_order_number,cs_promo_sk,cs_quantity,cs_ship_date_sk,cs_sold_date_sk] + BroadcastHashJoin [cd_demo_sk,cs_bill_cdemo_sk] + Project [cs_bill_cdemo_sk,cs_item_sk,cs_order_number,cs_promo_sk,cs_quantity,cs_ship_date_sk,cs_sold_date_sk] + BroadcastHashJoin [cs_bill_hdemo_sk,hd_demo_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (1) + Project [hd_demo_sk] + Filter [hd_buy_potential,hd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_buy_potential,hd_demo_sk] + Filter [cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_quantity,cs_ship_date_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_order_number,cs_promo_sk,cs_quantity,cs_ship_date_sk,cs_sold_date_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (2) + Project [cd_demo_sk] + Filter [cd_demo_sk,cd_marital_status] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (3) + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk] + InputAdapter + WholeStageCodegen (7) + Sort [i_item_sk] + InputAdapter + Exchange [i_item_sk] #8 + WholeStageCodegen (6) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_desc,i_item_sk] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (9) + Project [d_date,d_date_sk,d_date_sk,d_week_seq] + BroadcastHashJoin [d_week_seq,d_week_seq] + Filter [d_date_sk,d_week_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_week_seq] + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (8) + Project [d_date,d_date_sk,d_week_seq] + Filter [d_date,d_date_sk,d_week_seq,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk,d_week_seq,d_year] + InputAdapter + WholeStageCodegen (14) + Sort [inv_date_sk,inv_item_sk] + InputAdapter + Exchange [inv_date_sk,inv_item_sk] #11 + WholeStageCodegen (13) + Project [inv_date_sk,inv_item_sk,inv_quantity_on_hand,w_warehouse_name] + BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (12) + Filter [w_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.warehouse [w_warehouse_name,w_warehouse_sk] + Filter [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (15) + Filter [p_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.promotion [p_promo_sk] + WholeStageCodegen (19) + Sort [cr_item_sk,cr_order_number] + InputAdapter + Exchange [cr_item_sk,cr_order_number] #14 + WholeStageCodegen (18) + Filter [cr_item_sk,cr_order_number] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72/explain.txt new file mode 100644 index 0000000000000..539ad1474749b --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72/explain.txt @@ -0,0 +1,391 @@ +== Physical Plan == +TakeOrderedAndProject (70) ++- * HashAggregate (69) + +- Exchange (68) + +- * HashAggregate (67) + +- * Project (66) + +- * BroadcastHashJoin LeftOuter BuildRight (65) + :- * Project (60) + : +- * BroadcastHashJoin LeftOuter BuildRight (59) + : :- * Project (54) + : : +- * BroadcastHashJoin Inner BuildRight (53) + : : :- * Project (48) + : : : +- * BroadcastHashJoin Inner BuildRight (47) + : : : :- * Project (42) + : : : : +- * BroadcastHashJoin Inner BuildRight (41) + : : : : :- * Project (35) + : : : : : +- * BroadcastHashJoin Inner BuildRight (34) + : : : : : :- * Project (28) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (27) + : : : : : : :- * Project (21) + : : : : : : : +- * BroadcastHashJoin Inner BuildRight (20) + : : : : : : : :- * Project (15) + : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : : : : : : :- * Project (9) + : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : : : : : : :- * Filter (3) + : : : : : : : : : : +- * ColumnarToRow (2) + : : : : : : : : : : +- Scan parquet default.catalog_sales (1) + : : : : : : : : : +- BroadcastExchange (7) + : : : : : : : : : +- * Filter (6) + : : : : : : : : : +- * ColumnarToRow (5) + : : : : : : : : : +- Scan parquet default.inventory (4) + : : : : : : : : +- BroadcastExchange (13) + : : : : : : : : +- * Filter (12) + : : : : : : : : +- * ColumnarToRow (11) + : : : : : : : : +- Scan parquet default.warehouse (10) + : : : : : : : +- BroadcastExchange (19) + : : : : : : : +- * Filter (18) + : : : : : : : +- * ColumnarToRow (17) + : : : : : : : +- Scan parquet default.item (16) + : : : : : : +- BroadcastExchange (26) + : : : : : : +- * Project (25) + : : : : : : +- * Filter (24) + : : : : : : +- * ColumnarToRow (23) + : : : : : : +- Scan parquet default.customer_demographics (22) + : : : : : +- BroadcastExchange (33) + : : : : : +- * Project (32) + : : : : : +- * Filter (31) + : : : : : +- * ColumnarToRow (30) + : : : : : +- Scan parquet default.household_demographics (29) + : : : : +- BroadcastExchange (40) + : : : : +- * Project (39) + : : : : +- * Filter (38) + : : : : +- * ColumnarToRow (37) + : : : : +- Scan parquet default.date_dim (36) + : : : +- BroadcastExchange (46) + : : : +- * Filter (45) + : : : +- * ColumnarToRow (44) + : : : +- Scan parquet default.date_dim (43) + : : +- BroadcastExchange (52) + : : +- * Filter (51) + : : +- * ColumnarToRow (50) + : : +- Scan parquet default.date_dim (49) + : +- BroadcastExchange (58) + : +- * Filter (57) + : +- * ColumnarToRow (56) + : +- Scan parquet default.promotion (55) + +- BroadcastExchange (64) + +- * Filter (63) + +- * ColumnarToRow (62) + +- Scan parquet default.catalog_returns (61) + + +(1) Scan parquet default.catalog_sales +Output [8]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_quantity), IsNotNull(cs_item_sk), IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_hdemo_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_ship_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 11] +Input [8]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8] + +(3) Filter [codegen id : 11] +Input [8]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8] +Condition : (((((isnotnull(cs_quantity#8) AND isnotnull(cs_item_sk#5)) AND isnotnull(cs_bill_cdemo_sk#3)) AND isnotnull(cs_bill_hdemo_sk#4)) AND isnotnull(cs_sold_date_sk#1)) AND isnotnull(cs_ship_date_sk#2)) + +(4) Scan parquet default.inventory +Output [4]: [inv_date_sk#9, inv_item_sk#10, inv_warehouse_sk#11, inv_quantity_on_hand#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/inventory] +PushedFilters: [IsNotNull(inv_quantity_on_hand), IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [inv_date_sk#9, inv_item_sk#10, inv_warehouse_sk#11, inv_quantity_on_hand#12] + +(6) Filter [codegen id : 1] +Input [4]: [inv_date_sk#9, inv_item_sk#10, inv_warehouse_sk#11, inv_quantity_on_hand#12] +Condition : (((isnotnull(inv_quantity_on_hand#12) AND isnotnull(inv_item_sk#10)) AND isnotnull(inv_warehouse_sk#11)) AND isnotnull(inv_date_sk#9)) + +(7) BroadcastExchange +Input [4]: [inv_date_sk#9, inv_item_sk#10, inv_warehouse_sk#11, inv_quantity_on_hand#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#13] + +(8) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_item_sk#5] +Right keys [1]: [inv_item_sk#10] +Join condition: (inv_quantity_on_hand#12 < cs_quantity#8) + +(9) Project [codegen id : 11] +Output [9]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, inv_date_sk#9, inv_warehouse_sk#11] +Input [12]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, inv_date_sk#9, inv_item_sk#10, inv_warehouse_sk#11, inv_quantity_on_hand#12] + +(10) Scan parquet default.warehouse +Output [2]: [w_warehouse_sk#14, w_warehouse_name#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [w_warehouse_sk#14, w_warehouse_name#15] + +(12) Filter [codegen id : 2] +Input [2]: [w_warehouse_sk#14, w_warehouse_name#15] +Condition : isnotnull(w_warehouse_sk#14) + +(13) BroadcastExchange +Input [2]: [w_warehouse_sk#14, w_warehouse_name#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#16] + +(14) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [inv_warehouse_sk#11] +Right keys [1]: [w_warehouse_sk#14] +Join condition: None + +(15) Project [codegen id : 11] +Output [9]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, inv_date_sk#9, w_warehouse_name#15] +Input [11]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, inv_date_sk#9, inv_warehouse_sk#11, w_warehouse_sk#14, w_warehouse_name#15] + +(16) Scan parquet default.item +Output [2]: [i_item_sk#17, i_item_desc#18] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [2]: [i_item_sk#17, i_item_desc#18] + +(18) Filter [codegen id : 3] +Input [2]: [i_item_sk#17, i_item_desc#18] +Condition : isnotnull(i_item_sk#17) + +(19) BroadcastExchange +Input [2]: [i_item_sk#17, i_item_desc#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] + +(20) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_item_sk#5] +Right keys [1]: [i_item_sk#17] +Join condition: None + +(21) Project [codegen id : 11] +Output [10]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, inv_date_sk#9, w_warehouse_name#15, i_item_desc#18] +Input [11]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, inv_date_sk#9, w_warehouse_name#15, i_item_sk#17, i_item_desc#18] + +(22) Scan parquet default.customer_demographics +Output [2]: [cd_demo_sk#20, cd_marital_status#21] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer_demographics] +PushedFilters: [IsNotNull(cd_marital_status), EqualTo(cd_marital_status,D), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 4] +Input [2]: [cd_demo_sk#20, cd_marital_status#21] + +(24) Filter [codegen id : 4] +Input [2]: [cd_demo_sk#20, cd_marital_status#21] +Condition : ((isnotnull(cd_marital_status#21) AND (cd_marital_status#21 = D)) AND isnotnull(cd_demo_sk#20)) + +(25) Project [codegen id : 4] +Output [1]: [cd_demo_sk#20] +Input [2]: [cd_demo_sk#20, cd_marital_status#21] + +(26) BroadcastExchange +Input [1]: [cd_demo_sk#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#22] + +(27) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_bill_cdemo_sk#3] +Right keys [1]: [cd_demo_sk#20] +Join condition: None + +(28) Project [codegen id : 11] +Output [9]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, inv_date_sk#9, w_warehouse_name#15, i_item_desc#18] +Input [11]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, inv_date_sk#9, w_warehouse_name#15, i_item_desc#18, cd_demo_sk#20] + +(29) Scan parquet default.household_demographics +Output [2]: [hd_demo_sk#23, hd_buy_potential#24] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/household_demographics] +PushedFilters: [IsNotNull(hd_buy_potential), EqualTo(hd_buy_potential,>10000), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(30) ColumnarToRow [codegen id : 5] +Input [2]: [hd_demo_sk#23, hd_buy_potential#24] + +(31) Filter [codegen id : 5] +Input [2]: [hd_demo_sk#23, hd_buy_potential#24] +Condition : ((isnotnull(hd_buy_potential#24) AND (hd_buy_potential#24 = >10000)) AND isnotnull(hd_demo_sk#23)) + +(32) Project [codegen id : 5] +Output [1]: [hd_demo_sk#23] +Input [2]: [hd_demo_sk#23, hd_buy_potential#24] + +(33) BroadcastExchange +Input [1]: [hd_demo_sk#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#25] + +(34) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_bill_hdemo_sk#4] +Right keys [1]: [hd_demo_sk#23] +Join condition: None + +(35) Project [codegen id : 11] +Output [8]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, inv_date_sk#9, w_warehouse_name#15, i_item_desc#18] +Input [10]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, inv_date_sk#9, w_warehouse_name#15, i_item_desc#18, hd_demo_sk#23] + +(36) Scan parquet default.date_dim +Output [4]: [d_date_sk#26, d_date#27, d_week_seq#28, d_year#29] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk), IsNotNull(d_week_seq), IsNotNull(d_date)] +ReadSchema: struct + +(37) ColumnarToRow [codegen id : 6] +Input [4]: [d_date_sk#26, d_date#27, d_week_seq#28, d_year#29] + +(38) Filter [codegen id : 6] +Input [4]: [d_date_sk#26, d_date#27, d_week_seq#28, d_year#29] +Condition : ((((isnotnull(d_year#29) AND (d_year#29 = 1999)) AND isnotnull(d_date_sk#26)) AND isnotnull(d_week_seq#28)) AND isnotnull(d_date#27)) + +(39) Project [codegen id : 6] +Output [3]: [d_date_sk#26, d_date#27, d_week_seq#28] +Input [4]: [d_date_sk#26, d_date#27, d_week_seq#28, d_year#29] + +(40) BroadcastExchange +Input [3]: [d_date_sk#26, d_date#27, d_week_seq#28] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#30] + +(41) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_sold_date_sk#1] +Right keys [1]: [d_date_sk#26] +Join condition: None + +(42) Project [codegen id : 11] +Output [9]: [cs_ship_date_sk#2, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, inv_date_sk#9, w_warehouse_name#15, i_item_desc#18, d_date#27, d_week_seq#28] +Input [11]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, inv_date_sk#9, w_warehouse_name#15, i_item_desc#18, d_date_sk#26, d_date#27, d_week_seq#28] + +(43) Scan parquet default.date_dim +Output [2]: [d_date_sk#31, d_week_seq#32] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_date_sk), IsNotNull(d_week_seq)] +ReadSchema: struct + +(44) ColumnarToRow [codegen id : 7] +Input [2]: [d_date_sk#31, d_week_seq#32] + +(45) Filter [codegen id : 7] +Input [2]: [d_date_sk#31, d_week_seq#32] +Condition : (isnotnull(d_date_sk#31) AND isnotnull(d_week_seq#32)) + +(46) BroadcastExchange +Input [2]: [d_date_sk#31, d_week_seq#32] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, false] as bigint), 32) | (cast(input[0, int, false] as bigint) & 4294967295))),false), [id=#33] + +(47) BroadcastHashJoin [codegen id : 11] +Left keys [2]: [d_week_seq#28, inv_date_sk#9] +Right keys [2]: [d_week_seq#32, d_date_sk#31] +Join condition: None + +(48) Project [codegen id : 11] +Output [8]: [cs_ship_date_sk#2, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, w_warehouse_name#15, i_item_desc#18, d_date#27, d_week_seq#28] +Input [11]: [cs_ship_date_sk#2, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, inv_date_sk#9, w_warehouse_name#15, i_item_desc#18, d_date#27, d_week_seq#28, d_date_sk#31, d_week_seq#32] + +(49) Scan parquet default.date_dim +Output [2]: [d_date_sk#34, d_date#35] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_date_sk), IsNotNull(d_date)] +ReadSchema: struct + +(50) ColumnarToRow [codegen id : 8] +Input [2]: [d_date_sk#34, d_date#35] + +(51) Filter [codegen id : 8] +Input [2]: [d_date_sk#34, d_date#35] +Condition : (isnotnull(d_date_sk#34) AND isnotnull(d_date#35)) + +(52) BroadcastExchange +Input [2]: [d_date_sk#34, d_date#35] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#36] + +(53) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_ship_date_sk#2] +Right keys [1]: [d_date_sk#34] +Join condition: (d_date#35 > d_date#27 + 5 days) + +(54) Project [codegen id : 11] +Output [6]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, w_warehouse_name#15, i_item_desc#18, d_week_seq#28] +Input [10]: [cs_ship_date_sk#2, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, w_warehouse_name#15, i_item_desc#18, d_date#27, d_week_seq#28, d_date_sk#34, d_date#35] + +(55) Scan parquet default.promotion +Output [1]: [p_promo_sk#37] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/promotion] +PushedFilters: [IsNotNull(p_promo_sk)] +ReadSchema: struct + +(56) ColumnarToRow [codegen id : 9] +Input [1]: [p_promo_sk#37] + +(57) Filter [codegen id : 9] +Input [1]: [p_promo_sk#37] +Condition : isnotnull(p_promo_sk#37) + +(58) BroadcastExchange +Input [1]: [p_promo_sk#37] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#38] + +(59) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_promo_sk#6] +Right keys [1]: [p_promo_sk#37] +Join condition: None + +(60) Project [codegen id : 11] +Output [5]: [cs_item_sk#5, cs_order_number#7, w_warehouse_name#15, i_item_desc#18, d_week_seq#28] +Input [7]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, w_warehouse_name#15, i_item_desc#18, d_week_seq#28, p_promo_sk#37] + +(61) Scan parquet default.catalog_returns +Output [2]: [cr_item_sk#39, cr_order_number#40] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_returns] +PushedFilters: [IsNotNull(cr_order_number), IsNotNull(cr_item_sk)] +ReadSchema: struct + +(62) ColumnarToRow [codegen id : 10] +Input [2]: [cr_item_sk#39, cr_order_number#40] + +(63) Filter [codegen id : 10] +Input [2]: [cr_item_sk#39, cr_order_number#40] +Condition : (isnotnull(cr_order_number#40) AND isnotnull(cr_item_sk#39)) + +(64) BroadcastExchange +Input [2]: [cr_item_sk#39, cr_order_number#40] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, false] as bigint), 32) | (cast(input[1, int, false] as bigint) & 4294967295))),false), [id=#41] + +(65) BroadcastHashJoin [codegen id : 11] +Left keys [2]: [cs_item_sk#5, cs_order_number#7] +Right keys [2]: [cr_item_sk#39, cr_order_number#40] +Join condition: None + +(66) Project [codegen id : 11] +Output [3]: [w_warehouse_name#15, i_item_desc#18, d_week_seq#28] +Input [7]: [cs_item_sk#5, cs_order_number#7, w_warehouse_name#15, i_item_desc#18, d_week_seq#28, cr_item_sk#39, cr_order_number#40] + +(67) HashAggregate [codegen id : 11] +Input [3]: [w_warehouse_name#15, i_item_desc#18, d_week_seq#28] +Keys [3]: [i_item_desc#18, w_warehouse_name#15, d_week_seq#28] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#42] +Results [4]: [i_item_desc#18, w_warehouse_name#15, d_week_seq#28, count#43] + +(68) Exchange +Input [4]: [i_item_desc#18, w_warehouse_name#15, d_week_seq#28, count#43] +Arguments: hashpartitioning(i_item_desc#18, w_warehouse_name#15, d_week_seq#28, 5), true, [id=#44] + +(69) HashAggregate [codegen id : 12] +Input [4]: [i_item_desc#18, w_warehouse_name#15, d_week_seq#28, count#43] +Keys [3]: [i_item_desc#18, w_warehouse_name#15, d_week_seq#28] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#45] +Results [6]: [i_item_desc#18, w_warehouse_name#15, d_week_seq#28, count(1)#45 AS no_promo#46, count(1)#45 AS promo#47, count(1)#45 AS total_cnt#48] + +(70) TakeOrderedAndProject +Input [6]: [i_item_desc#18, w_warehouse_name#15, d_week_seq#28, no_promo#46, promo#47, total_cnt#48] +Arguments: 100, [total_cnt#48 DESC NULLS LAST, i_item_desc#18 ASC NULLS FIRST, w_warehouse_name#15 ASC NULLS FIRST, d_week_seq#28 ASC NULLS FIRST], [i_item_desc#18, w_warehouse_name#15, d_week_seq#28, no_promo#46, promo#47, total_cnt#48] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72/simplified.txt new file mode 100644 index 0000000000000..1488d52118b82 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72/simplified.txt @@ -0,0 +1,104 @@ +TakeOrderedAndProject [d_week_seq,i_item_desc,no_promo,promo,total_cnt,w_warehouse_name] + WholeStageCodegen (12) + HashAggregate [count,d_week_seq,i_item_desc,w_warehouse_name] [count,count(1),no_promo,promo,total_cnt] + InputAdapter + Exchange [d_week_seq,i_item_desc,w_warehouse_name] #1 + WholeStageCodegen (11) + HashAggregate [d_week_seq,i_item_desc,w_warehouse_name] [count,count] + Project [d_week_seq,i_item_desc,w_warehouse_name] + BroadcastHashJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] + Project [cs_item_sk,cs_order_number,d_week_seq,i_item_desc,w_warehouse_name] + BroadcastHashJoin [cs_promo_sk,p_promo_sk] + Project [cs_item_sk,cs_order_number,cs_promo_sk,d_week_seq,i_item_desc,w_warehouse_name] + BroadcastHashJoin [cs_ship_date_sk,d_date,d_date,d_date_sk] + Project [cs_item_sk,cs_order_number,cs_promo_sk,cs_ship_date_sk,d_date,d_week_seq,i_item_desc,w_warehouse_name] + BroadcastHashJoin [d_date_sk,d_week_seq,d_week_seq,inv_date_sk] + Project [cs_item_sk,cs_order_number,cs_promo_sk,cs_ship_date_sk,d_date,d_week_seq,i_item_desc,inv_date_sk,w_warehouse_name] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_item_sk,cs_order_number,cs_promo_sk,cs_ship_date_sk,cs_sold_date_sk,i_item_desc,inv_date_sk,w_warehouse_name] + BroadcastHashJoin [cs_bill_hdemo_sk,hd_demo_sk] + Project [cs_bill_hdemo_sk,cs_item_sk,cs_order_number,cs_promo_sk,cs_ship_date_sk,cs_sold_date_sk,i_item_desc,inv_date_sk,w_warehouse_name] + BroadcastHashJoin [cd_demo_sk,cs_bill_cdemo_sk] + Project [cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_order_number,cs_promo_sk,cs_ship_date_sk,cs_sold_date_sk,i_item_desc,inv_date_sk,w_warehouse_name] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_order_number,cs_promo_sk,cs_ship_date_sk,cs_sold_date_sk,inv_date_sk,w_warehouse_name] + BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] + Project [cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_order_number,cs_promo_sk,cs_ship_date_sk,cs_sold_date_sk,inv_date_sk,inv_warehouse_sk] + BroadcastHashJoin [cs_item_sk,cs_quantity,inv_item_sk,inv_quantity_on_hand] + Filter [cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_quantity,cs_ship_date_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_order_number,cs_promo_sk,cs_quantity,cs_ship_date_sk,cs_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [w_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.warehouse [w_warehouse_name,w_warehouse_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_desc,i_item_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Project [cd_demo_sk] + Filter [cd_demo_sk,cd_marital_status] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Project [hd_demo_sk] + Filter [hd_buy_potential,hd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_buy_potential,hd_demo_sk] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (6) + Project [d_date,d_date_sk,d_week_seq] + Filter [d_date,d_date_sk,d_week_seq,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk,d_week_seq,d_year] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (7) + Filter [d_date_sk,d_week_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_week_seq] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (8) + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk] + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (9) + Filter [p_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.promotion [p_promo_sk] + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (10) + Filter [cr_item_sk,cr_order_number] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.sf100/explain.txt new file mode 100644 index 0000000000000..426d31e6ea9a6 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.sf100/explain.txt @@ -0,0 +1,218 @@ +== Physical Plan == +* Sort (39) ++- Exchange (38) + +- * Project (37) + +- * SortMergeJoin Inner (36) + :- * Sort (30) + : +- Exchange (29) + : +- * Filter (28) + : +- * HashAggregate (27) + : +- Exchange (26) + : +- * HashAggregate (25) + : +- * Project (24) + : +- * BroadcastHashJoin Inner BuildRight (23) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.store (11) + : +- BroadcastExchange (22) + : +- * Project (21) + : +- * Filter (20) + : +- * ColumnarToRow (19) + : +- Scan parquet default.household_demographics (18) + +- * Sort (35) + +- Exchange (34) + +- * Filter (33) + +- * ColumnarToRow (32) + +- Scan parquet default.customer (31) + + +(1) Scan parquet default.store_sales +Output [5]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [5]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5] + +(3) Filter [codegen id : 4] +Input [5]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5] +Condition : (((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#4)) AND isnotnull(ss_hdemo_sk#3)) AND isnotnull(ss_customer_sk#2)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#6, d_year#7, d_dom#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_dom), GreaterThanOrEqual(d_dom,1), LessThanOrEqual(d_dom,2), In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] +Condition : ((((isnotnull(d_dom#8) AND (d_dom#8 >= 1)) AND (d_dom#8 <= 2)) AND d_year#7 IN (1999,2000,2001)) AND isnotnull(d_date_sk#6)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#6] +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] + +(8) BroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#6] +Join condition: None + +(10) Project [codegen id : 4] +Output [4]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5] +Input [6]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5, d_date_sk#6] + +(11) Scan parquet default.store +Output [2]: [s_store_sk#10, s_county#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store] +PushedFilters: [In(s_county, [Williamson County,Franklin Parish,Bronx County,Orange County]), IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [s_store_sk#10, s_county#11] + +(13) Filter [codegen id : 2] +Input [2]: [s_store_sk#10, s_county#11] +Condition : (s_county#11 IN (Williamson County,Franklin Parish,Bronx County,Orange County) AND isnotnull(s_store_sk#10)) + +(14) Project [codegen id : 2] +Output [1]: [s_store_sk#10] +Input [2]: [s_store_sk#10, s_county#11] + +(15) BroadcastExchange +Input [1]: [s_store_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(16) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#10] +Join condition: None + +(17) Project [codegen id : 4] +Output [3]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_ticket_number#5] +Input [5]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5, s_store_sk#10] + +(18) Scan parquet default.household_demographics +Output [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/household_demographics] +PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknown)), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] + +(20) Filter [codegen id : 3] +Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] +Condition : ((((isnotnull(hd_vehicle_count#16) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = unknown))) AND (hd_vehicle_count#16 > 0)) AND (CASE WHEN (hd_vehicle_count#16 > 0) THEN (cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) ELSE null END > 1.0)) AND isnotnull(hd_demo_sk#13)) + +(21) Project [codegen id : 3] +Output [1]: [hd_demo_sk#13] +Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] + +(22) BroadcastExchange +Input [1]: [hd_demo_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#17] + +(23) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_hdemo_sk#3] +Right keys [1]: [hd_demo_sk#13] +Join condition: None + +(24) Project [codegen id : 4] +Output [2]: [ss_customer_sk#2, ss_ticket_number#5] +Input [4]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_ticket_number#5, hd_demo_sk#13] + +(25) HashAggregate [codegen id : 4] +Input [2]: [ss_customer_sk#2, ss_ticket_number#5] +Keys [2]: [ss_ticket_number#5, ss_customer_sk#2] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#18] +Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] + +(26) Exchange +Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] +Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), true, [id=#20] + +(27) HashAggregate [codegen id : 5] +Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] +Keys [2]: [ss_ticket_number#5, ss_customer_sk#2] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#21] +Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count(1)#21 AS cnt#22] + +(28) Filter [codegen id : 5] +Input [3]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22] +Condition : ((cnt#22 >= 1) AND (cnt#22 <= 5)) + +(29) Exchange +Input [3]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22] +Arguments: hashpartitioning(ss_customer_sk#2, 5), true, [id=#23] + +(30) Sort [codegen id : 6] +Input [3]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22] +Arguments: [ss_customer_sk#2 ASC NULLS FIRST], false, 0 + +(31) Scan parquet default.customer +Output [5]: [c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(32) ColumnarToRow [codegen id : 7] +Input [5]: [c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28] + +(33) Filter [codegen id : 7] +Input [5]: [c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28] +Condition : isnotnull(c_customer_sk#24) + +(34) Exchange +Input [5]: [c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28] +Arguments: hashpartitioning(c_customer_sk#24, 5), true, [id=#29] + +(35) Sort [codegen id : 8] +Input [5]: [c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28] +Arguments: [c_customer_sk#24 ASC NULLS FIRST], false, 0 + +(36) SortMergeJoin [codegen id : 9] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#24] +Join condition: None + +(37) Project [codegen id : 9] +Output [6]: [c_last_name#27, c_first_name#26, c_salutation#25, c_preferred_cust_flag#28, ss_ticket_number#5, cnt#22] +Input [8]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22, c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28] + +(38) Exchange +Input [6]: [c_last_name#27, c_first_name#26, c_salutation#25, c_preferred_cust_flag#28, ss_ticket_number#5, cnt#22] +Arguments: rangepartitioning(cnt#22 DESC NULLS LAST, 5), true, [id=#30] + +(39) Sort [codegen id : 10] +Input [6]: [c_last_name#27, c_first_name#26, c_salutation#25, c_preferred_cust_flag#28, ss_ticket_number#5, cnt#22] +Arguments: [cnt#22 DESC NULLS LAST], true, 0 + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.sf100/simplified.txt new file mode 100644 index 0000000000000..57379ef90be5e --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.sf100/simplified.txt @@ -0,0 +1,63 @@ +WholeStageCodegen (10) + Sort [cnt] + InputAdapter + Exchange [cnt] #1 + WholeStageCodegen (9) + Project [c_first_name,c_last_name,c_preferred_cust_flag,c_salutation,cnt,ss_ticket_number] + SortMergeJoin [c_customer_sk,ss_customer_sk] + InputAdapter + WholeStageCodegen (6) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #2 + WholeStageCodegen (5) + Filter [cnt] + HashAggregate [count,ss_customer_sk,ss_ticket_number] [cnt,count,count(1)] + InputAdapter + Exchange [ss_customer_sk,ss_ticket_number] #3 + WholeStageCodegen (4) + HashAggregate [ss_customer_sk,ss_ticket_number] [count,count] + Project [ss_customer_sk,ss_ticket_number] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [ss_customer_sk,ss_hdemo_sk,ss_ticket_number] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_dom,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_dom,d_year] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Project [s_store_sk] + Filter [s_county,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_county,s_store_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (3) + Project [hd_demo_sk] + Filter [hd_buy_potential,hd_demo_sk,hd_dep_count,hd_vehicle_count] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_buy_potential,hd_demo_sk,hd_dep_count,hd_vehicle_count] + InputAdapter + WholeStageCodegen (8) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #7 + WholeStageCodegen (7) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name,c_preferred_cust_flag,c_salutation] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/explain.txt new file mode 100644 index 0000000000000..65454a045649f --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/explain.txt @@ -0,0 +1,203 @@ +== Physical Plan == +* Sort (36) ++- Exchange (35) + +- * Project (34) + +- * BroadcastHashJoin Inner BuildRight (33) + :- * Filter (28) + : +- * HashAggregate (27) + : +- Exchange (26) + : +- * HashAggregate (25) + : +- * Project (24) + : +- * BroadcastHashJoin Inner BuildRight (23) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.store (11) + : +- BroadcastExchange (22) + : +- * Project (21) + : +- * Filter (20) + : +- * ColumnarToRow (19) + : +- Scan parquet default.household_demographics (18) + +- BroadcastExchange (32) + +- * Filter (31) + +- * ColumnarToRow (30) + +- Scan parquet default.customer (29) + + +(1) Scan parquet default.store_sales +Output [5]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [5]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5] + +(3) Filter [codegen id : 4] +Input [5]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5] +Condition : (((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#4)) AND isnotnull(ss_hdemo_sk#3)) AND isnotnull(ss_customer_sk#2)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#6, d_year#7, d_dom#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_dom), GreaterThanOrEqual(d_dom,1), LessThanOrEqual(d_dom,2), In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] +Condition : ((((isnotnull(d_dom#8) AND (d_dom#8 >= 1)) AND (d_dom#8 <= 2)) AND d_year#7 IN (1999,2000,2001)) AND isnotnull(d_date_sk#6)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#6] +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] + +(8) BroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#6] +Join condition: None + +(10) Project [codegen id : 4] +Output [4]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5] +Input [6]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5, d_date_sk#6] + +(11) Scan parquet default.store +Output [2]: [s_store_sk#10, s_county#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store] +PushedFilters: [In(s_county, [Williamson County,Franklin Parish,Bronx County,Orange County]), IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [s_store_sk#10, s_county#11] + +(13) Filter [codegen id : 2] +Input [2]: [s_store_sk#10, s_county#11] +Condition : (s_county#11 IN (Williamson County,Franklin Parish,Bronx County,Orange County) AND isnotnull(s_store_sk#10)) + +(14) Project [codegen id : 2] +Output [1]: [s_store_sk#10] +Input [2]: [s_store_sk#10, s_county#11] + +(15) BroadcastExchange +Input [1]: [s_store_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(16) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#10] +Join condition: None + +(17) Project [codegen id : 4] +Output [3]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_ticket_number#5] +Input [5]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5, s_store_sk#10] + +(18) Scan parquet default.household_demographics +Output [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/household_demographics] +PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknown)), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] + +(20) Filter [codegen id : 3] +Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] +Condition : ((((isnotnull(hd_vehicle_count#16) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = unknown))) AND (hd_vehicle_count#16 > 0)) AND (CASE WHEN (hd_vehicle_count#16 > 0) THEN (cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) ELSE null END > 1.0)) AND isnotnull(hd_demo_sk#13)) + +(21) Project [codegen id : 3] +Output [1]: [hd_demo_sk#13] +Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] + +(22) BroadcastExchange +Input [1]: [hd_demo_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#17] + +(23) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_hdemo_sk#3] +Right keys [1]: [hd_demo_sk#13] +Join condition: None + +(24) Project [codegen id : 4] +Output [2]: [ss_customer_sk#2, ss_ticket_number#5] +Input [4]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_ticket_number#5, hd_demo_sk#13] + +(25) HashAggregate [codegen id : 4] +Input [2]: [ss_customer_sk#2, ss_ticket_number#5] +Keys [2]: [ss_ticket_number#5, ss_customer_sk#2] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#18] +Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] + +(26) Exchange +Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] +Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), true, [id=#20] + +(27) HashAggregate [codegen id : 6] +Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] +Keys [2]: [ss_ticket_number#5, ss_customer_sk#2] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#21] +Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count(1)#21 AS cnt#22] + +(28) Filter [codegen id : 6] +Input [3]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22] +Condition : ((cnt#22 >= 1) AND (cnt#22 <= 5)) + +(29) Scan parquet default.customer +Output [5]: [c_customer_sk#23, c_salutation#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(30) ColumnarToRow [codegen id : 5] +Input [5]: [c_customer_sk#23, c_salutation#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27] + +(31) Filter [codegen id : 5] +Input [5]: [c_customer_sk#23, c_salutation#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27] +Condition : isnotnull(c_customer_sk#23) + +(32) BroadcastExchange +Input [5]: [c_customer_sk#23, c_salutation#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#28] + +(33) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#23] +Join condition: None + +(34) Project [codegen id : 6] +Output [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22] +Input [8]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22, c_customer_sk#23, c_salutation#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27] + +(35) Exchange +Input [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22] +Arguments: rangepartitioning(cnt#22 DESC NULLS LAST, 5), true, [id=#29] + +(36) Sort [codegen id : 7] +Input [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22] +Arguments: [cnt#22 DESC NULLS LAST], true, 0 + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/simplified.txt new file mode 100644 index 0000000000000..55312b6569a21 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/simplified.txt @@ -0,0 +1,54 @@ +WholeStageCodegen (7) + Sort [cnt] + InputAdapter + Exchange [cnt] #1 + WholeStageCodegen (6) + Project [c_first_name,c_last_name,c_preferred_cust_flag,c_salutation,cnt,ss_ticket_number] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [cnt] + HashAggregate [count,ss_customer_sk,ss_ticket_number] [cnt,count,count(1)] + InputAdapter + Exchange [ss_customer_sk,ss_ticket_number] #2 + WholeStageCodegen (4) + HashAggregate [ss_customer_sk,ss_ticket_number] [count,count] + Project [ss_customer_sk,ss_ticket_number] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [ss_customer_sk,ss_hdemo_sk,ss_ticket_number] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_dom,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_dom,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [s_store_sk] + Filter [s_county,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_county,s_store_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Project [hd_demo_sk] + Filter [hd_buy_potential,hd_demo_sk,hd_dep_count,hd_vehicle_count] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_buy_potential,hd_demo_sk,hd_dep_count,hd_vehicle_count] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name,c_preferred_cust_flag,c_salutation] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74.sf100/explain.txt new file mode 100644 index 0000000000000..70b237e0cbdd5 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74.sf100/explain.txt @@ -0,0 +1,477 @@ +== Physical Plan == +TakeOrderedAndProject (86) ++- * Project (85) + +- * SortMergeJoin Inner (84) + :- * Project (66) + : +- * SortMergeJoin Inner (65) + : :- * SortMergeJoin Inner (45) + : : :- * Sort (24) + : : : +- Exchange (23) + : : : +- * Filter (22) + : : : +- * HashAggregate (21) + : : : +- Exchange (20) + : : : +- * HashAggregate (19) + : : : +- * Project (18) + : : : +- * SortMergeJoin Inner (17) + : : : :- * Sort (11) + : : : : +- Exchange (10) + : : : : +- * Project (9) + : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.store_sales (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.date_dim (4) + : : : +- * Sort (16) + : : : +- Exchange (15) + : : : +- * Filter (14) + : : : +- * ColumnarToRow (13) + : : : +- Scan parquet default.customer (12) + : : +- * Sort (44) + : : +- Exchange (43) + : : +- * HashAggregate (42) + : : +- Exchange (41) + : : +- * HashAggregate (40) + : : +- * Project (39) + : : +- * SortMergeJoin Inner (38) + : : :- * Sort (35) + : : : +- Exchange (34) + : : : +- * Project (33) + : : : +- * BroadcastHashJoin Inner BuildRight (32) + : : : :- * Filter (27) + : : : : +- * ColumnarToRow (26) + : : : : +- Scan parquet default.store_sales (25) + : : : +- BroadcastExchange (31) + : : : +- * Filter (30) + : : : +- * ColumnarToRow (29) + : : : +- Scan parquet default.date_dim (28) + : : +- * Sort (37) + : : +- ReusedExchange (36) + : +- * Sort (64) + : +- Exchange (63) + : +- * Project (62) + : +- * Filter (61) + : +- * HashAggregate (60) + : +- Exchange (59) + : +- * HashAggregate (58) + : +- * Project (57) + : +- * SortMergeJoin Inner (56) + : :- * Sort (53) + : : +- Exchange (52) + : : +- * Project (51) + : : +- * BroadcastHashJoin Inner BuildRight (50) + : : :- * Filter (48) + : : : +- * ColumnarToRow (47) + : : : +- Scan parquet default.web_sales (46) + : : +- ReusedExchange (49) + : +- * Sort (55) + : +- ReusedExchange (54) + +- * Sort (83) + +- Exchange (82) + +- * HashAggregate (81) + +- Exchange (80) + +- * HashAggregate (79) + +- * Project (78) + +- * SortMergeJoin Inner (77) + :- * Sort (74) + : +- Exchange (73) + : +- * Project (72) + : +- * BroadcastHashJoin Inner BuildRight (71) + : :- * Filter (69) + : : +- * ColumnarToRow (68) + : : +- Scan parquet default.web_sales (67) + : +- ReusedExchange (70) + +- * Sort (76) + +- ReusedExchange (75) + + +(1) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_net_paid#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 2] +Input [3]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_net_paid#3] + +(3) Filter [codegen id : 2] +Input [3]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_net_paid#3] +Condition : (isnotnull(ss_customer_sk#2) AND isnotnull(ss_sold_date_sk#1)) + +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#4, d_year#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), In(d_year, [2001,2002]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#4, d_year#5] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#4, d_year#5] +Condition : (((isnotnull(d_year#5) AND (d_year#5 = 2001)) AND d_year#5 IN (2001,2002)) AND isnotnull(d_date_sk#4)) + +(7) BroadcastExchange +Input [2]: [d_date_sk#4, d_year#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#6] + +(8) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(9) Project [codegen id : 2] +Output [3]: [ss_customer_sk#2, ss_net_paid#3, d_year#5] +Input [5]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_net_paid#3, d_date_sk#4, d_year#5] + +(10) Exchange +Input [3]: [ss_customer_sk#2, ss_net_paid#3, d_year#5] +Arguments: hashpartitioning(ss_customer_sk#2, 5), true, [id=#7] + +(11) Sort [codegen id : 3] +Input [3]: [ss_customer_sk#2, ss_net_paid#3, d_year#5] +Arguments: [ss_customer_sk#2 ASC NULLS FIRST], false, 0 + +(12) Scan parquet default.customer +Output [4]: [c_customer_sk#8, c_customer_id#9, c_first_name#10, c_last_name#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(13) ColumnarToRow [codegen id : 4] +Input [4]: [c_customer_sk#8, c_customer_id#9, c_first_name#10, c_last_name#11] + +(14) Filter [codegen id : 4] +Input [4]: [c_customer_sk#8, c_customer_id#9, c_first_name#10, c_last_name#11] +Condition : (isnotnull(c_customer_sk#8) AND isnotnull(c_customer_id#9)) + +(15) Exchange +Input [4]: [c_customer_sk#8, c_customer_id#9, c_first_name#10, c_last_name#11] +Arguments: hashpartitioning(c_customer_sk#8, 5), true, [id=#12] + +(16) Sort [codegen id : 5] +Input [4]: [c_customer_sk#8, c_customer_id#9, c_first_name#10, c_last_name#11] +Arguments: [c_customer_sk#8 ASC NULLS FIRST], false, 0 + +(17) SortMergeJoin [codegen id : 6] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#8] +Join condition: None + +(18) Project [codegen id : 6] +Output [5]: [c_customer_id#9, c_first_name#10, c_last_name#11, ss_net_paid#3, d_year#5] +Input [7]: [ss_customer_sk#2, ss_net_paid#3, d_year#5, c_customer_sk#8, c_customer_id#9, c_first_name#10, c_last_name#11] + +(19) HashAggregate [codegen id : 6] +Input [5]: [c_customer_id#9, c_first_name#10, c_last_name#11, ss_net_paid#3, d_year#5] +Keys [4]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#5] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#3))] +Aggregate Attributes [1]: [sum#13] +Results [5]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#5, sum#14] + +(20) Exchange +Input [5]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#5, sum#14] +Arguments: hashpartitioning(c_customer_id#9, c_first_name#10, c_last_name#11, d_year#5, 5), true, [id=#15] + +(21) HashAggregate [codegen id : 7] +Input [5]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#5, sum#14] +Keys [4]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#5] +Functions [1]: [sum(UnscaledValue(ss_net_paid#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#3))#16] +Results [2]: [c_customer_id#9 AS customer_id#17, MakeDecimal(sum(UnscaledValue(ss_net_paid#3))#16,17,2) AS year_total#18] + +(22) Filter [codegen id : 7] +Input [2]: [customer_id#17, year_total#18] +Condition : (isnotnull(year_total#18) AND (year_total#18 > 0.00)) + +(23) Exchange +Input [2]: [customer_id#17, year_total#18] +Arguments: hashpartitioning(customer_id#17, 5), true, [id=#19] + +(24) Sort [codegen id : 8] +Input [2]: [customer_id#17, year_total#18] +Arguments: [customer_id#17 ASC NULLS FIRST], false, 0 + +(25) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_net_paid#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(26) ColumnarToRow [codegen id : 10] +Input [3]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_net_paid#3] + +(27) Filter [codegen id : 10] +Input [3]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_net_paid#3] +Condition : (isnotnull(ss_customer_sk#2) AND isnotnull(ss_sold_date_sk#1)) + +(28) Scan parquet default.date_dim +Output [2]: [d_date_sk#4, d_year#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), In(d_year, [2001,2002]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 9] +Input [2]: [d_date_sk#4, d_year#5] + +(30) Filter [codegen id : 9] +Input [2]: [d_date_sk#4, d_year#5] +Condition : (((isnotnull(d_year#5) AND (d_year#5 = 2002)) AND d_year#5 IN (2001,2002)) AND isnotnull(d_date_sk#4)) + +(31) BroadcastExchange +Input [2]: [d_date_sk#4, d_year#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#20] + +(32) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(33) Project [codegen id : 10] +Output [3]: [ss_customer_sk#2, ss_net_paid#3, d_year#5] +Input [5]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_net_paid#3, d_date_sk#4, d_year#5] + +(34) Exchange +Input [3]: [ss_customer_sk#2, ss_net_paid#3, d_year#5] +Arguments: hashpartitioning(ss_customer_sk#2, 5), true, [id=#21] + +(35) Sort [codegen id : 11] +Input [3]: [ss_customer_sk#2, ss_net_paid#3, d_year#5] +Arguments: [ss_customer_sk#2 ASC NULLS FIRST], false, 0 + +(36) ReusedExchange [Reuses operator id: 15] +Output [4]: [c_customer_sk#8, c_customer_id#9, c_first_name#10, c_last_name#11] + +(37) Sort [codegen id : 13] +Input [4]: [c_customer_sk#8, c_customer_id#9, c_first_name#10, c_last_name#11] +Arguments: [c_customer_sk#8 ASC NULLS FIRST], false, 0 + +(38) SortMergeJoin [codegen id : 14] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#8] +Join condition: None + +(39) Project [codegen id : 14] +Output [5]: [c_customer_id#9, c_first_name#10, c_last_name#11, ss_net_paid#3, d_year#5] +Input [7]: [ss_customer_sk#2, ss_net_paid#3, d_year#5, c_customer_sk#8, c_customer_id#9, c_first_name#10, c_last_name#11] + +(40) HashAggregate [codegen id : 14] +Input [5]: [c_customer_id#9, c_first_name#10, c_last_name#11, ss_net_paid#3, d_year#5] +Keys [4]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#5] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#3))] +Aggregate Attributes [1]: [sum#22] +Results [5]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#5, sum#23] + +(41) Exchange +Input [5]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#5, sum#23] +Arguments: hashpartitioning(c_customer_id#9, c_first_name#10, c_last_name#11, d_year#5, 5), true, [id=#24] + +(42) HashAggregate [codegen id : 15] +Input [5]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#5, sum#23] +Keys [4]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#5] +Functions [1]: [sum(UnscaledValue(ss_net_paid#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#3))#25] +Results [4]: [c_customer_id#9 AS customer_id#26, c_first_name#10 AS customer_first_name#27, c_last_name#11 AS customer_last_name#28, MakeDecimal(sum(UnscaledValue(ss_net_paid#3))#25,17,2) AS year_total#29] + +(43) Exchange +Input [4]: [customer_id#26, customer_first_name#27, customer_last_name#28, year_total#29] +Arguments: hashpartitioning(customer_id#26, 5), true, [id=#30] + +(44) Sort [codegen id : 16] +Input [4]: [customer_id#26, customer_first_name#27, customer_last_name#28, year_total#29] +Arguments: [customer_id#26 ASC NULLS FIRST], false, 0 + +(45) SortMergeJoin [codegen id : 17] +Left keys [1]: [customer_id#17] +Right keys [1]: [customer_id#26] +Join condition: None + +(46) Scan parquet default.web_sales +Output [3]: [ws_sold_date_sk#31, ws_bill_customer_sk#32, ws_net_paid#33] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_bill_customer_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(47) ColumnarToRow [codegen id : 19] +Input [3]: [ws_sold_date_sk#31, ws_bill_customer_sk#32, ws_net_paid#33] + +(48) Filter [codegen id : 19] +Input [3]: [ws_sold_date_sk#31, ws_bill_customer_sk#32, ws_net_paid#33] +Condition : (isnotnull(ws_bill_customer_sk#32) AND isnotnull(ws_sold_date_sk#31)) + +(49) ReusedExchange [Reuses operator id: 7] +Output [2]: [d_date_sk#4, d_year#5] + +(50) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [ws_sold_date_sk#31] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(51) Project [codegen id : 19] +Output [3]: [ws_bill_customer_sk#32, ws_net_paid#33, d_year#5] +Input [5]: [ws_sold_date_sk#31, ws_bill_customer_sk#32, ws_net_paid#33, d_date_sk#4, d_year#5] + +(52) Exchange +Input [3]: [ws_bill_customer_sk#32, ws_net_paid#33, d_year#5] +Arguments: hashpartitioning(ws_bill_customer_sk#32, 5), true, [id=#34] + +(53) Sort [codegen id : 20] +Input [3]: [ws_bill_customer_sk#32, ws_net_paid#33, d_year#5] +Arguments: [ws_bill_customer_sk#32 ASC NULLS FIRST], false, 0 + +(54) ReusedExchange [Reuses operator id: 15] +Output [4]: [c_customer_sk#8, c_customer_id#9, c_first_name#10, c_last_name#11] + +(55) Sort [codegen id : 22] +Input [4]: [c_customer_sk#8, c_customer_id#9, c_first_name#10, c_last_name#11] +Arguments: [c_customer_sk#8 ASC NULLS FIRST], false, 0 + +(56) SortMergeJoin [codegen id : 23] +Left keys [1]: [ws_bill_customer_sk#32] +Right keys [1]: [c_customer_sk#8] +Join condition: None + +(57) Project [codegen id : 23] +Output [5]: [c_customer_id#9, c_first_name#10, c_last_name#11, ws_net_paid#33, d_year#5] +Input [7]: [ws_bill_customer_sk#32, ws_net_paid#33, d_year#5, c_customer_sk#8, c_customer_id#9, c_first_name#10, c_last_name#11] + +(58) HashAggregate [codegen id : 23] +Input [5]: [c_customer_id#9, c_first_name#10, c_last_name#11, ws_net_paid#33, d_year#5] +Keys [4]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#5] +Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#33))] +Aggregate Attributes [1]: [sum#35] +Results [5]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#5, sum#36] + +(59) Exchange +Input [5]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#5, sum#36] +Arguments: hashpartitioning(c_customer_id#9, c_first_name#10, c_last_name#11, d_year#5, 5), true, [id=#37] + +(60) HashAggregate [codegen id : 24] +Input [5]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#5, sum#36] +Keys [4]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#5] +Functions [1]: [sum(UnscaledValue(ws_net_paid#33))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#33))#38] +Results [2]: [c_customer_id#9 AS customer_id#39, MakeDecimal(sum(UnscaledValue(ws_net_paid#33))#38,17,2) AS year_total#40] + +(61) Filter [codegen id : 24] +Input [2]: [customer_id#39, year_total#40] +Condition : (isnotnull(year_total#40) AND (year_total#40 > 0.00)) + +(62) Project [codegen id : 24] +Output [2]: [customer_id#39 AS customer_id#41, year_total#40 AS year_total#42] +Input [2]: [customer_id#39, year_total#40] + +(63) Exchange +Input [2]: [customer_id#41, year_total#42] +Arguments: hashpartitioning(customer_id#41, 5), true, [id=#43] + +(64) Sort [codegen id : 25] +Input [2]: [customer_id#41, year_total#42] +Arguments: [customer_id#41 ASC NULLS FIRST], false, 0 + +(65) SortMergeJoin [codegen id : 26] +Left keys [1]: [customer_id#17] +Right keys [1]: [customer_id#41] +Join condition: None + +(66) Project [codegen id : 26] +Output [7]: [customer_id#17, year_total#18, customer_id#26, customer_first_name#27, customer_last_name#28, year_total#29, year_total#42] +Input [8]: [customer_id#17, year_total#18, customer_id#26, customer_first_name#27, customer_last_name#28, year_total#29, customer_id#41, year_total#42] + +(67) Scan parquet default.web_sales +Output [3]: [ws_sold_date_sk#31, ws_bill_customer_sk#32, ws_net_paid#33] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_bill_customer_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(68) ColumnarToRow [codegen id : 28] +Input [3]: [ws_sold_date_sk#31, ws_bill_customer_sk#32, ws_net_paid#33] + +(69) Filter [codegen id : 28] +Input [3]: [ws_sold_date_sk#31, ws_bill_customer_sk#32, ws_net_paid#33] +Condition : (isnotnull(ws_bill_customer_sk#32) AND isnotnull(ws_sold_date_sk#31)) + +(70) ReusedExchange [Reuses operator id: 31] +Output [2]: [d_date_sk#4, d_year#5] + +(71) BroadcastHashJoin [codegen id : 28] +Left keys [1]: [ws_sold_date_sk#31] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(72) Project [codegen id : 28] +Output [3]: [ws_bill_customer_sk#32, ws_net_paid#33, d_year#5] +Input [5]: [ws_sold_date_sk#31, ws_bill_customer_sk#32, ws_net_paid#33, d_date_sk#4, d_year#5] + +(73) Exchange +Input [3]: [ws_bill_customer_sk#32, ws_net_paid#33, d_year#5] +Arguments: hashpartitioning(ws_bill_customer_sk#32, 5), true, [id=#44] + +(74) Sort [codegen id : 29] +Input [3]: [ws_bill_customer_sk#32, ws_net_paid#33, d_year#5] +Arguments: [ws_bill_customer_sk#32 ASC NULLS FIRST], false, 0 + +(75) ReusedExchange [Reuses operator id: 15] +Output [4]: [c_customer_sk#8, c_customer_id#9, c_first_name#10, c_last_name#11] + +(76) Sort [codegen id : 31] +Input [4]: [c_customer_sk#8, c_customer_id#9, c_first_name#10, c_last_name#11] +Arguments: [c_customer_sk#8 ASC NULLS FIRST], false, 0 + +(77) SortMergeJoin [codegen id : 32] +Left keys [1]: [ws_bill_customer_sk#32] +Right keys [1]: [c_customer_sk#8] +Join condition: None + +(78) Project [codegen id : 32] +Output [5]: [c_customer_id#9, c_first_name#10, c_last_name#11, ws_net_paid#33, d_year#5] +Input [7]: [ws_bill_customer_sk#32, ws_net_paid#33, d_year#5, c_customer_sk#8, c_customer_id#9, c_first_name#10, c_last_name#11] + +(79) HashAggregate [codegen id : 32] +Input [5]: [c_customer_id#9, c_first_name#10, c_last_name#11, ws_net_paid#33, d_year#5] +Keys [4]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#5] +Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#33))] +Aggregate Attributes [1]: [sum#45] +Results [5]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#5, sum#46] + +(80) Exchange +Input [5]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#5, sum#46] +Arguments: hashpartitioning(c_customer_id#9, c_first_name#10, c_last_name#11, d_year#5, 5), true, [id=#47] + +(81) HashAggregate [codegen id : 33] +Input [5]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#5, sum#46] +Keys [4]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#5] +Functions [1]: [sum(UnscaledValue(ws_net_paid#33))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#33))#48] +Results [2]: [c_customer_id#9 AS customer_id#49, MakeDecimal(sum(UnscaledValue(ws_net_paid#33))#48,17,2) AS year_total#50] + +(82) Exchange +Input [2]: [customer_id#49, year_total#50] +Arguments: hashpartitioning(customer_id#49, 5), true, [id=#51] + +(83) Sort [codegen id : 34] +Input [2]: [customer_id#49, year_total#50] +Arguments: [customer_id#49 ASC NULLS FIRST], false, 0 + +(84) SortMergeJoin [codegen id : 35] +Left keys [1]: [customer_id#17] +Right keys [1]: [customer_id#49] +Join condition: (CASE WHEN (year_total#42 > 0.00) THEN CheckOverflow((promote_precision(year_total#50) / promote_precision(year_total#42)), DecimalType(37,20), true) ELSE null END > CASE WHEN (year_total#18 > 0.00) THEN CheckOverflow((promote_precision(year_total#29) / promote_precision(year_total#18)), DecimalType(37,20), true) ELSE null END) + +(85) Project [codegen id : 35] +Output [3]: [customer_id#26, customer_first_name#27, customer_last_name#28] +Input [9]: [customer_id#17, year_total#18, customer_id#26, customer_first_name#27, customer_last_name#28, year_total#29, year_total#42, customer_id#49, year_total#50] + +(86) TakeOrderedAndProject +Input [3]: [customer_id#26, customer_first_name#27, customer_last_name#28] +Arguments: 100, [customer_id#26 ASC NULLS FIRST, customer_id#26 ASC NULLS FIRST, customer_id#26 ASC NULLS FIRST], [customer_id#26, customer_first_name#27, customer_last_name#28] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74.sf100/simplified.txt new file mode 100644 index 0000000000000..c35e70d72eb36 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74.sf100/simplified.txt @@ -0,0 +1,157 @@ +TakeOrderedAndProject [customer_first_name,customer_id,customer_last_name] + WholeStageCodegen (35) + Project [customer_first_name,customer_id,customer_last_name] + SortMergeJoin [customer_id,customer_id,year_total,year_total,year_total,year_total] + InputAdapter + WholeStageCodegen (26) + Project [customer_first_name,customer_id,customer_id,customer_last_name,year_total,year_total,year_total] + SortMergeJoin [customer_id,customer_id] + InputAdapter + WholeStageCodegen (17) + SortMergeJoin [customer_id,customer_id] + InputAdapter + WholeStageCodegen (8) + Sort [customer_id] + InputAdapter + Exchange [customer_id] #1 + WholeStageCodegen (7) + Filter [year_total] + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum] [customer_id,sum,sum(UnscaledValue(ss_net_paid)),year_total] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year] #2 + WholeStageCodegen (6) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,ss_net_paid] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,d_year,ss_net_paid] + SortMergeJoin [c_customer_sk,ss_customer_sk] + InputAdapter + WholeStageCodegen (3) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #3 + WholeStageCodegen (2) + Project [d_year,ss_customer_sk,ss_net_paid] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_customer_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_net_paid,ss_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + WholeStageCodegen (5) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #5 + WholeStageCodegen (4) + Filter [c_customer_id,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_id,c_customer_sk,c_first_name,c_last_name] + InputAdapter + WholeStageCodegen (16) + Sort [customer_id] + InputAdapter + Exchange [customer_id] #6 + WholeStageCodegen (15) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum] [customer_first_name,customer_id,customer_last_name,sum,sum(UnscaledValue(ss_net_paid)),year_total] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year] #7 + WholeStageCodegen (14) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,ss_net_paid] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,d_year,ss_net_paid] + SortMergeJoin [c_customer_sk,ss_customer_sk] + InputAdapter + WholeStageCodegen (11) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #8 + WholeStageCodegen (10) + Project [d_year,ss_customer_sk,ss_net_paid] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_customer_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_net_paid,ss_sold_date_sk] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (9) + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + WholeStageCodegen (13) + Sort [c_customer_sk] + InputAdapter + ReusedExchange [c_customer_id,c_customer_sk,c_first_name,c_last_name] #5 + InputAdapter + WholeStageCodegen (25) + Sort [customer_id] + InputAdapter + Exchange [customer_id] #10 + WholeStageCodegen (24) + Project [customer_id,year_total] + Filter [year_total] + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum] [customer_id,sum,sum(UnscaledValue(ws_net_paid)),year_total] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year] #11 + WholeStageCodegen (23) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,ws_net_paid] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,d_year,ws_net_paid] + SortMergeJoin [c_customer_sk,ws_bill_customer_sk] + InputAdapter + WholeStageCodegen (20) + Sort [ws_bill_customer_sk] + InputAdapter + Exchange [ws_bill_customer_sk] #12 + WholeStageCodegen (19) + Project [d_year,ws_bill_customer_sk,ws_net_paid] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_bill_customer_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_year] #4 + InputAdapter + WholeStageCodegen (22) + Sort [c_customer_sk] + InputAdapter + ReusedExchange [c_customer_id,c_customer_sk,c_first_name,c_last_name] #5 + InputAdapter + WholeStageCodegen (34) + Sort [customer_id] + InputAdapter + Exchange [customer_id] #13 + WholeStageCodegen (33) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum] [customer_id,sum,sum(UnscaledValue(ws_net_paid)),year_total] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year] #14 + WholeStageCodegen (32) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,ws_net_paid] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,d_year,ws_net_paid] + SortMergeJoin [c_customer_sk,ws_bill_customer_sk] + InputAdapter + WholeStageCodegen (29) + Sort [ws_bill_customer_sk] + InputAdapter + Exchange [ws_bill_customer_sk] #15 + WholeStageCodegen (28) + Project [d_year,ws_bill_customer_sk,ws_net_paid] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_bill_customer_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_year] #9 + InputAdapter + WholeStageCodegen (31) + Sort [c_customer_sk] + InputAdapter + ReusedExchange [c_customer_id,c_customer_sk,c_first_name,c_last_name] #5 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74/explain.txt new file mode 100644 index 0000000000000..800b3a88c0d73 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74/explain.txt @@ -0,0 +1,410 @@ +== Physical Plan == +TakeOrderedAndProject (72) ++- * Project (71) + +- * BroadcastHashJoin Inner BuildRight (70) + :- * Project (56) + : +- * BroadcastHashJoin Inner BuildRight (55) + : :- * BroadcastHashJoin Inner BuildRight (36) + : : :- * Filter (19) + : : : +- * HashAggregate (18) + : : : +- Exchange (17) + : : : +- * HashAggregate (16) + : : : +- * Project (15) + : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : :- * Project (9) + : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.customer (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.store_sales (4) + : : : +- BroadcastExchange (13) + : : : +- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet default.date_dim (10) + : : +- BroadcastExchange (35) + : : +- * HashAggregate (34) + : : +- Exchange (33) + : : +- * HashAggregate (32) + : : +- * Project (31) + : : +- * BroadcastHashJoin Inner BuildRight (30) + : : :- * Project (25) + : : : +- * BroadcastHashJoin Inner BuildRight (24) + : : : :- * Filter (22) + : : : : +- * ColumnarToRow (21) + : : : : +- Scan parquet default.customer (20) + : : : +- ReusedExchange (23) + : : +- BroadcastExchange (29) + : : +- * Filter (28) + : : +- * ColumnarToRow (27) + : : +- Scan parquet default.date_dim (26) + : +- BroadcastExchange (54) + : +- * Project (53) + : +- * Filter (52) + : +- * HashAggregate (51) + : +- Exchange (50) + : +- * HashAggregate (49) + : +- * Project (48) + : +- * BroadcastHashJoin Inner BuildRight (47) + : :- * Project (45) + : : +- * BroadcastHashJoin Inner BuildRight (44) + : : :- * Filter (39) + : : : +- * ColumnarToRow (38) + : : : +- Scan parquet default.customer (37) + : : +- BroadcastExchange (43) + : : +- * Filter (42) + : : +- * ColumnarToRow (41) + : : +- Scan parquet default.web_sales (40) + : +- ReusedExchange (46) + +- BroadcastExchange (69) + +- * HashAggregate (68) + +- Exchange (67) + +- * HashAggregate (66) + +- * Project (65) + +- * BroadcastHashJoin Inner BuildRight (64) + :- * Project (62) + : +- * BroadcastHashJoin Inner BuildRight (61) + : :- * Filter (59) + : : +- * ColumnarToRow (58) + : : +- Scan parquet default.customer (57) + : +- ReusedExchange (60) + +- ReusedExchange (63) + + +(1) Scan parquet default.customer +Output [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] + +(3) Filter [codegen id : 3] +Input [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(4) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#5, ss_customer_sk#6, ss_net_paid#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [ss_sold_date_sk#5, ss_customer_sk#6, ss_net_paid#7] + +(6) Filter [codegen id : 1] +Input [3]: [ss_sold_date_sk#5, ss_customer_sk#6, ss_net_paid#7] +Condition : (isnotnull(ss_customer_sk#6) AND isnotnull(ss_sold_date_sk#5)) + +(7) BroadcastExchange +Input [3]: [ss_sold_date_sk#5, ss_customer_sk#6, ss_net_paid#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#8] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ss_customer_sk#6] +Join condition: None + +(9) Project [codegen id : 3] +Output [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_sold_date_sk#5, ss_net_paid#7] +Input [7]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, ss_sold_date_sk#5, ss_customer_sk#6, ss_net_paid#7] + +(10) Scan parquet default.date_dim +Output [2]: [d_date_sk#9, d_year#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), In(d_year, [2001,2002]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#9, d_year#10] + +(12) Filter [codegen id : 2] +Input [2]: [d_date_sk#9, d_year#10] +Condition : (((isnotnull(d_year#10) AND (d_year#10 = 2001)) AND d_year#10 IN (2001,2002)) AND isnotnull(d_date_sk#9)) + +(13) BroadcastExchange +Input [2]: [d_date_sk#9, d_year#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#11] + +(14) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#5] +Right keys [1]: [d_date_sk#9] +Join condition: None + +(15) Project [codegen id : 3] +Output [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#7, d_year#10] +Input [7]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_sold_date_sk#5, ss_net_paid#7, d_date_sk#9, d_year#10] + +(16) HashAggregate [codegen id : 3] +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#7, d_year#10] +Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#7))] +Aggregate Attributes [1]: [sum#12] +Results [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, sum#13] + +(17) Exchange +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, sum#13] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, 5), true, [id=#14] + +(18) HashAggregate [codegen id : 16] +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, sum#13] +Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10] +Functions [1]: [sum(UnscaledValue(ss_net_paid#7))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#7))#15] +Results [2]: [c_customer_id#2 AS customer_id#16, MakeDecimal(sum(UnscaledValue(ss_net_paid#7))#15,17,2) AS year_total#17] + +(19) Filter [codegen id : 16] +Input [2]: [customer_id#16, year_total#17] +Condition : (isnotnull(year_total#17) AND (year_total#17 > 0.00)) + +(20) Scan parquet default.customer +Output [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 6] +Input [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] + +(22) Filter [codegen id : 6] +Input [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(23) ReusedExchange [Reuses operator id: 7] +Output [3]: [ss_sold_date_sk#5, ss_customer_sk#6, ss_net_paid#7] + +(24) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ss_customer_sk#6] +Join condition: None + +(25) Project [codegen id : 6] +Output [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_sold_date_sk#5, ss_net_paid#7] +Input [7]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, ss_sold_date_sk#5, ss_customer_sk#6, ss_net_paid#7] + +(26) Scan parquet default.date_dim +Output [2]: [d_date_sk#9, d_year#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), In(d_year, [2001,2002]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 5] +Input [2]: [d_date_sk#9, d_year#10] + +(28) Filter [codegen id : 5] +Input [2]: [d_date_sk#9, d_year#10] +Condition : (((isnotnull(d_year#10) AND (d_year#10 = 2002)) AND d_year#10 IN (2001,2002)) AND isnotnull(d_date_sk#9)) + +(29) BroadcastExchange +Input [2]: [d_date_sk#9, d_year#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#18] + +(30) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_sold_date_sk#5] +Right keys [1]: [d_date_sk#9] +Join condition: None + +(31) Project [codegen id : 6] +Output [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#7, d_year#10] +Input [7]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_sold_date_sk#5, ss_net_paid#7, d_date_sk#9, d_year#10] + +(32) HashAggregate [codegen id : 6] +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#7, d_year#10] +Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#7))] +Aggregate Attributes [1]: [sum#19] +Results [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, sum#20] + +(33) Exchange +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, sum#20] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, 5), true, [id=#21] + +(34) HashAggregate [codegen id : 7] +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, sum#20] +Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10] +Functions [1]: [sum(UnscaledValue(ss_net_paid#7))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#7))#22] +Results [4]: [c_customer_id#2 AS customer_id#23, c_first_name#3 AS customer_first_name#24, c_last_name#4 AS customer_last_name#25, MakeDecimal(sum(UnscaledValue(ss_net_paid#7))#22,17,2) AS year_total#26] + +(35) BroadcastExchange +Input [4]: [customer_id#23, customer_first_name#24, customer_last_name#25, year_total#26] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#27] + +(36) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [customer_id#16] +Right keys [1]: [customer_id#23] +Join condition: None + +(37) Scan parquet default.customer +Output [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(38) ColumnarToRow [codegen id : 10] +Input [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] + +(39) Filter [codegen id : 10] +Input [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(40) Scan parquet default.web_sales +Output [3]: [ws_sold_date_sk#28, ws_bill_customer_sk#29, ws_net_paid#30] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_bill_customer_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(41) ColumnarToRow [codegen id : 8] +Input [3]: [ws_sold_date_sk#28, ws_bill_customer_sk#29, ws_net_paid#30] + +(42) Filter [codegen id : 8] +Input [3]: [ws_sold_date_sk#28, ws_bill_customer_sk#29, ws_net_paid#30] +Condition : (isnotnull(ws_bill_customer_sk#29) AND isnotnull(ws_sold_date_sk#28)) + +(43) BroadcastExchange +Input [3]: [ws_sold_date_sk#28, ws_bill_customer_sk#29, ws_net_paid#30] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#31] + +(44) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ws_bill_customer_sk#29] +Join condition: None + +(45) Project [codegen id : 10] +Output [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ws_sold_date_sk#28, ws_net_paid#30] +Input [7]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, ws_sold_date_sk#28, ws_bill_customer_sk#29, ws_net_paid#30] + +(46) ReusedExchange [Reuses operator id: 13] +Output [2]: [d_date_sk#9, d_year#10] + +(47) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ws_sold_date_sk#28] +Right keys [1]: [d_date_sk#9] +Join condition: None + +(48) Project [codegen id : 10] +Output [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ws_net_paid#30, d_year#10] +Input [7]: [c_customer_id#2, c_first_name#3, c_last_name#4, ws_sold_date_sk#28, ws_net_paid#30, d_date_sk#9, d_year#10] + +(49) HashAggregate [codegen id : 10] +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ws_net_paid#30, d_year#10] +Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10] +Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#30))] +Aggregate Attributes [1]: [sum#32] +Results [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, sum#33] + +(50) Exchange +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, sum#33] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, 5), true, [id=#34] + +(51) HashAggregate [codegen id : 11] +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, sum#33] +Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10] +Functions [1]: [sum(UnscaledValue(ws_net_paid#30))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#30))#35] +Results [2]: [c_customer_id#2 AS customer_id#36, MakeDecimal(sum(UnscaledValue(ws_net_paid#30))#35,17,2) AS year_total#37] + +(52) Filter [codegen id : 11] +Input [2]: [customer_id#36, year_total#37] +Condition : (isnotnull(year_total#37) AND (year_total#37 > 0.00)) + +(53) Project [codegen id : 11] +Output [2]: [customer_id#36 AS customer_id#38, year_total#37 AS year_total#39] +Input [2]: [customer_id#36, year_total#37] + +(54) BroadcastExchange +Input [2]: [customer_id#38, year_total#39] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#40] + +(55) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [customer_id#16] +Right keys [1]: [customer_id#38] +Join condition: None + +(56) Project [codegen id : 16] +Output [7]: [customer_id#16, year_total#17, customer_id#23, customer_first_name#24, customer_last_name#25, year_total#26, year_total#39] +Input [8]: [customer_id#16, year_total#17, customer_id#23, customer_first_name#24, customer_last_name#25, year_total#26, customer_id#38, year_total#39] + +(57) Scan parquet default.customer +Output [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(58) ColumnarToRow [codegen id : 14] +Input [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] + +(59) Filter [codegen id : 14] +Input [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(60) ReusedExchange [Reuses operator id: 43] +Output [3]: [ws_sold_date_sk#28, ws_bill_customer_sk#29, ws_net_paid#30] + +(61) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ws_bill_customer_sk#29] +Join condition: None + +(62) Project [codegen id : 14] +Output [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ws_sold_date_sk#28, ws_net_paid#30] +Input [7]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, ws_sold_date_sk#28, ws_bill_customer_sk#29, ws_net_paid#30] + +(63) ReusedExchange [Reuses operator id: 29] +Output [2]: [d_date_sk#9, d_year#10] + +(64) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [ws_sold_date_sk#28] +Right keys [1]: [d_date_sk#9] +Join condition: None + +(65) Project [codegen id : 14] +Output [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ws_net_paid#30, d_year#10] +Input [7]: [c_customer_id#2, c_first_name#3, c_last_name#4, ws_sold_date_sk#28, ws_net_paid#30, d_date_sk#9, d_year#10] + +(66) HashAggregate [codegen id : 14] +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ws_net_paid#30, d_year#10] +Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10] +Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#30))] +Aggregate Attributes [1]: [sum#41] +Results [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, sum#42] + +(67) Exchange +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, sum#42] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, 5), true, [id=#43] + +(68) HashAggregate [codegen id : 15] +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, sum#42] +Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10] +Functions [1]: [sum(UnscaledValue(ws_net_paid#30))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#30))#44] +Results [2]: [c_customer_id#2 AS customer_id#45, MakeDecimal(sum(UnscaledValue(ws_net_paid#30))#44,17,2) AS year_total#46] + +(69) BroadcastExchange +Input [2]: [customer_id#45, year_total#46] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#47] + +(70) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [customer_id#16] +Right keys [1]: [customer_id#45] +Join condition: (CASE WHEN (year_total#39 > 0.00) THEN CheckOverflow((promote_precision(year_total#46) / promote_precision(year_total#39)), DecimalType(37,20), true) ELSE null END > CASE WHEN (year_total#17 > 0.00) THEN CheckOverflow((promote_precision(year_total#26) / promote_precision(year_total#17)), DecimalType(37,20), true) ELSE null END) + +(71) Project [codegen id : 16] +Output [3]: [customer_id#23, customer_first_name#24, customer_last_name#25] +Input [9]: [customer_id#16, year_total#17, customer_id#23, customer_first_name#24, customer_last_name#25, year_total#26, year_total#39, customer_id#45, year_total#46] + +(72) TakeOrderedAndProject +Input [3]: [customer_id#23, customer_first_name#24, customer_last_name#25] +Arguments: 100, [customer_id#23 ASC NULLS FIRST, customer_id#23 ASC NULLS FIRST, customer_id#23 ASC NULLS FIRST], [customer_id#23, customer_first_name#24, customer_last_name#25] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74/simplified.txt new file mode 100644 index 0000000000000..68a6e7bfd91a3 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74/simplified.txt @@ -0,0 +1,107 @@ +TakeOrderedAndProject [customer_first_name,customer_id,customer_last_name] + WholeStageCodegen (16) + Project [customer_first_name,customer_id,customer_last_name] + BroadcastHashJoin [customer_id,customer_id,year_total,year_total,year_total,year_total] + Project [customer_first_name,customer_id,customer_id,customer_last_name,year_total,year_total,year_total] + BroadcastHashJoin [customer_id,customer_id] + BroadcastHashJoin [customer_id,customer_id] + Filter [year_total] + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum] [customer_id,sum,sum(UnscaledValue(ss_net_paid)),year_total] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year] #1 + WholeStageCodegen (3) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,ss_net_paid] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,d_year,ss_net_paid] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [c_customer_id,c_first_name,c_last_name,ss_net_paid,ss_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [c_customer_id,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_id,c_customer_sk,c_first_name,c_last_name] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [ss_customer_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_net_paid,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (7) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum] [customer_first_name,customer_id,customer_last_name,sum,sum(UnscaledValue(ss_net_paid)),year_total] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year] #5 + WholeStageCodegen (6) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,ss_net_paid] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,d_year,ss_net_paid] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [c_customer_id,c_first_name,c_last_name,ss_net_paid,ss_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [c_customer_id,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_id,c_customer_sk,c_first_name,c_last_name] + InputAdapter + ReusedExchange [ss_customer_sk,ss_net_paid,ss_sold_date_sk] #2 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (11) + Project [customer_id,year_total] + Filter [year_total] + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum] [customer_id,sum,sum(UnscaledValue(ws_net_paid)),year_total] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year] #8 + WholeStageCodegen (10) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,ws_net_paid] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,d_year,ws_net_paid] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [c_customer_id,c_first_name,c_last_name,ws_net_paid,ws_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + Filter [c_customer_id,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_id,c_customer_sk,c_first_name,c_last_name] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (8) + Filter [ws_bill_customer_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_year] #3 + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (15) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum] [customer_id,sum,sum(UnscaledValue(ws_net_paid)),year_total] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year] #11 + WholeStageCodegen (14) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,ws_net_paid] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,d_year,ws_net_paid] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [c_customer_id,c_first_name,c_last_name,ws_net_paid,ws_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + Filter [c_customer_id,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_id,c_customer_sk,c_first_name,c_last_name] + InputAdapter + ReusedExchange [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk] #9 + InputAdapter + ReusedExchange [d_date_sk,d_year] #6 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75.sf100/explain.txt new file mode 100644 index 0000000000000..f797b6c7ed087 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75.sf100/explain.txt @@ -0,0 +1,752 @@ +== Physical Plan == +TakeOrderedAndProject (138) ++- * Project (137) + +- * SortMergeJoin Inner (136) + :- * Sort (74) + : +- Exchange (73) + : +- * HashAggregate (72) + : +- Exchange (71) + : +- * HashAggregate (70) + : +- * HashAggregate (69) + : +- Exchange (68) + : +- * HashAggregate (67) + : +- Union (66) + : :- * HashAggregate (47) + : : +- Exchange (46) + : : +- * HashAggregate (45) + : : +- Union (44) + : : :- * Project (25) + : : : +- SortMergeJoin LeftOuter (24) + : : : :- * Sort (18) + : : : : +- Exchange (17) + : : : : +- * Project (16) + : : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : : :- * Project (10) + : : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : : :- * Filter (3) + : : : : : : +- * ColumnarToRow (2) + : : : : : : +- Scan parquet default.catalog_sales (1) + : : : : : +- BroadcastExchange (8) + : : : : : +- * Project (7) + : : : : : +- * Filter (6) + : : : : : +- * ColumnarToRow (5) + : : : : : +- Scan parquet default.item (4) + : : : : +- BroadcastExchange (14) + : : : : +- * Filter (13) + : : : : +- * ColumnarToRow (12) + : : : : +- Scan parquet default.date_dim (11) + : : : +- * Sort (23) + : : : +- Exchange (22) + : : : +- * Filter (21) + : : : +- * ColumnarToRow (20) + : : : +- Scan parquet default.catalog_returns (19) + : : +- * Project (43) + : : +- SortMergeJoin LeftOuter (42) + : : :- * Sort (36) + : : : +- Exchange (35) + : : : +- * Project (34) + : : : +- * BroadcastHashJoin Inner BuildRight (33) + : : : :- * Project (31) + : : : : +- * BroadcastHashJoin Inner BuildRight (30) + : : : : :- * Filter (28) + : : : : : +- * ColumnarToRow (27) + : : : : : +- Scan parquet default.store_sales (26) + : : : : +- ReusedExchange (29) + : : : +- ReusedExchange (32) + : : +- * Sort (41) + : : +- Exchange (40) + : : +- * Filter (39) + : : +- * ColumnarToRow (38) + : : +- Scan parquet default.store_returns (37) + : +- * Project (65) + : +- SortMergeJoin LeftOuter (64) + : :- * Sort (58) + : : +- Exchange (57) + : : +- * Project (56) + : : +- * BroadcastHashJoin Inner BuildRight (55) + : : :- * Project (53) + : : : +- * BroadcastHashJoin Inner BuildRight (52) + : : : :- * Filter (50) + : : : : +- * ColumnarToRow (49) + : : : : +- Scan parquet default.web_sales (48) + : : : +- ReusedExchange (51) + : : +- ReusedExchange (54) + : +- * Sort (63) + : +- Exchange (62) + : +- * Filter (61) + : +- * ColumnarToRow (60) + : +- Scan parquet default.web_returns (59) + +- * Sort (135) + +- Exchange (134) + +- * HashAggregate (133) + +- Exchange (132) + +- * HashAggregate (131) + +- * HashAggregate (130) + +- Exchange (129) + +- * HashAggregate (128) + +- Union (127) + :- * HashAggregate (111) + : +- Exchange (110) + : +- * HashAggregate (109) + : +- Union (108) + : :- * Project (92) + : : +- SortMergeJoin LeftOuter (91) + : : :- * Sort (88) + : : : +- Exchange (87) + : : : +- * Project (86) + : : : +- * BroadcastHashJoin Inner BuildRight (85) + : : : :- * Project (80) + : : : : +- * BroadcastHashJoin Inner BuildRight (79) + : : : : :- * Filter (77) + : : : : : +- * ColumnarToRow (76) + : : : : : +- Scan parquet default.catalog_sales (75) + : : : : +- ReusedExchange (78) + : : : +- BroadcastExchange (84) + : : : +- * Filter (83) + : : : +- * ColumnarToRow (82) + : : : +- Scan parquet default.date_dim (81) + : : +- * Sort (90) + : : +- ReusedExchange (89) + : +- * Project (107) + : +- SortMergeJoin LeftOuter (106) + : :- * Sort (103) + : : +- Exchange (102) + : : +- * Project (101) + : : +- * BroadcastHashJoin Inner BuildRight (100) + : : :- * Project (98) + : : : +- * BroadcastHashJoin Inner BuildRight (97) + : : : :- * Filter (95) + : : : : +- * ColumnarToRow (94) + : : : : +- Scan parquet default.store_sales (93) + : : : +- ReusedExchange (96) + : : +- ReusedExchange (99) + : +- * Sort (105) + : +- ReusedExchange (104) + +- * Project (126) + +- SortMergeJoin LeftOuter (125) + :- * Sort (122) + : +- Exchange (121) + : +- * Project (120) + : +- * BroadcastHashJoin Inner BuildRight (119) + : :- * Project (117) + : : +- * BroadcastHashJoin Inner BuildRight (116) + : : :- * Filter (114) + : : : +- * ColumnarToRow (113) + : : : +- Scan parquet default.web_sales (112) + : : +- ReusedExchange (115) + : +- ReusedExchange (118) + +- * Sort (124) + +- ReusedExchange (123) + + +(1) Scan parquet default.catalog_sales +Output [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5] + +(3) Filter [codegen id : 3] +Input [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5] +Condition : (isnotnull(cs_item_sk#2) AND isnotnull(cs_sold_date_sk#1)) + +(4) Scan parquet default.item +Output [6]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Books), IsNotNull(i_item_sk), IsNotNull(i_manufact_id), IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [6]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] + +(6) Filter [codegen id : 1] +Input [6]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] +Condition : ((((((isnotnull(i_category#10) AND (i_category#10 = Books)) AND isnotnull(i_item_sk#6)) AND isnotnull(i_manufact_id#11)) AND isnotnull(i_brand_id#7)) AND isnotnull(i_class_id#8)) AND isnotnull(i_category_id#9)) + +(7) Project [codegen id : 1] +Output [5]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Input [6]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] + +(8) BroadcastExchange +Input [5]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cs_item_sk#2] +Right keys [1]: [i_item_sk#6] +Join condition: None + +(10) Project [codegen id : 3] +Output [9]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Input [10]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] + +(11) Scan parquet default.date_dim +Output [2]: [d_date_sk#13, d_year#14] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#13, d_year#14] + +(13) Filter [codegen id : 2] +Input [2]: [d_date_sk#13, d_year#14] +Condition : ((isnotnull(d_year#14) AND (d_year#14 = 2002)) AND isnotnull(d_date_sk#13)) + +(14) BroadcastExchange +Input [2]: [d_date_sk#13, d_year#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#15] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cs_sold_date_sk#1] +Right keys [1]: [d_date_sk#13] +Join condition: None + +(16) Project [codegen id : 3] +Output [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14] +Input [11]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_date_sk#13, d_year#14] + +(17) Exchange +Input [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14] +Arguments: hashpartitioning(cs_order_number#3, cs_item_sk#2, 5), true, [id=#16] + +(18) Sort [codegen id : 4] +Input [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14] +Arguments: [cs_order_number#3 ASC NULLS FIRST, cs_item_sk#2 ASC NULLS FIRST], false, 0 + +(19) Scan parquet default.catalog_returns +Output [4]: [cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_returns] +PushedFilters: [IsNotNull(cr_order_number), IsNotNull(cr_item_sk)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 5] +Input [4]: [cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20] + +(21) Filter [codegen id : 5] +Input [4]: [cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20] +Condition : (isnotnull(cr_order_number#18) AND isnotnull(cr_item_sk#17)) + +(22) Exchange +Input [4]: [cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20] +Arguments: hashpartitioning(cr_order_number#18, cr_item_sk#17, 5), true, [id=#21] + +(23) Sort [codegen id : 6] +Input [4]: [cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20] +Arguments: [cr_order_number#18 ASC NULLS FIRST, cr_item_sk#17 ASC NULLS FIRST], false, 0 + +(24) SortMergeJoin +Left keys [2]: [cs_order_number#3, cs_item_sk#2] +Right keys [2]: [cr_order_number#18, cr_item_sk#17] +Join condition: None + +(25) Project [codegen id : 7] +Output [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, (cs_quantity#4 - coalesce(cr_return_quantity#19, 0)) AS sales_cnt#22, CheckOverflow((promote_precision(cast(cs_ext_sales_price#5 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#20, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#23] +Input [13]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14, cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20] + +(26) Scan parquet default.store_sales +Output [5]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 10] +Input [5]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28] + +(28) Filter [codegen id : 10] +Input [5]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28] +Condition : (isnotnull(ss_item_sk#25) AND isnotnull(ss_sold_date_sk#24)) + +(29) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] + +(30) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ss_item_sk#25] +Right keys [1]: [i_item_sk#6] +Join condition: None + +(31) Project [codegen id : 10] +Output [9]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Input [10]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] + +(32) ReusedExchange [Reuses operator id: 14] +Output [2]: [d_date_sk#13, d_year#14] + +(33) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ss_sold_date_sk#24] +Right keys [1]: [d_date_sk#13] +Join condition: None + +(34) Project [codegen id : 10] +Output [9]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14] +Input [11]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_date_sk#13, d_year#14] + +(35) Exchange +Input [9]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14] +Arguments: hashpartitioning(cast(ss_ticket_number#26 as bigint), cast(ss_item_sk#25 as bigint), 5), true, [id=#29] + +(36) Sort [codegen id : 11] +Input [9]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14] +Arguments: [cast(ss_ticket_number#26 as bigint) ASC NULLS FIRST, cast(ss_item_sk#25 as bigint) ASC NULLS FIRST], false, 0 + +(37) Scan parquet default.store_returns +Output [4]: [sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_returns] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] +ReadSchema: struct + +(38) ColumnarToRow [codegen id : 12] +Input [4]: [sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33] + +(39) Filter [codegen id : 12] +Input [4]: [sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33] +Condition : (isnotnull(sr_ticket_number#31) AND isnotnull(sr_item_sk#30)) + +(40) Exchange +Input [4]: [sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33] +Arguments: hashpartitioning(sr_ticket_number#31, sr_item_sk#30, 5), true, [id=#34] + +(41) Sort [codegen id : 13] +Input [4]: [sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33] +Arguments: [sr_ticket_number#31 ASC NULLS FIRST, sr_item_sk#30 ASC NULLS FIRST], false, 0 + +(42) SortMergeJoin +Left keys [2]: [cast(ss_ticket_number#26 as bigint), cast(ss_item_sk#25 as bigint)] +Right keys [2]: [sr_ticket_number#31, sr_item_sk#30] +Join condition: None + +(43) Project [codegen id : 14] +Output [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, (ss_quantity#27 - coalesce(sr_return_quantity#32, 0)) AS sales_cnt#35, CheckOverflow((promote_precision(cast(ss_ext_sales_price#28 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#33, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#36] +Input [13]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14, sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33] + +(44) Union + +(45) HashAggregate [codegen id : 15] +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] +Keys [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] +Functions: [] +Aggregate Attributes: [] +Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] + +(46) Exchange +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] +Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23, 5), true, [id=#37] + +(47) HashAggregate [codegen id : 16] +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] +Keys [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] +Functions: [] +Aggregate Attributes: [] +Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] + +(48) Scan parquet default.web_sales +Output [5]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(49) ColumnarToRow [codegen id : 19] +Input [5]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42] + +(50) Filter [codegen id : 19] +Input [5]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42] +Condition : (isnotnull(ws_item_sk#39) AND isnotnull(ws_sold_date_sk#38)) + +(51) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] + +(52) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [ws_item_sk#39] +Right keys [1]: [i_item_sk#6] +Join condition: None + +(53) Project [codegen id : 19] +Output [9]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Input [10]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] + +(54) ReusedExchange [Reuses operator id: 14] +Output [2]: [d_date_sk#13, d_year#14] + +(55) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [ws_sold_date_sk#38] +Right keys [1]: [d_date_sk#13] +Join condition: None + +(56) Project [codegen id : 19] +Output [9]: [ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14] +Input [11]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_date_sk#13, d_year#14] + +(57) Exchange +Input [9]: [ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14] +Arguments: hashpartitioning(cast(ws_order_number#40 as bigint), cast(ws_item_sk#39 as bigint), 5), true, [id=#43] + +(58) Sort [codegen id : 20] +Input [9]: [ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14] +Arguments: [cast(ws_order_number#40 as bigint) ASC NULLS FIRST, cast(ws_item_sk#39 as bigint) ASC NULLS FIRST], false, 0 + +(59) Scan parquet default.web_returns +Output [4]: [wr_item_sk#44, wr_order_number#45, wr_return_quantity#46, wr_return_amt#47] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_returns] +PushedFilters: [IsNotNull(wr_item_sk), IsNotNull(wr_order_number)] +ReadSchema: struct + +(60) ColumnarToRow [codegen id : 21] +Input [4]: [wr_item_sk#44, wr_order_number#45, wr_return_quantity#46, wr_return_amt#47] + +(61) Filter [codegen id : 21] +Input [4]: [wr_item_sk#44, wr_order_number#45, wr_return_quantity#46, wr_return_amt#47] +Condition : (isnotnull(wr_item_sk#44) AND isnotnull(wr_order_number#45)) + +(62) Exchange +Input [4]: [wr_item_sk#44, wr_order_number#45, wr_return_quantity#46, wr_return_amt#47] +Arguments: hashpartitioning(wr_order_number#45, wr_item_sk#44, 5), true, [id=#48] + +(63) Sort [codegen id : 22] +Input [4]: [wr_item_sk#44, wr_order_number#45, wr_return_quantity#46, wr_return_amt#47] +Arguments: [wr_order_number#45 ASC NULLS FIRST, wr_item_sk#44 ASC NULLS FIRST], false, 0 + +(64) SortMergeJoin +Left keys [2]: [cast(ws_order_number#40 as bigint), cast(ws_item_sk#39 as bigint)] +Right keys [2]: [wr_order_number#45, wr_item_sk#44] +Join condition: None + +(65) Project [codegen id : 23] +Output [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, (ws_quantity#41 - coalesce(wr_return_quantity#46, 0)) AS sales_cnt#49, CheckOverflow((promote_precision(cast(ws_ext_sales_price#42 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#47, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#50] +Input [13]: [ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14, wr_item_sk#44, wr_order_number#45, wr_return_quantity#46, wr_return_amt#47] + +(66) Union + +(67) HashAggregate [codegen id : 24] +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] +Keys [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] +Functions: [] +Aggregate Attributes: [] +Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] + +(68) Exchange +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] +Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23, 5), true, [id=#51] + +(69) HashAggregate [codegen id : 25] +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] +Keys [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] +Functions: [] +Aggregate Attributes: [] +Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] + +(70) HashAggregate [codegen id : 25] +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] +Keys [5]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Functions [2]: [partial_sum(cast(sales_cnt#22 as bigint)), partial_sum(UnscaledValue(sales_amt#23))] +Aggregate Attributes [2]: [sum#52, sum#53] +Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#54, sum#55] + +(71) Exchange +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#54, sum#55] +Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, 5), true, [id=#56] + +(72) HashAggregate [codegen id : 26] +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#54, sum#55] +Keys [5]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Functions [2]: [sum(cast(sales_cnt#22 as bigint)), sum(UnscaledValue(sales_amt#23))] +Aggregate Attributes [2]: [sum(cast(sales_cnt#22 as bigint))#57, sum(UnscaledValue(sales_amt#23))#58] +Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum(cast(sales_cnt#22 as bigint))#57 AS sales_cnt#59, MakeDecimal(sum(UnscaledValue(sales_amt#23))#58,18,2) AS sales_amt#60] + +(73) Exchange +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#59, sales_amt#60] +Arguments: hashpartitioning(i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, 5), true, [id=#61] + +(74) Sort [codegen id : 27] +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#59, sales_amt#60] +Arguments: [i_brand_id#7 ASC NULLS FIRST, i_class_id#8 ASC NULLS FIRST, i_category_id#9 ASC NULLS FIRST, i_manufact_id#11 ASC NULLS FIRST], false, 0 + +(75) Scan parquet default.catalog_sales +Output [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(76) ColumnarToRow [codegen id : 30] +Input [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5] + +(77) Filter [codegen id : 30] +Input [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5] +Condition : (isnotnull(cs_item_sk#2) AND isnotnull(cs_sold_date_sk#1)) + +(78) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#62, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66] + +(79) BroadcastHashJoin [codegen id : 30] +Left keys [1]: [cs_item_sk#2] +Right keys [1]: [i_item_sk#62] +Join condition: None + +(80) Project [codegen id : 30] +Output [9]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66] +Input [10]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_item_sk#62, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66] + +(81) Scan parquet default.date_dim +Output [2]: [d_date_sk#67, d_year#68] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(82) ColumnarToRow [codegen id : 29] +Input [2]: [d_date_sk#67, d_year#68] + +(83) Filter [codegen id : 29] +Input [2]: [d_date_sk#67, d_year#68] +Condition : ((isnotnull(d_year#68) AND (d_year#68 = 2001)) AND isnotnull(d_date_sk#67)) + +(84) BroadcastExchange +Input [2]: [d_date_sk#67, d_year#68] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#69] + +(85) BroadcastHashJoin [codegen id : 30] +Left keys [1]: [cs_sold_date_sk#1] +Right keys [1]: [d_date_sk#67] +Join condition: None + +(86) Project [codegen id : 30] +Output [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68] +Input [11]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_date_sk#67, d_year#68] + +(87) Exchange +Input [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68] +Arguments: hashpartitioning(cs_order_number#3, cs_item_sk#2, 5), true, [id=#70] + +(88) Sort [codegen id : 31] +Input [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68] +Arguments: [cs_order_number#3 ASC NULLS FIRST, cs_item_sk#2 ASC NULLS FIRST], false, 0 + +(89) ReusedExchange [Reuses operator id: 22] +Output [4]: [cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20] + +(90) Sort [codegen id : 33] +Input [4]: [cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20] +Arguments: [cr_order_number#18 ASC NULLS FIRST, cr_item_sk#17 ASC NULLS FIRST], false, 0 + +(91) SortMergeJoin +Left keys [2]: [cs_order_number#3, cs_item_sk#2] +Right keys [2]: [cr_order_number#18, cr_item_sk#17] +Join condition: None + +(92) Project [codegen id : 34] +Output [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, (cs_quantity#4 - coalesce(cr_return_quantity#19, 0)) AS sales_cnt#22, CheckOverflow((promote_precision(cast(cs_ext_sales_price#5 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#20, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#23] +Input [13]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68, cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20] + +(93) Scan parquet default.store_sales +Output [5]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(94) ColumnarToRow [codegen id : 37] +Input [5]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28] + +(95) Filter [codegen id : 37] +Input [5]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28] +Condition : (isnotnull(ss_item_sk#25) AND isnotnull(ss_sold_date_sk#24)) + +(96) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#62, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66] + +(97) BroadcastHashJoin [codegen id : 37] +Left keys [1]: [ss_item_sk#25] +Right keys [1]: [i_item_sk#62] +Join condition: None + +(98) Project [codegen id : 37] +Output [9]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66] +Input [10]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_item_sk#62, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66] + +(99) ReusedExchange [Reuses operator id: 84] +Output [2]: [d_date_sk#67, d_year#68] + +(100) BroadcastHashJoin [codegen id : 37] +Left keys [1]: [ss_sold_date_sk#24] +Right keys [1]: [d_date_sk#67] +Join condition: None + +(101) Project [codegen id : 37] +Output [9]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68] +Input [11]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_date_sk#67, d_year#68] + +(102) Exchange +Input [9]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68] +Arguments: hashpartitioning(cast(ss_ticket_number#26 as bigint), cast(ss_item_sk#25 as bigint), 5), true, [id=#71] + +(103) Sort [codegen id : 38] +Input [9]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68] +Arguments: [cast(ss_ticket_number#26 as bigint) ASC NULLS FIRST, cast(ss_item_sk#25 as bigint) ASC NULLS FIRST], false, 0 + +(104) ReusedExchange [Reuses operator id: 40] +Output [4]: [sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33] + +(105) Sort [codegen id : 40] +Input [4]: [sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33] +Arguments: [sr_ticket_number#31 ASC NULLS FIRST, sr_item_sk#30 ASC NULLS FIRST], false, 0 + +(106) SortMergeJoin +Left keys [2]: [cast(ss_ticket_number#26 as bigint), cast(ss_item_sk#25 as bigint)] +Right keys [2]: [sr_ticket_number#31, sr_item_sk#30] +Join condition: None + +(107) Project [codegen id : 41] +Output [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, (ss_quantity#27 - coalesce(sr_return_quantity#32, 0)) AS sales_cnt#72, CheckOverflow((promote_precision(cast(ss_ext_sales_price#28 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#33, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#73] +Input [13]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68, sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33] + +(108) Union + +(109) HashAggregate [codegen id : 42] +Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] +Keys [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] +Functions: [] +Aggregate Attributes: [] +Results [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] + +(110) Exchange +Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] +Arguments: hashpartitioning(d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23, 5), true, [id=#74] + +(111) HashAggregate [codegen id : 43] +Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] +Keys [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] +Functions: [] +Aggregate Attributes: [] +Results [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] + +(112) Scan parquet default.web_sales +Output [5]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(113) ColumnarToRow [codegen id : 46] +Input [5]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42] + +(114) Filter [codegen id : 46] +Input [5]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42] +Condition : (isnotnull(ws_item_sk#39) AND isnotnull(ws_sold_date_sk#38)) + +(115) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#62, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66] + +(116) BroadcastHashJoin [codegen id : 46] +Left keys [1]: [ws_item_sk#39] +Right keys [1]: [i_item_sk#62] +Join condition: None + +(117) Project [codegen id : 46] +Output [9]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66] +Input [10]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_item_sk#62, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66] + +(118) ReusedExchange [Reuses operator id: 84] +Output [2]: [d_date_sk#67, d_year#68] + +(119) BroadcastHashJoin [codegen id : 46] +Left keys [1]: [ws_sold_date_sk#38] +Right keys [1]: [d_date_sk#67] +Join condition: None + +(120) Project [codegen id : 46] +Output [9]: [ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68] +Input [11]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_date_sk#67, d_year#68] + +(121) Exchange +Input [9]: [ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68] +Arguments: hashpartitioning(cast(ws_order_number#40 as bigint), cast(ws_item_sk#39 as bigint), 5), true, [id=#75] + +(122) Sort [codegen id : 47] +Input [9]: [ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68] +Arguments: [cast(ws_order_number#40 as bigint) ASC NULLS FIRST, cast(ws_item_sk#39 as bigint) ASC NULLS FIRST], false, 0 + +(123) ReusedExchange [Reuses operator id: 62] +Output [4]: [wr_item_sk#44, wr_order_number#45, wr_return_quantity#46, wr_return_amt#47] + +(124) Sort [codegen id : 49] +Input [4]: [wr_item_sk#44, wr_order_number#45, wr_return_quantity#46, wr_return_amt#47] +Arguments: [wr_order_number#45 ASC NULLS FIRST, wr_item_sk#44 ASC NULLS FIRST], false, 0 + +(125) SortMergeJoin +Left keys [2]: [cast(ws_order_number#40 as bigint), cast(ws_item_sk#39 as bigint)] +Right keys [2]: [wr_order_number#45, wr_item_sk#44] +Join condition: None + +(126) Project [codegen id : 50] +Output [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, (ws_quantity#41 - coalesce(wr_return_quantity#46, 0)) AS sales_cnt#76, CheckOverflow((promote_precision(cast(ws_ext_sales_price#42 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#47, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#77] +Input [13]: [ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68, wr_item_sk#44, wr_order_number#45, wr_return_quantity#46, wr_return_amt#47] + +(127) Union + +(128) HashAggregate [codegen id : 51] +Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] +Keys [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] +Functions: [] +Aggregate Attributes: [] +Results [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] + +(129) Exchange +Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] +Arguments: hashpartitioning(d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23, 5), true, [id=#78] + +(130) HashAggregate [codegen id : 52] +Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] +Keys [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] +Functions: [] +Aggregate Attributes: [] +Results [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] + +(131) HashAggregate [codegen id : 52] +Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] +Keys [5]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66] +Functions [2]: [partial_sum(cast(sales_cnt#22 as bigint)), partial_sum(UnscaledValue(sales_amt#23))] +Aggregate Attributes [2]: [sum#79, sum#80] +Results [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sum#81, sum#82] + +(132) Exchange +Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sum#81, sum#82] +Arguments: hashpartitioning(d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, 5), true, [id=#83] + +(133) HashAggregate [codegen id : 53] +Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sum#81, sum#82] +Keys [5]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66] +Functions [2]: [sum(cast(sales_cnt#22 as bigint)), sum(UnscaledValue(sales_amt#23))] +Aggregate Attributes [2]: [sum(cast(sales_cnt#22 as bigint))#84, sum(UnscaledValue(sales_amt#23))#85] +Results [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sum(cast(sales_cnt#22 as bigint))#84 AS sales_cnt#86, MakeDecimal(sum(UnscaledValue(sales_amt#23))#85,18,2) AS sales_amt#87] + +(134) Exchange +Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#86, sales_amt#87] +Arguments: hashpartitioning(i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, 5), true, [id=#88] + +(135) Sort [codegen id : 54] +Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#86, sales_amt#87] +Arguments: [i_brand_id#63 ASC NULLS FIRST, i_class_id#64 ASC NULLS FIRST, i_category_id#65 ASC NULLS FIRST, i_manufact_id#66 ASC NULLS FIRST], false, 0 + +(136) SortMergeJoin [codegen id : 55] +Left keys [4]: [i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Right keys [4]: [i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66] +Join condition: (CheckOverflow((promote_precision(cast(sales_cnt#59 as decimal(17,2))) / promote_precision(cast(sales_cnt#86 as decimal(17,2)))), DecimalType(37,20), true) < 0.90000000000000000000) + +(137) Project [codegen id : 55] +Output [10]: [d_year#68 AS prev_year#89, d_year#14 AS year#90, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#86 AS prev_yr_cnt#91, sales_cnt#59 AS curr_yr_cnt#92, (sales_cnt#59 - sales_cnt#86) AS sales_cnt_diff#93, CheckOverflow((promote_precision(cast(sales_amt#60 as decimal(19,2))) - promote_precision(cast(sales_amt#87 as decimal(19,2)))), DecimalType(19,2), true) AS sales_amt_diff#94] +Input [14]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#59, sales_amt#60, d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#86, sales_amt#87] + +(138) TakeOrderedAndProject +Input [10]: [prev_year#89, year#90, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#91, curr_yr_cnt#92, sales_cnt_diff#93, sales_amt_diff#94] +Arguments: 100, [sales_cnt_diff#93 ASC NULLS FIRST], [prev_year#89, year#90, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#91, curr_yr_cnt#92, sales_cnt_diff#93, sales_amt_diff#94] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75.sf100/simplified.txt new file mode 100644 index 0000000000000..40aa2931ad5b8 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75.sf100/simplified.txt @@ -0,0 +1,237 @@ +TakeOrderedAndProject [curr_yr_cnt,i_brand_id,i_category_id,i_class_id,i_manufact_id,prev_year,prev_yr_cnt,sales_amt_diff,sales_cnt_diff,year] + WholeStageCodegen (55) + Project [d_year,d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_amt,sales_cnt,sales_cnt] + SortMergeJoin [i_brand_id,i_brand_id,i_category_id,i_category_id,i_class_id,i_class_id,i_manufact_id,i_manufact_id,sales_cnt,sales_cnt] + InputAdapter + WholeStageCodegen (27) + Sort [i_brand_id,i_category_id,i_class_id,i_manufact_id] + InputAdapter + Exchange [i_brand_id,i_category_id,i_class_id,i_manufact_id] #1 + WholeStageCodegen (26) + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sum,sum] [sales_amt,sales_cnt,sum,sum,sum(UnscaledValue(sales_amt)),sum(cast(sales_cnt as bigint))] + InputAdapter + Exchange [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id] #2 + WholeStageCodegen (25) + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] [sum,sum,sum,sum] + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] + InputAdapter + Exchange [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] #3 + WholeStageCodegen (24) + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] + InputAdapter + Union + WholeStageCodegen (16) + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] + InputAdapter + Exchange [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] #4 + WholeStageCodegen (15) + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] + InputAdapter + Union + WholeStageCodegen (7) + Project [cr_return_amount,cr_return_quantity,cs_ext_sales_price,cs_quantity,d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id] + InputAdapter + SortMergeJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] + WholeStageCodegen (4) + Sort [cs_item_sk,cs_order_number] + InputAdapter + Exchange [cs_item_sk,cs_order_number] #5 + WholeStageCodegen (3) + Project [cs_ext_sales_price,cs_item_sk,cs_order_number,cs_quantity,d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_ext_sales_price,cs_item_sk,cs_order_number,cs_quantity,cs_sold_date_sk,i_brand_id,i_category_id,i_class_id,i_manufact_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Filter [cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_ext_sales_price,cs_item_sk,cs_order_number,cs_quantity,cs_sold_date_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (1) + Project [i_brand_id,i_category_id,i_class_id,i_item_sk,i_manufact_id] + Filter [i_brand_id,i_category,i_category_id,i_class_id,i_item_sk,i_manufact_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand_id,i_category,i_category_id,i_class_id,i_item_sk,i_manufact_id] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (2) + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + WholeStageCodegen (6) + Sort [cr_item_sk,cr_order_number] + InputAdapter + Exchange [cr_item_sk,cr_order_number] #8 + WholeStageCodegen (5) + Filter [cr_item_sk,cr_order_number] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_amount,cr_return_quantity] + WholeStageCodegen (14) + Project [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sr_return_amt,sr_return_quantity,ss_ext_sales_price,ss_quantity] + InputAdapter + SortMergeJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + WholeStageCodegen (11) + Sort [ss_item_sk,ss_ticket_number] + InputAdapter + Exchange [ss_item_sk,ss_ticket_number] #9 + WholeStageCodegen (10) + Project [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,ss_ext_sales_price,ss_item_sk,ss_quantity,ss_ticket_number] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_brand_id,i_category_id,i_class_id,i_manufact_id,ss_ext_sales_price,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_ticket_number] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_ticket_number] + InputAdapter + ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk,i_manufact_id] #6 + InputAdapter + ReusedExchange [d_date_sk,d_year] #7 + WholeStageCodegen (13) + Sort [sr_item_sk,sr_ticket_number] + InputAdapter + Exchange [sr_item_sk,sr_ticket_number] #10 + WholeStageCodegen (12) + Filter [sr_item_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_item_sk,sr_return_amt,sr_return_quantity,sr_ticket_number] + WholeStageCodegen (23) + Project [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,wr_return_amt,wr_return_quantity,ws_ext_sales_price,ws_quantity] + InputAdapter + SortMergeJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] + WholeStageCodegen (20) + Sort [ws_item_sk,ws_order_number] + InputAdapter + Exchange [ws_item_sk,ws_order_number] #11 + WholeStageCodegen (19) + Project [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,ws_ext_sales_price,ws_item_sk,ws_order_number,ws_quantity] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [i_brand_id,i_category_id,i_class_id,i_manufact_id,ws_ext_sales_price,ws_item_sk,ws_order_number,ws_quantity,ws_sold_date_sk] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Filter [ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_ext_sales_price,ws_item_sk,ws_order_number,ws_quantity,ws_sold_date_sk] + InputAdapter + ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk,i_manufact_id] #6 + InputAdapter + ReusedExchange [d_date_sk,d_year] #7 + WholeStageCodegen (22) + Sort [wr_item_sk,wr_order_number] + InputAdapter + Exchange [wr_item_sk,wr_order_number] #12 + WholeStageCodegen (21) + Filter [wr_item_sk,wr_order_number] + ColumnarToRow + InputAdapter + Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_return_quantity] + InputAdapter + WholeStageCodegen (54) + Sort [i_brand_id,i_category_id,i_class_id,i_manufact_id] + InputAdapter + Exchange [i_brand_id,i_category_id,i_class_id,i_manufact_id] #13 + WholeStageCodegen (53) + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sum,sum] [sales_amt,sales_cnt,sum,sum,sum(UnscaledValue(sales_amt)),sum(cast(sales_cnt as bigint))] + InputAdapter + Exchange [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id] #14 + WholeStageCodegen (52) + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] [sum,sum,sum,sum] + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] + InputAdapter + Exchange [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] #15 + WholeStageCodegen (51) + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] + InputAdapter + Union + WholeStageCodegen (43) + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] + InputAdapter + Exchange [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] #16 + WholeStageCodegen (42) + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] + InputAdapter + Union + WholeStageCodegen (34) + Project [cr_return_amount,cr_return_quantity,cs_ext_sales_price,cs_quantity,d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id] + InputAdapter + SortMergeJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] + WholeStageCodegen (31) + Sort [cs_item_sk,cs_order_number] + InputAdapter + Exchange [cs_item_sk,cs_order_number] #17 + WholeStageCodegen (30) + Project [cs_ext_sales_price,cs_item_sk,cs_order_number,cs_quantity,d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_ext_sales_price,cs_item_sk,cs_order_number,cs_quantity,cs_sold_date_sk,i_brand_id,i_category_id,i_class_id,i_manufact_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Filter [cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_ext_sales_price,cs_item_sk,cs_order_number,cs_quantity,cs_sold_date_sk] + InputAdapter + ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk,i_manufact_id] #6 + InputAdapter + BroadcastExchange #18 + WholeStageCodegen (29) + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + WholeStageCodegen (33) + Sort [cr_item_sk,cr_order_number] + InputAdapter + ReusedExchange [cr_item_sk,cr_order_number,cr_return_amount,cr_return_quantity] #8 + WholeStageCodegen (41) + Project [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sr_return_amt,sr_return_quantity,ss_ext_sales_price,ss_quantity] + InputAdapter + SortMergeJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + WholeStageCodegen (38) + Sort [ss_item_sk,ss_ticket_number] + InputAdapter + Exchange [ss_item_sk,ss_ticket_number] #19 + WholeStageCodegen (37) + Project [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,ss_ext_sales_price,ss_item_sk,ss_quantity,ss_ticket_number] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_brand_id,i_category_id,i_class_id,i_manufact_id,ss_ext_sales_price,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_ticket_number] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_ticket_number] + InputAdapter + ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk,i_manufact_id] #6 + InputAdapter + ReusedExchange [d_date_sk,d_year] #18 + WholeStageCodegen (40) + Sort [sr_item_sk,sr_ticket_number] + InputAdapter + ReusedExchange [sr_item_sk,sr_return_amt,sr_return_quantity,sr_ticket_number] #10 + WholeStageCodegen (50) + Project [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,wr_return_amt,wr_return_quantity,ws_ext_sales_price,ws_quantity] + InputAdapter + SortMergeJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] + WholeStageCodegen (47) + Sort [ws_item_sk,ws_order_number] + InputAdapter + Exchange [ws_item_sk,ws_order_number] #20 + WholeStageCodegen (46) + Project [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,ws_ext_sales_price,ws_item_sk,ws_order_number,ws_quantity] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [i_brand_id,i_category_id,i_class_id,i_manufact_id,ws_ext_sales_price,ws_item_sk,ws_order_number,ws_quantity,ws_sold_date_sk] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Filter [ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_ext_sales_price,ws_item_sk,ws_order_number,ws_quantity,ws_sold_date_sk] + InputAdapter + ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk,i_manufact_id] #6 + InputAdapter + ReusedExchange [d_date_sk,d_year] #18 + WholeStageCodegen (49) + Sort [wr_item_sk,wr_order_number] + InputAdapter + ReusedExchange [wr_item_sk,wr_order_number,wr_return_amt,wr_return_quantity] #12 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/explain.txt new file mode 100644 index 0000000000000..2c829e45de716 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/explain.txt @@ -0,0 +1,647 @@ +== Physical Plan == +TakeOrderedAndProject (117) ++- * Project (116) + +- * BroadcastHashJoin Inner BuildRight (115) + :- * HashAggregate (63) + : +- Exchange (62) + : +- * HashAggregate (61) + : +- * HashAggregate (60) + : +- Exchange (59) + : +- * HashAggregate (58) + : +- Union (57) + : :- * HashAggregate (41) + : : +- Exchange (40) + : : +- * HashAggregate (39) + : : +- Union (38) + : : :- * Project (22) + : : : +- * BroadcastHashJoin LeftOuter BuildRight (21) + : : : :- * Project (16) + : : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : : :- * Project (10) + : : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : : :- * Filter (3) + : : : : : : +- * ColumnarToRow (2) + : : : : : : +- Scan parquet default.catalog_sales (1) + : : : : : +- BroadcastExchange (8) + : : : : : +- * Project (7) + : : : : : +- * Filter (6) + : : : : : +- * ColumnarToRow (5) + : : : : : +- Scan parquet default.item (4) + : : : : +- BroadcastExchange (14) + : : : : +- * Filter (13) + : : : : +- * ColumnarToRow (12) + : : : : +- Scan parquet default.date_dim (11) + : : : +- BroadcastExchange (20) + : : : +- * Filter (19) + : : : +- * ColumnarToRow (18) + : : : +- Scan parquet default.catalog_returns (17) + : : +- * Project (37) + : : +- * BroadcastHashJoin LeftOuter BuildRight (36) + : : :- * Project (31) + : : : +- * BroadcastHashJoin Inner BuildRight (30) + : : : :- * Project (28) + : : : : +- * BroadcastHashJoin Inner BuildRight (27) + : : : : :- * Filter (25) + : : : : : +- * ColumnarToRow (24) + : : : : : +- Scan parquet default.store_sales (23) + : : : : +- ReusedExchange (26) + : : : +- ReusedExchange (29) + : : +- BroadcastExchange (35) + : : +- * Filter (34) + : : +- * ColumnarToRow (33) + : : +- Scan parquet default.store_returns (32) + : +- * Project (56) + : +- * BroadcastHashJoin LeftOuter BuildRight (55) + : :- * Project (50) + : : +- * BroadcastHashJoin Inner BuildRight (49) + : : :- * Project (47) + : : : +- * BroadcastHashJoin Inner BuildRight (46) + : : : :- * Filter (44) + : : : : +- * ColumnarToRow (43) + : : : : +- Scan parquet default.web_sales (42) + : : : +- ReusedExchange (45) + : : +- ReusedExchange (48) + : +- BroadcastExchange (54) + : +- * Filter (53) + : +- * ColumnarToRow (52) + : +- Scan parquet default.web_returns (51) + +- BroadcastExchange (114) + +- * HashAggregate (113) + +- Exchange (112) + +- * HashAggregate (111) + +- * HashAggregate (110) + +- Exchange (109) + +- * HashAggregate (108) + +- Union (107) + :- * HashAggregate (94) + : +- Exchange (93) + : +- * HashAggregate (92) + : +- Union (91) + : :- * Project (78) + : : +- * BroadcastHashJoin LeftOuter BuildRight (77) + : : :- * Project (75) + : : : +- * BroadcastHashJoin Inner BuildRight (74) + : : : :- * Project (69) + : : : : +- * BroadcastHashJoin Inner BuildRight (68) + : : : : :- * Filter (66) + : : : : : +- * ColumnarToRow (65) + : : : : : +- Scan parquet default.catalog_sales (64) + : : : : +- ReusedExchange (67) + : : : +- BroadcastExchange (73) + : : : +- * Filter (72) + : : : +- * ColumnarToRow (71) + : : : +- Scan parquet default.date_dim (70) + : : +- ReusedExchange (76) + : +- * Project (90) + : +- * BroadcastHashJoin LeftOuter BuildRight (89) + : :- * Project (87) + : : +- * BroadcastHashJoin Inner BuildRight (86) + : : :- * Project (84) + : : : +- * BroadcastHashJoin Inner BuildRight (83) + : : : :- * Filter (81) + : : : : +- * ColumnarToRow (80) + : : : : +- Scan parquet default.store_sales (79) + : : : +- ReusedExchange (82) + : : +- ReusedExchange (85) + : +- ReusedExchange (88) + +- * Project (106) + +- * BroadcastHashJoin LeftOuter BuildRight (105) + :- * Project (103) + : +- * BroadcastHashJoin Inner BuildRight (102) + : :- * Project (100) + : : +- * BroadcastHashJoin Inner BuildRight (99) + : : :- * Filter (97) + : : : +- * ColumnarToRow (96) + : : : +- Scan parquet default.web_sales (95) + : : +- ReusedExchange (98) + : +- ReusedExchange (101) + +- ReusedExchange (104) + + +(1) Scan parquet default.catalog_sales +Output [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5] + +(3) Filter [codegen id : 4] +Input [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5] +Condition : (isnotnull(cs_item_sk#2) AND isnotnull(cs_sold_date_sk#1)) + +(4) Scan parquet default.item +Output [6]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Books), IsNotNull(i_item_sk), IsNotNull(i_manufact_id), IsNotNull(i_category_id), IsNotNull(i_brand_id), IsNotNull(i_class_id)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [6]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] + +(6) Filter [codegen id : 1] +Input [6]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] +Condition : ((((((isnotnull(i_category#10) AND (i_category#10 = Books)) AND isnotnull(i_item_sk#6)) AND isnotnull(i_manufact_id#11)) AND isnotnull(i_category_id#9)) AND isnotnull(i_brand_id#7)) AND isnotnull(i_class_id#8)) + +(7) Project [codegen id : 1] +Output [5]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Input [6]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] + +(8) BroadcastExchange +Input [5]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_item_sk#2] +Right keys [1]: [i_item_sk#6] +Join condition: None + +(10) Project [codegen id : 4] +Output [9]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Input [10]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] + +(11) Scan parquet default.date_dim +Output [2]: [d_date_sk#13, d_year#14] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#13, d_year#14] + +(13) Filter [codegen id : 2] +Input [2]: [d_date_sk#13, d_year#14] +Condition : ((isnotnull(d_year#14) AND (d_year#14 = 2002)) AND isnotnull(d_date_sk#13)) + +(14) BroadcastExchange +Input [2]: [d_date_sk#13, d_year#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#15] + +(15) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_sold_date_sk#1] +Right keys [1]: [d_date_sk#13] +Join condition: None + +(16) Project [codegen id : 4] +Output [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14] +Input [11]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_date_sk#13, d_year#14] + +(17) Scan parquet default.catalog_returns +Output [4]: [cr_item_sk#16, cr_order_number#17, cr_return_quantity#18, cr_return_amount#19] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_returns] +PushedFilters: [IsNotNull(cr_order_number), IsNotNull(cr_item_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [4]: [cr_item_sk#16, cr_order_number#17, cr_return_quantity#18, cr_return_amount#19] + +(19) Filter [codegen id : 3] +Input [4]: [cr_item_sk#16, cr_order_number#17, cr_return_quantity#18, cr_return_amount#19] +Condition : (isnotnull(cr_order_number#17) AND isnotnull(cr_item_sk#16)) + +(20) BroadcastExchange +Input [4]: [cr_item_sk#16, cr_order_number#17, cr_return_quantity#18, cr_return_amount#19] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, false] as bigint), 32) | (cast(input[0, int, false] as bigint) & 4294967295))),false), [id=#20] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [2]: [cs_order_number#3, cs_item_sk#2] +Right keys [2]: [cr_order_number#17, cr_item_sk#16] +Join condition: None + +(22) Project [codegen id : 4] +Output [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, (cs_quantity#4 - coalesce(cr_return_quantity#18, 0)) AS sales_cnt#21, CheckOverflow((promote_precision(cast(cs_ext_sales_price#5 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#19, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#22] +Input [13]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14, cr_item_sk#16, cr_order_number#17, cr_return_quantity#18, cr_return_amount#19] + +(23) Scan parquet default.store_sales +Output [5]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 8] +Input [5]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27] + +(25) Filter [codegen id : 8] +Input [5]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27] +Condition : (isnotnull(ss_item_sk#24) AND isnotnull(ss_sold_date_sk#23)) + +(26) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] + +(27) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_item_sk#24] +Right keys [1]: [i_item_sk#6] +Join condition: None + +(28) Project [codegen id : 8] +Output [9]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Input [10]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] + +(29) ReusedExchange [Reuses operator id: 14] +Output [2]: [d_date_sk#13, d_year#14] + +(30) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_sold_date_sk#23] +Right keys [1]: [d_date_sk#13] +Join condition: None + +(31) Project [codegen id : 8] +Output [9]: [ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14] +Input [11]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_date_sk#13, d_year#14] + +(32) Scan parquet default.store_returns +Output [4]: [sr_item_sk#28, sr_ticket_number#29, sr_return_quantity#30, sr_return_amt#31] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_returns] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] +ReadSchema: struct + +(33) ColumnarToRow [codegen id : 7] +Input [4]: [sr_item_sk#28, sr_ticket_number#29, sr_return_quantity#30, sr_return_amt#31] + +(34) Filter [codegen id : 7] +Input [4]: [sr_item_sk#28, sr_ticket_number#29, sr_return_quantity#30, sr_return_amt#31] +Condition : (isnotnull(sr_ticket_number#29) AND isnotnull(sr_item_sk#28)) + +(35) BroadcastExchange +Input [4]: [sr_item_sk#28, sr_ticket_number#29, sr_return_quantity#30, sr_return_amt#31] +Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false], input[0, bigint, false]),false), [id=#32] + +(36) BroadcastHashJoin [codegen id : 8] +Left keys [2]: [cast(ss_ticket_number#25 as bigint), cast(ss_item_sk#24 as bigint)] +Right keys [2]: [sr_ticket_number#29, sr_item_sk#28] +Join condition: None + +(37) Project [codegen id : 8] +Output [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, (ss_quantity#26 - coalesce(sr_return_quantity#30, 0)) AS sales_cnt#33, CheckOverflow((promote_precision(cast(ss_ext_sales_price#27 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#31, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#34] +Input [13]: [ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14, sr_item_sk#28, sr_ticket_number#29, sr_return_quantity#30, sr_return_amt#31] + +(38) Union + +(39) HashAggregate [codegen id : 9] +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] +Keys [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] +Functions: [] +Aggregate Attributes: [] +Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] + +(40) Exchange +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] +Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22, 5), true, [id=#35] + +(41) HashAggregate [codegen id : 10] +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] +Keys [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] +Functions: [] +Aggregate Attributes: [] +Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] + +(42) Scan parquet default.web_sales +Output [5]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(43) ColumnarToRow [codegen id : 14] +Input [5]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40] + +(44) Filter [codegen id : 14] +Input [5]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40] +Condition : (isnotnull(ws_item_sk#37) AND isnotnull(ws_sold_date_sk#36)) + +(45) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] + +(46) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [ws_item_sk#37] +Right keys [1]: [i_item_sk#6] +Join condition: None + +(47) Project [codegen id : 14] +Output [9]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Input [10]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] + +(48) ReusedExchange [Reuses operator id: 14] +Output [2]: [d_date_sk#13, d_year#14] + +(49) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [ws_sold_date_sk#36] +Right keys [1]: [d_date_sk#13] +Join condition: None + +(50) Project [codegen id : 14] +Output [9]: [ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14] +Input [11]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_date_sk#13, d_year#14] + +(51) Scan parquet default.web_returns +Output [4]: [wr_item_sk#41, wr_order_number#42, wr_return_quantity#43, wr_return_amt#44] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_returns] +PushedFilters: [IsNotNull(wr_order_number), IsNotNull(wr_item_sk)] +ReadSchema: struct + +(52) ColumnarToRow [codegen id : 13] +Input [4]: [wr_item_sk#41, wr_order_number#42, wr_return_quantity#43, wr_return_amt#44] + +(53) Filter [codegen id : 13] +Input [4]: [wr_item_sk#41, wr_order_number#42, wr_return_quantity#43, wr_return_amt#44] +Condition : (isnotnull(wr_order_number#42) AND isnotnull(wr_item_sk#41)) + +(54) BroadcastExchange +Input [4]: [wr_item_sk#41, wr_order_number#42, wr_return_quantity#43, wr_return_amt#44] +Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false], input[0, bigint, false]),false), [id=#45] + +(55) BroadcastHashJoin [codegen id : 14] +Left keys [2]: [cast(ws_order_number#38 as bigint), cast(ws_item_sk#37 as bigint)] +Right keys [2]: [wr_order_number#42, wr_item_sk#41] +Join condition: None + +(56) Project [codegen id : 14] +Output [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, (ws_quantity#39 - coalesce(wr_return_quantity#43, 0)) AS sales_cnt#46, CheckOverflow((promote_precision(cast(ws_ext_sales_price#40 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#44, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#47] +Input [13]: [ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14, wr_item_sk#41, wr_order_number#42, wr_return_quantity#43, wr_return_amt#44] + +(57) Union + +(58) HashAggregate [codegen id : 15] +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] +Keys [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] +Functions: [] +Aggregate Attributes: [] +Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] + +(59) Exchange +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] +Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22, 5), true, [id=#48] + +(60) HashAggregate [codegen id : 16] +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] +Keys [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] +Functions: [] +Aggregate Attributes: [] +Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] + +(61) HashAggregate [codegen id : 16] +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] +Keys [5]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Functions [2]: [partial_sum(cast(sales_cnt#21 as bigint)), partial_sum(UnscaledValue(sales_amt#22))] +Aggregate Attributes [2]: [sum#49, sum#50] +Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#51, sum#52] + +(62) Exchange +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#51, sum#52] +Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, 5), true, [id=#53] + +(63) HashAggregate [codegen id : 34] +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#51, sum#52] +Keys [5]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Functions [2]: [sum(cast(sales_cnt#21 as bigint)), sum(UnscaledValue(sales_amt#22))] +Aggregate Attributes [2]: [sum(cast(sales_cnt#21 as bigint))#54, sum(UnscaledValue(sales_amt#22))#55] +Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum(cast(sales_cnt#21 as bigint))#54 AS sales_cnt#56, MakeDecimal(sum(UnscaledValue(sales_amt#22))#55,18,2) AS sales_amt#57] + +(64) Scan parquet default.catalog_sales +Output [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(65) ColumnarToRow [codegen id : 20] +Input [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5] + +(66) Filter [codegen id : 20] +Input [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5] +Condition : (isnotnull(cs_item_sk#2) AND isnotnull(cs_sold_date_sk#1)) + +(67) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] + +(68) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [cs_item_sk#2] +Right keys [1]: [i_item_sk#58] +Join condition: None + +(69) Project [codegen id : 20] +Output [9]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] +Input [10]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] + +(70) Scan parquet default.date_dim +Output [2]: [d_date_sk#63, d_year#64] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(71) ColumnarToRow [codegen id : 18] +Input [2]: [d_date_sk#63, d_year#64] + +(72) Filter [codegen id : 18] +Input [2]: [d_date_sk#63, d_year#64] +Condition : ((isnotnull(d_year#64) AND (d_year#64 = 2001)) AND isnotnull(d_date_sk#63)) + +(73) BroadcastExchange +Input [2]: [d_date_sk#63, d_year#64] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#65] + +(74) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [cs_sold_date_sk#1] +Right keys [1]: [d_date_sk#63] +Join condition: None + +(75) Project [codegen id : 20] +Output [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_year#64] +Input [11]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_date_sk#63, d_year#64] + +(76) ReusedExchange [Reuses operator id: 20] +Output [4]: [cr_item_sk#16, cr_order_number#17, cr_return_quantity#18, cr_return_amount#19] + +(77) BroadcastHashJoin [codegen id : 20] +Left keys [2]: [cs_order_number#3, cs_item_sk#2] +Right keys [2]: [cr_order_number#17, cr_item_sk#16] +Join condition: None + +(78) Project [codegen id : 20] +Output [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, (cs_quantity#4 - coalesce(cr_return_quantity#18, 0)) AS sales_cnt#21, CheckOverflow((promote_precision(cast(cs_ext_sales_price#5 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#19, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#22] +Input [13]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_year#64, cr_item_sk#16, cr_order_number#17, cr_return_quantity#18, cr_return_amount#19] + +(79) Scan parquet default.store_sales +Output [5]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(80) ColumnarToRow [codegen id : 24] +Input [5]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27] + +(81) Filter [codegen id : 24] +Input [5]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27] +Condition : (isnotnull(ss_item_sk#24) AND isnotnull(ss_sold_date_sk#23)) + +(82) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] + +(83) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [ss_item_sk#24] +Right keys [1]: [i_item_sk#58] +Join condition: None + +(84) Project [codegen id : 24] +Output [9]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] +Input [10]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] + +(85) ReusedExchange [Reuses operator id: 73] +Output [2]: [d_date_sk#63, d_year#64] + +(86) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [ss_sold_date_sk#23] +Right keys [1]: [d_date_sk#63] +Join condition: None + +(87) Project [codegen id : 24] +Output [9]: [ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_year#64] +Input [11]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_date_sk#63, d_year#64] + +(88) ReusedExchange [Reuses operator id: 35] +Output [4]: [sr_item_sk#28, sr_ticket_number#29, sr_return_quantity#30, sr_return_amt#31] + +(89) BroadcastHashJoin [codegen id : 24] +Left keys [2]: [cast(ss_ticket_number#25 as bigint), cast(ss_item_sk#24 as bigint)] +Right keys [2]: [sr_ticket_number#29, sr_item_sk#28] +Join condition: None + +(90) Project [codegen id : 24] +Output [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, (ss_quantity#26 - coalesce(sr_return_quantity#30, 0)) AS sales_cnt#66, CheckOverflow((promote_precision(cast(ss_ext_sales_price#27 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#31, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#67] +Input [13]: [ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_year#64, sr_item_sk#28, sr_ticket_number#29, sr_return_quantity#30, sr_return_amt#31] + +(91) Union + +(92) HashAggregate [codegen id : 25] +Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] +Keys [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] +Functions: [] +Aggregate Attributes: [] +Results [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] + +(93) Exchange +Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] +Arguments: hashpartitioning(d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22, 5), true, [id=#68] + +(94) HashAggregate [codegen id : 26] +Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] +Keys [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] +Functions: [] +Aggregate Attributes: [] +Results [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] + +(95) Scan parquet default.web_sales +Output [5]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(96) ColumnarToRow [codegen id : 30] +Input [5]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40] + +(97) Filter [codegen id : 30] +Input [5]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40] +Condition : (isnotnull(ws_item_sk#37) AND isnotnull(ws_sold_date_sk#36)) + +(98) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] + +(99) BroadcastHashJoin [codegen id : 30] +Left keys [1]: [ws_item_sk#37] +Right keys [1]: [i_item_sk#58] +Join condition: None + +(100) Project [codegen id : 30] +Output [9]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] +Input [10]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] + +(101) ReusedExchange [Reuses operator id: 73] +Output [2]: [d_date_sk#63, d_year#64] + +(102) BroadcastHashJoin [codegen id : 30] +Left keys [1]: [ws_sold_date_sk#36] +Right keys [1]: [d_date_sk#63] +Join condition: None + +(103) Project [codegen id : 30] +Output [9]: [ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_year#64] +Input [11]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_date_sk#63, d_year#64] + +(104) ReusedExchange [Reuses operator id: 54] +Output [4]: [wr_item_sk#41, wr_order_number#42, wr_return_quantity#43, wr_return_amt#44] + +(105) BroadcastHashJoin [codegen id : 30] +Left keys [2]: [cast(ws_order_number#38 as bigint), cast(ws_item_sk#37 as bigint)] +Right keys [2]: [wr_order_number#42, wr_item_sk#41] +Join condition: None + +(106) Project [codegen id : 30] +Output [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, (ws_quantity#39 - coalesce(wr_return_quantity#43, 0)) AS sales_cnt#69, CheckOverflow((promote_precision(cast(ws_ext_sales_price#40 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#44, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#70] +Input [13]: [ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_year#64, wr_item_sk#41, wr_order_number#42, wr_return_quantity#43, wr_return_amt#44] + +(107) Union + +(108) HashAggregate [codegen id : 31] +Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] +Keys [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] +Functions: [] +Aggregate Attributes: [] +Results [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] + +(109) Exchange +Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] +Arguments: hashpartitioning(d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22, 5), true, [id=#71] + +(110) HashAggregate [codegen id : 32] +Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] +Keys [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] +Functions: [] +Aggregate Attributes: [] +Results [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] + +(111) HashAggregate [codegen id : 32] +Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] +Keys [5]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] +Functions [2]: [partial_sum(cast(sales_cnt#21 as bigint)), partial_sum(UnscaledValue(sales_amt#22))] +Aggregate Attributes [2]: [sum#72, sum#73] +Results [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sum#74, sum#75] + +(112) Exchange +Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sum#74, sum#75] +Arguments: hashpartitioning(d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, 5), true, [id=#76] + +(113) HashAggregate [codegen id : 33] +Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sum#74, sum#75] +Keys [5]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] +Functions [2]: [sum(cast(sales_cnt#21 as bigint)), sum(UnscaledValue(sales_amt#22))] +Aggregate Attributes [2]: [sum(cast(sales_cnt#21 as bigint))#77, sum(UnscaledValue(sales_amt#22))#78] +Results [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sum(cast(sales_cnt#21 as bigint))#77 AS sales_cnt#79, MakeDecimal(sum(UnscaledValue(sales_amt#22))#78,18,2) AS sales_amt#80] + +(114) BroadcastExchange +Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#79, sales_amt#80] +Arguments: HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true], input[4, int, true]),false), [id=#81] + +(115) BroadcastHashJoin [codegen id : 34] +Left keys [4]: [i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Right keys [4]: [i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] +Join condition: (CheckOverflow((promote_precision(cast(sales_cnt#56 as decimal(17,2))) / promote_precision(cast(sales_cnt#79 as decimal(17,2)))), DecimalType(37,20), true) < 0.90000000000000000000) + +(116) Project [codegen id : 34] +Output [10]: [d_year#64 AS prev_year#82, d_year#14 AS year#83, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#79 AS prev_yr_cnt#84, sales_cnt#56 AS curr_yr_cnt#85, (sales_cnt#56 - sales_cnt#79) AS sales_cnt_diff#86, CheckOverflow((promote_precision(cast(sales_amt#57 as decimal(19,2))) - promote_precision(cast(sales_amt#80 as decimal(19,2)))), DecimalType(19,2), true) AS sales_amt_diff#87] +Input [14]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#56, sales_amt#57, d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#79, sales_amt#80] + +(117) TakeOrderedAndProject +Input [10]: [prev_year#82, year#83, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#84, curr_yr_cnt#85, sales_cnt_diff#86, sales_amt_diff#87] +Arguments: 100, [sales_cnt_diff#86 ASC NULLS FIRST], [prev_year#82, year#83, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#84, curr_yr_cnt#85, sales_cnt_diff#86, sales_amt_diff#87] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/simplified.txt new file mode 100644 index 0000000000000..4974c17705d87 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/simplified.txt @@ -0,0 +1,180 @@ +TakeOrderedAndProject [curr_yr_cnt,i_brand_id,i_category_id,i_class_id,i_manufact_id,prev_year,prev_yr_cnt,sales_amt_diff,sales_cnt_diff,year] + WholeStageCodegen (34) + Project [d_year,d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_amt,sales_cnt,sales_cnt] + BroadcastHashJoin [i_brand_id,i_brand_id,i_category_id,i_category_id,i_class_id,i_class_id,i_manufact_id,i_manufact_id,sales_cnt,sales_cnt] + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sum,sum] [sales_amt,sales_cnt,sum,sum,sum(UnscaledValue(sales_amt)),sum(cast(sales_cnt as bigint))] + InputAdapter + Exchange [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id] #1 + WholeStageCodegen (16) + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] [sum,sum,sum,sum] + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] + InputAdapter + Exchange [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] #2 + WholeStageCodegen (15) + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] + InputAdapter + Union + WholeStageCodegen (10) + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] + InputAdapter + Exchange [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] #3 + WholeStageCodegen (9) + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] + InputAdapter + Union + WholeStageCodegen (4) + Project [cr_return_amount,cr_return_quantity,cs_ext_sales_price,cs_quantity,d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id] + BroadcastHashJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] + Project [cs_ext_sales_price,cs_item_sk,cs_order_number,cs_quantity,d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_ext_sales_price,cs_item_sk,cs_order_number,cs_quantity,cs_sold_date_sk,i_brand_id,i_category_id,i_class_id,i_manufact_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Filter [cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_ext_sales_price,cs_item_sk,cs_order_number,cs_quantity,cs_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Project [i_brand_id,i_category_id,i_class_id,i_item_sk,i_manufact_id] + Filter [i_brand_id,i_category,i_category_id,i_class_id,i_item_sk,i_manufact_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand_id,i_category,i_category_id,i_class_id,i_item_sk,i_manufact_id] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (3) + Filter [cr_item_sk,cr_order_number] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_amount,cr_return_quantity] + WholeStageCodegen (8) + Project [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sr_return_amt,sr_return_quantity,ss_ext_sales_price,ss_quantity] + BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + Project [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,ss_ext_sales_price,ss_item_sk,ss_quantity,ss_ticket_number] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_brand_id,i_category_id,i_class_id,i_manufact_id,ss_ext_sales_price,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_ticket_number] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_ticket_number] + InputAdapter + ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk,i_manufact_id] #4 + InputAdapter + ReusedExchange [d_date_sk,d_year] #5 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (7) + Filter [sr_item_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_item_sk,sr_return_amt,sr_return_quantity,sr_ticket_number] + WholeStageCodegen (14) + Project [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,wr_return_amt,wr_return_quantity,ws_ext_sales_price,ws_quantity] + BroadcastHashJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] + Project [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,ws_ext_sales_price,ws_item_sk,ws_order_number,ws_quantity] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [i_brand_id,i_category_id,i_class_id,i_manufact_id,ws_ext_sales_price,ws_item_sk,ws_order_number,ws_quantity,ws_sold_date_sk] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Filter [ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_ext_sales_price,ws_item_sk,ws_order_number,ws_quantity,ws_sold_date_sk] + InputAdapter + ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk,i_manufact_id] #4 + InputAdapter + ReusedExchange [d_date_sk,d_year] #5 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (13) + Filter [wr_item_sk,wr_order_number] + ColumnarToRow + InputAdapter + Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_return_quantity] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (33) + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sum,sum] [sales_amt,sales_cnt,sum,sum,sum(UnscaledValue(sales_amt)),sum(cast(sales_cnt as bigint))] + InputAdapter + Exchange [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id] #10 + WholeStageCodegen (32) + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] [sum,sum,sum,sum] + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] + InputAdapter + Exchange [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] #11 + WholeStageCodegen (31) + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] + InputAdapter + Union + WholeStageCodegen (26) + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] + InputAdapter + Exchange [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] #12 + WholeStageCodegen (25) + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] + InputAdapter + Union + WholeStageCodegen (20) + Project [cr_return_amount,cr_return_quantity,cs_ext_sales_price,cs_quantity,d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id] + BroadcastHashJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] + Project [cs_ext_sales_price,cs_item_sk,cs_order_number,cs_quantity,d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_ext_sales_price,cs_item_sk,cs_order_number,cs_quantity,cs_sold_date_sk,i_brand_id,i_category_id,i_class_id,i_manufact_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Filter [cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_ext_sales_price,cs_item_sk,cs_order_number,cs_quantity,cs_sold_date_sk] + InputAdapter + ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk,i_manufact_id] #4 + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (18) + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + ReusedExchange [cr_item_sk,cr_order_number,cr_return_amount,cr_return_quantity] #6 + WholeStageCodegen (24) + Project [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sr_return_amt,sr_return_quantity,ss_ext_sales_price,ss_quantity] + BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + Project [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,ss_ext_sales_price,ss_item_sk,ss_quantity,ss_ticket_number] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_brand_id,i_category_id,i_class_id,i_manufact_id,ss_ext_sales_price,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_ticket_number] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_ticket_number] + InputAdapter + ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk,i_manufact_id] #4 + InputAdapter + ReusedExchange [d_date_sk,d_year] #13 + InputAdapter + ReusedExchange [sr_item_sk,sr_return_amt,sr_return_quantity,sr_ticket_number] #7 + WholeStageCodegen (30) + Project [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,wr_return_amt,wr_return_quantity,ws_ext_sales_price,ws_quantity] + BroadcastHashJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] + Project [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,ws_ext_sales_price,ws_item_sk,ws_order_number,ws_quantity] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [i_brand_id,i_category_id,i_class_id,i_manufact_id,ws_ext_sales_price,ws_item_sk,ws_order_number,ws_quantity,ws_sold_date_sk] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Filter [ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_ext_sales_price,ws_item_sk,ws_order_number,ws_quantity,ws_sold_date_sk] + InputAdapter + ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk,i_manufact_id] #4 + InputAdapter + ReusedExchange [d_date_sk,d_year] #13 + InputAdapter + ReusedExchange [wr_item_sk,wr_order_number,wr_return_amt,wr_return_quantity] #8 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76.sf100/explain.txt new file mode 100644 index 0000000000000..c3c6951d9d083 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76.sf100/explain.txt @@ -0,0 +1,245 @@ +== Physical Plan == +TakeOrderedAndProject (44) ++- * HashAggregate (43) + +- Exchange (42) + +- * HashAggregate (41) + +- Union (40) + :- * Project (15) + : +- * BroadcastHashJoin Inner BuildRight (14) + : :- * Project (9) + : : +- * BroadcastHashJoin Inner BuildRight (8) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.store_sales (1) + : : +- BroadcastExchange (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet default.date_dim (4) + : +- BroadcastExchange (13) + : +- * Filter (12) + : +- * ColumnarToRow (11) + : +- Scan parquet default.item (10) + :- * Project (30) + : +- * BroadcastHashJoin Inner BuildLeft (29) + : :- BroadcastExchange (25) + : : +- * Project (24) + : : +- * BroadcastHashJoin Inner BuildLeft (23) + : : :- BroadcastExchange (19) + : : : +- * Filter (18) + : : : +- * ColumnarToRow (17) + : : : +- Scan parquet default.web_sales (16) + : : +- * Filter (22) + : : +- * ColumnarToRow (21) + : : +- Scan parquet default.date_dim (20) + : +- * Filter (28) + : +- * ColumnarToRow (27) + : +- Scan parquet default.item (26) + +- * Project (39) + +- * BroadcastHashJoin Inner BuildRight (38) + :- * Project (36) + : +- * BroadcastHashJoin Inner BuildRight (35) + : :- * Filter (33) + : : +- * ColumnarToRow (32) + : : +- Scan parquet default.catalog_sales (31) + : +- ReusedExchange (34) + +- ReusedExchange (37) + + +(1) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_ext_sales_price#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNull(ss_store_sk), IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_ext_sales_price#4] + +(3) Filter [codegen id : 3] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_ext_sales_price#4] +Condition : ((isnull(ss_store_sk#3) AND isnotnull(ss_item_sk#2)) AND isnotnull(ss_sold_date_sk#1)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#5, d_year#6, d_qoy#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#5, d_year#6, d_qoy#7] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#5, d_year#6, d_qoy#7] +Condition : isnotnull(d_date_sk#5) + +(7) BroadcastExchange +Input [3]: [d_date_sk#5, d_year#6, d_qoy#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#8] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(9) Project [codegen id : 3] +Output [5]: [ss_item_sk#2, ss_store_sk#3, ss_ext_sales_price#4, d_year#6, d_qoy#7] +Input [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_ext_sales_price#4, d_date_sk#5, d_year#6, d_qoy#7] + +(10) Scan parquet default.item +Output [2]: [i_item_sk#9, i_category#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [i_item_sk#9, i_category#10] + +(12) Filter [codegen id : 2] +Input [2]: [i_item_sk#9, i_category#10] +Condition : isnotnull(i_item_sk#9) + +(13) BroadcastExchange +Input [2]: [i_item_sk#9, i_category#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#11] + +(14) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#9] +Join condition: None + +(15) Project [codegen id : 3] +Output [6]: [store AS channel#12, ss_store_sk#3 AS col_name#13, d_year#6, d_qoy#7, i_category#10, ss_ext_sales_price#4 AS ext_sales_price#14] +Input [7]: [ss_item_sk#2, ss_store_sk#3, ss_ext_sales_price#4, d_year#6, d_qoy#7, i_item_sk#9, i_category#10] + +(16) Scan parquet default.web_sales +Output [4]: [ws_sold_date_sk#15, ws_item_sk#16, ws_ship_customer_sk#17, ws_ext_sales_price#18] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNull(ws_ship_customer_sk), IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 4] +Input [4]: [ws_sold_date_sk#15, ws_item_sk#16, ws_ship_customer_sk#17, ws_ext_sales_price#18] + +(18) Filter [codegen id : 4] +Input [4]: [ws_sold_date_sk#15, ws_item_sk#16, ws_ship_customer_sk#17, ws_ext_sales_price#18] +Condition : ((isnull(ws_ship_customer_sk#17) AND isnotnull(ws_item_sk#16)) AND isnotnull(ws_sold_date_sk#15)) + +(19) BroadcastExchange +Input [4]: [ws_sold_date_sk#15, ws_item_sk#16, ws_ship_customer_sk#17, ws_ext_sales_price#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] + +(20) Scan parquet default.date_dim +Output [3]: [d_date_sk#5, d_year#6, d_qoy#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_date_sk)] +ReadSchema: struct + +(21) ColumnarToRow +Input [3]: [d_date_sk#5, d_year#6, d_qoy#7] + +(22) Filter +Input [3]: [d_date_sk#5, d_year#6, d_qoy#7] +Condition : isnotnull(d_date_sk#5) + +(23) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ws_sold_date_sk#15] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(24) Project [codegen id : 5] +Output [5]: [ws_item_sk#16, ws_ship_customer_sk#17, ws_ext_sales_price#18, d_year#6, d_qoy#7] +Input [7]: [ws_sold_date_sk#15, ws_item_sk#16, ws_ship_customer_sk#17, ws_ext_sales_price#18, d_date_sk#5, d_year#6, d_qoy#7] + +(25) BroadcastExchange +Input [5]: [ws_item_sk#16, ws_ship_customer_sk#17, ws_ext_sales_price#18, d_year#6, d_qoy#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] + +(26) Scan parquet default.item +Output [2]: [i_item_sk#9, i_category#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(27) ColumnarToRow +Input [2]: [i_item_sk#9, i_category#10] + +(28) Filter +Input [2]: [i_item_sk#9, i_category#10] +Condition : isnotnull(i_item_sk#9) + +(29) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_item_sk#16] +Right keys [1]: [i_item_sk#9] +Join condition: None + +(30) Project [codegen id : 6] +Output [6]: [web AS channel#21, ws_ship_customer_sk#17 AS col_name#22, d_year#6, d_qoy#7, i_category#10, ws_ext_sales_price#18 AS ext_sales_price#23] +Input [7]: [ws_item_sk#16, ws_ship_customer_sk#17, ws_ext_sales_price#18, d_year#6, d_qoy#7, i_item_sk#9, i_category#10] + +(31) Scan parquet default.catalog_sales +Output [4]: [cs_sold_date_sk#24, cs_ship_addr_sk#25, cs_item_sk#26, cs_ext_sales_price#27] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNull(cs_ship_addr_sk), IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(32) ColumnarToRow [codegen id : 9] +Input [4]: [cs_sold_date_sk#24, cs_ship_addr_sk#25, cs_item_sk#26, cs_ext_sales_price#27] + +(33) Filter [codegen id : 9] +Input [4]: [cs_sold_date_sk#24, cs_ship_addr_sk#25, cs_item_sk#26, cs_ext_sales_price#27] +Condition : ((isnull(cs_ship_addr_sk#25) AND isnotnull(cs_item_sk#26)) AND isnotnull(cs_sold_date_sk#24)) + +(34) ReusedExchange [Reuses operator id: 7] +Output [3]: [d_date_sk#5, d_year#6, d_qoy#7] + +(35) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [cs_sold_date_sk#24] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(36) Project [codegen id : 9] +Output [5]: [cs_ship_addr_sk#25, cs_item_sk#26, cs_ext_sales_price#27, d_year#6, d_qoy#7] +Input [7]: [cs_sold_date_sk#24, cs_ship_addr_sk#25, cs_item_sk#26, cs_ext_sales_price#27, d_date_sk#5, d_year#6, d_qoy#7] + +(37) ReusedExchange [Reuses operator id: 13] +Output [2]: [i_item_sk#9, i_category#10] + +(38) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [cs_item_sk#26] +Right keys [1]: [i_item_sk#9] +Join condition: None + +(39) Project [codegen id : 9] +Output [6]: [catalog AS channel#28, cs_ship_addr_sk#25 AS col_name#29, d_year#6, d_qoy#7, i_category#10, cs_ext_sales_price#27 AS ext_sales_price#30] +Input [7]: [cs_ship_addr_sk#25, cs_item_sk#26, cs_ext_sales_price#27, d_year#6, d_qoy#7, i_item_sk#9, i_category#10] + +(40) Union + +(41) HashAggregate [codegen id : 10] +Input [6]: [channel#12, col_name#13, d_year#6, d_qoy#7, i_category#10, ext_sales_price#14] +Keys [5]: [channel#12, col_name#13, d_year#6, d_qoy#7, i_category#10] +Functions [2]: [partial_count(1), partial_sum(UnscaledValue(ext_sales_price#14))] +Aggregate Attributes [2]: [count#31, sum#32] +Results [7]: [channel#12, col_name#13, d_year#6, d_qoy#7, i_category#10, count#33, sum#34] + +(42) Exchange +Input [7]: [channel#12, col_name#13, d_year#6, d_qoy#7, i_category#10, count#33, sum#34] +Arguments: hashpartitioning(channel#12, col_name#13, d_year#6, d_qoy#7, i_category#10, 5), true, [id=#35] + +(43) HashAggregate [codegen id : 11] +Input [7]: [channel#12, col_name#13, d_year#6, d_qoy#7, i_category#10, count#33, sum#34] +Keys [5]: [channel#12, col_name#13, d_year#6, d_qoy#7, i_category#10] +Functions [2]: [count(1), sum(UnscaledValue(ext_sales_price#14))] +Aggregate Attributes [2]: [count(1)#36, sum(UnscaledValue(ext_sales_price#14))#37] +Results [7]: [channel#12, col_name#13, d_year#6, d_qoy#7, i_category#10, count(1)#36 AS sales_cnt#38, MakeDecimal(sum(UnscaledValue(ext_sales_price#14))#37,17,2) AS sales_amt#39] + +(44) TakeOrderedAndProject +Input [7]: [channel#12, col_name#13, d_year#6, d_qoy#7, i_category#10, sales_cnt#38, sales_amt#39] +Arguments: 100, [channel#12 ASC NULLS FIRST, col_name#13 ASC NULLS FIRST, d_year#6 ASC NULLS FIRST, d_qoy#7 ASC NULLS FIRST, i_category#10 ASC NULLS FIRST], [channel#12, col_name#13, d_year#6, d_qoy#7, i_category#10, sales_cnt#38, sales_amt#39] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76.sf100/simplified.txt new file mode 100644 index 0000000000000..74214da0678c6 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76.sf100/simplified.txt @@ -0,0 +1,68 @@ +TakeOrderedAndProject [channel,col_name,d_qoy,d_year,i_category,sales_amt,sales_cnt] + WholeStageCodegen (11) + HashAggregate [channel,col_name,count,d_qoy,d_year,i_category,sum] [count,count(1),sales_amt,sales_cnt,sum,sum(UnscaledValue(ext_sales_price))] + InputAdapter + Exchange [channel,col_name,d_qoy,d_year,i_category] #1 + WholeStageCodegen (10) + HashAggregate [channel,col_name,d_qoy,d_year,ext_sales_price,i_category] [count,count,sum,sum] + InputAdapter + Union + WholeStageCodegen (3) + Project [d_qoy,d_year,i_category,ss_ext_sales_price,ss_store_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [d_qoy,d_year,ss_ext_sales_price,ss_item_sk,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_qoy,d_year] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_category,i_item_sk] + WholeStageCodegen (6) + Project [d_qoy,d_year,i_category,ws_ext_sales_price,ws_ship_customer_sk] + BroadcastHashJoin [i_item_sk,ws_item_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (5) + Project [d_qoy,d_year,ws_ext_sales_price,ws_item_sk,ws_ship_customer_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Filter [ws_item_sk,ws_ship_customer_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_ext_sales_price,ws_item_sk,ws_ship_customer_sk,ws_sold_date_sk] + Filter [d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_qoy,d_year] + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_category,i_item_sk] + WholeStageCodegen (9) + Project [cs_ext_sales_price,cs_ship_addr_sk,d_qoy,d_year,i_category] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_ext_sales_price,cs_item_sk,cs_ship_addr_sk,d_qoy,d_year] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_item_sk,cs_ship_addr_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_ext_sales_price,cs_item_sk,cs_ship_addr_sk,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_qoy,d_year] #2 + InputAdapter + ReusedExchange [i_category,i_item_sk] #3 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76/explain.txt new file mode 100644 index 0000000000000..b4a279af23889 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76/explain.txt @@ -0,0 +1,209 @@ +== Physical Plan == +TakeOrderedAndProject (38) ++- * HashAggregate (37) + +- Exchange (36) + +- * HashAggregate (35) + +- Union (34) + :- * Project (15) + : +- * BroadcastHashJoin Inner BuildRight (14) + : :- * Project (9) + : : +- * BroadcastHashJoin Inner BuildRight (8) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.store_sales (1) + : : +- BroadcastExchange (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet default.item (4) + : +- BroadcastExchange (13) + : +- * Filter (12) + : +- * ColumnarToRow (11) + : +- Scan parquet default.date_dim (10) + :- * Project (24) + : +- * BroadcastHashJoin Inner BuildRight (23) + : :- * Project (21) + : : +- * BroadcastHashJoin Inner BuildRight (20) + : : :- * Filter (18) + : : : +- * ColumnarToRow (17) + : : : +- Scan parquet default.web_sales (16) + : : +- ReusedExchange (19) + : +- ReusedExchange (22) + +- * Project (33) + +- * BroadcastHashJoin Inner BuildRight (32) + :- * Project (30) + : +- * BroadcastHashJoin Inner BuildRight (29) + : :- * Filter (27) + : : +- * ColumnarToRow (26) + : : +- Scan parquet default.catalog_sales (25) + : +- ReusedExchange (28) + +- ReusedExchange (31) + + +(1) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_ext_sales_price#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNull(ss_store_sk), IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_ext_sales_price#4] + +(3) Filter [codegen id : 3] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_ext_sales_price#4] +Condition : ((isnull(ss_store_sk#3) AND isnotnull(ss_item_sk#2)) AND isnotnull(ss_sold_date_sk#1)) + +(4) Scan parquet default.item +Output [2]: [i_item_sk#5, i_category#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [i_item_sk#5, i_category#6] + +(6) Filter [codegen id : 1] +Input [2]: [i_item_sk#5, i_category#6] +Condition : isnotnull(i_item_sk#5) + +(7) BroadcastExchange +Input [2]: [i_item_sk#5, i_category#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#5] +Join condition: None + +(9) Project [codegen id : 3] +Output [4]: [ss_sold_date_sk#1, ss_store_sk#3, ss_ext_sales_price#4, i_category#6] +Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_ext_sales_price#4, i_item_sk#5, i_category#6] + +(10) Scan parquet default.date_dim +Output [3]: [d_date_sk#8, d_year#9, d_qoy#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#8, d_year#9, d_qoy#10] + +(12) Filter [codegen id : 2] +Input [3]: [d_date_sk#8, d_year#9, d_qoy#10] +Condition : isnotnull(d_date_sk#8) + +(13) BroadcastExchange +Input [3]: [d_date_sk#8, d_year#9, d_qoy#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#11] + +(14) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#8] +Join condition: None + +(15) Project [codegen id : 3] +Output [6]: [store AS channel#12, ss_store_sk#3 AS col_name#13, d_year#9, d_qoy#10, i_category#6, ss_ext_sales_price#4 AS ext_sales_price#14] +Input [7]: [ss_sold_date_sk#1, ss_store_sk#3, ss_ext_sales_price#4, i_category#6, d_date_sk#8, d_year#9, d_qoy#10] + +(16) Scan parquet default.web_sales +Output [4]: [ws_sold_date_sk#15, ws_item_sk#16, ws_ship_customer_sk#17, ws_ext_sales_price#18] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_sales] +PushedFilters: [IsNull(ws_ship_customer_sk), IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 6] +Input [4]: [ws_sold_date_sk#15, ws_item_sk#16, ws_ship_customer_sk#17, ws_ext_sales_price#18] + +(18) Filter [codegen id : 6] +Input [4]: [ws_sold_date_sk#15, ws_item_sk#16, ws_ship_customer_sk#17, ws_ext_sales_price#18] +Condition : ((isnull(ws_ship_customer_sk#17) AND isnotnull(ws_item_sk#16)) AND isnotnull(ws_sold_date_sk#15)) + +(19) ReusedExchange [Reuses operator id: 7] +Output [2]: [i_item_sk#5, i_category#6] + +(20) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_item_sk#16] +Right keys [1]: [i_item_sk#5] +Join condition: None + +(21) Project [codegen id : 6] +Output [4]: [ws_sold_date_sk#15, ws_ship_customer_sk#17, ws_ext_sales_price#18, i_category#6] +Input [6]: [ws_sold_date_sk#15, ws_item_sk#16, ws_ship_customer_sk#17, ws_ext_sales_price#18, i_item_sk#5, i_category#6] + +(22) ReusedExchange [Reuses operator id: 13] +Output [3]: [d_date_sk#8, d_year#9, d_qoy#10] + +(23) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_sold_date_sk#15] +Right keys [1]: [d_date_sk#8] +Join condition: None + +(24) Project [codegen id : 6] +Output [6]: [web AS channel#19, ws_ship_customer_sk#17 AS col_name#20, d_year#9, d_qoy#10, i_category#6, ws_ext_sales_price#18 AS ext_sales_price#21] +Input [7]: [ws_sold_date_sk#15, ws_ship_customer_sk#17, ws_ext_sales_price#18, i_category#6, d_date_sk#8, d_year#9, d_qoy#10] + +(25) Scan parquet default.catalog_sales +Output [4]: [cs_sold_date_sk#22, cs_ship_addr_sk#23, cs_item_sk#24, cs_ext_sales_price#25] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNull(cs_ship_addr_sk), IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(26) ColumnarToRow [codegen id : 9] +Input [4]: [cs_sold_date_sk#22, cs_ship_addr_sk#23, cs_item_sk#24, cs_ext_sales_price#25] + +(27) Filter [codegen id : 9] +Input [4]: [cs_sold_date_sk#22, cs_ship_addr_sk#23, cs_item_sk#24, cs_ext_sales_price#25] +Condition : ((isnull(cs_ship_addr_sk#23) AND isnotnull(cs_item_sk#24)) AND isnotnull(cs_sold_date_sk#22)) + +(28) ReusedExchange [Reuses operator id: 7] +Output [2]: [i_item_sk#5, i_category#6] + +(29) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [cs_item_sk#24] +Right keys [1]: [i_item_sk#5] +Join condition: None + +(30) Project [codegen id : 9] +Output [4]: [cs_sold_date_sk#22, cs_ship_addr_sk#23, cs_ext_sales_price#25, i_category#6] +Input [6]: [cs_sold_date_sk#22, cs_ship_addr_sk#23, cs_item_sk#24, cs_ext_sales_price#25, i_item_sk#5, i_category#6] + +(31) ReusedExchange [Reuses operator id: 13] +Output [3]: [d_date_sk#8, d_year#9, d_qoy#10] + +(32) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [cs_sold_date_sk#22] +Right keys [1]: [d_date_sk#8] +Join condition: None + +(33) Project [codegen id : 9] +Output [6]: [catalog AS channel#26, cs_ship_addr_sk#23 AS col_name#27, d_year#9, d_qoy#10, i_category#6, cs_ext_sales_price#25 AS ext_sales_price#28] +Input [7]: [cs_sold_date_sk#22, cs_ship_addr_sk#23, cs_ext_sales_price#25, i_category#6, d_date_sk#8, d_year#9, d_qoy#10] + +(34) Union + +(35) HashAggregate [codegen id : 10] +Input [6]: [channel#12, col_name#13, d_year#9, d_qoy#10, i_category#6, ext_sales_price#14] +Keys [5]: [channel#12, col_name#13, d_year#9, d_qoy#10, i_category#6] +Functions [2]: [partial_count(1), partial_sum(UnscaledValue(ext_sales_price#14))] +Aggregate Attributes [2]: [count#29, sum#30] +Results [7]: [channel#12, col_name#13, d_year#9, d_qoy#10, i_category#6, count#31, sum#32] + +(36) Exchange +Input [7]: [channel#12, col_name#13, d_year#9, d_qoy#10, i_category#6, count#31, sum#32] +Arguments: hashpartitioning(channel#12, col_name#13, d_year#9, d_qoy#10, i_category#6, 5), true, [id=#33] + +(37) HashAggregate [codegen id : 11] +Input [7]: [channel#12, col_name#13, d_year#9, d_qoy#10, i_category#6, count#31, sum#32] +Keys [5]: [channel#12, col_name#13, d_year#9, d_qoy#10, i_category#6] +Functions [2]: [count(1), sum(UnscaledValue(ext_sales_price#14))] +Aggregate Attributes [2]: [count(1)#34, sum(UnscaledValue(ext_sales_price#14))#35] +Results [7]: [channel#12, col_name#13, d_year#9, d_qoy#10, i_category#6, count(1)#34 AS sales_cnt#36, MakeDecimal(sum(UnscaledValue(ext_sales_price#14))#35,17,2) AS sales_amt#37] + +(38) TakeOrderedAndProject +Input [7]: [channel#12, col_name#13, d_year#9, d_qoy#10, i_category#6, sales_cnt#36, sales_amt#37] +Arguments: 100, [channel#12 ASC NULLS FIRST, col_name#13 ASC NULLS FIRST, d_year#9 ASC NULLS FIRST, d_qoy#10 ASC NULLS FIRST, i_category#6 ASC NULLS FIRST], [channel#12, col_name#13, d_year#9, d_qoy#10, i_category#6, sales_cnt#36, sales_amt#37] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76/simplified.txt new file mode 100644 index 0000000000000..697757ade4997 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76/simplified.txt @@ -0,0 +1,58 @@ +TakeOrderedAndProject [channel,col_name,d_qoy,d_year,i_category,sales_amt,sales_cnt] + WholeStageCodegen (11) + HashAggregate [channel,col_name,count,d_qoy,d_year,i_category,sum] [count,count(1),sales_amt,sales_cnt,sum,sum(UnscaledValue(ext_sales_price))] + InputAdapter + Exchange [channel,col_name,d_qoy,d_year,i_category] #1 + WholeStageCodegen (10) + HashAggregate [channel,col_name,d_qoy,d_year,ext_sales_price,i_category] [count,count,sum,sum] + InputAdapter + Union + WholeStageCodegen (3) + Project [d_qoy,d_year,i_category,ss_ext_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_category,ss_ext_sales_price,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_category,i_item_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_qoy,d_year] + WholeStageCodegen (6) + Project [d_qoy,d_year,i_category,ws_ext_sales_price,ws_ship_customer_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [i_category,ws_ext_sales_price,ws_ship_customer_sk,ws_sold_date_sk] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Filter [ws_item_sk,ws_ship_customer_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_ext_sales_price,ws_item_sk,ws_ship_customer_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [i_category,i_item_sk] #2 + InputAdapter + ReusedExchange [d_date_sk,d_qoy,d_year] #3 + WholeStageCodegen (9) + Project [cs_ext_sales_price,cs_ship_addr_sk,d_qoy,d_year,i_category] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_ext_sales_price,cs_ship_addr_sk,cs_sold_date_sk,i_category] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Filter [cs_item_sk,cs_ship_addr_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_ext_sales_price,cs_item_sk,cs_ship_addr_sk,cs_sold_date_sk] + InputAdapter + ReusedExchange [i_category,i_item_sk] #2 + InputAdapter + ReusedExchange [d_date_sk,d_qoy,d_year] #3 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77.sf100/explain.txt new file mode 100644 index 0000000000000..560903bb9eeab --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77.sf100/explain.txt @@ -0,0 +1,520 @@ +== Physical Plan == +TakeOrderedAndProject (91) ++- * HashAggregate (90) + +- Exchange (89) + +- * HashAggregate (88) + +- * Expand (87) + +- Union (86) + :- * Project (34) + : +- * BroadcastHashJoin LeftOuter BuildRight (33) + : :- * HashAggregate (19) + : : +- Exchange (18) + : : +- * HashAggregate (17) + : : +- * Project (16) + : : +- * BroadcastHashJoin Inner BuildRight (15) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.store (11) + : +- BroadcastExchange (32) + : +- * HashAggregate (31) + : +- Exchange (30) + : +- * HashAggregate (29) + : +- * Project (28) + : +- * BroadcastHashJoin Inner BuildRight (27) + : :- * Project (25) + : : +- * BroadcastHashJoin Inner BuildRight (24) + : : :- * Filter (22) + : : : +- * ColumnarToRow (21) + : : : +- Scan parquet default.store_returns (20) + : : +- ReusedExchange (23) + : +- ReusedExchange (26) + :- * Project (55) + : +- BroadcastNestedLoopJoin Inner BuildRight (54) + : :- * HashAggregate (43) + : : +- Exchange (42) + : : +- * HashAggregate (41) + : : +- * Project (40) + : : +- * BroadcastHashJoin Inner BuildRight (39) + : : :- * Filter (37) + : : : +- * ColumnarToRow (36) + : : : +- Scan parquet default.catalog_sales (35) + : : +- ReusedExchange (38) + : +- BroadcastExchange (53) + : +- * HashAggregate (52) + : +- Exchange (51) + : +- * HashAggregate (50) + : +- * Project (49) + : +- * BroadcastHashJoin Inner BuildRight (48) + : :- * Filter (46) + : : +- * ColumnarToRow (45) + : : +- Scan parquet default.catalog_returns (44) + : +- ReusedExchange (47) + +- * Project (85) + +- * BroadcastHashJoin LeftOuter BuildRight (84) + :- * HashAggregate (70) + : +- Exchange (69) + : +- * HashAggregate (68) + : +- * Project (67) + : +- * BroadcastHashJoin Inner BuildRight (66) + : :- * Project (61) + : : +- * BroadcastHashJoin Inner BuildRight (60) + : : :- * Filter (58) + : : : +- * ColumnarToRow (57) + : : : +- Scan parquet default.web_sales (56) + : : +- ReusedExchange (59) + : +- BroadcastExchange (65) + : +- * Filter (64) + : +- * ColumnarToRow (63) + : +- Scan parquet default.web_page (62) + +- BroadcastExchange (83) + +- * HashAggregate (82) + +- Exchange (81) + +- * HashAggregate (80) + +- * Project (79) + +- * BroadcastHashJoin Inner BuildRight (78) + :- * Project (76) + : +- * BroadcastHashJoin Inner BuildRight (75) + : :- * Filter (73) + : : +- * ColumnarToRow (72) + : : +- Scan parquet default.web_returns (71) + : +- ReusedExchange (74) + +- ReusedExchange (77) + + +(1) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [4]: [ss_sold_date_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] + +(3) Filter [codegen id : 3] +Input [4]: [ss_sold_date_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] +Condition : (isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#2)) + +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#5, d_date#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-03), LessThanOrEqual(d_date,2000-09-02), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#5, d_date#6] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#5, d_date#6] +Condition : (((isnotnull(d_date#6) AND (d_date#6 >= 11172)) AND (d_date#6 <= 11202)) AND isnotnull(d_date_sk#5)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#5] +Input [2]: [d_date_sk#5, d_date#6] + +(8) BroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#7] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(10) Project [codegen id : 3] +Output [3]: [ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] +Input [5]: [ss_sold_date_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, d_date_sk#5] + +(11) Scan parquet default.store +Output [1]: [s_store_sk#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [1]: [s_store_sk#8] + +(13) Filter [codegen id : 2] +Input [1]: [s_store_sk#8] +Condition : isnotnull(s_store_sk#8) + +(14) BroadcastExchange +Input [1]: [s_store_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#9] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_store_sk#2] +Right keys [1]: [s_store_sk#8] +Join condition: None + +(16) Project [codegen id : 3] +Output [3]: [ss_ext_sales_price#3, ss_net_profit#4, s_store_sk#8] +Input [4]: [ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, s_store_sk#8] + +(17) HashAggregate [codegen id : 3] +Input [3]: [ss_ext_sales_price#3, ss_net_profit#4, s_store_sk#8] +Keys [1]: [s_store_sk#8] +Functions [2]: [partial_sum(UnscaledValue(ss_ext_sales_price#3)), partial_sum(UnscaledValue(ss_net_profit#4))] +Aggregate Attributes [2]: [sum#10, sum#11] +Results [3]: [s_store_sk#8, sum#12, sum#13] + +(18) Exchange +Input [3]: [s_store_sk#8, sum#12, sum#13] +Arguments: hashpartitioning(s_store_sk#8, 5), true, [id=#14] + +(19) HashAggregate [codegen id : 8] +Input [3]: [s_store_sk#8, sum#12, sum#13] +Keys [1]: [s_store_sk#8] +Functions [2]: [sum(UnscaledValue(ss_ext_sales_price#3)), sum(UnscaledValue(ss_net_profit#4))] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_ext_sales_price#3))#15, sum(UnscaledValue(ss_net_profit#4))#16] +Results [3]: [s_store_sk#8, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#15,17,2) AS sales#17, MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#16,17,2) AS profit#18] + +(20) Scan parquet default.store_returns +Output [4]: [sr_returned_date_sk#19, sr_store_sk#20, sr_return_amt#21, sr_net_loss#22] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_returns] +PushedFilters: [IsNotNull(sr_returned_date_sk), IsNotNull(sr_store_sk)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 6] +Input [4]: [sr_returned_date_sk#19, sr_store_sk#20, sr_return_amt#21, sr_net_loss#22] + +(22) Filter [codegen id : 6] +Input [4]: [sr_returned_date_sk#19, sr_store_sk#20, sr_return_amt#21, sr_net_loss#22] +Condition : (isnotnull(sr_returned_date_sk#19) AND isnotnull(sr_store_sk#20)) + +(23) ReusedExchange [Reuses operator id: 14] +Output [1]: [s_store_sk#23] + +(24) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [sr_store_sk#20] +Right keys [1]: [cast(s_store_sk#23 as bigint)] +Join condition: None + +(25) Project [codegen id : 6] +Output [4]: [sr_returned_date_sk#19, sr_return_amt#21, sr_net_loss#22, s_store_sk#23] +Input [5]: [sr_returned_date_sk#19, sr_store_sk#20, sr_return_amt#21, sr_net_loss#22, s_store_sk#23] + +(26) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(27) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [sr_returned_date_sk#19] +Right keys [1]: [cast(d_date_sk#5 as bigint)] +Join condition: None + +(28) Project [codegen id : 6] +Output [3]: [sr_return_amt#21, sr_net_loss#22, s_store_sk#23] +Input [5]: [sr_returned_date_sk#19, sr_return_amt#21, sr_net_loss#22, s_store_sk#23, d_date_sk#5] + +(29) HashAggregate [codegen id : 6] +Input [3]: [sr_return_amt#21, sr_net_loss#22, s_store_sk#23] +Keys [1]: [s_store_sk#23] +Functions [2]: [partial_sum(UnscaledValue(sr_return_amt#21)), partial_sum(UnscaledValue(sr_net_loss#22))] +Aggregate Attributes [2]: [sum#24, sum#25] +Results [3]: [s_store_sk#23, sum#26, sum#27] + +(30) Exchange +Input [3]: [s_store_sk#23, sum#26, sum#27] +Arguments: hashpartitioning(s_store_sk#23, 5), true, [id=#28] + +(31) HashAggregate [codegen id : 7] +Input [3]: [s_store_sk#23, sum#26, sum#27] +Keys [1]: [s_store_sk#23] +Functions [2]: [sum(UnscaledValue(sr_return_amt#21)), sum(UnscaledValue(sr_net_loss#22))] +Aggregate Attributes [2]: [sum(UnscaledValue(sr_return_amt#21))#29, sum(UnscaledValue(sr_net_loss#22))#30] +Results [3]: [s_store_sk#23, MakeDecimal(sum(UnscaledValue(sr_return_amt#21))#29,17,2) AS returns#31, MakeDecimal(sum(UnscaledValue(sr_net_loss#22))#30,17,2) AS profit_loss#32] + +(32) BroadcastExchange +Input [3]: [s_store_sk#23, returns#31, profit_loss#32] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#33] + +(33) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [s_store_sk#8] +Right keys [1]: [s_store_sk#23] +Join condition: None + +(34) Project [codegen id : 8] +Output [5]: [sales#17, coalesce(returns#31, 0.00) AS returns#34, CheckOverflow((promote_precision(cast(profit#18 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#32, 0.00) as decimal(18,2)))), DecimalType(18,2), true) AS profit#35, store channel AS channel#36, s_store_sk#8 AS id#37] +Input [6]: [s_store_sk#8, sales#17, profit#18, s_store_sk#23, returns#31, profit_loss#32] + +(35) Scan parquet default.catalog_sales +Output [4]: [cs_sold_date_sk#38, cs_call_center_sk#39, cs_ext_sales_price#40, cs_net_profit#41] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(36) ColumnarToRow [codegen id : 10] +Input [4]: [cs_sold_date_sk#38, cs_call_center_sk#39, cs_ext_sales_price#40, cs_net_profit#41] + +(37) Filter [codegen id : 10] +Input [4]: [cs_sold_date_sk#38, cs_call_center_sk#39, cs_ext_sales_price#40, cs_net_profit#41] +Condition : isnotnull(cs_sold_date_sk#38) + +(38) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(39) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_sold_date_sk#38] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(40) Project [codegen id : 10] +Output [3]: [cs_call_center_sk#39, cs_ext_sales_price#40, cs_net_profit#41] +Input [5]: [cs_sold_date_sk#38, cs_call_center_sk#39, cs_ext_sales_price#40, cs_net_profit#41, d_date_sk#5] + +(41) HashAggregate [codegen id : 10] +Input [3]: [cs_call_center_sk#39, cs_ext_sales_price#40, cs_net_profit#41] +Keys [1]: [cs_call_center_sk#39] +Functions [2]: [partial_sum(UnscaledValue(cs_ext_sales_price#40)), partial_sum(UnscaledValue(cs_net_profit#41))] +Aggregate Attributes [2]: [sum#42, sum#43] +Results [3]: [cs_call_center_sk#39, sum#44, sum#45] + +(42) Exchange +Input [3]: [cs_call_center_sk#39, sum#44, sum#45] +Arguments: hashpartitioning(cs_call_center_sk#39, 5), true, [id=#46] + +(43) HashAggregate [codegen id : 11] +Input [3]: [cs_call_center_sk#39, sum#44, sum#45] +Keys [1]: [cs_call_center_sk#39] +Functions [2]: [sum(UnscaledValue(cs_ext_sales_price#40)), sum(UnscaledValue(cs_net_profit#41))] +Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_sales_price#40))#47, sum(UnscaledValue(cs_net_profit#41))#48] +Results [3]: [cs_call_center_sk#39, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#40))#47,17,2) AS sales#49, MakeDecimal(sum(UnscaledValue(cs_net_profit#41))#48,17,2) AS profit#50] + +(44) Scan parquet default.catalog_returns +Output [3]: [cr_returned_date_sk#51, cr_return_amount#52, cr_net_loss#53] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_returns] +PushedFilters: [IsNotNull(cr_returned_date_sk)] +ReadSchema: struct + +(45) ColumnarToRow [codegen id : 13] +Input [3]: [cr_returned_date_sk#51, cr_return_amount#52, cr_net_loss#53] + +(46) Filter [codegen id : 13] +Input [3]: [cr_returned_date_sk#51, cr_return_amount#52, cr_net_loss#53] +Condition : isnotnull(cr_returned_date_sk#51) + +(47) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(48) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [cr_returned_date_sk#51] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(49) Project [codegen id : 13] +Output [2]: [cr_return_amount#52, cr_net_loss#53] +Input [4]: [cr_returned_date_sk#51, cr_return_amount#52, cr_net_loss#53, d_date_sk#5] + +(50) HashAggregate [codegen id : 13] +Input [2]: [cr_return_amount#52, cr_net_loss#53] +Keys: [] +Functions [2]: [partial_sum(UnscaledValue(cr_return_amount#52)), partial_sum(UnscaledValue(cr_net_loss#53))] +Aggregate Attributes [2]: [sum#54, sum#55] +Results [2]: [sum#56, sum#57] + +(51) Exchange +Input [2]: [sum#56, sum#57] +Arguments: SinglePartition, true, [id=#58] + +(52) HashAggregate [codegen id : 14] +Input [2]: [sum#56, sum#57] +Keys: [] +Functions [2]: [sum(UnscaledValue(cr_return_amount#52)), sum(UnscaledValue(cr_net_loss#53))] +Aggregate Attributes [2]: [sum(UnscaledValue(cr_return_amount#52))#59, sum(UnscaledValue(cr_net_loss#53))#60] +Results [2]: [MakeDecimal(sum(UnscaledValue(cr_return_amount#52))#59,17,2) AS returns#61, MakeDecimal(sum(UnscaledValue(cr_net_loss#53))#60,17,2) AS profit_loss#62] + +(53) BroadcastExchange +Input [2]: [returns#61, profit_loss#62] +Arguments: IdentityBroadcastMode, [id=#63] + +(54) BroadcastNestedLoopJoin +Join condition: None + +(55) Project [codegen id : 15] +Output [5]: [sales#49, returns#61, CheckOverflow((promote_precision(cast(profit#50 as decimal(18,2))) - promote_precision(cast(profit_loss#62 as decimal(18,2)))), DecimalType(18,2), true) AS profit#64, catalog channel AS channel#65, cs_call_center_sk#39 AS id#66] +Input [5]: [cs_call_center_sk#39, sales#49, profit#50, returns#61, profit_loss#62] + +(56) Scan parquet default.web_sales +Output [4]: [ws_sold_date_sk#67, ws_web_page_sk#68, ws_ext_sales_price#69, ws_net_profit#70] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_web_page_sk)] +ReadSchema: struct + +(57) ColumnarToRow [codegen id : 18] +Input [4]: [ws_sold_date_sk#67, ws_web_page_sk#68, ws_ext_sales_price#69, ws_net_profit#70] + +(58) Filter [codegen id : 18] +Input [4]: [ws_sold_date_sk#67, ws_web_page_sk#68, ws_ext_sales_price#69, ws_net_profit#70] +Condition : (isnotnull(ws_sold_date_sk#67) AND isnotnull(ws_web_page_sk#68)) + +(59) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(60) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [ws_sold_date_sk#67] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(61) Project [codegen id : 18] +Output [3]: [ws_web_page_sk#68, ws_ext_sales_price#69, ws_net_profit#70] +Input [5]: [ws_sold_date_sk#67, ws_web_page_sk#68, ws_ext_sales_price#69, ws_net_profit#70, d_date_sk#5] + +(62) Scan parquet default.web_page +Output [1]: [wp_web_page_sk#71] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_page] +PushedFilters: [IsNotNull(wp_web_page_sk)] +ReadSchema: struct + +(63) ColumnarToRow [codegen id : 17] +Input [1]: [wp_web_page_sk#71] + +(64) Filter [codegen id : 17] +Input [1]: [wp_web_page_sk#71] +Condition : isnotnull(wp_web_page_sk#71) + +(65) BroadcastExchange +Input [1]: [wp_web_page_sk#71] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#72] + +(66) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [ws_web_page_sk#68] +Right keys [1]: [wp_web_page_sk#71] +Join condition: None + +(67) Project [codegen id : 18] +Output [3]: [ws_ext_sales_price#69, ws_net_profit#70, wp_web_page_sk#71] +Input [4]: [ws_web_page_sk#68, ws_ext_sales_price#69, ws_net_profit#70, wp_web_page_sk#71] + +(68) HashAggregate [codegen id : 18] +Input [3]: [ws_ext_sales_price#69, ws_net_profit#70, wp_web_page_sk#71] +Keys [1]: [wp_web_page_sk#71] +Functions [2]: [partial_sum(UnscaledValue(ws_ext_sales_price#69)), partial_sum(UnscaledValue(ws_net_profit#70))] +Aggregate Attributes [2]: [sum#73, sum#74] +Results [3]: [wp_web_page_sk#71, sum#75, sum#76] + +(69) Exchange +Input [3]: [wp_web_page_sk#71, sum#75, sum#76] +Arguments: hashpartitioning(wp_web_page_sk#71, 5), true, [id=#77] + +(70) HashAggregate [codegen id : 23] +Input [3]: [wp_web_page_sk#71, sum#75, sum#76] +Keys [1]: [wp_web_page_sk#71] +Functions [2]: [sum(UnscaledValue(ws_ext_sales_price#69)), sum(UnscaledValue(ws_net_profit#70))] +Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_sales_price#69))#78, sum(UnscaledValue(ws_net_profit#70))#79] +Results [3]: [wp_web_page_sk#71, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#69))#78,17,2) AS sales#80, MakeDecimal(sum(UnscaledValue(ws_net_profit#70))#79,17,2) AS profit#81] + +(71) Scan parquet default.web_returns +Output [4]: [wr_returned_date_sk#82, wr_web_page_sk#83, wr_return_amt#84, wr_net_loss#85] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_returns] +PushedFilters: [IsNotNull(wr_returned_date_sk), IsNotNull(wr_web_page_sk)] +ReadSchema: struct + +(72) ColumnarToRow [codegen id : 21] +Input [4]: [wr_returned_date_sk#82, wr_web_page_sk#83, wr_return_amt#84, wr_net_loss#85] + +(73) Filter [codegen id : 21] +Input [4]: [wr_returned_date_sk#82, wr_web_page_sk#83, wr_return_amt#84, wr_net_loss#85] +Condition : (isnotnull(wr_returned_date_sk#82) AND isnotnull(wr_web_page_sk#83)) + +(74) ReusedExchange [Reuses operator id: 65] +Output [1]: [wp_web_page_sk#86] + +(75) BroadcastHashJoin [codegen id : 21] +Left keys [1]: [wr_web_page_sk#83] +Right keys [1]: [cast(wp_web_page_sk#86 as bigint)] +Join condition: None + +(76) Project [codegen id : 21] +Output [4]: [wr_returned_date_sk#82, wr_return_amt#84, wr_net_loss#85, wp_web_page_sk#86] +Input [5]: [wr_returned_date_sk#82, wr_web_page_sk#83, wr_return_amt#84, wr_net_loss#85, wp_web_page_sk#86] + +(77) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(78) BroadcastHashJoin [codegen id : 21] +Left keys [1]: [wr_returned_date_sk#82] +Right keys [1]: [cast(d_date_sk#5 as bigint)] +Join condition: None + +(79) Project [codegen id : 21] +Output [3]: [wr_return_amt#84, wr_net_loss#85, wp_web_page_sk#86] +Input [5]: [wr_returned_date_sk#82, wr_return_amt#84, wr_net_loss#85, wp_web_page_sk#86, d_date_sk#5] + +(80) HashAggregate [codegen id : 21] +Input [3]: [wr_return_amt#84, wr_net_loss#85, wp_web_page_sk#86] +Keys [1]: [wp_web_page_sk#86] +Functions [2]: [partial_sum(UnscaledValue(wr_return_amt#84)), partial_sum(UnscaledValue(wr_net_loss#85))] +Aggregate Attributes [2]: [sum#87, sum#88] +Results [3]: [wp_web_page_sk#86, sum#89, sum#90] + +(81) Exchange +Input [3]: [wp_web_page_sk#86, sum#89, sum#90] +Arguments: hashpartitioning(wp_web_page_sk#86, 5), true, [id=#91] + +(82) HashAggregate [codegen id : 22] +Input [3]: [wp_web_page_sk#86, sum#89, sum#90] +Keys [1]: [wp_web_page_sk#86] +Functions [2]: [sum(UnscaledValue(wr_return_amt#84)), sum(UnscaledValue(wr_net_loss#85))] +Aggregate Attributes [2]: [sum(UnscaledValue(wr_return_amt#84))#92, sum(UnscaledValue(wr_net_loss#85))#93] +Results [3]: [wp_web_page_sk#86, MakeDecimal(sum(UnscaledValue(wr_return_amt#84))#92,17,2) AS returns#94, MakeDecimal(sum(UnscaledValue(wr_net_loss#85))#93,17,2) AS profit_loss#95] + +(83) BroadcastExchange +Input [3]: [wp_web_page_sk#86, returns#94, profit_loss#95] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#96] + +(84) BroadcastHashJoin [codegen id : 23] +Left keys [1]: [wp_web_page_sk#71] +Right keys [1]: [wp_web_page_sk#86] +Join condition: None + +(85) Project [codegen id : 23] +Output [5]: [sales#80, coalesce(returns#94, 0.00) AS returns#97, CheckOverflow((promote_precision(cast(profit#81 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#95, 0.00) as decimal(18,2)))), DecimalType(18,2), true) AS profit#98, web channel AS channel#99, wp_web_page_sk#71 AS id#100] +Input [6]: [wp_web_page_sk#71, sales#80, profit#81, wp_web_page_sk#86, returns#94, profit_loss#95] + +(86) Union + +(87) Expand [codegen id : 24] +Input [5]: [sales#17, returns#34, profit#35, channel#36, id#37] +Arguments: [List(sales#17, returns#34, profit#35, channel#36, id#37, 0), List(sales#17, returns#34, profit#35, channel#36, null, 1), List(sales#17, returns#34, profit#35, null, null, 3)], [sales#17, returns#34, profit#35, channel#101, id#102, spark_grouping_id#103] + +(88) HashAggregate [codegen id : 24] +Input [6]: [sales#17, returns#34, profit#35, channel#101, id#102, spark_grouping_id#103] +Keys [3]: [channel#101, id#102, spark_grouping_id#103] +Functions [3]: [partial_sum(sales#17), partial_sum(returns#34), partial_sum(profit#35)] +Aggregate Attributes [6]: [sum#104, isEmpty#105, sum#106, isEmpty#107, sum#108, isEmpty#109] +Results [9]: [channel#101, id#102, spark_grouping_id#103, sum#110, isEmpty#111, sum#112, isEmpty#113, sum#114, isEmpty#115] + +(89) Exchange +Input [9]: [channel#101, id#102, spark_grouping_id#103, sum#110, isEmpty#111, sum#112, isEmpty#113, sum#114, isEmpty#115] +Arguments: hashpartitioning(channel#101, id#102, spark_grouping_id#103, 5), true, [id=#116] + +(90) HashAggregate [codegen id : 25] +Input [9]: [channel#101, id#102, spark_grouping_id#103, sum#110, isEmpty#111, sum#112, isEmpty#113, sum#114, isEmpty#115] +Keys [3]: [channel#101, id#102, spark_grouping_id#103] +Functions [3]: [sum(sales#17), sum(returns#34), sum(profit#35)] +Aggregate Attributes [3]: [sum(sales#17)#117, sum(returns#34)#118, sum(profit#35)#119] +Results [5]: [channel#101, id#102, sum(sales#17)#117 AS sales#120, sum(returns#34)#118 AS returns#121, sum(profit#35)#119 AS profit#122] + +(91) TakeOrderedAndProject +Input [5]: [channel#101, id#102, sales#120, returns#121, profit#122] +Arguments: 100, [channel#101 ASC NULLS FIRST, id#102 ASC NULLS FIRST], [channel#101, id#102, sales#120, returns#121, profit#122] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77.sf100/simplified.txt new file mode 100644 index 0000000000000..cbddf82f08155 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77.sf100/simplified.txt @@ -0,0 +1,139 @@ +TakeOrderedAndProject [channel,id,profit,returns,sales] + WholeStageCodegen (25) + HashAggregate [channel,id,isEmpty,isEmpty,isEmpty,spark_grouping_id,sum,sum,sum] [isEmpty,isEmpty,isEmpty,profit,returns,sales,sum,sum,sum,sum(profit),sum(returns),sum(sales)] + InputAdapter + Exchange [channel,id,spark_grouping_id] #1 + WholeStageCodegen (24) + HashAggregate [channel,id,profit,returns,sales,spark_grouping_id] [isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum] + Expand [channel,id,profit,returns,sales] + InputAdapter + Union + WholeStageCodegen (8) + Project [profit,profit_loss,returns,s_store_sk,sales] + BroadcastHashJoin [s_store_sk,s_store_sk] + HashAggregate [s_store_sk,sum,sum] [profit,sales,sum,sum,sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_net_profit))] + InputAdapter + Exchange [s_store_sk] #2 + WholeStageCodegen (3) + HashAggregate [s_store_sk,ss_ext_sales_price,ss_net_profit] [sum,sum,sum,sum] + Project [s_store_sk,ss_ext_sales_price,ss_net_profit] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_ext_sales_price,ss_net_profit,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (7) + HashAggregate [s_store_sk,sum,sum] [profit_loss,returns,sum,sum,sum(UnscaledValue(sr_net_loss)),sum(UnscaledValue(sr_return_amt))] + InputAdapter + Exchange [s_store_sk] #6 + WholeStageCodegen (6) + HashAggregate [s_store_sk,sr_net_loss,sr_return_amt] [sum,sum,sum,sum] + Project [s_store_sk,sr_net_loss,sr_return_amt] + BroadcastHashJoin [d_date_sk,sr_returned_date_sk] + Project [s_store_sk,sr_net_loss,sr_return_amt,sr_returned_date_sk] + BroadcastHashJoin [s_store_sk,sr_store_sk] + Filter [sr_returned_date_sk,sr_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_net_loss,sr_return_amt,sr_returned_date_sk,sr_store_sk] + InputAdapter + ReusedExchange [s_store_sk] #4 + InputAdapter + ReusedExchange [d_date_sk] #3 + WholeStageCodegen (15) + Project [cs_call_center_sk,profit,profit_loss,returns,sales] + InputAdapter + BroadcastNestedLoopJoin + WholeStageCodegen (11) + HashAggregate [cs_call_center_sk,sum,sum] [profit,sales,sum,sum,sum(UnscaledValue(cs_ext_sales_price)),sum(UnscaledValue(cs_net_profit))] + InputAdapter + Exchange [cs_call_center_sk] #7 + WholeStageCodegen (10) + HashAggregate [cs_call_center_sk,cs_ext_sales_price,cs_net_profit] [sum,sum,sum,sum] + Project [cs_call_center_sk,cs_ext_sales_price,cs_net_profit] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_call_center_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + BroadcastExchange #8 + WholeStageCodegen (14) + HashAggregate [sum,sum] [profit_loss,returns,sum,sum,sum(UnscaledValue(cr_net_loss)),sum(UnscaledValue(cr_return_amount))] + InputAdapter + Exchange #9 + WholeStageCodegen (13) + HashAggregate [cr_net_loss,cr_return_amount] [sum,sum,sum,sum] + Project [cr_net_loss,cr_return_amount] + BroadcastHashJoin [cr_returned_date_sk,d_date_sk] + Filter [cr_returned_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_net_loss,cr_return_amount,cr_returned_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + WholeStageCodegen (23) + Project [profit,profit_loss,returns,sales,wp_web_page_sk] + BroadcastHashJoin [wp_web_page_sk,wp_web_page_sk] + HashAggregate [sum,sum,wp_web_page_sk] [profit,sales,sum,sum,sum(UnscaledValue(ws_ext_sales_price)),sum(UnscaledValue(ws_net_profit))] + InputAdapter + Exchange [wp_web_page_sk] #10 + WholeStageCodegen (18) + HashAggregate [wp_web_page_sk,ws_ext_sales_price,ws_net_profit] [sum,sum,sum,sum] + Project [wp_web_page_sk,ws_ext_sales_price,ws_net_profit] + BroadcastHashJoin [wp_web_page_sk,ws_web_page_sk] + Project [ws_ext_sales_price,ws_net_profit,ws_web_page_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_sold_date_sk,ws_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_ext_sales_price,ws_net_profit,ws_sold_date_sk,ws_web_page_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (17) + Filter [wp_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_page [wp_web_page_sk] + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (22) + HashAggregate [sum,sum,wp_web_page_sk] [profit_loss,returns,sum,sum,sum(UnscaledValue(wr_net_loss)),sum(UnscaledValue(wr_return_amt))] + InputAdapter + Exchange [wp_web_page_sk] #13 + WholeStageCodegen (21) + HashAggregate [wp_web_page_sk,wr_net_loss,wr_return_amt] [sum,sum,sum,sum] + Project [wp_web_page_sk,wr_net_loss,wr_return_amt] + BroadcastHashJoin [d_date_sk,wr_returned_date_sk] + Project [wp_web_page_sk,wr_net_loss,wr_return_amt,wr_returned_date_sk] + BroadcastHashJoin [wp_web_page_sk,wr_web_page_sk] + Filter [wr_returned_date_sk,wr_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_returns [wr_net_loss,wr_return_amt,wr_returned_date_sk,wr_web_page_sk] + InputAdapter + ReusedExchange [wp_web_page_sk] #11 + InputAdapter + ReusedExchange [d_date_sk] #3 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77/explain.txt new file mode 100644 index 0000000000000..75f4fb6640dee --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77/explain.txt @@ -0,0 +1,520 @@ +== Physical Plan == +TakeOrderedAndProject (91) ++- * HashAggregate (90) + +- Exchange (89) + +- * HashAggregate (88) + +- * Expand (87) + +- Union (86) + :- * Project (34) + : +- * BroadcastHashJoin LeftOuter BuildRight (33) + : :- * HashAggregate (19) + : : +- Exchange (18) + : : +- * HashAggregate (17) + : : +- * Project (16) + : : +- * BroadcastHashJoin Inner BuildRight (15) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.store (11) + : +- BroadcastExchange (32) + : +- * HashAggregate (31) + : +- Exchange (30) + : +- * HashAggregate (29) + : +- * Project (28) + : +- * BroadcastHashJoin Inner BuildRight (27) + : :- * Project (25) + : : +- * BroadcastHashJoin Inner BuildRight (24) + : : :- * Filter (22) + : : : +- * ColumnarToRow (21) + : : : +- Scan parquet default.store_returns (20) + : : +- ReusedExchange (23) + : +- ReusedExchange (26) + :- * Project (55) + : +- BroadcastNestedLoopJoin Inner BuildLeft (54) + : :- BroadcastExchange (44) + : : +- * HashAggregate (43) + : : +- Exchange (42) + : : +- * HashAggregate (41) + : : +- * Project (40) + : : +- * BroadcastHashJoin Inner BuildRight (39) + : : :- * Filter (37) + : : : +- * ColumnarToRow (36) + : : : +- Scan parquet default.catalog_sales (35) + : : +- ReusedExchange (38) + : +- * HashAggregate (53) + : +- Exchange (52) + : +- * HashAggregate (51) + : +- * Project (50) + : +- * BroadcastHashJoin Inner BuildRight (49) + : :- * Filter (47) + : : +- * ColumnarToRow (46) + : : +- Scan parquet default.catalog_returns (45) + : +- ReusedExchange (48) + +- * Project (85) + +- * BroadcastHashJoin LeftOuter BuildRight (84) + :- * HashAggregate (70) + : +- Exchange (69) + : +- * HashAggregate (68) + : +- * Project (67) + : +- * BroadcastHashJoin Inner BuildRight (66) + : :- * Project (61) + : : +- * BroadcastHashJoin Inner BuildRight (60) + : : :- * Filter (58) + : : : +- * ColumnarToRow (57) + : : : +- Scan parquet default.web_sales (56) + : : +- ReusedExchange (59) + : +- BroadcastExchange (65) + : +- * Filter (64) + : +- * ColumnarToRow (63) + : +- Scan parquet default.web_page (62) + +- BroadcastExchange (83) + +- * HashAggregate (82) + +- Exchange (81) + +- * HashAggregate (80) + +- * Project (79) + +- * BroadcastHashJoin Inner BuildRight (78) + :- * Project (76) + : +- * BroadcastHashJoin Inner BuildRight (75) + : :- * Filter (73) + : : +- * ColumnarToRow (72) + : : +- Scan parquet default.web_returns (71) + : +- ReusedExchange (74) + +- ReusedExchange (77) + + +(1) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [4]: [ss_sold_date_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] + +(3) Filter [codegen id : 3] +Input [4]: [ss_sold_date_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] +Condition : (isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#2)) + +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#5, d_date#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-03), LessThanOrEqual(d_date,2000-09-02), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#5, d_date#6] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#5, d_date#6] +Condition : (((isnotnull(d_date#6) AND (d_date#6 >= 11172)) AND (d_date#6 <= 11202)) AND isnotnull(d_date_sk#5)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#5] +Input [2]: [d_date_sk#5, d_date#6] + +(8) BroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#7] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(10) Project [codegen id : 3] +Output [3]: [ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] +Input [5]: [ss_sold_date_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, d_date_sk#5] + +(11) Scan parquet default.store +Output [1]: [s_store_sk#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [1]: [s_store_sk#8] + +(13) Filter [codegen id : 2] +Input [1]: [s_store_sk#8] +Condition : isnotnull(s_store_sk#8) + +(14) BroadcastExchange +Input [1]: [s_store_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#9] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_store_sk#2] +Right keys [1]: [s_store_sk#8] +Join condition: None + +(16) Project [codegen id : 3] +Output [3]: [ss_ext_sales_price#3, ss_net_profit#4, s_store_sk#8] +Input [4]: [ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, s_store_sk#8] + +(17) HashAggregate [codegen id : 3] +Input [3]: [ss_ext_sales_price#3, ss_net_profit#4, s_store_sk#8] +Keys [1]: [s_store_sk#8] +Functions [2]: [partial_sum(UnscaledValue(ss_ext_sales_price#3)), partial_sum(UnscaledValue(ss_net_profit#4))] +Aggregate Attributes [2]: [sum#10, sum#11] +Results [3]: [s_store_sk#8, sum#12, sum#13] + +(18) Exchange +Input [3]: [s_store_sk#8, sum#12, sum#13] +Arguments: hashpartitioning(s_store_sk#8, 5), true, [id=#14] + +(19) HashAggregate [codegen id : 8] +Input [3]: [s_store_sk#8, sum#12, sum#13] +Keys [1]: [s_store_sk#8] +Functions [2]: [sum(UnscaledValue(ss_ext_sales_price#3)), sum(UnscaledValue(ss_net_profit#4))] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_ext_sales_price#3))#15, sum(UnscaledValue(ss_net_profit#4))#16] +Results [3]: [s_store_sk#8, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#15,17,2) AS sales#17, MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#16,17,2) AS profit#18] + +(20) Scan parquet default.store_returns +Output [4]: [sr_returned_date_sk#19, sr_store_sk#20, sr_return_amt#21, sr_net_loss#22] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_returns] +PushedFilters: [IsNotNull(sr_returned_date_sk), IsNotNull(sr_store_sk)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 6] +Input [4]: [sr_returned_date_sk#19, sr_store_sk#20, sr_return_amt#21, sr_net_loss#22] + +(22) Filter [codegen id : 6] +Input [4]: [sr_returned_date_sk#19, sr_store_sk#20, sr_return_amt#21, sr_net_loss#22] +Condition : (isnotnull(sr_returned_date_sk#19) AND isnotnull(sr_store_sk#20)) + +(23) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(24) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [sr_returned_date_sk#19] +Right keys [1]: [cast(d_date_sk#5 as bigint)] +Join condition: None + +(25) Project [codegen id : 6] +Output [3]: [sr_store_sk#20, sr_return_amt#21, sr_net_loss#22] +Input [5]: [sr_returned_date_sk#19, sr_store_sk#20, sr_return_amt#21, sr_net_loss#22, d_date_sk#5] + +(26) ReusedExchange [Reuses operator id: 14] +Output [1]: [s_store_sk#23] + +(27) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [sr_store_sk#20] +Right keys [1]: [cast(s_store_sk#23 as bigint)] +Join condition: None + +(28) Project [codegen id : 6] +Output [3]: [sr_return_amt#21, sr_net_loss#22, s_store_sk#23] +Input [4]: [sr_store_sk#20, sr_return_amt#21, sr_net_loss#22, s_store_sk#23] + +(29) HashAggregate [codegen id : 6] +Input [3]: [sr_return_amt#21, sr_net_loss#22, s_store_sk#23] +Keys [1]: [s_store_sk#23] +Functions [2]: [partial_sum(UnscaledValue(sr_return_amt#21)), partial_sum(UnscaledValue(sr_net_loss#22))] +Aggregate Attributes [2]: [sum#24, sum#25] +Results [3]: [s_store_sk#23, sum#26, sum#27] + +(30) Exchange +Input [3]: [s_store_sk#23, sum#26, sum#27] +Arguments: hashpartitioning(s_store_sk#23, 5), true, [id=#28] + +(31) HashAggregate [codegen id : 7] +Input [3]: [s_store_sk#23, sum#26, sum#27] +Keys [1]: [s_store_sk#23] +Functions [2]: [sum(UnscaledValue(sr_return_amt#21)), sum(UnscaledValue(sr_net_loss#22))] +Aggregate Attributes [2]: [sum(UnscaledValue(sr_return_amt#21))#29, sum(UnscaledValue(sr_net_loss#22))#30] +Results [3]: [s_store_sk#23, MakeDecimal(sum(UnscaledValue(sr_return_amt#21))#29,17,2) AS returns#31, MakeDecimal(sum(UnscaledValue(sr_net_loss#22))#30,17,2) AS profit_loss#32] + +(32) BroadcastExchange +Input [3]: [s_store_sk#23, returns#31, profit_loss#32] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#33] + +(33) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [s_store_sk#8] +Right keys [1]: [s_store_sk#23] +Join condition: None + +(34) Project [codegen id : 8] +Output [5]: [sales#17, coalesce(returns#31, 0.00) AS returns#34, CheckOverflow((promote_precision(cast(profit#18 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#32, 0.00) as decimal(18,2)))), DecimalType(18,2), true) AS profit#35, store channel AS channel#36, s_store_sk#8 AS id#37] +Input [6]: [s_store_sk#8, sales#17, profit#18, s_store_sk#23, returns#31, profit_loss#32] + +(35) Scan parquet default.catalog_sales +Output [4]: [cs_sold_date_sk#38, cs_call_center_sk#39, cs_ext_sales_price#40, cs_net_profit#41] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(36) ColumnarToRow [codegen id : 10] +Input [4]: [cs_sold_date_sk#38, cs_call_center_sk#39, cs_ext_sales_price#40, cs_net_profit#41] + +(37) Filter [codegen id : 10] +Input [4]: [cs_sold_date_sk#38, cs_call_center_sk#39, cs_ext_sales_price#40, cs_net_profit#41] +Condition : isnotnull(cs_sold_date_sk#38) + +(38) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(39) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_sold_date_sk#38] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(40) Project [codegen id : 10] +Output [3]: [cs_call_center_sk#39, cs_ext_sales_price#40, cs_net_profit#41] +Input [5]: [cs_sold_date_sk#38, cs_call_center_sk#39, cs_ext_sales_price#40, cs_net_profit#41, d_date_sk#5] + +(41) HashAggregate [codegen id : 10] +Input [3]: [cs_call_center_sk#39, cs_ext_sales_price#40, cs_net_profit#41] +Keys [1]: [cs_call_center_sk#39] +Functions [2]: [partial_sum(UnscaledValue(cs_ext_sales_price#40)), partial_sum(UnscaledValue(cs_net_profit#41))] +Aggregate Attributes [2]: [sum#42, sum#43] +Results [3]: [cs_call_center_sk#39, sum#44, sum#45] + +(42) Exchange +Input [3]: [cs_call_center_sk#39, sum#44, sum#45] +Arguments: hashpartitioning(cs_call_center_sk#39, 5), true, [id=#46] + +(43) HashAggregate [codegen id : 11] +Input [3]: [cs_call_center_sk#39, sum#44, sum#45] +Keys [1]: [cs_call_center_sk#39] +Functions [2]: [sum(UnscaledValue(cs_ext_sales_price#40)), sum(UnscaledValue(cs_net_profit#41))] +Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_sales_price#40))#47, sum(UnscaledValue(cs_net_profit#41))#48] +Results [3]: [cs_call_center_sk#39, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#40))#47,17,2) AS sales#49, MakeDecimal(sum(UnscaledValue(cs_net_profit#41))#48,17,2) AS profit#50] + +(44) BroadcastExchange +Input [3]: [cs_call_center_sk#39, sales#49, profit#50] +Arguments: IdentityBroadcastMode, [id=#51] + +(45) Scan parquet default.catalog_returns +Output [3]: [cr_returned_date_sk#52, cr_return_amount#53, cr_net_loss#54] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_returns] +PushedFilters: [IsNotNull(cr_returned_date_sk)] +ReadSchema: struct + +(46) ColumnarToRow [codegen id : 13] +Input [3]: [cr_returned_date_sk#52, cr_return_amount#53, cr_net_loss#54] + +(47) Filter [codegen id : 13] +Input [3]: [cr_returned_date_sk#52, cr_return_amount#53, cr_net_loss#54] +Condition : isnotnull(cr_returned_date_sk#52) + +(48) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(49) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [cr_returned_date_sk#52] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(50) Project [codegen id : 13] +Output [2]: [cr_return_amount#53, cr_net_loss#54] +Input [4]: [cr_returned_date_sk#52, cr_return_amount#53, cr_net_loss#54, d_date_sk#5] + +(51) HashAggregate [codegen id : 13] +Input [2]: [cr_return_amount#53, cr_net_loss#54] +Keys: [] +Functions [2]: [partial_sum(UnscaledValue(cr_return_amount#53)), partial_sum(UnscaledValue(cr_net_loss#54))] +Aggregate Attributes [2]: [sum#55, sum#56] +Results [2]: [sum#57, sum#58] + +(52) Exchange +Input [2]: [sum#57, sum#58] +Arguments: SinglePartition, true, [id=#59] + +(53) HashAggregate [codegen id : 14] +Input [2]: [sum#57, sum#58] +Keys: [] +Functions [2]: [sum(UnscaledValue(cr_return_amount#53)), sum(UnscaledValue(cr_net_loss#54))] +Aggregate Attributes [2]: [sum(UnscaledValue(cr_return_amount#53))#60, sum(UnscaledValue(cr_net_loss#54))#61] +Results [2]: [MakeDecimal(sum(UnscaledValue(cr_return_amount#53))#60,17,2) AS returns#62, MakeDecimal(sum(UnscaledValue(cr_net_loss#54))#61,17,2) AS profit_loss#63] + +(54) BroadcastNestedLoopJoin +Join condition: None + +(55) Project [codegen id : 15] +Output [5]: [sales#49, returns#62, CheckOverflow((promote_precision(cast(profit#50 as decimal(18,2))) - promote_precision(cast(profit_loss#63 as decimal(18,2)))), DecimalType(18,2), true) AS profit#64, catalog channel AS channel#65, cs_call_center_sk#39 AS id#66] +Input [5]: [cs_call_center_sk#39, sales#49, profit#50, returns#62, profit_loss#63] + +(56) Scan parquet default.web_sales +Output [4]: [ws_sold_date_sk#67, ws_web_page_sk#68, ws_ext_sales_price#69, ws_net_profit#70] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_web_page_sk)] +ReadSchema: struct + +(57) ColumnarToRow [codegen id : 18] +Input [4]: [ws_sold_date_sk#67, ws_web_page_sk#68, ws_ext_sales_price#69, ws_net_profit#70] + +(58) Filter [codegen id : 18] +Input [4]: [ws_sold_date_sk#67, ws_web_page_sk#68, ws_ext_sales_price#69, ws_net_profit#70] +Condition : (isnotnull(ws_sold_date_sk#67) AND isnotnull(ws_web_page_sk#68)) + +(59) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(60) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [ws_sold_date_sk#67] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(61) Project [codegen id : 18] +Output [3]: [ws_web_page_sk#68, ws_ext_sales_price#69, ws_net_profit#70] +Input [5]: [ws_sold_date_sk#67, ws_web_page_sk#68, ws_ext_sales_price#69, ws_net_profit#70, d_date_sk#5] + +(62) Scan parquet default.web_page +Output [1]: [wp_web_page_sk#71] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_page] +PushedFilters: [IsNotNull(wp_web_page_sk)] +ReadSchema: struct + +(63) ColumnarToRow [codegen id : 17] +Input [1]: [wp_web_page_sk#71] + +(64) Filter [codegen id : 17] +Input [1]: [wp_web_page_sk#71] +Condition : isnotnull(wp_web_page_sk#71) + +(65) BroadcastExchange +Input [1]: [wp_web_page_sk#71] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#72] + +(66) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [ws_web_page_sk#68] +Right keys [1]: [wp_web_page_sk#71] +Join condition: None + +(67) Project [codegen id : 18] +Output [3]: [ws_ext_sales_price#69, ws_net_profit#70, wp_web_page_sk#71] +Input [4]: [ws_web_page_sk#68, ws_ext_sales_price#69, ws_net_profit#70, wp_web_page_sk#71] + +(68) HashAggregate [codegen id : 18] +Input [3]: [ws_ext_sales_price#69, ws_net_profit#70, wp_web_page_sk#71] +Keys [1]: [wp_web_page_sk#71] +Functions [2]: [partial_sum(UnscaledValue(ws_ext_sales_price#69)), partial_sum(UnscaledValue(ws_net_profit#70))] +Aggregate Attributes [2]: [sum#73, sum#74] +Results [3]: [wp_web_page_sk#71, sum#75, sum#76] + +(69) Exchange +Input [3]: [wp_web_page_sk#71, sum#75, sum#76] +Arguments: hashpartitioning(wp_web_page_sk#71, 5), true, [id=#77] + +(70) HashAggregate [codegen id : 23] +Input [3]: [wp_web_page_sk#71, sum#75, sum#76] +Keys [1]: [wp_web_page_sk#71] +Functions [2]: [sum(UnscaledValue(ws_ext_sales_price#69)), sum(UnscaledValue(ws_net_profit#70))] +Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_sales_price#69))#78, sum(UnscaledValue(ws_net_profit#70))#79] +Results [3]: [wp_web_page_sk#71, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#69))#78,17,2) AS sales#80, MakeDecimal(sum(UnscaledValue(ws_net_profit#70))#79,17,2) AS profit#81] + +(71) Scan parquet default.web_returns +Output [4]: [wr_returned_date_sk#82, wr_web_page_sk#83, wr_return_amt#84, wr_net_loss#85] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_returns] +PushedFilters: [IsNotNull(wr_returned_date_sk), IsNotNull(wr_web_page_sk)] +ReadSchema: struct + +(72) ColumnarToRow [codegen id : 21] +Input [4]: [wr_returned_date_sk#82, wr_web_page_sk#83, wr_return_amt#84, wr_net_loss#85] + +(73) Filter [codegen id : 21] +Input [4]: [wr_returned_date_sk#82, wr_web_page_sk#83, wr_return_amt#84, wr_net_loss#85] +Condition : (isnotnull(wr_returned_date_sk#82) AND isnotnull(wr_web_page_sk#83)) + +(74) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(75) BroadcastHashJoin [codegen id : 21] +Left keys [1]: [wr_returned_date_sk#82] +Right keys [1]: [cast(d_date_sk#5 as bigint)] +Join condition: None + +(76) Project [codegen id : 21] +Output [3]: [wr_web_page_sk#83, wr_return_amt#84, wr_net_loss#85] +Input [5]: [wr_returned_date_sk#82, wr_web_page_sk#83, wr_return_amt#84, wr_net_loss#85, d_date_sk#5] + +(77) ReusedExchange [Reuses operator id: 65] +Output [1]: [wp_web_page_sk#86] + +(78) BroadcastHashJoin [codegen id : 21] +Left keys [1]: [wr_web_page_sk#83] +Right keys [1]: [cast(wp_web_page_sk#86 as bigint)] +Join condition: None + +(79) Project [codegen id : 21] +Output [3]: [wr_return_amt#84, wr_net_loss#85, wp_web_page_sk#86] +Input [4]: [wr_web_page_sk#83, wr_return_amt#84, wr_net_loss#85, wp_web_page_sk#86] + +(80) HashAggregate [codegen id : 21] +Input [3]: [wr_return_amt#84, wr_net_loss#85, wp_web_page_sk#86] +Keys [1]: [wp_web_page_sk#86] +Functions [2]: [partial_sum(UnscaledValue(wr_return_amt#84)), partial_sum(UnscaledValue(wr_net_loss#85))] +Aggregate Attributes [2]: [sum#87, sum#88] +Results [3]: [wp_web_page_sk#86, sum#89, sum#90] + +(81) Exchange +Input [3]: [wp_web_page_sk#86, sum#89, sum#90] +Arguments: hashpartitioning(wp_web_page_sk#86, 5), true, [id=#91] + +(82) HashAggregate [codegen id : 22] +Input [3]: [wp_web_page_sk#86, sum#89, sum#90] +Keys [1]: [wp_web_page_sk#86] +Functions [2]: [sum(UnscaledValue(wr_return_amt#84)), sum(UnscaledValue(wr_net_loss#85))] +Aggregate Attributes [2]: [sum(UnscaledValue(wr_return_amt#84))#92, sum(UnscaledValue(wr_net_loss#85))#93] +Results [3]: [wp_web_page_sk#86, MakeDecimal(sum(UnscaledValue(wr_return_amt#84))#92,17,2) AS returns#94, MakeDecimal(sum(UnscaledValue(wr_net_loss#85))#93,17,2) AS profit_loss#95] + +(83) BroadcastExchange +Input [3]: [wp_web_page_sk#86, returns#94, profit_loss#95] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#96] + +(84) BroadcastHashJoin [codegen id : 23] +Left keys [1]: [wp_web_page_sk#71] +Right keys [1]: [wp_web_page_sk#86] +Join condition: None + +(85) Project [codegen id : 23] +Output [5]: [sales#80, coalesce(returns#94, 0.00) AS returns#97, CheckOverflow((promote_precision(cast(profit#81 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#95, 0.00) as decimal(18,2)))), DecimalType(18,2), true) AS profit#98, web channel AS channel#99, wp_web_page_sk#71 AS id#100] +Input [6]: [wp_web_page_sk#71, sales#80, profit#81, wp_web_page_sk#86, returns#94, profit_loss#95] + +(86) Union + +(87) Expand [codegen id : 24] +Input [5]: [sales#17, returns#34, profit#35, channel#36, id#37] +Arguments: [List(sales#17, returns#34, profit#35, channel#36, id#37, 0), List(sales#17, returns#34, profit#35, channel#36, null, 1), List(sales#17, returns#34, profit#35, null, null, 3)], [sales#17, returns#34, profit#35, channel#101, id#102, spark_grouping_id#103] + +(88) HashAggregate [codegen id : 24] +Input [6]: [sales#17, returns#34, profit#35, channel#101, id#102, spark_grouping_id#103] +Keys [3]: [channel#101, id#102, spark_grouping_id#103] +Functions [3]: [partial_sum(sales#17), partial_sum(returns#34), partial_sum(profit#35)] +Aggregate Attributes [6]: [sum#104, isEmpty#105, sum#106, isEmpty#107, sum#108, isEmpty#109] +Results [9]: [channel#101, id#102, spark_grouping_id#103, sum#110, isEmpty#111, sum#112, isEmpty#113, sum#114, isEmpty#115] + +(89) Exchange +Input [9]: [channel#101, id#102, spark_grouping_id#103, sum#110, isEmpty#111, sum#112, isEmpty#113, sum#114, isEmpty#115] +Arguments: hashpartitioning(channel#101, id#102, spark_grouping_id#103, 5), true, [id=#116] + +(90) HashAggregate [codegen id : 25] +Input [9]: [channel#101, id#102, spark_grouping_id#103, sum#110, isEmpty#111, sum#112, isEmpty#113, sum#114, isEmpty#115] +Keys [3]: [channel#101, id#102, spark_grouping_id#103] +Functions [3]: [sum(sales#17), sum(returns#34), sum(profit#35)] +Aggregate Attributes [3]: [sum(sales#17)#117, sum(returns#34)#118, sum(profit#35)#119] +Results [5]: [channel#101, id#102, sum(sales#17)#117 AS sales#120, sum(returns#34)#118 AS returns#121, sum(profit#35)#119 AS profit#122] + +(91) TakeOrderedAndProject +Input [5]: [channel#101, id#102, sales#120, returns#121, profit#122] +Arguments: 100, [channel#101 ASC NULLS FIRST, id#102 ASC NULLS FIRST], [channel#101, id#102, sales#120, returns#121, profit#122] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77/simplified.txt new file mode 100644 index 0000000000000..591c1e7bc2720 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77/simplified.txt @@ -0,0 +1,139 @@ +TakeOrderedAndProject [channel,id,profit,returns,sales] + WholeStageCodegen (25) + HashAggregate [channel,id,isEmpty,isEmpty,isEmpty,spark_grouping_id,sum,sum,sum] [isEmpty,isEmpty,isEmpty,profit,returns,sales,sum,sum,sum,sum(profit),sum(returns),sum(sales)] + InputAdapter + Exchange [channel,id,spark_grouping_id] #1 + WholeStageCodegen (24) + HashAggregate [channel,id,profit,returns,sales,spark_grouping_id] [isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum] + Expand [channel,id,profit,returns,sales] + InputAdapter + Union + WholeStageCodegen (8) + Project [profit,profit_loss,returns,s_store_sk,sales] + BroadcastHashJoin [s_store_sk,s_store_sk] + HashAggregate [s_store_sk,sum,sum] [profit,sales,sum,sum,sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_net_profit))] + InputAdapter + Exchange [s_store_sk] #2 + WholeStageCodegen (3) + HashAggregate [s_store_sk,ss_ext_sales_price,ss_net_profit] [sum,sum,sum,sum] + Project [s_store_sk,ss_ext_sales_price,ss_net_profit] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_ext_sales_price,ss_net_profit,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (7) + HashAggregate [s_store_sk,sum,sum] [profit_loss,returns,sum,sum,sum(UnscaledValue(sr_net_loss)),sum(UnscaledValue(sr_return_amt))] + InputAdapter + Exchange [s_store_sk] #6 + WholeStageCodegen (6) + HashAggregate [s_store_sk,sr_net_loss,sr_return_amt] [sum,sum,sum,sum] + Project [s_store_sk,sr_net_loss,sr_return_amt] + BroadcastHashJoin [s_store_sk,sr_store_sk] + Project [sr_net_loss,sr_return_amt,sr_store_sk] + BroadcastHashJoin [d_date_sk,sr_returned_date_sk] + Filter [sr_returned_date_sk,sr_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_net_loss,sr_return_amt,sr_returned_date_sk,sr_store_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + ReusedExchange [s_store_sk] #4 + WholeStageCodegen (15) + Project [cs_call_center_sk,profit,profit_loss,returns,sales] + InputAdapter + BroadcastNestedLoopJoin + BroadcastExchange #7 + WholeStageCodegen (11) + HashAggregate [cs_call_center_sk,sum,sum] [profit,sales,sum,sum,sum(UnscaledValue(cs_ext_sales_price)),sum(UnscaledValue(cs_net_profit))] + InputAdapter + Exchange [cs_call_center_sk] #8 + WholeStageCodegen (10) + HashAggregate [cs_call_center_sk,cs_ext_sales_price,cs_net_profit] [sum,sum,sum,sum] + Project [cs_call_center_sk,cs_ext_sales_price,cs_net_profit] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_call_center_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + WholeStageCodegen (14) + HashAggregate [sum,sum] [profit_loss,returns,sum,sum,sum(UnscaledValue(cr_net_loss)),sum(UnscaledValue(cr_return_amount))] + InputAdapter + Exchange #9 + WholeStageCodegen (13) + HashAggregate [cr_net_loss,cr_return_amount] [sum,sum,sum,sum] + Project [cr_net_loss,cr_return_amount] + BroadcastHashJoin [cr_returned_date_sk,d_date_sk] + Filter [cr_returned_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_net_loss,cr_return_amount,cr_returned_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + WholeStageCodegen (23) + Project [profit,profit_loss,returns,sales,wp_web_page_sk] + BroadcastHashJoin [wp_web_page_sk,wp_web_page_sk] + HashAggregate [sum,sum,wp_web_page_sk] [profit,sales,sum,sum,sum(UnscaledValue(ws_ext_sales_price)),sum(UnscaledValue(ws_net_profit))] + InputAdapter + Exchange [wp_web_page_sk] #10 + WholeStageCodegen (18) + HashAggregate [wp_web_page_sk,ws_ext_sales_price,ws_net_profit] [sum,sum,sum,sum] + Project [wp_web_page_sk,ws_ext_sales_price,ws_net_profit] + BroadcastHashJoin [wp_web_page_sk,ws_web_page_sk] + Project [ws_ext_sales_price,ws_net_profit,ws_web_page_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_sold_date_sk,ws_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_ext_sales_price,ws_net_profit,ws_sold_date_sk,ws_web_page_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (17) + Filter [wp_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_page [wp_web_page_sk] + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (22) + HashAggregate [sum,sum,wp_web_page_sk] [profit_loss,returns,sum,sum,sum(UnscaledValue(wr_net_loss)),sum(UnscaledValue(wr_return_amt))] + InputAdapter + Exchange [wp_web_page_sk] #13 + WholeStageCodegen (21) + HashAggregate [wp_web_page_sk,wr_net_loss,wr_return_amt] [sum,sum,sum,sum] + Project [wp_web_page_sk,wr_net_loss,wr_return_amt] + BroadcastHashJoin [wp_web_page_sk,wr_web_page_sk] + Project [wr_net_loss,wr_return_amt,wr_web_page_sk] + BroadcastHashJoin [d_date_sk,wr_returned_date_sk] + Filter [wr_returned_date_sk,wr_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_returns [wr_net_loss,wr_return_amt,wr_returned_date_sk,wr_web_page_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + ReusedExchange [wp_web_page_sk] #11 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78.sf100/explain.txt new file mode 100644 index 0000000000000..98a04d3b64d21 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78.sf100/explain.txt @@ -0,0 +1,391 @@ +== Physical Plan == +TakeOrderedAndProject (70) ++- * Project (69) + +- * SortMergeJoin Inner (68) + :- * Project (46) + : +- * SortMergeJoin Inner (45) + : :- * Sort (23) + : : +- * HashAggregate (22) + : : +- Exchange (21) + : : +- * HashAggregate (20) + : : +- * Project (19) + : : +- * BroadcastHashJoin Inner BuildRight (18) + : : :- * Project (13) + : : : +- * Filter (12) + : : : +- SortMergeJoin LeftOuter (11) + : : : :- * Sort (5) + : : : : +- Exchange (4) + : : : : +- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- * Sort (10) + : : : +- Exchange (9) + : : : +- * Filter (8) + : : : +- * ColumnarToRow (7) + : : : +- Scan parquet default.store_returns (6) + : : +- BroadcastExchange (17) + : : +- * Filter (16) + : : +- * ColumnarToRow (15) + : : +- Scan parquet default.date_dim (14) + : +- * Sort (44) + : +- * Filter (43) + : +- * HashAggregate (42) + : +- Exchange (41) + : +- * HashAggregate (40) + : +- * Project (39) + : +- * BroadcastHashJoin Inner BuildRight (38) + : :- * Project (36) + : : +- * Filter (35) + : : +- SortMergeJoin LeftOuter (34) + : : :- * Sort (28) + : : : +- Exchange (27) + : : : +- * Filter (26) + : : : +- * ColumnarToRow (25) + : : : +- Scan parquet default.catalog_sales (24) + : : +- * Sort (33) + : : +- Exchange (32) + : : +- * Filter (31) + : : +- * ColumnarToRow (30) + : : +- Scan parquet default.catalog_returns (29) + : +- ReusedExchange (37) + +- * Sort (67) + +- * Filter (66) + +- * HashAggregate (65) + +- Exchange (64) + +- * HashAggregate (63) + +- * Project (62) + +- * BroadcastHashJoin Inner BuildRight (61) + :- * Project (59) + : +- * Filter (58) + : +- SortMergeJoin LeftOuter (57) + : :- * Sort (51) + : : +- Exchange (50) + : : +- * Filter (49) + : : +- * ColumnarToRow (48) + : : +- Scan parquet default.web_sales (47) + : +- * Sort (56) + : +- Exchange (55) + : +- * Filter (54) + : +- * ColumnarToRow (53) + : +- Scan parquet default.web_returns (52) + +- ReusedExchange (60) + + +(1) Scan parquet default.store_sales +Output [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#4, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#4, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7] + +(3) Filter [codegen id : 1] +Input [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#4, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7] +Condition : ((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_item_sk#2)) AND isnotnull(ss_customer_sk#3)) + +(4) Exchange +Input [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#4, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7] +Arguments: hashpartitioning(cast(ss_ticket_number#4 as bigint), cast(ss_item_sk#2 as bigint), 5), true, [id=#8] + +(5) Sort [codegen id : 2] +Input [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#4, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7] +Arguments: [cast(ss_ticket_number#4 as bigint) ASC NULLS FIRST, cast(ss_item_sk#2 as bigint) ASC NULLS FIRST], false, 0 + +(6) Scan parquet default.store_returns +Output [2]: [sr_item_sk#9, sr_ticket_number#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_returns] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 3] +Input [2]: [sr_item_sk#9, sr_ticket_number#10] + +(8) Filter [codegen id : 3] +Input [2]: [sr_item_sk#9, sr_ticket_number#10] +Condition : (isnotnull(sr_ticket_number#10) AND isnotnull(sr_item_sk#9)) + +(9) Exchange +Input [2]: [sr_item_sk#9, sr_ticket_number#10] +Arguments: hashpartitioning(sr_ticket_number#10, sr_item_sk#9, 5), true, [id=#11] + +(10) Sort [codegen id : 4] +Input [2]: [sr_item_sk#9, sr_ticket_number#10] +Arguments: [sr_ticket_number#10 ASC NULLS FIRST, sr_item_sk#9 ASC NULLS FIRST], false, 0 + +(11) SortMergeJoin +Left keys [2]: [cast(ss_ticket_number#4 as bigint), cast(ss_item_sk#2 as bigint)] +Right keys [2]: [sr_ticket_number#10, sr_item_sk#9] +Join condition: None + +(12) Filter [codegen id : 6] +Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#4, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7, sr_item_sk#9, sr_ticket_number#10] +Condition : isnull(sr_ticket_number#10) + +(13) Project [codegen id : 6] +Output [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7] +Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#4, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7, sr_item_sk#9, sr_ticket_number#10] + +(14) Scan parquet default.date_dim +Output [2]: [d_date_sk#12, d_year#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(15) ColumnarToRow [codegen id : 5] +Input [2]: [d_date_sk#12, d_year#13] + +(16) Filter [codegen id : 5] +Input [2]: [d_date_sk#12, d_year#13] +Condition : ((isnotnull(d_year#13) AND (d_year#13 = 2000)) AND isnotnull(d_date_sk#12)) + +(17) BroadcastExchange +Input [2]: [d_date_sk#12, d_year#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] + +(18) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#12] +Join condition: None + +(19) Project [codegen id : 6] +Output [6]: [ss_item_sk#2, ss_customer_sk#3, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7, d_year#13] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7, d_date_sk#12, d_year#13] + +(20) HashAggregate [codegen id : 6] +Input [6]: [ss_item_sk#2, ss_customer_sk#3, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7, d_year#13] +Keys [3]: [d_year#13, ss_item_sk#2, ss_customer_sk#3] +Functions [3]: [partial_sum(cast(ss_quantity#5 as bigint)), partial_sum(UnscaledValue(ss_wholesale_cost#6)), partial_sum(UnscaledValue(ss_sales_price#7))] +Aggregate Attributes [3]: [sum#15, sum#16, sum#17] +Results [6]: [d_year#13, ss_item_sk#2, ss_customer_sk#3, sum#18, sum#19, sum#20] + +(21) Exchange +Input [6]: [d_year#13, ss_item_sk#2, ss_customer_sk#3, sum#18, sum#19, sum#20] +Arguments: hashpartitioning(d_year#13, ss_item_sk#2, ss_customer_sk#3, 5), true, [id=#21] + +(22) HashAggregate [codegen id : 7] +Input [6]: [d_year#13, ss_item_sk#2, ss_customer_sk#3, sum#18, sum#19, sum#20] +Keys [3]: [d_year#13, ss_item_sk#2, ss_customer_sk#3] +Functions [3]: [sum(cast(ss_quantity#5 as bigint)), sum(UnscaledValue(ss_wholesale_cost#6)), sum(UnscaledValue(ss_sales_price#7))] +Aggregate Attributes [3]: [sum(cast(ss_quantity#5 as bigint))#22, sum(UnscaledValue(ss_wholesale_cost#6))#23, sum(UnscaledValue(ss_sales_price#7))#24] +Results [6]: [d_year#13 AS ss_sold_year#25, ss_item_sk#2, ss_customer_sk#3, sum(cast(ss_quantity#5 as bigint))#22 AS ss_qty#26, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#6))#23,17,2) AS ss_wc#27, MakeDecimal(sum(UnscaledValue(ss_sales_price#7))#24,17,2) AS ss_sp#28] + +(23) Sort [codegen id : 7] +Input [6]: [ss_sold_year#25, ss_item_sk#2, ss_customer_sk#3, ss_qty#26, ss_wc#27, ss_sp#28] +Arguments: [ss_sold_year#25 ASC NULLS FIRST, ss_item_sk#2 ASC NULLS FIRST, ss_customer_sk#3 ASC NULLS FIRST], false, 0 + +(24) Scan parquet default.catalog_sales +Output [7]: [cs_sold_date_sk#29, cs_bill_customer_sk#30, cs_item_sk#31, cs_order_number#32, cs_quantity#33, cs_wholesale_cost#34, cs_sales_price#35] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk), IsNotNull(cs_bill_customer_sk)] +ReadSchema: struct + +(25) ColumnarToRow [codegen id : 8] +Input [7]: [cs_sold_date_sk#29, cs_bill_customer_sk#30, cs_item_sk#31, cs_order_number#32, cs_quantity#33, cs_wholesale_cost#34, cs_sales_price#35] + +(26) Filter [codegen id : 8] +Input [7]: [cs_sold_date_sk#29, cs_bill_customer_sk#30, cs_item_sk#31, cs_order_number#32, cs_quantity#33, cs_wholesale_cost#34, cs_sales_price#35] +Condition : ((isnotnull(cs_sold_date_sk#29) AND isnotnull(cs_item_sk#31)) AND isnotnull(cs_bill_customer_sk#30)) + +(27) Exchange +Input [7]: [cs_sold_date_sk#29, cs_bill_customer_sk#30, cs_item_sk#31, cs_order_number#32, cs_quantity#33, cs_wholesale_cost#34, cs_sales_price#35] +Arguments: hashpartitioning(cs_order_number#32, cs_item_sk#31, 5), true, [id=#36] + +(28) Sort [codegen id : 9] +Input [7]: [cs_sold_date_sk#29, cs_bill_customer_sk#30, cs_item_sk#31, cs_order_number#32, cs_quantity#33, cs_wholesale_cost#34, cs_sales_price#35] +Arguments: [cs_order_number#32 ASC NULLS FIRST, cs_item_sk#31 ASC NULLS FIRST], false, 0 + +(29) Scan parquet default.catalog_returns +Output [2]: [cr_item_sk#37, cr_order_number#38] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_returns] +PushedFilters: [IsNotNull(cr_order_number), IsNotNull(cr_item_sk)] +ReadSchema: struct + +(30) ColumnarToRow [codegen id : 10] +Input [2]: [cr_item_sk#37, cr_order_number#38] + +(31) Filter [codegen id : 10] +Input [2]: [cr_item_sk#37, cr_order_number#38] +Condition : (isnotnull(cr_order_number#38) AND isnotnull(cr_item_sk#37)) + +(32) Exchange +Input [2]: [cr_item_sk#37, cr_order_number#38] +Arguments: hashpartitioning(cr_order_number#38, cr_item_sk#37, 5), true, [id=#39] + +(33) Sort [codegen id : 11] +Input [2]: [cr_item_sk#37, cr_order_number#38] +Arguments: [cr_order_number#38 ASC NULLS FIRST, cr_item_sk#37 ASC NULLS FIRST], false, 0 + +(34) SortMergeJoin +Left keys [2]: [cs_order_number#32, cs_item_sk#31] +Right keys [2]: [cr_order_number#38, cr_item_sk#37] +Join condition: None + +(35) Filter [codegen id : 13] +Input [9]: [cs_sold_date_sk#29, cs_bill_customer_sk#30, cs_item_sk#31, cs_order_number#32, cs_quantity#33, cs_wholesale_cost#34, cs_sales_price#35, cr_item_sk#37, cr_order_number#38] +Condition : isnull(cr_order_number#38) + +(36) Project [codegen id : 13] +Output [6]: [cs_sold_date_sk#29, cs_bill_customer_sk#30, cs_item_sk#31, cs_quantity#33, cs_wholesale_cost#34, cs_sales_price#35] +Input [9]: [cs_sold_date_sk#29, cs_bill_customer_sk#30, cs_item_sk#31, cs_order_number#32, cs_quantity#33, cs_wholesale_cost#34, cs_sales_price#35, cr_item_sk#37, cr_order_number#38] + +(37) ReusedExchange [Reuses operator id: 17] +Output [2]: [d_date_sk#12, d_year#13] + +(38) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [cs_sold_date_sk#29] +Right keys [1]: [d_date_sk#12] +Join condition: None + +(39) Project [codegen id : 13] +Output [6]: [cs_bill_customer_sk#30, cs_item_sk#31, cs_quantity#33, cs_wholesale_cost#34, cs_sales_price#35, d_year#13] +Input [8]: [cs_sold_date_sk#29, cs_bill_customer_sk#30, cs_item_sk#31, cs_quantity#33, cs_wholesale_cost#34, cs_sales_price#35, d_date_sk#12, d_year#13] + +(40) HashAggregate [codegen id : 13] +Input [6]: [cs_bill_customer_sk#30, cs_item_sk#31, cs_quantity#33, cs_wholesale_cost#34, cs_sales_price#35, d_year#13] +Keys [3]: [d_year#13, cs_item_sk#31, cs_bill_customer_sk#30] +Functions [3]: [partial_sum(cast(cs_quantity#33 as bigint)), partial_sum(UnscaledValue(cs_wholesale_cost#34)), partial_sum(UnscaledValue(cs_sales_price#35))] +Aggregate Attributes [3]: [sum#40, sum#41, sum#42] +Results [6]: [d_year#13, cs_item_sk#31, cs_bill_customer_sk#30, sum#43, sum#44, sum#45] + +(41) Exchange +Input [6]: [d_year#13, cs_item_sk#31, cs_bill_customer_sk#30, sum#43, sum#44, sum#45] +Arguments: hashpartitioning(d_year#13, cs_item_sk#31, cs_bill_customer_sk#30, 5), true, [id=#46] + +(42) HashAggregate [codegen id : 14] +Input [6]: [d_year#13, cs_item_sk#31, cs_bill_customer_sk#30, sum#43, sum#44, sum#45] +Keys [3]: [d_year#13, cs_item_sk#31, cs_bill_customer_sk#30] +Functions [3]: [sum(cast(cs_quantity#33 as bigint)), sum(UnscaledValue(cs_wholesale_cost#34)), sum(UnscaledValue(cs_sales_price#35))] +Aggregate Attributes [3]: [sum(cast(cs_quantity#33 as bigint))#47, sum(UnscaledValue(cs_wholesale_cost#34))#48, sum(UnscaledValue(cs_sales_price#35))#49] +Results [6]: [d_year#13 AS cs_sold_year#50, cs_item_sk#31, cs_bill_customer_sk#30 AS cs_customer_sk#51, sum(cast(cs_quantity#33 as bigint))#47 AS cs_qty#52, MakeDecimal(sum(UnscaledValue(cs_wholesale_cost#34))#48,17,2) AS cs_wc#53, MakeDecimal(sum(UnscaledValue(cs_sales_price#35))#49,17,2) AS cs_sp#54] + +(43) Filter [codegen id : 14] +Input [6]: [cs_sold_year#50, cs_item_sk#31, cs_customer_sk#51, cs_qty#52, cs_wc#53, cs_sp#54] +Condition : (coalesce(cs_qty#52, 0) > 0) + +(44) Sort [codegen id : 14] +Input [6]: [cs_sold_year#50, cs_item_sk#31, cs_customer_sk#51, cs_qty#52, cs_wc#53, cs_sp#54] +Arguments: [cs_sold_year#50 ASC NULLS FIRST, cs_item_sk#31 ASC NULLS FIRST, cs_customer_sk#51 ASC NULLS FIRST], false, 0 + +(45) SortMergeJoin [codegen id : 15] +Left keys [3]: [ss_sold_year#25, ss_item_sk#2, ss_customer_sk#3] +Right keys [3]: [cs_sold_year#50, cs_item_sk#31, cs_customer_sk#51] +Join condition: None + +(46) Project [codegen id : 15] +Output [9]: [ss_sold_year#25, ss_item_sk#2, ss_customer_sk#3, ss_qty#26, ss_wc#27, ss_sp#28, cs_qty#52, cs_wc#53, cs_sp#54] +Input [12]: [ss_sold_year#25, ss_item_sk#2, ss_customer_sk#3, ss_qty#26, ss_wc#27, ss_sp#28, cs_sold_year#50, cs_item_sk#31, cs_customer_sk#51, cs_qty#52, cs_wc#53, cs_sp#54] + +(47) Scan parquet default.web_sales +Output [7]: [ws_sold_date_sk#55, ws_item_sk#56, ws_bill_customer_sk#57, ws_order_number#58, ws_quantity#59, ws_wholesale_cost#60, ws_sales_price#61] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_customer_sk), IsNotNull(ws_item_sk)] +ReadSchema: struct + +(48) ColumnarToRow [codegen id : 16] +Input [7]: [ws_sold_date_sk#55, ws_item_sk#56, ws_bill_customer_sk#57, ws_order_number#58, ws_quantity#59, ws_wholesale_cost#60, ws_sales_price#61] + +(49) Filter [codegen id : 16] +Input [7]: [ws_sold_date_sk#55, ws_item_sk#56, ws_bill_customer_sk#57, ws_order_number#58, ws_quantity#59, ws_wholesale_cost#60, ws_sales_price#61] +Condition : ((isnotnull(ws_sold_date_sk#55) AND isnotnull(ws_bill_customer_sk#57)) AND isnotnull(ws_item_sk#56)) + +(50) Exchange +Input [7]: [ws_sold_date_sk#55, ws_item_sk#56, ws_bill_customer_sk#57, ws_order_number#58, ws_quantity#59, ws_wholesale_cost#60, ws_sales_price#61] +Arguments: hashpartitioning(cast(ws_order_number#58 as bigint), cast(ws_item_sk#56 as bigint), 5), true, [id=#62] + +(51) Sort [codegen id : 17] +Input [7]: [ws_sold_date_sk#55, ws_item_sk#56, ws_bill_customer_sk#57, ws_order_number#58, ws_quantity#59, ws_wholesale_cost#60, ws_sales_price#61] +Arguments: [cast(ws_order_number#58 as bigint) ASC NULLS FIRST, cast(ws_item_sk#56 as bigint) ASC NULLS FIRST], false, 0 + +(52) Scan parquet default.web_returns +Output [2]: [wr_item_sk#63, wr_order_number#64] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_returns] +PushedFilters: [IsNotNull(wr_order_number), IsNotNull(wr_item_sk)] +ReadSchema: struct + +(53) ColumnarToRow [codegen id : 18] +Input [2]: [wr_item_sk#63, wr_order_number#64] + +(54) Filter [codegen id : 18] +Input [2]: [wr_item_sk#63, wr_order_number#64] +Condition : (isnotnull(wr_order_number#64) AND isnotnull(wr_item_sk#63)) + +(55) Exchange +Input [2]: [wr_item_sk#63, wr_order_number#64] +Arguments: hashpartitioning(wr_order_number#64, wr_item_sk#63, 5), true, [id=#65] + +(56) Sort [codegen id : 19] +Input [2]: [wr_item_sk#63, wr_order_number#64] +Arguments: [wr_order_number#64 ASC NULLS FIRST, wr_item_sk#63 ASC NULLS FIRST], false, 0 + +(57) SortMergeJoin +Left keys [2]: [cast(ws_order_number#58 as bigint), cast(ws_item_sk#56 as bigint)] +Right keys [2]: [wr_order_number#64, wr_item_sk#63] +Join condition: None + +(58) Filter [codegen id : 21] +Input [9]: [ws_sold_date_sk#55, ws_item_sk#56, ws_bill_customer_sk#57, ws_order_number#58, ws_quantity#59, ws_wholesale_cost#60, ws_sales_price#61, wr_item_sk#63, wr_order_number#64] +Condition : isnull(wr_order_number#64) + +(59) Project [codegen id : 21] +Output [6]: [ws_sold_date_sk#55, ws_item_sk#56, ws_bill_customer_sk#57, ws_quantity#59, ws_wholesale_cost#60, ws_sales_price#61] +Input [9]: [ws_sold_date_sk#55, ws_item_sk#56, ws_bill_customer_sk#57, ws_order_number#58, ws_quantity#59, ws_wholesale_cost#60, ws_sales_price#61, wr_item_sk#63, wr_order_number#64] + +(60) ReusedExchange [Reuses operator id: 17] +Output [2]: [d_date_sk#12, d_year#13] + +(61) BroadcastHashJoin [codegen id : 21] +Left keys [1]: [ws_sold_date_sk#55] +Right keys [1]: [d_date_sk#12] +Join condition: None + +(62) Project [codegen id : 21] +Output [6]: [ws_item_sk#56, ws_bill_customer_sk#57, ws_quantity#59, ws_wholesale_cost#60, ws_sales_price#61, d_year#13] +Input [8]: [ws_sold_date_sk#55, ws_item_sk#56, ws_bill_customer_sk#57, ws_quantity#59, ws_wholesale_cost#60, ws_sales_price#61, d_date_sk#12, d_year#13] + +(63) HashAggregate [codegen id : 21] +Input [6]: [ws_item_sk#56, ws_bill_customer_sk#57, ws_quantity#59, ws_wholesale_cost#60, ws_sales_price#61, d_year#13] +Keys [3]: [d_year#13, ws_item_sk#56, ws_bill_customer_sk#57] +Functions [3]: [partial_sum(cast(ws_quantity#59 as bigint)), partial_sum(UnscaledValue(ws_wholesale_cost#60)), partial_sum(UnscaledValue(ws_sales_price#61))] +Aggregate Attributes [3]: [sum#66, sum#67, sum#68] +Results [6]: [d_year#13, ws_item_sk#56, ws_bill_customer_sk#57, sum#69, sum#70, sum#71] + +(64) Exchange +Input [6]: [d_year#13, ws_item_sk#56, ws_bill_customer_sk#57, sum#69, sum#70, sum#71] +Arguments: hashpartitioning(d_year#13, ws_item_sk#56, ws_bill_customer_sk#57, 5), true, [id=#72] + +(65) HashAggregate [codegen id : 22] +Input [6]: [d_year#13, ws_item_sk#56, ws_bill_customer_sk#57, sum#69, sum#70, sum#71] +Keys [3]: [d_year#13, ws_item_sk#56, ws_bill_customer_sk#57] +Functions [3]: [sum(cast(ws_quantity#59 as bigint)), sum(UnscaledValue(ws_wholesale_cost#60)), sum(UnscaledValue(ws_sales_price#61))] +Aggregate Attributes [3]: [sum(cast(ws_quantity#59 as bigint))#73, sum(UnscaledValue(ws_wholesale_cost#60))#74, sum(UnscaledValue(ws_sales_price#61))#75] +Results [6]: [d_year#13 AS ws_sold_year#76, ws_item_sk#56, ws_bill_customer_sk#57 AS ws_customer_sk#77, sum(cast(ws_quantity#59 as bigint))#73 AS ws_qty#78, MakeDecimal(sum(UnscaledValue(ws_wholesale_cost#60))#74,17,2) AS ws_wc#79, MakeDecimal(sum(UnscaledValue(ws_sales_price#61))#75,17,2) AS ws_sp#80] + +(66) Filter [codegen id : 22] +Input [6]: [ws_sold_year#76, ws_item_sk#56, ws_customer_sk#77, ws_qty#78, ws_wc#79, ws_sp#80] +Condition : (coalesce(ws_qty#78, 0) > 0) + +(67) Sort [codegen id : 22] +Input [6]: [ws_sold_year#76, ws_item_sk#56, ws_customer_sk#77, ws_qty#78, ws_wc#79, ws_sp#80] +Arguments: [ws_sold_year#76 ASC NULLS FIRST, ws_item_sk#56 ASC NULLS FIRST, ws_customer_sk#77 ASC NULLS FIRST], false, 0 + +(68) SortMergeJoin [codegen id : 23] +Left keys [3]: [ss_sold_year#25, ss_item_sk#2, ss_customer_sk#3] +Right keys [3]: [ws_sold_year#76, ws_item_sk#56, ws_customer_sk#77] +Join condition: None + +(69) Project [codegen id : 23] +Output [12]: [round((cast(ss_qty#26 as double) / cast(coalesce((ws_qty#78 + cs_qty#52), 1) as double)), 2) AS ratio#81, ss_qty#26 AS store_qty#82, ss_wc#27 AS store_wholesale_cost#83, ss_sp#28 AS store_sales_price#84, (coalesce(ws_qty#78, 0) + coalesce(cs_qty#52, 0)) AS other_chan_qty#85, CheckOverflow((promote_precision(cast(coalesce(ws_wc#79, 0.00) as decimal(18,2))) + promote_precision(cast(coalesce(cs_wc#53, 0.00) as decimal(18,2)))), DecimalType(18,2), true) AS other_chan_wholesale_cost#86, CheckOverflow((promote_precision(cast(coalesce(ws_sp#80, 0.00) as decimal(18,2))) + promote_precision(cast(coalesce(cs_sp#54, 0.00) as decimal(18,2)))), DecimalType(18,2), true) AS other_chan_sales_price#87, ss_sp#28, ss_qty#26, ws_qty#78, ss_wc#27, cs_qty#52] +Input [15]: [ss_sold_year#25, ss_item_sk#2, ss_customer_sk#3, ss_qty#26, ss_wc#27, ss_sp#28, cs_qty#52, cs_wc#53, cs_sp#54, ws_sold_year#76, ws_item_sk#56, ws_customer_sk#77, ws_qty#78, ws_wc#79, ws_sp#80] + +(70) TakeOrderedAndProject +Input [12]: [ratio#81, store_qty#82, store_wholesale_cost#83, store_sales_price#84, other_chan_qty#85, other_chan_wholesale_cost#86, other_chan_sales_price#87, ss_sp#28, ss_qty#26, ws_qty#78, ss_wc#27, cs_qty#52] +Arguments: 100, [ratio#81 ASC NULLS FIRST, ss_qty#26 DESC NULLS LAST, ss_wc#27 DESC NULLS LAST, ss_sp#28 DESC NULLS LAST, other_chan_qty#85 ASC NULLS FIRST, other_chan_wholesale_cost#86 ASC NULLS FIRST, other_chan_sales_price#87 ASC NULLS FIRST, round((cast(ss_qty#26 as double) / cast(coalesce((ws_qty#78 + cs_qty#52), 1) as double)), 2) ASC NULLS FIRST], [ratio#81, store_qty#82, store_wholesale_cost#83, store_sales_price#84, other_chan_qty#85, other_chan_wholesale_cost#86, other_chan_sales_price#87] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78.sf100/simplified.txt new file mode 100644 index 0000000000000..726d382498db7 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78.sf100/simplified.txt @@ -0,0 +1,117 @@ +TakeOrderedAndProject [cs_qty,other_chan_qty,other_chan_sales_price,other_chan_wholesale_cost,ratio,ss_qty,ss_sp,ss_wc,store_qty,store_sales_price,store_wholesale_cost,ws_qty] + WholeStageCodegen (23) + Project [cs_qty,cs_sp,cs_wc,ss_qty,ss_sp,ss_wc,ws_qty,ws_sp,ws_wc] + SortMergeJoin [ss_customer_sk,ss_item_sk,ss_sold_year,ws_customer_sk,ws_item_sk,ws_sold_year] + InputAdapter + WholeStageCodegen (15) + Project [cs_qty,cs_sp,cs_wc,ss_customer_sk,ss_item_sk,ss_qty,ss_sold_year,ss_sp,ss_wc] + SortMergeJoin [cs_customer_sk,cs_item_sk,cs_sold_year,ss_customer_sk,ss_item_sk,ss_sold_year] + InputAdapter + WholeStageCodegen (7) + Sort [ss_customer_sk,ss_item_sk,ss_sold_year] + HashAggregate [d_year,ss_customer_sk,ss_item_sk,sum,sum,sum] [ss_qty,ss_sold_year,ss_sp,ss_wc,sum,sum,sum,sum(UnscaledValue(ss_sales_price)),sum(UnscaledValue(ss_wholesale_cost)),sum(cast(ss_quantity as bigint))] + InputAdapter + Exchange [d_year,ss_customer_sk,ss_item_sk] #1 + WholeStageCodegen (6) + HashAggregate [d_year,ss_customer_sk,ss_item_sk,ss_quantity,ss_sales_price,ss_wholesale_cost] [sum,sum,sum,sum,sum,sum] + Project [d_year,ss_customer_sk,ss_item_sk,ss_quantity,ss_sales_price,ss_wholesale_cost] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_customer_sk,ss_item_sk,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_wholesale_cost] + Filter [sr_ticket_number] + InputAdapter + SortMergeJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + WholeStageCodegen (2) + Sort [ss_item_sk,ss_ticket_number] + InputAdapter + Exchange [ss_item_sk,ss_ticket_number] #2 + WholeStageCodegen (1) + Filter [ss_customer_sk,ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_ticket_number,ss_wholesale_cost] + WholeStageCodegen (4) + Sort [sr_item_sk,sr_ticket_number] + InputAdapter + Exchange [sr_item_sk,sr_ticket_number] #3 + WholeStageCodegen (3) + Filter [sr_item_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (5) + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + WholeStageCodegen (14) + Sort [cs_customer_sk,cs_item_sk,cs_sold_year] + Filter [cs_qty] + HashAggregate [cs_bill_customer_sk,cs_item_sk,d_year,sum,sum,sum] [cs_customer_sk,cs_qty,cs_sold_year,cs_sp,cs_wc,sum,sum,sum,sum(UnscaledValue(cs_sales_price)),sum(UnscaledValue(cs_wholesale_cost)),sum(cast(cs_quantity as bigint))] + InputAdapter + Exchange [cs_bill_customer_sk,cs_item_sk,d_year] #5 + WholeStageCodegen (13) + HashAggregate [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sales_price,cs_wholesale_cost,d_year] [sum,sum,sum,sum,sum,sum] + Project [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sales_price,cs_wholesale_cost,d_year] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sales_price,cs_sold_date_sk,cs_wholesale_cost] + Filter [cr_order_number] + InputAdapter + SortMergeJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] + WholeStageCodegen (9) + Sort [cs_item_sk,cs_order_number] + InputAdapter + Exchange [cs_item_sk,cs_order_number] #6 + WholeStageCodegen (8) + Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_order_number,cs_quantity,cs_sales_price,cs_sold_date_sk,cs_wholesale_cost] + WholeStageCodegen (11) + Sort [cr_item_sk,cr_order_number] + InputAdapter + Exchange [cr_item_sk,cr_order_number] #7 + WholeStageCodegen (10) + Filter [cr_item_sk,cr_order_number] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number] + InputAdapter + ReusedExchange [d_date_sk,d_year] #4 + InputAdapter + WholeStageCodegen (22) + Sort [ws_customer_sk,ws_item_sk,ws_sold_year] + Filter [ws_qty] + HashAggregate [d_year,sum,sum,sum,ws_bill_customer_sk,ws_item_sk] [sum,sum,sum,sum(UnscaledValue(ws_sales_price)),sum(UnscaledValue(ws_wholesale_cost)),sum(cast(ws_quantity as bigint)),ws_customer_sk,ws_qty,ws_sold_year,ws_sp,ws_wc] + InputAdapter + Exchange [d_year,ws_bill_customer_sk,ws_item_sk] #8 + WholeStageCodegen (21) + HashAggregate [d_year,ws_bill_customer_sk,ws_item_sk,ws_quantity,ws_sales_price,ws_wholesale_cost] [sum,sum,sum,sum,sum,sum] + Project [d_year,ws_bill_customer_sk,ws_item_sk,ws_quantity,ws_sales_price,ws_wholesale_cost] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ws_bill_customer_sk,ws_item_sk,ws_quantity,ws_sales_price,ws_sold_date_sk,ws_wholesale_cost] + Filter [wr_order_number] + InputAdapter + SortMergeJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] + WholeStageCodegen (17) + Sort [ws_item_sk,ws_order_number] + InputAdapter + Exchange [ws_item_sk,ws_order_number] #9 + WholeStageCodegen (16) + Filter [ws_bill_customer_sk,ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_customer_sk,ws_item_sk,ws_order_number,ws_quantity,ws_sales_price,ws_sold_date_sk,ws_wholesale_cost] + WholeStageCodegen (19) + Sort [wr_item_sk,wr_order_number] + InputAdapter + Exchange [wr_item_sk,wr_order_number] #10 + WholeStageCodegen (18) + Filter [wr_item_sk,wr_order_number] + ColumnarToRow + InputAdapter + Scan parquet default.web_returns [wr_item_sk,wr_order_number] + InputAdapter + ReusedExchange [d_date_sk,d_year] #4 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78/explain.txt new file mode 100644 index 0000000000000..0d20fd0422f06 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78/explain.txt @@ -0,0 +1,341 @@ +== Physical Plan == +TakeOrderedAndProject (60) ++- * Project (59) + +- * BroadcastHashJoin Inner BuildRight (58) + :- * Project (39) + : +- * BroadcastHashJoin Inner BuildRight (38) + : :- * HashAggregate (19) + : : +- Exchange (18) + : : +- * HashAggregate (17) + : : +- * Project (16) + : : +- * BroadcastHashJoin Inner BuildRight (15) + : : :- * Project (10) + : : : +- * Filter (9) + : : : +- * BroadcastHashJoin LeftOuter BuildRight (8) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.store_returns (4) + : : +- BroadcastExchange (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.date_dim (11) + : +- BroadcastExchange (37) + : +- * Filter (36) + : +- * HashAggregate (35) + : +- Exchange (34) + : +- * HashAggregate (33) + : +- * Project (32) + : +- * BroadcastHashJoin Inner BuildRight (31) + : :- * Project (29) + : : +- * Filter (28) + : : +- * BroadcastHashJoin LeftOuter BuildRight (27) + : : :- * Filter (22) + : : : +- * ColumnarToRow (21) + : : : +- Scan parquet default.web_sales (20) + : : +- BroadcastExchange (26) + : : +- * Filter (25) + : : +- * ColumnarToRow (24) + : : +- Scan parquet default.web_returns (23) + : +- ReusedExchange (30) + +- BroadcastExchange (57) + +- * Filter (56) + +- * HashAggregate (55) + +- Exchange (54) + +- * HashAggregate (53) + +- * Project (52) + +- * BroadcastHashJoin Inner BuildRight (51) + :- * Project (49) + : +- * Filter (48) + : +- * BroadcastHashJoin LeftOuter BuildRight (47) + : :- * Filter (42) + : : +- * ColumnarToRow (41) + : : +- Scan parquet default.catalog_sales (40) + : +- BroadcastExchange (46) + : +- * Filter (45) + : +- * ColumnarToRow (44) + : +- Scan parquet default.catalog_returns (43) + +- ReusedExchange (50) + + +(1) Scan parquet default.store_sales +Output [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#4, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#4, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7] + +(3) Filter [codegen id : 3] +Input [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#4, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7] +Condition : ((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_item_sk#2)) AND isnotnull(ss_customer_sk#3)) + +(4) Scan parquet default.store_returns +Output [2]: [sr_item_sk#8, sr_ticket_number#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_returns] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [sr_item_sk#8, sr_ticket_number#9] + +(6) Filter [codegen id : 1] +Input [2]: [sr_item_sk#8, sr_ticket_number#9] +Condition : (isnotnull(sr_ticket_number#9) AND isnotnull(sr_item_sk#8)) + +(7) BroadcastExchange +Input [2]: [sr_item_sk#8, sr_ticket_number#9] +Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false], input[0, bigint, false]),false), [id=#10] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [2]: [cast(ss_ticket_number#4 as bigint), cast(ss_item_sk#2 as bigint)] +Right keys [2]: [sr_ticket_number#9, sr_item_sk#8] +Join condition: None + +(9) Filter [codegen id : 3] +Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#4, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7, sr_item_sk#8, sr_ticket_number#9] +Condition : isnull(sr_ticket_number#9) + +(10) Project [codegen id : 3] +Output [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7] +Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#4, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7, sr_item_sk#8, sr_ticket_number#9] + +(11) Scan parquet default.date_dim +Output [2]: [d_date_sk#11, d_year#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#11, d_year#12] + +(13) Filter [codegen id : 2] +Input [2]: [d_date_sk#11, d_year#12] +Condition : ((isnotnull(d_year#12) AND (d_year#12 = 2000)) AND isnotnull(d_date_sk#11)) + +(14) BroadcastExchange +Input [2]: [d_date_sk#11, d_year#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#13] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#11] +Join condition: None + +(16) Project [codegen id : 3] +Output [6]: [ss_item_sk#2, ss_customer_sk#3, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7, d_year#12] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7, d_date_sk#11, d_year#12] + +(17) HashAggregate [codegen id : 3] +Input [6]: [ss_item_sk#2, ss_customer_sk#3, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7, d_year#12] +Keys [3]: [d_year#12, ss_item_sk#2, ss_customer_sk#3] +Functions [3]: [partial_sum(cast(ss_quantity#5 as bigint)), partial_sum(UnscaledValue(ss_wholesale_cost#6)), partial_sum(UnscaledValue(ss_sales_price#7))] +Aggregate Attributes [3]: [sum#14, sum#15, sum#16] +Results [6]: [d_year#12, ss_item_sk#2, ss_customer_sk#3, sum#17, sum#18, sum#19] + +(18) Exchange +Input [6]: [d_year#12, ss_item_sk#2, ss_customer_sk#3, sum#17, sum#18, sum#19] +Arguments: hashpartitioning(d_year#12, ss_item_sk#2, ss_customer_sk#3, 5), true, [id=#20] + +(19) HashAggregate [codegen id : 12] +Input [6]: [d_year#12, ss_item_sk#2, ss_customer_sk#3, sum#17, sum#18, sum#19] +Keys [3]: [d_year#12, ss_item_sk#2, ss_customer_sk#3] +Functions [3]: [sum(cast(ss_quantity#5 as bigint)), sum(UnscaledValue(ss_wholesale_cost#6)), sum(UnscaledValue(ss_sales_price#7))] +Aggregate Attributes [3]: [sum(cast(ss_quantity#5 as bigint))#21, sum(UnscaledValue(ss_wholesale_cost#6))#22, sum(UnscaledValue(ss_sales_price#7))#23] +Results [6]: [d_year#12 AS ss_sold_year#24, ss_item_sk#2, ss_customer_sk#3, sum(cast(ss_quantity#5 as bigint))#21 AS ss_qty#25, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#6))#22,17,2) AS ss_wc#26, MakeDecimal(sum(UnscaledValue(ss_sales_price#7))#23,17,2) AS ss_sp#27] + +(20) Scan parquet default.web_sales +Output [7]: [ws_sold_date_sk#28, ws_item_sk#29, ws_bill_customer_sk#30, ws_order_number#31, ws_quantity#32, ws_wholesale_cost#33, ws_sales_price#34] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_item_sk), IsNotNull(ws_bill_customer_sk)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 6] +Input [7]: [ws_sold_date_sk#28, ws_item_sk#29, ws_bill_customer_sk#30, ws_order_number#31, ws_quantity#32, ws_wholesale_cost#33, ws_sales_price#34] + +(22) Filter [codegen id : 6] +Input [7]: [ws_sold_date_sk#28, ws_item_sk#29, ws_bill_customer_sk#30, ws_order_number#31, ws_quantity#32, ws_wholesale_cost#33, ws_sales_price#34] +Condition : ((isnotnull(ws_sold_date_sk#28) AND isnotnull(ws_item_sk#29)) AND isnotnull(ws_bill_customer_sk#30)) + +(23) Scan parquet default.web_returns +Output [2]: [wr_item_sk#35, wr_order_number#36] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_returns] +PushedFilters: [IsNotNull(wr_order_number), IsNotNull(wr_item_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 4] +Input [2]: [wr_item_sk#35, wr_order_number#36] + +(25) Filter [codegen id : 4] +Input [2]: [wr_item_sk#35, wr_order_number#36] +Condition : (isnotnull(wr_order_number#36) AND isnotnull(wr_item_sk#35)) + +(26) BroadcastExchange +Input [2]: [wr_item_sk#35, wr_order_number#36] +Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false], input[0, bigint, false]),false), [id=#37] + +(27) BroadcastHashJoin [codegen id : 6] +Left keys [2]: [cast(ws_order_number#31 as bigint), cast(ws_item_sk#29 as bigint)] +Right keys [2]: [wr_order_number#36, wr_item_sk#35] +Join condition: None + +(28) Filter [codegen id : 6] +Input [9]: [ws_sold_date_sk#28, ws_item_sk#29, ws_bill_customer_sk#30, ws_order_number#31, ws_quantity#32, ws_wholesale_cost#33, ws_sales_price#34, wr_item_sk#35, wr_order_number#36] +Condition : isnull(wr_order_number#36) + +(29) Project [codegen id : 6] +Output [6]: [ws_sold_date_sk#28, ws_item_sk#29, ws_bill_customer_sk#30, ws_quantity#32, ws_wholesale_cost#33, ws_sales_price#34] +Input [9]: [ws_sold_date_sk#28, ws_item_sk#29, ws_bill_customer_sk#30, ws_order_number#31, ws_quantity#32, ws_wholesale_cost#33, ws_sales_price#34, wr_item_sk#35, wr_order_number#36] + +(30) ReusedExchange [Reuses operator id: 14] +Output [2]: [d_date_sk#11, d_year#12] + +(31) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_sold_date_sk#28] +Right keys [1]: [d_date_sk#11] +Join condition: None + +(32) Project [codegen id : 6] +Output [6]: [ws_item_sk#29, ws_bill_customer_sk#30, ws_quantity#32, ws_wholesale_cost#33, ws_sales_price#34, d_year#12] +Input [8]: [ws_sold_date_sk#28, ws_item_sk#29, ws_bill_customer_sk#30, ws_quantity#32, ws_wholesale_cost#33, ws_sales_price#34, d_date_sk#11, d_year#12] + +(33) HashAggregate [codegen id : 6] +Input [6]: [ws_item_sk#29, ws_bill_customer_sk#30, ws_quantity#32, ws_wholesale_cost#33, ws_sales_price#34, d_year#12] +Keys [3]: [d_year#12, ws_item_sk#29, ws_bill_customer_sk#30] +Functions [3]: [partial_sum(cast(ws_quantity#32 as bigint)), partial_sum(UnscaledValue(ws_wholesale_cost#33)), partial_sum(UnscaledValue(ws_sales_price#34))] +Aggregate Attributes [3]: [sum#38, sum#39, sum#40] +Results [6]: [d_year#12, ws_item_sk#29, ws_bill_customer_sk#30, sum#41, sum#42, sum#43] + +(34) Exchange +Input [6]: [d_year#12, ws_item_sk#29, ws_bill_customer_sk#30, sum#41, sum#42, sum#43] +Arguments: hashpartitioning(d_year#12, ws_item_sk#29, ws_bill_customer_sk#30, 5), true, [id=#44] + +(35) HashAggregate [codegen id : 7] +Input [6]: [d_year#12, ws_item_sk#29, ws_bill_customer_sk#30, sum#41, sum#42, sum#43] +Keys [3]: [d_year#12, ws_item_sk#29, ws_bill_customer_sk#30] +Functions [3]: [sum(cast(ws_quantity#32 as bigint)), sum(UnscaledValue(ws_wholesale_cost#33)), sum(UnscaledValue(ws_sales_price#34))] +Aggregate Attributes [3]: [sum(cast(ws_quantity#32 as bigint))#45, sum(UnscaledValue(ws_wholesale_cost#33))#46, sum(UnscaledValue(ws_sales_price#34))#47] +Results [6]: [d_year#12 AS ws_sold_year#48, ws_item_sk#29, ws_bill_customer_sk#30 AS ws_customer_sk#49, sum(cast(ws_quantity#32 as bigint))#45 AS ws_qty#50, MakeDecimal(sum(UnscaledValue(ws_wholesale_cost#33))#46,17,2) AS ws_wc#51, MakeDecimal(sum(UnscaledValue(ws_sales_price#34))#47,17,2) AS ws_sp#52] + +(36) Filter [codegen id : 7] +Input [6]: [ws_sold_year#48, ws_item_sk#29, ws_customer_sk#49, ws_qty#50, ws_wc#51, ws_sp#52] +Condition : (coalesce(ws_qty#50, 0) > 0) + +(37) BroadcastExchange +Input [6]: [ws_sold_year#48, ws_item_sk#29, ws_customer_sk#49, ws_qty#50, ws_wc#51, ws_sp#52] +Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [id=#53] + +(38) BroadcastHashJoin [codegen id : 12] +Left keys [3]: [ss_sold_year#24, ss_item_sk#2, ss_customer_sk#3] +Right keys [3]: [ws_sold_year#48, ws_item_sk#29, ws_customer_sk#49] +Join condition: None + +(39) Project [codegen id : 12] +Output [9]: [ss_sold_year#24, ss_item_sk#2, ss_customer_sk#3, ss_qty#25, ss_wc#26, ss_sp#27, ws_qty#50, ws_wc#51, ws_sp#52] +Input [12]: [ss_sold_year#24, ss_item_sk#2, ss_customer_sk#3, ss_qty#25, ss_wc#26, ss_sp#27, ws_sold_year#48, ws_item_sk#29, ws_customer_sk#49, ws_qty#50, ws_wc#51, ws_sp#52] + +(40) Scan parquet default.catalog_sales +Output [7]: [cs_sold_date_sk#54, cs_bill_customer_sk#55, cs_item_sk#56, cs_order_number#57, cs_quantity#58, cs_wholesale_cost#59, cs_sales_price#60] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk), IsNotNull(cs_bill_customer_sk)] +ReadSchema: struct + +(41) ColumnarToRow [codegen id : 10] +Input [7]: [cs_sold_date_sk#54, cs_bill_customer_sk#55, cs_item_sk#56, cs_order_number#57, cs_quantity#58, cs_wholesale_cost#59, cs_sales_price#60] + +(42) Filter [codegen id : 10] +Input [7]: [cs_sold_date_sk#54, cs_bill_customer_sk#55, cs_item_sk#56, cs_order_number#57, cs_quantity#58, cs_wholesale_cost#59, cs_sales_price#60] +Condition : ((isnotnull(cs_sold_date_sk#54) AND isnotnull(cs_item_sk#56)) AND isnotnull(cs_bill_customer_sk#55)) + +(43) Scan parquet default.catalog_returns +Output [2]: [cr_item_sk#61, cr_order_number#62] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_returns] +PushedFilters: [IsNotNull(cr_order_number), IsNotNull(cr_item_sk)] +ReadSchema: struct + +(44) ColumnarToRow [codegen id : 8] +Input [2]: [cr_item_sk#61, cr_order_number#62] + +(45) Filter [codegen id : 8] +Input [2]: [cr_item_sk#61, cr_order_number#62] +Condition : (isnotnull(cr_order_number#62) AND isnotnull(cr_item_sk#61)) + +(46) BroadcastExchange +Input [2]: [cr_item_sk#61, cr_order_number#62] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, false] as bigint), 32) | (cast(input[0, int, false] as bigint) & 4294967295))),false), [id=#63] + +(47) BroadcastHashJoin [codegen id : 10] +Left keys [2]: [cs_order_number#57, cs_item_sk#56] +Right keys [2]: [cr_order_number#62, cr_item_sk#61] +Join condition: None + +(48) Filter [codegen id : 10] +Input [9]: [cs_sold_date_sk#54, cs_bill_customer_sk#55, cs_item_sk#56, cs_order_number#57, cs_quantity#58, cs_wholesale_cost#59, cs_sales_price#60, cr_item_sk#61, cr_order_number#62] +Condition : isnull(cr_order_number#62) + +(49) Project [codegen id : 10] +Output [6]: [cs_sold_date_sk#54, cs_bill_customer_sk#55, cs_item_sk#56, cs_quantity#58, cs_wholesale_cost#59, cs_sales_price#60] +Input [9]: [cs_sold_date_sk#54, cs_bill_customer_sk#55, cs_item_sk#56, cs_order_number#57, cs_quantity#58, cs_wholesale_cost#59, cs_sales_price#60, cr_item_sk#61, cr_order_number#62] + +(50) ReusedExchange [Reuses operator id: 14] +Output [2]: [d_date_sk#11, d_year#12] + +(51) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_sold_date_sk#54] +Right keys [1]: [d_date_sk#11] +Join condition: None + +(52) Project [codegen id : 10] +Output [6]: [cs_bill_customer_sk#55, cs_item_sk#56, cs_quantity#58, cs_wholesale_cost#59, cs_sales_price#60, d_year#12] +Input [8]: [cs_sold_date_sk#54, cs_bill_customer_sk#55, cs_item_sk#56, cs_quantity#58, cs_wholesale_cost#59, cs_sales_price#60, d_date_sk#11, d_year#12] + +(53) HashAggregate [codegen id : 10] +Input [6]: [cs_bill_customer_sk#55, cs_item_sk#56, cs_quantity#58, cs_wholesale_cost#59, cs_sales_price#60, d_year#12] +Keys [3]: [d_year#12, cs_item_sk#56, cs_bill_customer_sk#55] +Functions [3]: [partial_sum(cast(cs_quantity#58 as bigint)), partial_sum(UnscaledValue(cs_wholesale_cost#59)), partial_sum(UnscaledValue(cs_sales_price#60))] +Aggregate Attributes [3]: [sum#64, sum#65, sum#66] +Results [6]: [d_year#12, cs_item_sk#56, cs_bill_customer_sk#55, sum#67, sum#68, sum#69] + +(54) Exchange +Input [6]: [d_year#12, cs_item_sk#56, cs_bill_customer_sk#55, sum#67, sum#68, sum#69] +Arguments: hashpartitioning(d_year#12, cs_item_sk#56, cs_bill_customer_sk#55, 5), true, [id=#70] + +(55) HashAggregate [codegen id : 11] +Input [6]: [d_year#12, cs_item_sk#56, cs_bill_customer_sk#55, sum#67, sum#68, sum#69] +Keys [3]: [d_year#12, cs_item_sk#56, cs_bill_customer_sk#55] +Functions [3]: [sum(cast(cs_quantity#58 as bigint)), sum(UnscaledValue(cs_wholesale_cost#59)), sum(UnscaledValue(cs_sales_price#60))] +Aggregate Attributes [3]: [sum(cast(cs_quantity#58 as bigint))#71, sum(UnscaledValue(cs_wholesale_cost#59))#72, sum(UnscaledValue(cs_sales_price#60))#73] +Results [6]: [d_year#12 AS cs_sold_year#74, cs_item_sk#56, cs_bill_customer_sk#55 AS cs_customer_sk#75, sum(cast(cs_quantity#58 as bigint))#71 AS cs_qty#76, MakeDecimal(sum(UnscaledValue(cs_wholesale_cost#59))#72,17,2) AS cs_wc#77, MakeDecimal(sum(UnscaledValue(cs_sales_price#60))#73,17,2) AS cs_sp#78] + +(56) Filter [codegen id : 11] +Input [6]: [cs_sold_year#74, cs_item_sk#56, cs_customer_sk#75, cs_qty#76, cs_wc#77, cs_sp#78] +Condition : (coalesce(cs_qty#76, 0) > 0) + +(57) BroadcastExchange +Input [6]: [cs_sold_year#74, cs_item_sk#56, cs_customer_sk#75, cs_qty#76, cs_wc#77, cs_sp#78] +Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [id=#79] + +(58) BroadcastHashJoin [codegen id : 12] +Left keys [3]: [ss_sold_year#24, ss_item_sk#2, ss_customer_sk#3] +Right keys [3]: [cs_sold_year#74, cs_item_sk#56, cs_customer_sk#75] +Join condition: None + +(59) Project [codegen id : 12] +Output [12]: [round((cast(ss_qty#25 as double) / cast(coalesce((ws_qty#50 + cs_qty#76), 1) as double)), 2) AS ratio#80, ss_qty#25 AS store_qty#81, ss_wc#26 AS store_wholesale_cost#82, ss_sp#27 AS store_sales_price#83, (coalesce(ws_qty#50, 0) + coalesce(cs_qty#76, 0)) AS other_chan_qty#84, CheckOverflow((promote_precision(cast(coalesce(ws_wc#51, 0.00) as decimal(18,2))) + promote_precision(cast(coalesce(cs_wc#77, 0.00) as decimal(18,2)))), DecimalType(18,2), true) AS other_chan_wholesale_cost#85, CheckOverflow((promote_precision(cast(coalesce(ws_sp#52, 0.00) as decimal(18,2))) + promote_precision(cast(coalesce(cs_sp#78, 0.00) as decimal(18,2)))), DecimalType(18,2), true) AS other_chan_sales_price#86, ss_sp#27, ss_wc#26, cs_qty#76, ss_qty#25, ws_qty#50] +Input [15]: [ss_sold_year#24, ss_item_sk#2, ss_customer_sk#3, ss_qty#25, ss_wc#26, ss_sp#27, ws_qty#50, ws_wc#51, ws_sp#52, cs_sold_year#74, cs_item_sk#56, cs_customer_sk#75, cs_qty#76, cs_wc#77, cs_sp#78] + +(60) TakeOrderedAndProject +Input [12]: [ratio#80, store_qty#81, store_wholesale_cost#82, store_sales_price#83, other_chan_qty#84, other_chan_wholesale_cost#85, other_chan_sales_price#86, ss_sp#27, ss_wc#26, cs_qty#76, ss_qty#25, ws_qty#50] +Arguments: 100, [ratio#80 ASC NULLS FIRST, ss_qty#25 DESC NULLS LAST, ss_wc#26 DESC NULLS LAST, ss_sp#27 DESC NULLS LAST, other_chan_qty#84 ASC NULLS FIRST, other_chan_wholesale_cost#85 ASC NULLS FIRST, other_chan_sales_price#86 ASC NULLS FIRST, round((cast(ss_qty#25 as double) / cast(coalesce((ws_qty#50 + cs_qty#76), 1) as double)), 2) ASC NULLS FIRST], [ratio#80, store_qty#81, store_wholesale_cost#82, store_sales_price#83, other_chan_qty#84, other_chan_wholesale_cost#85, other_chan_sales_price#86] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78/simplified.txt new file mode 100644 index 0000000000000..ffff01466ef21 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78/simplified.txt @@ -0,0 +1,88 @@ +TakeOrderedAndProject [cs_qty,other_chan_qty,other_chan_sales_price,other_chan_wholesale_cost,ratio,ss_qty,ss_sp,ss_wc,store_qty,store_sales_price,store_wholesale_cost,ws_qty] + WholeStageCodegen (12) + Project [cs_qty,cs_sp,cs_wc,ss_qty,ss_sp,ss_wc,ws_qty,ws_sp,ws_wc] + BroadcastHashJoin [cs_customer_sk,cs_item_sk,cs_sold_year,ss_customer_sk,ss_item_sk,ss_sold_year] + Project [ss_customer_sk,ss_item_sk,ss_qty,ss_sold_year,ss_sp,ss_wc,ws_qty,ws_sp,ws_wc] + BroadcastHashJoin [ss_customer_sk,ss_item_sk,ss_sold_year,ws_customer_sk,ws_item_sk,ws_sold_year] + HashAggregate [d_year,ss_customer_sk,ss_item_sk,sum,sum,sum] [ss_qty,ss_sold_year,ss_sp,ss_wc,sum,sum,sum,sum(UnscaledValue(ss_sales_price)),sum(UnscaledValue(ss_wholesale_cost)),sum(cast(ss_quantity as bigint))] + InputAdapter + Exchange [d_year,ss_customer_sk,ss_item_sk] #1 + WholeStageCodegen (3) + HashAggregate [d_year,ss_customer_sk,ss_item_sk,ss_quantity,ss_sales_price,ss_wholesale_cost] [sum,sum,sum,sum,sum,sum] + Project [d_year,ss_customer_sk,ss_item_sk,ss_quantity,ss_sales_price,ss_wholesale_cost] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_customer_sk,ss_item_sk,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_wholesale_cost] + Filter [sr_ticket_number] + BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + Filter [ss_customer_sk,ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_ticket_number,ss_wholesale_cost] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [sr_item_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (7) + Filter [ws_qty] + HashAggregate [d_year,sum,sum,sum,ws_bill_customer_sk,ws_item_sk] [sum,sum,sum,sum(UnscaledValue(ws_sales_price)),sum(UnscaledValue(ws_wholesale_cost)),sum(cast(ws_quantity as bigint)),ws_customer_sk,ws_qty,ws_sold_year,ws_sp,ws_wc] + InputAdapter + Exchange [d_year,ws_bill_customer_sk,ws_item_sk] #5 + WholeStageCodegen (6) + HashAggregate [d_year,ws_bill_customer_sk,ws_item_sk,ws_quantity,ws_sales_price,ws_wholesale_cost] [sum,sum,sum,sum,sum,sum] + Project [d_year,ws_bill_customer_sk,ws_item_sk,ws_quantity,ws_sales_price,ws_wholesale_cost] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ws_bill_customer_sk,ws_item_sk,ws_quantity,ws_sales_price,ws_sold_date_sk,ws_wholesale_cost] + Filter [wr_order_number] + BroadcastHashJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] + Filter [ws_bill_customer_sk,ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_customer_sk,ws_item_sk,ws_order_number,ws_quantity,ws_sales_price,ws_sold_date_sk,ws_wholesale_cost] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (4) + Filter [wr_item_sk,wr_order_number] + ColumnarToRow + InputAdapter + Scan parquet default.web_returns [wr_item_sk,wr_order_number] + InputAdapter + ReusedExchange [d_date_sk,d_year] #3 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (11) + Filter [cs_qty] + HashAggregate [cs_bill_customer_sk,cs_item_sk,d_year,sum,sum,sum] [cs_customer_sk,cs_qty,cs_sold_year,cs_sp,cs_wc,sum,sum,sum,sum(UnscaledValue(cs_sales_price)),sum(UnscaledValue(cs_wholesale_cost)),sum(cast(cs_quantity as bigint))] + InputAdapter + Exchange [cs_bill_customer_sk,cs_item_sk,d_year] #8 + WholeStageCodegen (10) + HashAggregate [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sales_price,cs_wholesale_cost,d_year] [sum,sum,sum,sum,sum,sum] + Project [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sales_price,cs_wholesale_cost,d_year] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sales_price,cs_sold_date_sk,cs_wholesale_cost] + Filter [cr_order_number] + BroadcastHashJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] + Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_order_number,cs_quantity,cs_sales_price,cs_sold_date_sk,cs_wholesale_cost] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (8) + Filter [cr_item_sk,cr_order_number] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number] + InputAdapter + ReusedExchange [d_date_sk,d_year] #3 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79.sf100/explain.txt new file mode 100644 index 0000000000000..4f39bba8e00d8 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79.sf100/explain.txt @@ -0,0 +1,208 @@ +== Physical Plan == +TakeOrderedAndProject (37) ++- * Project (36) + +- * SortMergeJoin Inner (35) + :- * Sort (29) + : +- Exchange (28) + : +- * HashAggregate (27) + : +- Exchange (26) + : +- * HashAggregate (25) + : +- * Project (24) + : +- * BroadcastHashJoin Inner BuildRight (23) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.household_demographics (11) + : +- BroadcastExchange (22) + : +- * Project (21) + : +- * Filter (20) + : +- * ColumnarToRow (19) + : +- Scan parquet default.store (18) + +- * Sort (34) + +- Exchange (33) + +- * Filter (32) + +- * ColumnarToRow (31) + +- Scan parquet default.customer (30) + + +(1) Scan parquet default.store_sales +Output [8]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [8]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] + +(3) Filter [codegen id : 4] +Input [8]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] +Condition : (((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#5)) AND isnotnull(ss_hdemo_sk#3)) AND isnotnull(ss_customer_sk#2)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#9, d_year#10, d_dow#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_dow), EqualTo(d_dow,1), In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#9, d_year#10, d_dow#11] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#9, d_year#10, d_dow#11] +Condition : (((isnotnull(d_dow#11) AND (d_dow#11 = 1)) AND d_year#10 IN (1999,2000,2001)) AND isnotnull(d_date_sk#9)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#9] +Input [3]: [d_date_sk#9, d_year#10, d_dow#11] + +(8) BroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#9] +Join condition: None + +(10) Project [codegen id : 4] +Output [7]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] +Input [9]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, d_date_sk#9] + +(11) Scan parquet default.household_demographics +Output [3]: [hd_demo_sk#13, hd_dep_count#14, hd_vehicle_count#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/household_demographics] +PushedFilters: [Or(EqualTo(hd_dep_count,6),GreaterThan(hd_vehicle_count,2)), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [hd_demo_sk#13, hd_dep_count#14, hd_vehicle_count#15] + +(13) Filter [codegen id : 2] +Input [3]: [hd_demo_sk#13, hd_dep_count#14, hd_vehicle_count#15] +Condition : (((hd_dep_count#14 = 6) OR (hd_vehicle_count#15 > 2)) AND isnotnull(hd_demo_sk#13)) + +(14) Project [codegen id : 2] +Output [1]: [hd_demo_sk#13] +Input [3]: [hd_demo_sk#13, hd_dep_count#14, hd_vehicle_count#15] + +(15) BroadcastExchange +Input [1]: [hd_demo_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] + +(16) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_hdemo_sk#3] +Right keys [1]: [hd_demo_sk#13] +Join condition: None + +(17) Project [codegen id : 4] +Output [6]: [ss_customer_sk#2, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] +Input [8]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, hd_demo_sk#13] + +(18) Scan parquet default.store +Output [3]: [s_store_sk#17, s_number_employees#18, s_city#19] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_number_employees), GreaterThanOrEqual(s_number_employees,200), LessThanOrEqual(s_number_employees,295), IsNotNull(s_store_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [3]: [s_store_sk#17, s_number_employees#18, s_city#19] + +(20) Filter [codegen id : 3] +Input [3]: [s_store_sk#17, s_number_employees#18, s_city#19] +Condition : (((isnotnull(s_number_employees#18) AND (s_number_employees#18 >= 200)) AND (s_number_employees#18 <= 295)) AND isnotnull(s_store_sk#17)) + +(21) Project [codegen id : 3] +Output [2]: [s_store_sk#17, s_city#19] +Input [3]: [s_store_sk#17, s_number_employees#18, s_city#19] + +(22) BroadcastExchange +Input [2]: [s_store_sk#17, s_city#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] + +(23) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#5] +Right keys [1]: [s_store_sk#17] +Join condition: None + +(24) Project [codegen id : 4] +Output [6]: [ss_customer_sk#2, ss_addr_sk#4, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, s_city#19] +Input [8]: [ss_customer_sk#2, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, s_store_sk#17, s_city#19] + +(25) HashAggregate [codegen id : 4] +Input [6]: [ss_customer_sk#2, ss_addr_sk#4, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, s_city#19] +Keys [4]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, s_city#19] +Functions [2]: [partial_sum(UnscaledValue(ss_coupon_amt#7)), partial_sum(UnscaledValue(ss_net_profit#8))] +Aggregate Attributes [2]: [sum#21, sum#22] +Results [6]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, s_city#19, sum#23, sum#24] + +(26) Exchange +Input [6]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, s_city#19, sum#23, sum#24] +Arguments: hashpartitioning(ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, s_city#19, 5), true, [id=#25] + +(27) HashAggregate [codegen id : 5] +Input [6]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, s_city#19, sum#23, sum#24] +Keys [4]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, s_city#19] +Functions [2]: [sum(UnscaledValue(ss_coupon_amt#7)), sum(UnscaledValue(ss_net_profit#8))] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_coupon_amt#7))#26, sum(UnscaledValue(ss_net_profit#8))#27] +Results [5]: [ss_ticket_number#6, ss_customer_sk#2, s_city#19, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#7))#26,17,2) AS amt#28, MakeDecimal(sum(UnscaledValue(ss_net_profit#8))#27,17,2) AS profit#29] + +(28) Exchange +Input [5]: [ss_ticket_number#6, ss_customer_sk#2, s_city#19, amt#28, profit#29] +Arguments: hashpartitioning(ss_customer_sk#2, 5), true, [id=#30] + +(29) Sort [codegen id : 6] +Input [5]: [ss_ticket_number#6, ss_customer_sk#2, s_city#19, amt#28, profit#29] +Arguments: [ss_customer_sk#2 ASC NULLS FIRST], false, 0 + +(30) Scan parquet default.customer +Output [3]: [c_customer_sk#31, c_first_name#32, c_last_name#33] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 7] +Input [3]: [c_customer_sk#31, c_first_name#32, c_last_name#33] + +(32) Filter [codegen id : 7] +Input [3]: [c_customer_sk#31, c_first_name#32, c_last_name#33] +Condition : isnotnull(c_customer_sk#31) + +(33) Exchange +Input [3]: [c_customer_sk#31, c_first_name#32, c_last_name#33] +Arguments: hashpartitioning(c_customer_sk#31, 5), true, [id=#34] + +(34) Sort [codegen id : 8] +Input [3]: [c_customer_sk#31, c_first_name#32, c_last_name#33] +Arguments: [c_customer_sk#31 ASC NULLS FIRST], false, 0 + +(35) SortMergeJoin [codegen id : 9] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#31] +Join condition: None + +(36) Project [codegen id : 9] +Output [7]: [c_last_name#33, c_first_name#32, substr(s_city#19, 1, 30) AS substr(s_city, 1, 30)#35, ss_ticket_number#6, amt#28, profit#29, s_city#19] +Input [8]: [ss_ticket_number#6, ss_customer_sk#2, s_city#19, amt#28, profit#29, c_customer_sk#31, c_first_name#32, c_last_name#33] + +(37) TakeOrderedAndProject +Input [7]: [c_last_name#33, c_first_name#32, substr(s_city, 1, 30)#35, ss_ticket_number#6, amt#28, profit#29, s_city#19] +Arguments: 100, [c_last_name#33 ASC NULLS FIRST, c_first_name#32 ASC NULLS FIRST, substr(s_city#19, 1, 30) ASC NULLS FIRST, profit#29 ASC NULLS FIRST], [c_last_name#33, c_first_name#32, substr(s_city, 1, 30)#35, ss_ticket_number#6, amt#28, profit#29] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79.sf100/simplified.txt new file mode 100644 index 0000000000000..8ef698c9f896c --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79.sf100/simplified.txt @@ -0,0 +1,59 @@ +TakeOrderedAndProject [amt,c_first_name,c_last_name,profit,s_city,ss_ticket_number,substr(s_city, 1, 30)] + WholeStageCodegen (9) + Project [amt,c_first_name,c_last_name,profit,s_city,ss_ticket_number] + SortMergeJoin [c_customer_sk,ss_customer_sk] + InputAdapter + WholeStageCodegen (6) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #1 + WholeStageCodegen (5) + HashAggregate [s_city,ss_addr_sk,ss_customer_sk,ss_ticket_number,sum,sum] [amt,profit,sum,sum,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_net_profit))] + InputAdapter + Exchange [s_city,ss_addr_sk,ss_customer_sk,ss_ticket_number] #2 + WholeStageCodegen (4) + HashAggregate [s_city,ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,ss_ticket_number] [sum,sum,sum,sum] + Project [s_city,ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,ss_ticket_number] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,ss_store_sk,ss_ticket_number] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_store_sk,ss_ticket_number] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_dow,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_dow,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [hd_demo_sk] + Filter [hd_demo_sk,hd_dep_count,hd_vehicle_count] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Project [s_city,s_store_sk] + Filter [s_number_employees,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_city,s_number_employees,s_store_sk] + InputAdapter + WholeStageCodegen (8) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #6 + WholeStageCodegen (7) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79/explain.txt new file mode 100644 index 0000000000000..c5a159ac59b87 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79/explain.txt @@ -0,0 +1,193 @@ +== Physical Plan == +TakeOrderedAndProject (34) ++- * Project (33) + +- * BroadcastHashJoin Inner BuildRight (32) + :- * HashAggregate (27) + : +- Exchange (26) + : +- * HashAggregate (25) + : +- * Project (24) + : +- * BroadcastHashJoin Inner BuildRight (23) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.store (11) + : +- BroadcastExchange (22) + : +- * Project (21) + : +- * Filter (20) + : +- * ColumnarToRow (19) + : +- Scan parquet default.household_demographics (18) + +- BroadcastExchange (31) + +- * Filter (30) + +- * ColumnarToRow (29) + +- Scan parquet default.customer (28) + + +(1) Scan parquet default.store_sales +Output [8]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [8]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] + +(3) Filter [codegen id : 4] +Input [8]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] +Condition : (((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#5)) AND isnotnull(ss_hdemo_sk#3)) AND isnotnull(ss_customer_sk#2)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#9, d_year#10, d_dow#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_dow), EqualTo(d_dow,1), In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#9, d_year#10, d_dow#11] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#9, d_year#10, d_dow#11] +Condition : (((isnotnull(d_dow#11) AND (d_dow#11 = 1)) AND d_year#10 IN (1999,2000,2001)) AND isnotnull(d_date_sk#9)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#9] +Input [3]: [d_date_sk#9, d_year#10, d_dow#11] + +(8) BroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#9] +Join condition: None + +(10) Project [codegen id : 4] +Output [7]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] +Input [9]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, d_date_sk#9] + +(11) Scan parquet default.store +Output [3]: [s_store_sk#13, s_number_employees#14, s_city#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_number_employees), GreaterThanOrEqual(s_number_employees,200), LessThanOrEqual(s_number_employees,295), IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [s_store_sk#13, s_number_employees#14, s_city#15] + +(13) Filter [codegen id : 2] +Input [3]: [s_store_sk#13, s_number_employees#14, s_city#15] +Condition : (((isnotnull(s_number_employees#14) AND (s_number_employees#14 >= 200)) AND (s_number_employees#14 <= 295)) AND isnotnull(s_store_sk#13)) + +(14) Project [codegen id : 2] +Output [2]: [s_store_sk#13, s_city#15] +Input [3]: [s_store_sk#13, s_number_employees#14, s_city#15] + +(15) BroadcastExchange +Input [2]: [s_store_sk#13, s_city#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] + +(16) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#5] +Right keys [1]: [s_store_sk#13] +Join condition: None + +(17) Project [codegen id : 4] +Output [7]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, s_city#15] +Input [9]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, s_store_sk#13, s_city#15] + +(18) Scan parquet default.household_demographics +Output [3]: [hd_demo_sk#17, hd_dep_count#18, hd_vehicle_count#19] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/household_demographics] +PushedFilters: [Or(EqualTo(hd_dep_count,6),GreaterThan(hd_vehicle_count,2)), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [3]: [hd_demo_sk#17, hd_dep_count#18, hd_vehicle_count#19] + +(20) Filter [codegen id : 3] +Input [3]: [hd_demo_sk#17, hd_dep_count#18, hd_vehicle_count#19] +Condition : (((hd_dep_count#18 = 6) OR (hd_vehicle_count#19 > 2)) AND isnotnull(hd_demo_sk#17)) + +(21) Project [codegen id : 3] +Output [1]: [hd_demo_sk#17] +Input [3]: [hd_demo_sk#17, hd_dep_count#18, hd_vehicle_count#19] + +(22) BroadcastExchange +Input [1]: [hd_demo_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] + +(23) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_hdemo_sk#3] +Right keys [1]: [hd_demo_sk#17] +Join condition: None + +(24) Project [codegen id : 4] +Output [6]: [ss_customer_sk#2, ss_addr_sk#4, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, s_city#15] +Input [8]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, s_city#15, hd_demo_sk#17] + +(25) HashAggregate [codegen id : 4] +Input [6]: [ss_customer_sk#2, ss_addr_sk#4, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, s_city#15] +Keys [4]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, s_city#15] +Functions [2]: [partial_sum(UnscaledValue(ss_coupon_amt#7)), partial_sum(UnscaledValue(ss_net_profit#8))] +Aggregate Attributes [2]: [sum#21, sum#22] +Results [6]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, s_city#15, sum#23, sum#24] + +(26) Exchange +Input [6]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, s_city#15, sum#23, sum#24] +Arguments: hashpartitioning(ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, s_city#15, 5), true, [id=#25] + +(27) HashAggregate [codegen id : 6] +Input [6]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, s_city#15, sum#23, sum#24] +Keys [4]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, s_city#15] +Functions [2]: [sum(UnscaledValue(ss_coupon_amt#7)), sum(UnscaledValue(ss_net_profit#8))] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_coupon_amt#7))#26, sum(UnscaledValue(ss_net_profit#8))#27] +Results [5]: [ss_ticket_number#6, ss_customer_sk#2, s_city#15, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#7))#26,17,2) AS amt#28, MakeDecimal(sum(UnscaledValue(ss_net_profit#8))#27,17,2) AS profit#29] + +(28) Scan parquet default.customer +Output [3]: [c_customer_sk#30, c_first_name#31, c_last_name#32] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 5] +Input [3]: [c_customer_sk#30, c_first_name#31, c_last_name#32] + +(30) Filter [codegen id : 5] +Input [3]: [c_customer_sk#30, c_first_name#31, c_last_name#32] +Condition : isnotnull(c_customer_sk#30) + +(31) BroadcastExchange +Input [3]: [c_customer_sk#30, c_first_name#31, c_last_name#32] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#33] + +(32) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#30] +Join condition: None + +(33) Project [codegen id : 6] +Output [7]: [c_last_name#32, c_first_name#31, substr(s_city#15, 1, 30) AS substr(s_city, 1, 30)#34, ss_ticket_number#6, amt#28, profit#29, s_city#15] +Input [8]: [ss_ticket_number#6, ss_customer_sk#2, s_city#15, amt#28, profit#29, c_customer_sk#30, c_first_name#31, c_last_name#32] + +(34) TakeOrderedAndProject +Input [7]: [c_last_name#32, c_first_name#31, substr(s_city, 1, 30)#34, ss_ticket_number#6, amt#28, profit#29, s_city#15] +Arguments: 100, [c_last_name#32 ASC NULLS FIRST, c_first_name#31 ASC NULLS FIRST, substr(s_city#15, 1, 30) ASC NULLS FIRST, profit#29 ASC NULLS FIRST], [c_last_name#32, c_first_name#31, substr(s_city, 1, 30)#34, ss_ticket_number#6, amt#28, profit#29] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79/simplified.txt new file mode 100644 index 0000000000000..53f5a7d0acc0e --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79/simplified.txt @@ -0,0 +1,50 @@ +TakeOrderedAndProject [amt,c_first_name,c_last_name,profit,s_city,ss_ticket_number,substr(s_city, 1, 30)] + WholeStageCodegen (6) + Project [amt,c_first_name,c_last_name,profit,s_city,ss_ticket_number] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + HashAggregate [s_city,ss_addr_sk,ss_customer_sk,ss_ticket_number,sum,sum] [amt,profit,sum,sum,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_net_profit))] + InputAdapter + Exchange [s_city,ss_addr_sk,ss_customer_sk,ss_ticket_number] #1 + WholeStageCodegen (4) + HashAggregate [s_city,ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,ss_ticket_number] [sum,sum,sum,sum] + Project [s_city,ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,ss_ticket_number] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [s_city,ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_ticket_number] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_store_sk,ss_ticket_number] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_dow,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_dow,d_year] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [s_city,s_store_sk] + Filter [s_number_employees,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_city,s_number_employees,s_store_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [hd_demo_sk] + Filter [hd_demo_sk,hd_dep_count,hd_vehicle_count] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (5) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8.sf100/explain.txt new file mode 100644 index 0000000000000..df5b8ec4d66db --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8.sf100/explain.txt @@ -0,0 +1,302 @@ +== Physical Plan == +TakeOrderedAndProject (53) ++- * HashAggregate (52) + +- Exchange (51) + +- * HashAggregate (50) + +- * Project (49) + +- * SortMergeJoin Inner (48) + :- * Sort (18) + : +- Exchange (17) + : +- * Project (16) + : +- * BroadcastHashJoin Inner BuildRight (15) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.store_sales (1) + : : +- BroadcastExchange (8) + : : +- * Project (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet default.date_dim (4) + : +- BroadcastExchange (14) + : +- * Filter (13) + : +- * ColumnarToRow (12) + : +- Scan parquet default.store (11) + +- * Sort (47) + +- Exchange (46) + +- * HashAggregate (45) + +- Exchange (44) + +- * HashAggregate (43) + +- * Project (42) + +- * BroadcastHashJoin LeftSemi BuildRight (41) + :- * Filter (21) + : +- * ColumnarToRow (20) + : +- Scan parquet default.customer_address (19) + +- BroadcastExchange (40) + +- * Project (39) + +- * Filter (38) + +- * HashAggregate (37) + +- Exchange (36) + +- * HashAggregate (35) + +- * Project (34) + +- * SortMergeJoin Inner (33) + :- * Sort (26) + : +- Exchange (25) + : +- * Filter (24) + : +- * ColumnarToRow (23) + : +- Scan parquet default.customer_address (22) + +- * Sort (32) + +- Exchange (31) + +- * Project (30) + +- * Filter (29) + +- * ColumnarToRow (28) + +- Scan parquet default.customer (27) + + +(1) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3] + +(3) Filter [codegen id : 3] +Input [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3] +Condition : (isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#2)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#4, d_year#5, d_qoy#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,1998), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#4, d_year#5, d_qoy#6] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#4, d_year#5, d_qoy#6] +Condition : ((((isnotnull(d_qoy#6) AND isnotnull(d_year#5)) AND (d_qoy#6 = 2)) AND (d_year#5 = 1998)) AND isnotnull(d_date_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#4] +Input [3]: [d_date_sk#4, d_year#5, d_qoy#6] + +(8) BroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#7] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(10) Project [codegen id : 3] +Output [2]: [ss_store_sk#2, ss_net_profit#3] +Input [4]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3, d_date_sk#4] + +(11) Scan parquet default.store +Output [3]: [s_store_sk#8, s_store_name#9, s_zip#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_zip)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [s_store_sk#8, s_store_name#9, s_zip#10] + +(13) Filter [codegen id : 2] +Input [3]: [s_store_sk#8, s_store_name#9, s_zip#10] +Condition : (isnotnull(s_store_sk#8) AND isnotnull(s_zip#10)) + +(14) BroadcastExchange +Input [3]: [s_store_sk#8, s_store_name#9, s_zip#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#11] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_store_sk#2] +Right keys [1]: [s_store_sk#8] +Join condition: None + +(16) Project [codegen id : 3] +Output [3]: [ss_net_profit#3, s_store_name#9, s_zip#10] +Input [5]: [ss_store_sk#2, ss_net_profit#3, s_store_sk#8, s_store_name#9, s_zip#10] + +(17) Exchange +Input [3]: [ss_net_profit#3, s_store_name#9, s_zip#10] +Arguments: hashpartitioning(substr(s_zip#10, 1, 2), 5), true, [id=#12] + +(18) Sort [codegen id : 4] +Input [3]: [ss_net_profit#3, s_store_name#9, s_zip#10] +Arguments: [substr(s_zip#10, 1, 2) ASC NULLS FIRST], false, 0 + +(19) Scan parquet default.customer_address +Output [1]: [ca_zip#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer_address] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 11] +Input [1]: [ca_zip#13] + +(21) Filter [codegen id : 11] +Input [1]: [ca_zip#13] +Condition : (substr(ca_zip#13, 1, 5) INSET (56910,69952,63792,39371,74351,11101,25003,97189,57834,73134,62377,51200,32754,22752,86379,14171,91110,40162,98569,28709,13394,66162,25733,25782,26065,18383,51949,87343,50298,83849,33786,64528,23470,67030,46136,25280,46820,77721,99076,18426,31880,17871,98235,45748,49156,18652,72013,51622,43848,78567,41248,13695,44165,67853,54917,53179,64034,10567,71791,68908,55565,59402,64147,85816,57855,61547,27700,68100,28810,58263,15723,83933,51103,58058,90578,82276,81096,81426,96451,77556,38607,76638,18906,62971,57047,48425,35576,11928,30625,83444,73520,51650,57647,60099,30122,94983,24128,10445,41368,26233,26859,21756,24676,19849,36420,38193,58470,39127,13595,87501,24317,15455,69399,98025,81019,48033,11376,39516,67875,92712,14867,38122,29741,42961,30469,51211,56458,15559,16021,33123,33282,33515,72823,54601,76698,56240,72175,60279,20004,68806,72325,28488,43933,50412,45200,22246,78668,79777,96765,67301,73273,49448,82636,23932,47305,29839,39192,18799,61265,37125,58943,64457,88424,24610,84935,89360,68893,30431,28898,10336,90257,59166,46081,26105,96888,36634,86284,35258,39972,22927,73241,53268,24206,27385,99543,31671,14663,30903,39861,24996,63089,88086,83921,21076,67897,66708,45721,60576,25103,52867,30450,36233,30010,96576,73171,56571,56575,64544,13955,78451,43285,18119,16725,83041,76107,79994,54364,35942,56691,19769,63435,34102,18845,22744,13354,75691,45549,23968,31387,83144,13375,15765,28577,88190,19736,73650,37930,25989,83926,94898,51798,39736,22437,55253,38415,71256,18376,42029,25858,44438,19515,38935,51649,71954,15882,18767,63193,25486,49130,37126,40604,34425,17043,12305,11634,26653,94167,36446,10516,67473,66864,72425,63981,18842,22461,42666,47770,69035,70372,28587,45266,15371,15798,45375,90225,16807,31016,68014,21337,19505,50016,10144,84093,21286,19430,34322,91068,94945,72305,24671,58048,65084,28545,21195,20548,22245,77191,96976,48583,76231,15734,61810,11356,68621,68786,98359,41367,26689,69913,76614,68101,88885,50308,79077,18270,28915,29178,53672,62878,10390,14922,68341,56529,41766,68309,56616,15126,61860,97789,11489,45692,41918,72151,72550,27156,36495,70738,17879,53535,17920,68880,78890,35850,14089,58078,65164,27068,26231,13376,57665,32213,77610,87816,21309,15146,86198,91137,55307,67467,40558,94627,82136,22351,89091,20260,23006,91393,47537,62496,98294,18840,71286,81312,31029,70466,35458,14060,22685,28286,25631,19512,40081,63837,14328,35474,22152,76232,51061,86057,17183) AND isnotnull(substr(ca_zip#13, 1, 5))) + +(22) Scan parquet default.customer_address +Output [2]: [ca_address_sk#14, ca_zip#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 5] +Input [2]: [ca_address_sk#14, ca_zip#13] + +(24) Filter [codegen id : 5] +Input [2]: [ca_address_sk#14, ca_zip#13] +Condition : isnotnull(ca_address_sk#14) + +(25) Exchange +Input [2]: [ca_address_sk#14, ca_zip#13] +Arguments: hashpartitioning(ca_address_sk#14, 5), true, [id=#15] + +(26) Sort [codegen id : 6] +Input [2]: [ca_address_sk#14, ca_zip#13] +Arguments: [ca_address_sk#14 ASC NULLS FIRST], false, 0 + +(27) Scan parquet default.customer +Output [2]: [c_current_addr_sk#16, c_preferred_cust_flag#17] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer] +PushedFilters: [IsNotNull(c_preferred_cust_flag), EqualTo(c_preferred_cust_flag,Y), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(28) ColumnarToRow [codegen id : 7] +Input [2]: [c_current_addr_sk#16, c_preferred_cust_flag#17] + +(29) Filter [codegen id : 7] +Input [2]: [c_current_addr_sk#16, c_preferred_cust_flag#17] +Condition : ((isnotnull(c_preferred_cust_flag#17) AND (c_preferred_cust_flag#17 = Y)) AND isnotnull(c_current_addr_sk#16)) + +(30) Project [codegen id : 7] +Output [1]: [c_current_addr_sk#16] +Input [2]: [c_current_addr_sk#16, c_preferred_cust_flag#17] + +(31) Exchange +Input [1]: [c_current_addr_sk#16] +Arguments: hashpartitioning(c_current_addr_sk#16, 5), true, [id=#18] + +(32) Sort [codegen id : 8] +Input [1]: [c_current_addr_sk#16] +Arguments: [c_current_addr_sk#16 ASC NULLS FIRST], false, 0 + +(33) SortMergeJoin [codegen id : 9] +Left keys [1]: [ca_address_sk#14] +Right keys [1]: [c_current_addr_sk#16] +Join condition: None + +(34) Project [codegen id : 9] +Output [1]: [ca_zip#13] +Input [3]: [ca_address_sk#14, ca_zip#13, c_current_addr_sk#16] + +(35) HashAggregate [codegen id : 9] +Input [1]: [ca_zip#13] +Keys [1]: [ca_zip#13] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#19] +Results [2]: [ca_zip#13, count#20] + +(36) Exchange +Input [2]: [ca_zip#13, count#20] +Arguments: hashpartitioning(ca_zip#13, 5), true, [id=#21] + +(37) HashAggregate [codegen id : 10] +Input [2]: [ca_zip#13, count#20] +Keys [1]: [ca_zip#13] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#22] +Results [2]: [substr(ca_zip#13, 1, 5) AS ca_zip#23, count(1)#22 AS count(1)#24] + +(38) Filter [codegen id : 10] +Input [2]: [ca_zip#23, count(1)#24] +Condition : (count(1)#24 > 10) + +(39) Project [codegen id : 10] +Output [1]: [ca_zip#23] +Input [2]: [ca_zip#23, count(1)#24] + +(40) BroadcastExchange +Input [1]: [ca_zip#23] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), isnull(input[0, string, true])),false), [id=#25] + +(41) BroadcastHashJoin [codegen id : 11] +Left keys [2]: [coalesce(substr(ca_zip#13, 1, 5), ), isnull(substr(ca_zip#13, 1, 5))] +Right keys [2]: [coalesce(ca_zip#23, ), isnull(ca_zip#23)] +Join condition: None + +(42) Project [codegen id : 11] +Output [1]: [substr(ca_zip#13, 1, 5) AS ca_zip#26] +Input [1]: [ca_zip#13] + +(43) HashAggregate [codegen id : 11] +Input [1]: [ca_zip#26] +Keys [1]: [ca_zip#26] +Functions: [] +Aggregate Attributes: [] +Results [1]: [ca_zip#26] + +(44) Exchange +Input [1]: [ca_zip#26] +Arguments: hashpartitioning(ca_zip#26, 5), true, [id=#27] + +(45) HashAggregate [codegen id : 12] +Input [1]: [ca_zip#26] +Keys [1]: [ca_zip#26] +Functions: [] +Aggregate Attributes: [] +Results [1]: [ca_zip#26] + +(46) Exchange +Input [1]: [ca_zip#26] +Arguments: hashpartitioning(substr(ca_zip#26, 1, 2), 5), true, [id=#28] + +(47) Sort [codegen id : 13] +Input [1]: [ca_zip#26] +Arguments: [substr(ca_zip#26, 1, 2) ASC NULLS FIRST], false, 0 + +(48) SortMergeJoin [codegen id : 14] +Left keys [1]: [substr(s_zip#10, 1, 2)] +Right keys [1]: [substr(ca_zip#26, 1, 2)] +Join condition: None + +(49) Project [codegen id : 14] +Output [2]: [ss_net_profit#3, s_store_name#9] +Input [4]: [ss_net_profit#3, s_store_name#9, s_zip#10, ca_zip#26] + +(50) HashAggregate [codegen id : 14] +Input [2]: [ss_net_profit#3, s_store_name#9] +Keys [1]: [s_store_name#9] +Functions [1]: [partial_sum(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [1]: [sum#29] +Results [2]: [s_store_name#9, sum#30] + +(51) Exchange +Input [2]: [s_store_name#9, sum#30] +Arguments: hashpartitioning(s_store_name#9, 5), true, [id=#31] + +(52) HashAggregate [codegen id : 15] +Input [2]: [s_store_name#9, sum#30] +Keys [1]: [s_store_name#9] +Functions [1]: [sum(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#3))#32] +Results [2]: [s_store_name#9, MakeDecimal(sum(UnscaledValue(ss_net_profit#3))#32,17,2) AS sum(ss_net_profit)#33] + +(53) TakeOrderedAndProject +Input [2]: [s_store_name#9, sum(ss_net_profit)#33] +Arguments: 100, [s_store_name#9 ASC NULLS FIRST], [s_store_name#9, sum(ss_net_profit)#33] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8.sf100/simplified.txt new file mode 100644 index 0000000000000..415bf96afe55d --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8.sf100/simplified.txt @@ -0,0 +1,88 @@ +TakeOrderedAndProject [s_store_name,sum(ss_net_profit)] + WholeStageCodegen (15) + HashAggregate [s_store_name,sum] [sum,sum(UnscaledValue(ss_net_profit)),sum(ss_net_profit)] + InputAdapter + Exchange [s_store_name] #1 + WholeStageCodegen (14) + HashAggregate [s_store_name,ss_net_profit] [sum,sum] + Project [s_store_name,ss_net_profit] + SortMergeJoin [ca_zip,s_zip] + InputAdapter + WholeStageCodegen (4) + Sort [s_zip] + InputAdapter + Exchange [s_zip] #2 + WholeStageCodegen (3) + Project [s_store_name,s_zip,ss_net_profit] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_net_profit,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_net_profit,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_qoy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_qoy,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [s_store_sk,s_zip] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_name,s_store_sk,s_zip] + InputAdapter + WholeStageCodegen (13) + Sort [ca_zip] + InputAdapter + Exchange [ca_zip] #5 + WholeStageCodegen (12) + HashAggregate [ca_zip] + InputAdapter + Exchange [ca_zip] #6 + WholeStageCodegen (11) + HashAggregate [ca_zip] + Project [ca_zip] + BroadcastHashJoin [ca_zip,ca_zip] + Filter [ca_zip] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_zip] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (10) + Project [ca_zip] + Filter [count(1)] + HashAggregate [ca_zip,count] [ca_zip,count,count(1),count(1)] + InputAdapter + Exchange [ca_zip] #8 + WholeStageCodegen (9) + HashAggregate [ca_zip] [count,count] + Project [ca_zip] + SortMergeJoin [c_current_addr_sk,ca_address_sk] + InputAdapter + WholeStageCodegen (6) + Sort [ca_address_sk] + InputAdapter + Exchange [ca_address_sk] #9 + WholeStageCodegen (5) + Filter [ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_zip] + InputAdapter + WholeStageCodegen (8) + Sort [c_current_addr_sk] + InputAdapter + Exchange [c_current_addr_sk] #10 + WholeStageCodegen (7) + Project [c_current_addr_sk] + Filter [c_current_addr_sk,c_preferred_cust_flag] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_current_addr_sk,c_preferred_cust_flag] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8/explain.txt new file mode 100644 index 0000000000000..aed20da055b40 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8/explain.txt @@ -0,0 +1,272 @@ +== Physical Plan == +TakeOrderedAndProject (47) ++- * HashAggregate (46) + +- Exchange (45) + +- * HashAggregate (44) + +- * Project (43) + +- * BroadcastHashJoin Inner BuildRight (42) + :- * Project (16) + : +- * BroadcastHashJoin Inner BuildRight (15) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.store_sales (1) + : : +- BroadcastExchange (8) + : : +- * Project (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet default.date_dim (4) + : +- BroadcastExchange (14) + : +- * Filter (13) + : +- * ColumnarToRow (12) + : +- Scan parquet default.store (11) + +- BroadcastExchange (41) + +- * HashAggregate (40) + +- Exchange (39) + +- * HashAggregate (38) + +- * Project (37) + +- * BroadcastHashJoin LeftSemi BuildRight (36) + :- * Filter (19) + : +- * ColumnarToRow (18) + : +- Scan parquet default.customer_address (17) + +- BroadcastExchange (35) + +- * Project (34) + +- * Filter (33) + +- * HashAggregate (32) + +- Exchange (31) + +- * HashAggregate (30) + +- * Project (29) + +- * BroadcastHashJoin Inner BuildRight (28) + :- * Filter (22) + : +- * ColumnarToRow (21) + : +- Scan parquet default.customer_address (20) + +- BroadcastExchange (27) + +- * Project (26) + +- * Filter (25) + +- * ColumnarToRow (24) + +- Scan parquet default.customer (23) + + +(1) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 8] +Input [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3] + +(3) Filter [codegen id : 8] +Input [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3] +Condition : (isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#2)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#4, d_year#5, d_qoy#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,1998), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#4, d_year#5, d_qoy#6] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#4, d_year#5, d_qoy#6] +Condition : ((((isnotnull(d_qoy#6) AND isnotnull(d_year#5)) AND (d_qoy#6 = 2)) AND (d_year#5 = 1998)) AND isnotnull(d_date_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#4] +Input [3]: [d_date_sk#4, d_year#5, d_qoy#6] + +(8) BroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#7] + +(9) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(10) Project [codegen id : 8] +Output [2]: [ss_store_sk#2, ss_net_profit#3] +Input [4]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3, d_date_sk#4] + +(11) Scan parquet default.store +Output [3]: [s_store_sk#8, s_store_name#9, s_zip#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_zip)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [s_store_sk#8, s_store_name#9, s_zip#10] + +(13) Filter [codegen id : 2] +Input [3]: [s_store_sk#8, s_store_name#9, s_zip#10] +Condition : (isnotnull(s_store_sk#8) AND isnotnull(s_zip#10)) + +(14) BroadcastExchange +Input [3]: [s_store_sk#8, s_store_name#9, s_zip#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#11] + +(15) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_store_sk#2] +Right keys [1]: [s_store_sk#8] +Join condition: None + +(16) Project [codegen id : 8] +Output [3]: [ss_net_profit#3, s_store_name#9, s_zip#10] +Input [5]: [ss_store_sk#2, ss_net_profit#3, s_store_sk#8, s_store_name#9, s_zip#10] + +(17) Scan parquet default.customer_address +Output [1]: [ca_zip#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer_address] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 6] +Input [1]: [ca_zip#12] + +(19) Filter [codegen id : 6] +Input [1]: [ca_zip#12] +Condition : (substr(ca_zip#12, 1, 5) INSET (56910,69952,63792,39371,74351,11101,25003,97189,57834,73134,62377,51200,32754,22752,86379,14171,91110,40162,98569,28709,13394,66162,25733,25782,26065,18383,51949,87343,50298,83849,33786,64528,23470,67030,46136,25280,46820,77721,99076,18426,31880,17871,98235,45748,49156,18652,72013,51622,43848,78567,41248,13695,44165,67853,54917,53179,64034,10567,71791,68908,55565,59402,64147,85816,57855,61547,27700,68100,28810,58263,15723,83933,51103,58058,90578,82276,81096,81426,96451,77556,38607,76638,18906,62971,57047,48425,35576,11928,30625,83444,73520,51650,57647,60099,30122,94983,24128,10445,41368,26233,26859,21756,24676,19849,36420,38193,58470,39127,13595,87501,24317,15455,69399,98025,81019,48033,11376,39516,67875,92712,14867,38122,29741,42961,30469,51211,56458,15559,16021,33123,33282,33515,72823,54601,76698,56240,72175,60279,20004,68806,72325,28488,43933,50412,45200,22246,78668,79777,96765,67301,73273,49448,82636,23932,47305,29839,39192,18799,61265,37125,58943,64457,88424,24610,84935,89360,68893,30431,28898,10336,90257,59166,46081,26105,96888,36634,86284,35258,39972,22927,73241,53268,24206,27385,99543,31671,14663,30903,39861,24996,63089,88086,83921,21076,67897,66708,45721,60576,25103,52867,30450,36233,30010,96576,73171,56571,56575,64544,13955,78451,43285,18119,16725,83041,76107,79994,54364,35942,56691,19769,63435,34102,18845,22744,13354,75691,45549,23968,31387,83144,13375,15765,28577,88190,19736,73650,37930,25989,83926,94898,51798,39736,22437,55253,38415,71256,18376,42029,25858,44438,19515,38935,51649,71954,15882,18767,63193,25486,49130,37126,40604,34425,17043,12305,11634,26653,94167,36446,10516,67473,66864,72425,63981,18842,22461,42666,47770,69035,70372,28587,45266,15371,15798,45375,90225,16807,31016,68014,21337,19505,50016,10144,84093,21286,19430,34322,91068,94945,72305,24671,58048,65084,28545,21195,20548,22245,77191,96976,48583,76231,15734,61810,11356,68621,68786,98359,41367,26689,69913,76614,68101,88885,50308,79077,18270,28915,29178,53672,62878,10390,14922,68341,56529,41766,68309,56616,15126,61860,97789,11489,45692,41918,72151,72550,27156,36495,70738,17879,53535,17920,68880,78890,35850,14089,58078,65164,27068,26231,13376,57665,32213,77610,87816,21309,15146,86198,91137,55307,67467,40558,94627,82136,22351,89091,20260,23006,91393,47537,62496,98294,18840,71286,81312,31029,70466,35458,14060,22685,28286,25631,19512,40081,63837,14328,35474,22152,76232,51061,86057,17183) AND isnotnull(substr(ca_zip#12, 1, 5))) + +(20) Scan parquet default.customer_address +Output [2]: [ca_address_sk#13, ca_zip#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 4] +Input [2]: [ca_address_sk#13, ca_zip#12] + +(22) Filter [codegen id : 4] +Input [2]: [ca_address_sk#13, ca_zip#12] +Condition : isnotnull(ca_address_sk#13) + +(23) Scan parquet default.customer +Output [2]: [c_current_addr_sk#14, c_preferred_cust_flag#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_preferred_cust_flag), EqualTo(c_preferred_cust_flag,Y), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 3] +Input [2]: [c_current_addr_sk#14, c_preferred_cust_flag#15] + +(25) Filter [codegen id : 3] +Input [2]: [c_current_addr_sk#14, c_preferred_cust_flag#15] +Condition : ((isnotnull(c_preferred_cust_flag#15) AND (c_preferred_cust_flag#15 = Y)) AND isnotnull(c_current_addr_sk#14)) + +(26) Project [codegen id : 3] +Output [1]: [c_current_addr_sk#14] +Input [2]: [c_current_addr_sk#14, c_preferred_cust_flag#15] + +(27) BroadcastExchange +Input [1]: [c_current_addr_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] + +(28) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ca_address_sk#13] +Right keys [1]: [c_current_addr_sk#14] +Join condition: None + +(29) Project [codegen id : 4] +Output [1]: [ca_zip#12] +Input [3]: [ca_address_sk#13, ca_zip#12, c_current_addr_sk#14] + +(30) HashAggregate [codegen id : 4] +Input [1]: [ca_zip#12] +Keys [1]: [ca_zip#12] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#17] +Results [2]: [ca_zip#12, count#18] + +(31) Exchange +Input [2]: [ca_zip#12, count#18] +Arguments: hashpartitioning(ca_zip#12, 5), true, [id=#19] + +(32) HashAggregate [codegen id : 5] +Input [2]: [ca_zip#12, count#18] +Keys [1]: [ca_zip#12] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#20] +Results [2]: [substr(ca_zip#12, 1, 5) AS ca_zip#21, count(1)#20 AS count(1)#22] + +(33) Filter [codegen id : 5] +Input [2]: [ca_zip#21, count(1)#22] +Condition : (count(1)#22 > 10) + +(34) Project [codegen id : 5] +Output [1]: [ca_zip#21] +Input [2]: [ca_zip#21, count(1)#22] + +(35) BroadcastExchange +Input [1]: [ca_zip#21] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), isnull(input[0, string, true])),false), [id=#23] + +(36) BroadcastHashJoin [codegen id : 6] +Left keys [2]: [coalesce(substr(ca_zip#12, 1, 5), ), isnull(substr(ca_zip#12, 1, 5))] +Right keys [2]: [coalesce(ca_zip#21, ), isnull(ca_zip#21)] +Join condition: None + +(37) Project [codegen id : 6] +Output [1]: [substr(ca_zip#12, 1, 5) AS ca_zip#24] +Input [1]: [ca_zip#12] + +(38) HashAggregate [codegen id : 6] +Input [1]: [ca_zip#24] +Keys [1]: [ca_zip#24] +Functions: [] +Aggregate Attributes: [] +Results [1]: [ca_zip#24] + +(39) Exchange +Input [1]: [ca_zip#24] +Arguments: hashpartitioning(ca_zip#24, 5), true, [id=#25] + +(40) HashAggregate [codegen id : 7] +Input [1]: [ca_zip#24] +Keys [1]: [ca_zip#24] +Functions: [] +Aggregate Attributes: [] +Results [1]: [ca_zip#24] + +(41) BroadcastExchange +Input [1]: [ca_zip#24] +Arguments: HashedRelationBroadcastMode(List(substr(input[0, string, true], 1, 2)),false), [id=#26] + +(42) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [substr(s_zip#10, 1, 2)] +Right keys [1]: [substr(ca_zip#24, 1, 2)] +Join condition: None + +(43) Project [codegen id : 8] +Output [2]: [ss_net_profit#3, s_store_name#9] +Input [4]: [ss_net_profit#3, s_store_name#9, s_zip#10, ca_zip#24] + +(44) HashAggregate [codegen id : 8] +Input [2]: [ss_net_profit#3, s_store_name#9] +Keys [1]: [s_store_name#9] +Functions [1]: [partial_sum(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [1]: [sum#27] +Results [2]: [s_store_name#9, sum#28] + +(45) Exchange +Input [2]: [s_store_name#9, sum#28] +Arguments: hashpartitioning(s_store_name#9, 5), true, [id=#29] + +(46) HashAggregate [codegen id : 9] +Input [2]: [s_store_name#9, sum#28] +Keys [1]: [s_store_name#9] +Functions [1]: [sum(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#3))#30] +Results [2]: [s_store_name#9, MakeDecimal(sum(UnscaledValue(ss_net_profit#3))#30,17,2) AS sum(ss_net_profit)#31] + +(47) TakeOrderedAndProject +Input [2]: [s_store_name#9, sum(ss_net_profit)#31] +Arguments: 100, [s_store_name#9 ASC NULLS FIRST], [s_store_name#9, sum(ss_net_profit)#31] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8/simplified.txt new file mode 100644 index 0000000000000..b463f20da9968 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8/simplified.txt @@ -0,0 +1,70 @@ +TakeOrderedAndProject [s_store_name,sum(ss_net_profit)] + WholeStageCodegen (9) + HashAggregate [s_store_name,sum] [sum,sum(UnscaledValue(ss_net_profit)),sum(ss_net_profit)] + InputAdapter + Exchange [s_store_name] #1 + WholeStageCodegen (8) + HashAggregate [s_store_name,ss_net_profit] [sum,sum] + Project [s_store_name,ss_net_profit] + BroadcastHashJoin [ca_zip,s_zip] + Project [s_store_name,s_zip,ss_net_profit] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_net_profit,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_net_profit,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_qoy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_qoy,d_year] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [s_store_sk,s_zip] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_name,s_store_sk,s_zip] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (7) + HashAggregate [ca_zip] + InputAdapter + Exchange [ca_zip] #5 + WholeStageCodegen (6) + HashAggregate [ca_zip] + Project [ca_zip] + BroadcastHashJoin [ca_zip,ca_zip] + Filter [ca_zip] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_zip] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Project [ca_zip] + Filter [count(1)] + HashAggregate [ca_zip,count] [ca_zip,count,count(1),count(1)] + InputAdapter + Exchange [ca_zip] #7 + WholeStageCodegen (4) + HashAggregate [ca_zip] [count,count] + Project [ca_zip] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Filter [ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_zip] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (3) + Project [c_current_addr_sk] + Filter [c_current_addr_sk,c_preferred_cust_flag] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_current_addr_sk,c_preferred_cust_flag] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.sf100/explain.txt new file mode 100644 index 0000000000000..8fbaac0eb8fb7 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.sf100/explain.txt @@ -0,0 +1,598 @@ +== Physical Plan == +TakeOrderedAndProject (108) ++- * HashAggregate (107) + +- Exchange (106) + +- * HashAggregate (105) + +- * Expand (104) + +- Union (103) + :- * HashAggregate (42) + : +- Exchange (41) + : +- * HashAggregate (40) + : +- * Project (39) + : +- * BroadcastHashJoin Inner BuildRight (38) + : :- * Project (33) + : : +- * BroadcastHashJoin Inner BuildRight (32) + : : :- * Project (26) + : : : +- * BroadcastHashJoin Inner BuildRight (25) + : : : :- * Project (19) + : : : : +- * BroadcastHashJoin Inner BuildRight (18) + : : : : :- * Project (12) + : : : : : +- SortMergeJoin LeftOuter (11) + : : : : : :- * Sort (5) + : : : : : : +- Exchange (4) + : : : : : : +- * Filter (3) + : : : : : : +- * ColumnarToRow (2) + : : : : : : +- Scan parquet default.store_sales (1) + : : : : : +- * Sort (10) + : : : : : +- Exchange (9) + : : : : : +- * Filter (8) + : : : : : +- * ColumnarToRow (7) + : : : : : +- Scan parquet default.store_returns (6) + : : : : +- BroadcastExchange (17) + : : : : +- * Project (16) + : : : : +- * Filter (15) + : : : : +- * ColumnarToRow (14) + : : : : +- Scan parquet default.item (13) + : : : +- BroadcastExchange (24) + : : : +- * Project (23) + : : : +- * Filter (22) + : : : +- * ColumnarToRow (21) + : : : +- Scan parquet default.date_dim (20) + : : +- BroadcastExchange (31) + : : +- * Project (30) + : : +- * Filter (29) + : : +- * ColumnarToRow (28) + : : +- Scan parquet default.promotion (27) + : +- BroadcastExchange (37) + : +- * Filter (36) + : +- * ColumnarToRow (35) + : +- Scan parquet default.store (34) + :- * HashAggregate (72) + : +- Exchange (71) + : +- * HashAggregate (70) + : +- * Project (69) + : +- * BroadcastHashJoin Inner BuildRight (68) + : :- * Project (63) + : : +- * BroadcastHashJoin Inner BuildRight (62) + : : :- * Project (60) + : : : +- * BroadcastHashJoin Inner BuildRight (59) + : : : :- * Project (57) + : : : : +- * BroadcastHashJoin Inner BuildRight (56) + : : : : :- * Project (54) + : : : : : +- SortMergeJoin LeftOuter (53) + : : : : : :- * Sort (47) + : : : : : : +- Exchange (46) + : : : : : : +- * Filter (45) + : : : : : : +- * ColumnarToRow (44) + : : : : : : +- Scan parquet default.catalog_sales (43) + : : : : : +- * Sort (52) + : : : : : +- Exchange (51) + : : : : : +- * Filter (50) + : : : : : +- * ColumnarToRow (49) + : : : : : +- Scan parquet default.catalog_returns (48) + : : : : +- ReusedExchange (55) + : : : +- ReusedExchange (58) + : : +- ReusedExchange (61) + : +- BroadcastExchange (67) + : +- * Filter (66) + : +- * ColumnarToRow (65) + : +- Scan parquet default.catalog_page (64) + +- * HashAggregate (102) + +- Exchange (101) + +- * HashAggregate (100) + +- * Project (99) + +- * BroadcastHashJoin Inner BuildRight (98) + :- * Project (93) + : +- * BroadcastHashJoin Inner BuildRight (92) + : :- * Project (90) + : : +- * BroadcastHashJoin Inner BuildRight (89) + : : :- * Project (87) + : : : +- * BroadcastHashJoin Inner BuildRight (86) + : : : :- * Project (84) + : : : : +- SortMergeJoin LeftOuter (83) + : : : : :- * Sort (77) + : : : : : +- Exchange (76) + : : : : : +- * Filter (75) + : : : : : +- * ColumnarToRow (74) + : : : : : +- Scan parquet default.web_sales (73) + : : : : +- * Sort (82) + : : : : +- Exchange (81) + : : : : +- * Filter (80) + : : : : +- * ColumnarToRow (79) + : : : : +- Scan parquet default.web_returns (78) + : : : +- ReusedExchange (85) + : : +- ReusedExchange (88) + : +- ReusedExchange (91) + +- BroadcastExchange (97) + +- * Filter (96) + +- * ColumnarToRow (95) + +- Scan parquet default.web_site (94) + + +(1) Scan parquet default.store_sales +Output [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_net_profit#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk), IsNotNull(ss_promo_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_net_profit#7] + +(3) Filter [codegen id : 1] +Input [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_net_profit#7] +Condition : (((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_item_sk#2)) AND isnotnull(ss_promo_sk#4)) + +(4) Exchange +Input [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_net_profit#7] +Arguments: hashpartitioning(cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#5 as bigint), 5), true, [id=#8] + +(5) Sort [codegen id : 2] +Input [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_net_profit#7] +Arguments: [cast(ss_item_sk#2 as bigint) ASC NULLS FIRST, cast(ss_ticket_number#5 as bigint) ASC NULLS FIRST], false, 0 + +(6) Scan parquet default.store_returns +Output [4]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_returns] +PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 3] +Input [4]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12] + +(8) Filter [codegen id : 3] +Input [4]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12] +Condition : (isnotnull(sr_item_sk#9) AND isnotnull(sr_ticket_number#10)) + +(9) Exchange +Input [4]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12] +Arguments: hashpartitioning(sr_item_sk#9, sr_ticket_number#10, 5), true, [id=#13] + +(10) Sort [codegen id : 4] +Input [4]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12] +Arguments: [sr_item_sk#9 ASC NULLS FIRST, sr_ticket_number#10 ASC NULLS FIRST], false, 0 + +(11) SortMergeJoin +Left keys [2]: [cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#5 as bigint)] +Right keys [2]: [sr_item_sk#9, sr_ticket_number#10] +Join condition: None + +(12) Project [codegen id : 9] +Output [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#11, sr_net_loss#12] +Input [11]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_net_profit#7, sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12] + +(13) Scan parquet default.item +Output [2]: [i_item_sk#14, i_current_price#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_current_price), GreaterThan(i_current_price,50.00), IsNotNull(i_item_sk)] +ReadSchema: struct + +(14) ColumnarToRow [codegen id : 5] +Input [2]: [i_item_sk#14, i_current_price#15] + +(15) Filter [codegen id : 5] +Input [2]: [i_item_sk#14, i_current_price#15] +Condition : ((isnotnull(i_current_price#15) AND (i_current_price#15 > 50.00)) AND isnotnull(i_item_sk#14)) + +(16) Project [codegen id : 5] +Output [1]: [i_item_sk#14] +Input [2]: [i_item_sk#14, i_current_price#15] + +(17) BroadcastExchange +Input [1]: [i_item_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] + +(18) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#14] +Join condition: None + +(19) Project [codegen id : 9] +Output [7]: [ss_sold_date_sk#1, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#11, sr_net_loss#12] +Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#11, sr_net_loss#12, i_item_sk#14] + +(20) Scan parquet default.date_dim +Output [2]: [d_date_sk#17, d_date#18] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-23), LessThanOrEqual(d_date,2000-09-22), IsNotNull(d_date_sk)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 6] +Input [2]: [d_date_sk#17, d_date#18] + +(22) Filter [codegen id : 6] +Input [2]: [d_date_sk#17, d_date#18] +Condition : (((isnotnull(d_date#18) AND (d_date#18 >= 11192)) AND (d_date#18 <= 11222)) AND isnotnull(d_date_sk#17)) + +(23) Project [codegen id : 6] +Output [1]: [d_date_sk#17] +Input [2]: [d_date_sk#17, d_date#18] + +(24) BroadcastExchange +Input [1]: [d_date_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#19] + +(25) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#17] +Join condition: None + +(26) Project [codegen id : 9] +Output [6]: [ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#11, sr_net_loss#12] +Input [8]: [ss_sold_date_sk#1, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#11, sr_net_loss#12, d_date_sk#17] + +(27) Scan parquet default.promotion +Output [2]: [p_promo_sk#20, p_channel_tv#21] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/promotion] +PushedFilters: [IsNotNull(p_channel_tv), EqualTo(p_channel_tv,N), IsNotNull(p_promo_sk)] +ReadSchema: struct + +(28) ColumnarToRow [codegen id : 7] +Input [2]: [p_promo_sk#20, p_channel_tv#21] + +(29) Filter [codegen id : 7] +Input [2]: [p_promo_sk#20, p_channel_tv#21] +Condition : ((isnotnull(p_channel_tv#21) AND (p_channel_tv#21 = N)) AND isnotnull(p_promo_sk#20)) + +(30) Project [codegen id : 7] +Output [1]: [p_promo_sk#20] +Input [2]: [p_promo_sk#20, p_channel_tv#21] + +(31) BroadcastExchange +Input [1]: [p_promo_sk#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#22] + +(32) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_promo_sk#4] +Right keys [1]: [p_promo_sk#20] +Join condition: None + +(33) Project [codegen id : 9] +Output [5]: [ss_store_sk#3, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#11, sr_net_loss#12] +Input [7]: [ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#11, sr_net_loss#12, p_promo_sk#20] + +(34) Scan parquet default.store +Output [2]: [s_store_sk#23, s_store_id#24] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(35) ColumnarToRow [codegen id : 8] +Input [2]: [s_store_sk#23, s_store_id#24] + +(36) Filter [codegen id : 8] +Input [2]: [s_store_sk#23, s_store_id#24] +Condition : isnotnull(s_store_sk#23) + +(37) BroadcastExchange +Input [2]: [s_store_sk#23, s_store_id#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#25] + +(38) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#23] +Join condition: None + +(39) Project [codegen id : 9] +Output [5]: [ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#11, sr_net_loss#12, s_store_id#24] +Input [7]: [ss_store_sk#3, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#11, sr_net_loss#12, s_store_sk#23, s_store_id#24] + +(40) HashAggregate [codegen id : 9] +Input [5]: [ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#11, sr_net_loss#12, s_store_id#24] +Keys [1]: [s_store_id#24] +Functions [3]: [partial_sum(UnscaledValue(ss_ext_sales_price#6)), partial_sum(coalesce(cast(sr_return_amt#11 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ss_net_profit#7 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#12 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))] +Aggregate Attributes [5]: [sum#26, sum#27, isEmpty#28, sum#29, isEmpty#30] +Results [6]: [s_store_id#24, sum#31, sum#32, isEmpty#33, sum#34, isEmpty#35] + +(41) Exchange +Input [6]: [s_store_id#24, sum#31, sum#32, isEmpty#33, sum#34, isEmpty#35] +Arguments: hashpartitioning(s_store_id#24, 5), true, [id=#36] + +(42) HashAggregate [codegen id : 10] +Input [6]: [s_store_id#24, sum#31, sum#32, isEmpty#33, sum#34, isEmpty#35] +Keys [1]: [s_store_id#24] +Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#6)), sum(coalesce(cast(sr_return_amt#11 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ss_net_profit#7 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#12 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))] +Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#6))#37, sum(coalesce(cast(sr_return_amt#11 as decimal(12,2)), 0.00))#38, sum(CheckOverflow((promote_precision(cast(ss_net_profit#7 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#12 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#39] +Results [5]: [MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#6))#37,17,2) AS sales#40, sum(coalesce(cast(sr_return_amt#11 as decimal(12,2)), 0.00))#38 AS returns#41, sum(CheckOverflow((promote_precision(cast(ss_net_profit#7 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#12 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#39 AS profit#42, store channel AS channel#43, concat(store, s_store_id#24) AS id#44] + +(43) Scan parquet default.catalog_sales +Output [7]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_item_sk#47, cs_promo_sk#48, cs_order_number#49, cs_ext_sales_price#50, cs_net_profit#51] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_catalog_page_sk), IsNotNull(cs_item_sk), IsNotNull(cs_promo_sk)] +ReadSchema: struct + +(44) ColumnarToRow [codegen id : 11] +Input [7]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_item_sk#47, cs_promo_sk#48, cs_order_number#49, cs_ext_sales_price#50, cs_net_profit#51] + +(45) Filter [codegen id : 11] +Input [7]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_item_sk#47, cs_promo_sk#48, cs_order_number#49, cs_ext_sales_price#50, cs_net_profit#51] +Condition : (((isnotnull(cs_sold_date_sk#45) AND isnotnull(cs_catalog_page_sk#46)) AND isnotnull(cs_item_sk#47)) AND isnotnull(cs_promo_sk#48)) + +(46) Exchange +Input [7]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_item_sk#47, cs_promo_sk#48, cs_order_number#49, cs_ext_sales_price#50, cs_net_profit#51] +Arguments: hashpartitioning(cs_item_sk#47, cs_order_number#49, 5), true, [id=#52] + +(47) Sort [codegen id : 12] +Input [7]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_item_sk#47, cs_promo_sk#48, cs_order_number#49, cs_ext_sales_price#50, cs_net_profit#51] +Arguments: [cs_item_sk#47 ASC NULLS FIRST, cs_order_number#49 ASC NULLS FIRST], false, 0 + +(48) Scan parquet default.catalog_returns +Output [4]: [cr_item_sk#53, cr_order_number#54, cr_return_amount#55, cr_net_loss#56] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_returns] +PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)] +ReadSchema: struct + +(49) ColumnarToRow [codegen id : 13] +Input [4]: [cr_item_sk#53, cr_order_number#54, cr_return_amount#55, cr_net_loss#56] + +(50) Filter [codegen id : 13] +Input [4]: [cr_item_sk#53, cr_order_number#54, cr_return_amount#55, cr_net_loss#56] +Condition : (isnotnull(cr_item_sk#53) AND isnotnull(cr_order_number#54)) + +(51) Exchange +Input [4]: [cr_item_sk#53, cr_order_number#54, cr_return_amount#55, cr_net_loss#56] +Arguments: hashpartitioning(cr_item_sk#53, cr_order_number#54, 5), true, [id=#57] + +(52) Sort [codegen id : 14] +Input [4]: [cr_item_sk#53, cr_order_number#54, cr_return_amount#55, cr_net_loss#56] +Arguments: [cr_item_sk#53 ASC NULLS FIRST, cr_order_number#54 ASC NULLS FIRST], false, 0 + +(53) SortMergeJoin +Left keys [2]: [cs_item_sk#47, cs_order_number#49] +Right keys [2]: [cr_item_sk#53, cr_order_number#54] +Join condition: None + +(54) Project [codegen id : 19] +Output [8]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_item_sk#47, cs_promo_sk#48, cs_ext_sales_price#50, cs_net_profit#51, cr_return_amount#55, cr_net_loss#56] +Input [11]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_item_sk#47, cs_promo_sk#48, cs_order_number#49, cs_ext_sales_price#50, cs_net_profit#51, cr_item_sk#53, cr_order_number#54, cr_return_amount#55, cr_net_loss#56] + +(55) ReusedExchange [Reuses operator id: 17] +Output [1]: [i_item_sk#14] + +(56) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [cs_item_sk#47] +Right keys [1]: [i_item_sk#14] +Join condition: None + +(57) Project [codegen id : 19] +Output [7]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_promo_sk#48, cs_ext_sales_price#50, cs_net_profit#51, cr_return_amount#55, cr_net_loss#56] +Input [9]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_item_sk#47, cs_promo_sk#48, cs_ext_sales_price#50, cs_net_profit#51, cr_return_amount#55, cr_net_loss#56, i_item_sk#14] + +(58) ReusedExchange [Reuses operator id: 24] +Output [1]: [d_date_sk#17] + +(59) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [cs_sold_date_sk#45] +Right keys [1]: [d_date_sk#17] +Join condition: None + +(60) Project [codegen id : 19] +Output [6]: [cs_catalog_page_sk#46, cs_promo_sk#48, cs_ext_sales_price#50, cs_net_profit#51, cr_return_amount#55, cr_net_loss#56] +Input [8]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_promo_sk#48, cs_ext_sales_price#50, cs_net_profit#51, cr_return_amount#55, cr_net_loss#56, d_date_sk#17] + +(61) ReusedExchange [Reuses operator id: 31] +Output [1]: [p_promo_sk#20] + +(62) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [cs_promo_sk#48] +Right keys [1]: [p_promo_sk#20] +Join condition: None + +(63) Project [codegen id : 19] +Output [5]: [cs_catalog_page_sk#46, cs_ext_sales_price#50, cs_net_profit#51, cr_return_amount#55, cr_net_loss#56] +Input [7]: [cs_catalog_page_sk#46, cs_promo_sk#48, cs_ext_sales_price#50, cs_net_profit#51, cr_return_amount#55, cr_net_loss#56, p_promo_sk#20] + +(64) Scan parquet default.catalog_page +Output [2]: [cp_catalog_page_sk#58, cp_catalog_page_id#59] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_page] +PushedFilters: [IsNotNull(cp_catalog_page_sk)] +ReadSchema: struct + +(65) ColumnarToRow [codegen id : 18] +Input [2]: [cp_catalog_page_sk#58, cp_catalog_page_id#59] + +(66) Filter [codegen id : 18] +Input [2]: [cp_catalog_page_sk#58, cp_catalog_page_id#59] +Condition : isnotnull(cp_catalog_page_sk#58) + +(67) BroadcastExchange +Input [2]: [cp_catalog_page_sk#58, cp_catalog_page_id#59] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#60] + +(68) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [cs_catalog_page_sk#46] +Right keys [1]: [cp_catalog_page_sk#58] +Join condition: None + +(69) Project [codegen id : 19] +Output [5]: [cs_ext_sales_price#50, cs_net_profit#51, cr_return_amount#55, cr_net_loss#56, cp_catalog_page_id#59] +Input [7]: [cs_catalog_page_sk#46, cs_ext_sales_price#50, cs_net_profit#51, cr_return_amount#55, cr_net_loss#56, cp_catalog_page_sk#58, cp_catalog_page_id#59] + +(70) HashAggregate [codegen id : 19] +Input [5]: [cs_ext_sales_price#50, cs_net_profit#51, cr_return_amount#55, cr_net_loss#56, cp_catalog_page_id#59] +Keys [1]: [cp_catalog_page_id#59] +Functions [3]: [partial_sum(UnscaledValue(cs_ext_sales_price#50)), partial_sum(coalesce(cast(cr_return_amount#55 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(cs_net_profit#51 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#56 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))] +Aggregate Attributes [5]: [sum#61, sum#62, isEmpty#63, sum#64, isEmpty#65] +Results [6]: [cp_catalog_page_id#59, sum#66, sum#67, isEmpty#68, sum#69, isEmpty#70] + +(71) Exchange +Input [6]: [cp_catalog_page_id#59, sum#66, sum#67, isEmpty#68, sum#69, isEmpty#70] +Arguments: hashpartitioning(cp_catalog_page_id#59, 5), true, [id=#71] + +(72) HashAggregate [codegen id : 20] +Input [6]: [cp_catalog_page_id#59, sum#66, sum#67, isEmpty#68, sum#69, isEmpty#70] +Keys [1]: [cp_catalog_page_id#59] +Functions [3]: [sum(UnscaledValue(cs_ext_sales_price#50)), sum(coalesce(cast(cr_return_amount#55 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(cs_net_profit#51 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#56 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))] +Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_sales_price#50))#72, sum(coalesce(cast(cr_return_amount#55 as decimal(12,2)), 0.00))#73, sum(CheckOverflow((promote_precision(cast(cs_net_profit#51 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#56 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#74] +Results [5]: [MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#50))#72,17,2) AS sales#75, sum(coalesce(cast(cr_return_amount#55 as decimal(12,2)), 0.00))#73 AS returns#76, sum(CheckOverflow((promote_precision(cast(cs_net_profit#51 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#56 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#74 AS profit#77, catalog channel AS channel#78, concat(catalog_page, cp_catalog_page_id#59) AS id#79] + +(73) Scan parquet default.web_sales +Output [7]: [ws_sold_date_sk#80, ws_item_sk#81, ws_web_site_sk#82, ws_promo_sk#83, ws_order_number#84, ws_ext_sales_price#85, ws_net_profit#86] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_web_site_sk), IsNotNull(ws_item_sk), IsNotNull(ws_promo_sk)] +ReadSchema: struct + +(74) ColumnarToRow [codegen id : 21] +Input [7]: [ws_sold_date_sk#80, ws_item_sk#81, ws_web_site_sk#82, ws_promo_sk#83, ws_order_number#84, ws_ext_sales_price#85, ws_net_profit#86] + +(75) Filter [codegen id : 21] +Input [7]: [ws_sold_date_sk#80, ws_item_sk#81, ws_web_site_sk#82, ws_promo_sk#83, ws_order_number#84, ws_ext_sales_price#85, ws_net_profit#86] +Condition : (((isnotnull(ws_sold_date_sk#80) AND isnotnull(ws_web_site_sk#82)) AND isnotnull(ws_item_sk#81)) AND isnotnull(ws_promo_sk#83)) + +(76) Exchange +Input [7]: [ws_sold_date_sk#80, ws_item_sk#81, ws_web_site_sk#82, ws_promo_sk#83, ws_order_number#84, ws_ext_sales_price#85, ws_net_profit#86] +Arguments: hashpartitioning(cast(ws_item_sk#81 as bigint), cast(ws_order_number#84 as bigint), 5), true, [id=#87] + +(77) Sort [codegen id : 22] +Input [7]: [ws_sold_date_sk#80, ws_item_sk#81, ws_web_site_sk#82, ws_promo_sk#83, ws_order_number#84, ws_ext_sales_price#85, ws_net_profit#86] +Arguments: [cast(ws_item_sk#81 as bigint) ASC NULLS FIRST, cast(ws_order_number#84 as bigint) ASC NULLS FIRST], false, 0 + +(78) Scan parquet default.web_returns +Output [4]: [wr_item_sk#88, wr_order_number#89, wr_return_amt#90, wr_net_loss#91] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_returns] +PushedFilters: [IsNotNull(wr_item_sk), IsNotNull(wr_order_number)] +ReadSchema: struct + +(79) ColumnarToRow [codegen id : 23] +Input [4]: [wr_item_sk#88, wr_order_number#89, wr_return_amt#90, wr_net_loss#91] + +(80) Filter [codegen id : 23] +Input [4]: [wr_item_sk#88, wr_order_number#89, wr_return_amt#90, wr_net_loss#91] +Condition : (isnotnull(wr_item_sk#88) AND isnotnull(wr_order_number#89)) + +(81) Exchange +Input [4]: [wr_item_sk#88, wr_order_number#89, wr_return_amt#90, wr_net_loss#91] +Arguments: hashpartitioning(wr_item_sk#88, wr_order_number#89, 5), true, [id=#92] + +(82) Sort [codegen id : 24] +Input [4]: [wr_item_sk#88, wr_order_number#89, wr_return_amt#90, wr_net_loss#91] +Arguments: [wr_item_sk#88 ASC NULLS FIRST, wr_order_number#89 ASC NULLS FIRST], false, 0 + +(83) SortMergeJoin +Left keys [2]: [cast(ws_item_sk#81 as bigint), cast(ws_order_number#84 as bigint)] +Right keys [2]: [wr_item_sk#88, wr_order_number#89] +Join condition: None + +(84) Project [codegen id : 29] +Output [8]: [ws_sold_date_sk#80, ws_item_sk#81, ws_web_site_sk#82, ws_promo_sk#83, ws_ext_sales_price#85, ws_net_profit#86, wr_return_amt#90, wr_net_loss#91] +Input [11]: [ws_sold_date_sk#80, ws_item_sk#81, ws_web_site_sk#82, ws_promo_sk#83, ws_order_number#84, ws_ext_sales_price#85, ws_net_profit#86, wr_item_sk#88, wr_order_number#89, wr_return_amt#90, wr_net_loss#91] + +(85) ReusedExchange [Reuses operator id: 17] +Output [1]: [i_item_sk#14] + +(86) BroadcastHashJoin [codegen id : 29] +Left keys [1]: [ws_item_sk#81] +Right keys [1]: [i_item_sk#14] +Join condition: None + +(87) Project [codegen id : 29] +Output [7]: [ws_sold_date_sk#80, ws_web_site_sk#82, ws_promo_sk#83, ws_ext_sales_price#85, ws_net_profit#86, wr_return_amt#90, wr_net_loss#91] +Input [9]: [ws_sold_date_sk#80, ws_item_sk#81, ws_web_site_sk#82, ws_promo_sk#83, ws_ext_sales_price#85, ws_net_profit#86, wr_return_amt#90, wr_net_loss#91, i_item_sk#14] + +(88) ReusedExchange [Reuses operator id: 24] +Output [1]: [d_date_sk#17] + +(89) BroadcastHashJoin [codegen id : 29] +Left keys [1]: [ws_sold_date_sk#80] +Right keys [1]: [d_date_sk#17] +Join condition: None + +(90) Project [codegen id : 29] +Output [6]: [ws_web_site_sk#82, ws_promo_sk#83, ws_ext_sales_price#85, ws_net_profit#86, wr_return_amt#90, wr_net_loss#91] +Input [8]: [ws_sold_date_sk#80, ws_web_site_sk#82, ws_promo_sk#83, ws_ext_sales_price#85, ws_net_profit#86, wr_return_amt#90, wr_net_loss#91, d_date_sk#17] + +(91) ReusedExchange [Reuses operator id: 31] +Output [1]: [p_promo_sk#20] + +(92) BroadcastHashJoin [codegen id : 29] +Left keys [1]: [ws_promo_sk#83] +Right keys [1]: [p_promo_sk#20] +Join condition: None + +(93) Project [codegen id : 29] +Output [5]: [ws_web_site_sk#82, ws_ext_sales_price#85, ws_net_profit#86, wr_return_amt#90, wr_net_loss#91] +Input [7]: [ws_web_site_sk#82, ws_promo_sk#83, ws_ext_sales_price#85, ws_net_profit#86, wr_return_amt#90, wr_net_loss#91, p_promo_sk#20] + +(94) Scan parquet default.web_site +Output [2]: [web_site_sk#93, web_site_id#94] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_site] +PushedFilters: [IsNotNull(web_site_sk)] +ReadSchema: struct + +(95) ColumnarToRow [codegen id : 28] +Input [2]: [web_site_sk#93, web_site_id#94] + +(96) Filter [codegen id : 28] +Input [2]: [web_site_sk#93, web_site_id#94] +Condition : isnotnull(web_site_sk#93) + +(97) BroadcastExchange +Input [2]: [web_site_sk#93, web_site_id#94] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#95] + +(98) BroadcastHashJoin [codegen id : 29] +Left keys [1]: [ws_web_site_sk#82] +Right keys [1]: [web_site_sk#93] +Join condition: None + +(99) Project [codegen id : 29] +Output [5]: [ws_ext_sales_price#85, ws_net_profit#86, wr_return_amt#90, wr_net_loss#91, web_site_id#94] +Input [7]: [ws_web_site_sk#82, ws_ext_sales_price#85, ws_net_profit#86, wr_return_amt#90, wr_net_loss#91, web_site_sk#93, web_site_id#94] + +(100) HashAggregate [codegen id : 29] +Input [5]: [ws_ext_sales_price#85, ws_net_profit#86, wr_return_amt#90, wr_net_loss#91, web_site_id#94] +Keys [1]: [web_site_id#94] +Functions [3]: [partial_sum(UnscaledValue(ws_ext_sales_price#85)), partial_sum(coalesce(cast(wr_return_amt#90 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ws_net_profit#86 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#91 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))] +Aggregate Attributes [5]: [sum#96, sum#97, isEmpty#98, sum#99, isEmpty#100] +Results [6]: [web_site_id#94, sum#101, sum#102, isEmpty#103, sum#104, isEmpty#105] + +(101) Exchange +Input [6]: [web_site_id#94, sum#101, sum#102, isEmpty#103, sum#104, isEmpty#105] +Arguments: hashpartitioning(web_site_id#94, 5), true, [id=#106] + +(102) HashAggregate [codegen id : 30] +Input [6]: [web_site_id#94, sum#101, sum#102, isEmpty#103, sum#104, isEmpty#105] +Keys [1]: [web_site_id#94] +Functions [3]: [sum(UnscaledValue(ws_ext_sales_price#85)), sum(coalesce(cast(wr_return_amt#90 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ws_net_profit#86 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#91 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_sales_price#85))#107, sum(coalesce(cast(wr_return_amt#90 as decimal(12,2)), 0.00))#108, sum(CheckOverflow((promote_precision(cast(ws_net_profit#86 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#91 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#109] +Results [5]: [MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#85))#107,17,2) AS sales#110, sum(coalesce(cast(wr_return_amt#90 as decimal(12,2)), 0.00))#108 AS returns#111, sum(CheckOverflow((promote_precision(cast(ws_net_profit#86 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#91 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#109 AS profit#112, web channel AS channel#113, concat(web_site, web_site_id#94) AS id#114] + +(103) Union + +(104) Expand [codegen id : 31] +Input [5]: [sales#40, returns#41, profit#42, channel#43, id#44] +Arguments: [List(sales#40, returns#41, profit#42, channel#43, id#44, 0), List(sales#40, returns#41, profit#42, channel#43, null, 1), List(sales#40, returns#41, profit#42, null, null, 3)], [sales#40, returns#41, profit#42, channel#115, id#116, spark_grouping_id#117] + +(105) HashAggregate [codegen id : 31] +Input [6]: [sales#40, returns#41, profit#42, channel#115, id#116, spark_grouping_id#117] +Keys [3]: [channel#115, id#116, spark_grouping_id#117] +Functions [3]: [partial_sum(sales#40), partial_sum(returns#41), partial_sum(profit#42)] +Aggregate Attributes [6]: [sum#118, isEmpty#119, sum#120, isEmpty#121, sum#122, isEmpty#123] +Results [9]: [channel#115, id#116, spark_grouping_id#117, sum#124, isEmpty#125, sum#126, isEmpty#127, sum#128, isEmpty#129] + +(106) Exchange +Input [9]: [channel#115, id#116, spark_grouping_id#117, sum#124, isEmpty#125, sum#126, isEmpty#127, sum#128, isEmpty#129] +Arguments: hashpartitioning(channel#115, id#116, spark_grouping_id#117, 5), true, [id=#130] + +(107) HashAggregate [codegen id : 32] +Input [9]: [channel#115, id#116, spark_grouping_id#117, sum#124, isEmpty#125, sum#126, isEmpty#127, sum#128, isEmpty#129] +Keys [3]: [channel#115, id#116, spark_grouping_id#117] +Functions [3]: [sum(sales#40), sum(returns#41), sum(profit#42)] +Aggregate Attributes [3]: [sum(sales#40)#131, sum(returns#41)#132, sum(profit#42)#133] +Results [5]: [channel#115, id#116, sum(sales#40)#131 AS sales#134, sum(returns#41)#132 AS returns#135, sum(profit#42)#133 AS profit#136] + +(108) TakeOrderedAndProject +Input [5]: [channel#115, id#116, sales#134, returns#135, profit#136] +Arguments: 100, [channel#115 ASC NULLS FIRST, id#116 ASC NULLS FIRST], [channel#115, id#116, sales#134, returns#135, profit#136] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.sf100/simplified.txt new file mode 100644 index 0000000000000..a2189f6763eb3 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.sf100/simplified.txt @@ -0,0 +1,172 @@ +TakeOrderedAndProject [channel,id,profit,returns,sales] + WholeStageCodegen (32) + HashAggregate [channel,id,isEmpty,isEmpty,isEmpty,spark_grouping_id,sum,sum,sum] [isEmpty,isEmpty,isEmpty,profit,returns,sales,sum,sum,sum,sum(profit),sum(returns),sum(sales)] + InputAdapter + Exchange [channel,id,spark_grouping_id] #1 + WholeStageCodegen (31) + HashAggregate [channel,id,profit,returns,sales,spark_grouping_id] [isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum] + Expand [channel,id,profit,returns,sales] + InputAdapter + Union + WholeStageCodegen (10) + HashAggregate [isEmpty,isEmpty,s_store_id,sum,sum,sum] [channel,id,isEmpty,isEmpty,profit,returns,sales,sum,sum,sum,sum(CheckOverflow((promote_precision(cast(ss_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true)),sum(UnscaledValue(ss_ext_sales_price)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00))] + InputAdapter + Exchange [s_store_id] #2 + WholeStageCodegen (9) + HashAggregate [s_store_id,sr_net_loss,sr_return_amt,ss_ext_sales_price,ss_net_profit] [isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum] + Project [s_store_id,sr_net_loss,sr_return_amt,ss_ext_sales_price,ss_net_profit] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [sr_net_loss,sr_return_amt,ss_ext_sales_price,ss_net_profit,ss_store_sk] + BroadcastHashJoin [p_promo_sk,ss_promo_sk] + Project [sr_net_loss,sr_return_amt,ss_ext_sales_price,ss_net_profit,ss_promo_sk,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [sr_net_loss,sr_return_amt,ss_ext_sales_price,ss_net_profit,ss_promo_sk,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [sr_net_loss,sr_return_amt,ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_promo_sk,ss_sold_date_sk,ss_store_sk] + InputAdapter + SortMergeJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + WholeStageCodegen (2) + Sort [ss_item_sk,ss_ticket_number] + InputAdapter + Exchange [ss_item_sk,ss_ticket_number] #3 + WholeStageCodegen (1) + Filter [ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + WholeStageCodegen (4) + Sort [sr_item_sk,sr_ticket_number] + InputAdapter + Exchange [sr_item_sk,sr_ticket_number] #4 + WholeStageCodegen (3) + Filter [sr_item_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_item_sk,sr_net_loss,sr_return_amt,sr_ticket_number] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (5) + Project [i_item_sk] + Filter [i_current_price,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_current_price,i_item_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (6) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (7) + Project [p_promo_sk] + Filter [p_channel_tv,p_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.promotion [p_channel_tv,p_promo_sk] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (8) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_id,s_store_sk] + WholeStageCodegen (20) + HashAggregate [cp_catalog_page_id,isEmpty,isEmpty,sum,sum,sum] [channel,id,isEmpty,isEmpty,profit,returns,sales,sum,sum,sum,sum(CheckOverflow((promote_precision(cast(cs_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true)),sum(UnscaledValue(cs_ext_sales_price)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00))] + InputAdapter + Exchange [cp_catalog_page_id] #9 + WholeStageCodegen (19) + HashAggregate [cp_catalog_page_id,cr_net_loss,cr_return_amount,cs_ext_sales_price,cs_net_profit] [isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum] + Project [cp_catalog_page_id,cr_net_loss,cr_return_amount,cs_ext_sales_price,cs_net_profit] + BroadcastHashJoin [cp_catalog_page_sk,cs_catalog_page_sk] + Project [cr_net_loss,cr_return_amount,cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit] + BroadcastHashJoin [cs_promo_sk,p_promo_sk] + Project [cr_net_loss,cr_return_amount,cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cs_promo_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cr_net_loss,cr_return_amount,cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cs_promo_sk,cs_sold_date_sk] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cr_net_loss,cr_return_amount,cs_catalog_page_sk,cs_ext_sales_price,cs_item_sk,cs_net_profit,cs_promo_sk,cs_sold_date_sk] + InputAdapter + SortMergeJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] + WholeStageCodegen (12) + Sort [cs_item_sk,cs_order_number] + InputAdapter + Exchange [cs_item_sk,cs_order_number] #10 + WholeStageCodegen (11) + Filter [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_catalog_page_sk,cs_ext_sales_price,cs_item_sk,cs_net_profit,cs_order_number,cs_promo_sk,cs_sold_date_sk] + WholeStageCodegen (14) + Sort [cr_item_sk,cr_order_number] + InputAdapter + Exchange [cr_item_sk,cr_order_number] #11 + WholeStageCodegen (13) + Filter [cr_item_sk,cr_order_number] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_item_sk,cr_net_loss,cr_order_number,cr_return_amount] + InputAdapter + ReusedExchange [i_item_sk] #5 + InputAdapter + ReusedExchange [d_date_sk] #6 + InputAdapter + ReusedExchange [p_promo_sk] #7 + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (18) + Filter [cp_catalog_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_page [cp_catalog_page_id,cp_catalog_page_sk] + WholeStageCodegen (30) + HashAggregate [isEmpty,isEmpty,sum,sum,sum,web_site_id] [channel,id,isEmpty,isEmpty,profit,returns,sales,sum,sum,sum,sum(CheckOverflow((promote_precision(cast(ws_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true)),sum(UnscaledValue(ws_ext_sales_price)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00))] + InputAdapter + Exchange [web_site_id] #13 + WholeStageCodegen (29) + HashAggregate [web_site_id,wr_net_loss,wr_return_amt,ws_ext_sales_price,ws_net_profit] [isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum] + Project [web_site_id,wr_net_loss,wr_return_amt,ws_ext_sales_price,ws_net_profit] + BroadcastHashJoin [web_site_sk,ws_web_site_sk] + Project [wr_net_loss,wr_return_amt,ws_ext_sales_price,ws_net_profit,ws_web_site_sk] + BroadcastHashJoin [p_promo_sk,ws_promo_sk] + Project [wr_net_loss,wr_return_amt,ws_ext_sales_price,ws_net_profit,ws_promo_sk,ws_web_site_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [wr_net_loss,wr_return_amt,ws_ext_sales_price,ws_net_profit,ws_promo_sk,ws_sold_date_sk,ws_web_site_sk] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Project [wr_net_loss,wr_return_amt,ws_ext_sales_price,ws_item_sk,ws_net_profit,ws_promo_sk,ws_sold_date_sk,ws_web_site_sk] + InputAdapter + SortMergeJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] + WholeStageCodegen (22) + Sort [ws_item_sk,ws_order_number] + InputAdapter + Exchange [ws_item_sk,ws_order_number] #14 + WholeStageCodegen (21) + Filter [ws_item_sk,ws_promo_sk,ws_sold_date_sk,ws_web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_ext_sales_price,ws_item_sk,ws_net_profit,ws_order_number,ws_promo_sk,ws_sold_date_sk,ws_web_site_sk] + WholeStageCodegen (24) + Sort [wr_item_sk,wr_order_number] + InputAdapter + Exchange [wr_item_sk,wr_order_number] #15 + WholeStageCodegen (23) + Filter [wr_item_sk,wr_order_number] + ColumnarToRow + InputAdapter + Scan parquet default.web_returns [wr_item_sk,wr_net_loss,wr_order_number,wr_return_amt] + InputAdapter + ReusedExchange [i_item_sk] #5 + InputAdapter + ReusedExchange [d_date_sk] #6 + InputAdapter + ReusedExchange [p_promo_sk] #7 + InputAdapter + BroadcastExchange #16 + WholeStageCodegen (28) + Filter [web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_site [web_site_id,web_site_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80/explain.txt new file mode 100644 index 0000000000000..b835dec02cbea --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80/explain.txt @@ -0,0 +1,553 @@ +== Physical Plan == +TakeOrderedAndProject (99) ++- * HashAggregate (98) + +- Exchange (97) + +- * HashAggregate (96) + +- * Expand (95) + +- Union (94) + :- * HashAggregate (39) + : +- Exchange (38) + : +- * HashAggregate (37) + : +- * Project (36) + : +- * BroadcastHashJoin Inner BuildRight (35) + : :- * Project (29) + : : +- * BroadcastHashJoin Inner BuildRight (28) + : : :- * Project (22) + : : : +- * BroadcastHashJoin Inner BuildRight (21) + : : : :- * Project (16) + : : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : : :- * Project (9) + : : : : : +- * BroadcastHashJoin LeftOuter BuildRight (8) + : : : : : :- * Filter (3) + : : : : : : +- * ColumnarToRow (2) + : : : : : : +- Scan parquet default.store_sales (1) + : : : : : +- BroadcastExchange (7) + : : : : : +- * Filter (6) + : : : : : +- * ColumnarToRow (5) + : : : : : +- Scan parquet default.store_returns (4) + : : : : +- BroadcastExchange (14) + : : : : +- * Project (13) + : : : : +- * Filter (12) + : : : : +- * ColumnarToRow (11) + : : : : +- Scan parquet default.date_dim (10) + : : : +- BroadcastExchange (20) + : : : +- * Filter (19) + : : : +- * ColumnarToRow (18) + : : : +- Scan parquet default.store (17) + : : +- BroadcastExchange (27) + : : +- * Project (26) + : : +- * Filter (25) + : : +- * ColumnarToRow (24) + : : +- Scan parquet default.item (23) + : +- BroadcastExchange (34) + : +- * Project (33) + : +- * Filter (32) + : +- * ColumnarToRow (31) + : +- Scan parquet default.promotion (30) + :- * HashAggregate (66) + : +- Exchange (65) + : +- * HashAggregate (64) + : +- * Project (63) + : +- * BroadcastHashJoin Inner BuildRight (62) + : :- * Project (60) + : : +- * BroadcastHashJoin Inner BuildRight (59) + : : :- * Project (57) + : : : +- * BroadcastHashJoin Inner BuildRight (56) + : : : :- * Project (51) + : : : : +- * BroadcastHashJoin Inner BuildRight (50) + : : : : :- * Project (48) + : : : : : +- * BroadcastHashJoin LeftOuter BuildRight (47) + : : : : : :- * Filter (42) + : : : : : : +- * ColumnarToRow (41) + : : : : : : +- Scan parquet default.catalog_sales (40) + : : : : : +- BroadcastExchange (46) + : : : : : +- * Filter (45) + : : : : : +- * ColumnarToRow (44) + : : : : : +- Scan parquet default.catalog_returns (43) + : : : : +- ReusedExchange (49) + : : : +- BroadcastExchange (55) + : : : +- * Filter (54) + : : : +- * ColumnarToRow (53) + : : : +- Scan parquet default.catalog_page (52) + : : +- ReusedExchange (58) + : +- ReusedExchange (61) + +- * HashAggregate (93) + +- Exchange (92) + +- * HashAggregate (91) + +- * Project (90) + +- * BroadcastHashJoin Inner BuildRight (89) + :- * Project (87) + : +- * BroadcastHashJoin Inner BuildRight (86) + : :- * Project (84) + : : +- * BroadcastHashJoin Inner BuildRight (83) + : : :- * Project (78) + : : : +- * BroadcastHashJoin Inner BuildRight (77) + : : : :- * Project (75) + : : : : +- * BroadcastHashJoin LeftOuter BuildRight (74) + : : : : :- * Filter (69) + : : : : : +- * ColumnarToRow (68) + : : : : : +- Scan parquet default.web_sales (67) + : : : : +- BroadcastExchange (73) + : : : : +- * Filter (72) + : : : : +- * ColumnarToRow (71) + : : : : +- Scan parquet default.web_returns (70) + : : : +- ReusedExchange (76) + : : +- BroadcastExchange (82) + : : +- * Filter (81) + : : +- * ColumnarToRow (80) + : : +- Scan parquet default.web_site (79) + : +- ReusedExchange (85) + +- ReusedExchange (88) + + +(1) Scan parquet default.store_sales +Output [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_net_profit#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk), IsNotNull(ss_promo_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 6] +Input [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_net_profit#7] + +(3) Filter [codegen id : 6] +Input [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_net_profit#7] +Condition : (((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_item_sk#2)) AND isnotnull(ss_promo_sk#4)) + +(4) Scan parquet default.store_returns +Output [4]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_returns] +PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] + +(6) Filter [codegen id : 1] +Input [4]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] +Condition : (isnotnull(sr_item_sk#8) AND isnotnull(sr_ticket_number#9)) + +(7) BroadcastExchange +Input [4]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false], input[1, bigint, false]),false), [id=#12] + +(8) BroadcastHashJoin [codegen id : 6] +Left keys [2]: [cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#5 as bigint)] +Right keys [2]: [sr_item_sk#8, sr_ticket_number#9] +Join condition: None + +(9) Project [codegen id : 6] +Output [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#10, sr_net_loss#11] +Input [11]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_net_profit#7, sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] + +(10) Scan parquet default.date_dim +Output [2]: [d_date_sk#13, d_date#14] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-23), LessThanOrEqual(d_date,2000-09-22), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#13, d_date#14] + +(12) Filter [codegen id : 2] +Input [2]: [d_date_sk#13, d_date#14] +Condition : (((isnotnull(d_date#14) AND (d_date#14 >= 11192)) AND (d_date#14 <= 11222)) AND isnotnull(d_date_sk#13)) + +(13) Project [codegen id : 2] +Output [1]: [d_date_sk#13] +Input [2]: [d_date_sk#13, d_date#14] + +(14) BroadcastExchange +Input [1]: [d_date_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] + +(15) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#13] +Join condition: None + +(16) Project [codegen id : 6] +Output [7]: [ss_item_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#10, sr_net_loss#11] +Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#10, sr_net_loss#11, d_date_sk#13] + +(17) Scan parquet default.store +Output [2]: [s_store_sk#16, s_store_id#17] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [2]: [s_store_sk#16, s_store_id#17] + +(19) Filter [codegen id : 3] +Input [2]: [s_store_sk#16, s_store_id#17] +Condition : isnotnull(s_store_sk#16) + +(20) BroadcastExchange +Input [2]: [s_store_sk#16, s_store_id#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#18] + +(21) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#16] +Join condition: None + +(22) Project [codegen id : 6] +Output [7]: [ss_item_sk#2, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#10, sr_net_loss#11, s_store_id#17] +Input [9]: [ss_item_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#10, sr_net_loss#11, s_store_sk#16, s_store_id#17] + +(23) Scan parquet default.item +Output [2]: [i_item_sk#19, i_current_price#20] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_current_price), GreaterThan(i_current_price,50.00), IsNotNull(i_item_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 4] +Input [2]: [i_item_sk#19, i_current_price#20] + +(25) Filter [codegen id : 4] +Input [2]: [i_item_sk#19, i_current_price#20] +Condition : ((isnotnull(i_current_price#20) AND (i_current_price#20 > 50.00)) AND isnotnull(i_item_sk#19)) + +(26) Project [codegen id : 4] +Output [1]: [i_item_sk#19] +Input [2]: [i_item_sk#19, i_current_price#20] + +(27) BroadcastExchange +Input [1]: [i_item_sk#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#21] + +(28) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#19] +Join condition: None + +(29) Project [codegen id : 6] +Output [6]: [ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#10, sr_net_loss#11, s_store_id#17] +Input [8]: [ss_item_sk#2, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#10, sr_net_loss#11, s_store_id#17, i_item_sk#19] + +(30) Scan parquet default.promotion +Output [2]: [p_promo_sk#22, p_channel_tv#23] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/promotion] +PushedFilters: [IsNotNull(p_channel_tv), EqualTo(p_channel_tv,N), IsNotNull(p_promo_sk)] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 5] +Input [2]: [p_promo_sk#22, p_channel_tv#23] + +(32) Filter [codegen id : 5] +Input [2]: [p_promo_sk#22, p_channel_tv#23] +Condition : ((isnotnull(p_channel_tv#23) AND (p_channel_tv#23 = N)) AND isnotnull(p_promo_sk#22)) + +(33) Project [codegen id : 5] +Output [1]: [p_promo_sk#22] +Input [2]: [p_promo_sk#22, p_channel_tv#23] + +(34) BroadcastExchange +Input [1]: [p_promo_sk#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#24] + +(35) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_promo_sk#4] +Right keys [1]: [p_promo_sk#22] +Join condition: None + +(36) Project [codegen id : 6] +Output [5]: [ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#10, sr_net_loss#11, s_store_id#17] +Input [7]: [ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#10, sr_net_loss#11, s_store_id#17, p_promo_sk#22] + +(37) HashAggregate [codegen id : 6] +Input [5]: [ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#10, sr_net_loss#11, s_store_id#17] +Keys [1]: [s_store_id#17] +Functions [3]: [partial_sum(UnscaledValue(ss_ext_sales_price#6)), partial_sum(coalesce(cast(sr_return_amt#10 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ss_net_profit#7 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#11 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))] +Aggregate Attributes [5]: [sum#25, sum#26, isEmpty#27, sum#28, isEmpty#29] +Results [6]: [s_store_id#17, sum#30, sum#31, isEmpty#32, sum#33, isEmpty#34] + +(38) Exchange +Input [6]: [s_store_id#17, sum#30, sum#31, isEmpty#32, sum#33, isEmpty#34] +Arguments: hashpartitioning(s_store_id#17, 5), true, [id=#35] + +(39) HashAggregate [codegen id : 7] +Input [6]: [s_store_id#17, sum#30, sum#31, isEmpty#32, sum#33, isEmpty#34] +Keys [1]: [s_store_id#17] +Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#6)), sum(coalesce(cast(sr_return_amt#10 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ss_net_profit#7 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#11 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))] +Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#6))#36, sum(coalesce(cast(sr_return_amt#10 as decimal(12,2)), 0.00))#37, sum(CheckOverflow((promote_precision(cast(ss_net_profit#7 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#11 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#38] +Results [5]: [MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#6))#36,17,2) AS sales#39, sum(coalesce(cast(sr_return_amt#10 as decimal(12,2)), 0.00))#37 AS returns#40, sum(CheckOverflow((promote_precision(cast(ss_net_profit#7 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#11 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#38 AS profit#41, store channel AS channel#42, concat(store, s_store_id#17) AS id#43] + +(40) Scan parquet default.catalog_sales +Output [7]: [cs_sold_date_sk#44, cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_order_number#48, cs_ext_sales_price#49, cs_net_profit#50] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_catalog_page_sk), IsNotNull(cs_item_sk), IsNotNull(cs_promo_sk)] +ReadSchema: struct + +(41) ColumnarToRow [codegen id : 13] +Input [7]: [cs_sold_date_sk#44, cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_order_number#48, cs_ext_sales_price#49, cs_net_profit#50] + +(42) Filter [codegen id : 13] +Input [7]: [cs_sold_date_sk#44, cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_order_number#48, cs_ext_sales_price#49, cs_net_profit#50] +Condition : (((isnotnull(cs_sold_date_sk#44) AND isnotnull(cs_catalog_page_sk#45)) AND isnotnull(cs_item_sk#46)) AND isnotnull(cs_promo_sk#47)) + +(43) Scan parquet default.catalog_returns +Output [4]: [cr_item_sk#51, cr_order_number#52, cr_return_amount#53, cr_net_loss#54] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_returns] +PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)] +ReadSchema: struct + +(44) ColumnarToRow [codegen id : 8] +Input [4]: [cr_item_sk#51, cr_order_number#52, cr_return_amount#53, cr_net_loss#54] + +(45) Filter [codegen id : 8] +Input [4]: [cr_item_sk#51, cr_order_number#52, cr_return_amount#53, cr_net_loss#54] +Condition : (isnotnull(cr_item_sk#51) AND isnotnull(cr_order_number#52)) + +(46) BroadcastExchange +Input [4]: [cr_item_sk#51, cr_order_number#52, cr_return_amount#53, cr_net_loss#54] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, false] as bigint), 32) | (cast(input[1, int, false] as bigint) & 4294967295))),false), [id=#55] + +(47) BroadcastHashJoin [codegen id : 13] +Left keys [2]: [cs_item_sk#46, cs_order_number#48] +Right keys [2]: [cr_item_sk#51, cr_order_number#52] +Join condition: None + +(48) Project [codegen id : 13] +Output [8]: [cs_sold_date_sk#44, cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#53, cr_net_loss#54] +Input [11]: [cs_sold_date_sk#44, cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_order_number#48, cs_ext_sales_price#49, cs_net_profit#50, cr_item_sk#51, cr_order_number#52, cr_return_amount#53, cr_net_loss#54] + +(49) ReusedExchange [Reuses operator id: 14] +Output [1]: [d_date_sk#13] + +(50) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [cs_sold_date_sk#44] +Right keys [1]: [d_date_sk#13] +Join condition: None + +(51) Project [codegen id : 13] +Output [7]: [cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#53, cr_net_loss#54] +Input [9]: [cs_sold_date_sk#44, cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#53, cr_net_loss#54, d_date_sk#13] + +(52) Scan parquet default.catalog_page +Output [2]: [cp_catalog_page_sk#56, cp_catalog_page_id#57] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_page] +PushedFilters: [IsNotNull(cp_catalog_page_sk)] +ReadSchema: struct + +(53) ColumnarToRow [codegen id : 10] +Input [2]: [cp_catalog_page_sk#56, cp_catalog_page_id#57] + +(54) Filter [codegen id : 10] +Input [2]: [cp_catalog_page_sk#56, cp_catalog_page_id#57] +Condition : isnotnull(cp_catalog_page_sk#56) + +(55) BroadcastExchange +Input [2]: [cp_catalog_page_sk#56, cp_catalog_page_id#57] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#58] + +(56) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [cs_catalog_page_sk#45] +Right keys [1]: [cp_catalog_page_sk#56] +Join condition: None + +(57) Project [codegen id : 13] +Output [7]: [cs_item_sk#46, cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#53, cr_net_loss#54, cp_catalog_page_id#57] +Input [9]: [cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#53, cr_net_loss#54, cp_catalog_page_sk#56, cp_catalog_page_id#57] + +(58) ReusedExchange [Reuses operator id: 27] +Output [1]: [i_item_sk#19] + +(59) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [cs_item_sk#46] +Right keys [1]: [i_item_sk#19] +Join condition: None + +(60) Project [codegen id : 13] +Output [6]: [cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#53, cr_net_loss#54, cp_catalog_page_id#57] +Input [8]: [cs_item_sk#46, cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#53, cr_net_loss#54, cp_catalog_page_id#57, i_item_sk#19] + +(61) ReusedExchange [Reuses operator id: 34] +Output [1]: [p_promo_sk#22] + +(62) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [cs_promo_sk#47] +Right keys [1]: [p_promo_sk#22] +Join condition: None + +(63) Project [codegen id : 13] +Output [5]: [cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#53, cr_net_loss#54, cp_catalog_page_id#57] +Input [7]: [cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#53, cr_net_loss#54, cp_catalog_page_id#57, p_promo_sk#22] + +(64) HashAggregate [codegen id : 13] +Input [5]: [cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#53, cr_net_loss#54, cp_catalog_page_id#57] +Keys [1]: [cp_catalog_page_id#57] +Functions [3]: [partial_sum(UnscaledValue(cs_ext_sales_price#49)), partial_sum(coalesce(cast(cr_return_amount#53 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#54 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))] +Aggregate Attributes [5]: [sum#59, sum#60, isEmpty#61, sum#62, isEmpty#63] +Results [6]: [cp_catalog_page_id#57, sum#64, sum#65, isEmpty#66, sum#67, isEmpty#68] + +(65) Exchange +Input [6]: [cp_catalog_page_id#57, sum#64, sum#65, isEmpty#66, sum#67, isEmpty#68] +Arguments: hashpartitioning(cp_catalog_page_id#57, 5), true, [id=#69] + +(66) HashAggregate [codegen id : 14] +Input [6]: [cp_catalog_page_id#57, sum#64, sum#65, isEmpty#66, sum#67, isEmpty#68] +Keys [1]: [cp_catalog_page_id#57] +Functions [3]: [sum(UnscaledValue(cs_ext_sales_price#49)), sum(coalesce(cast(cr_return_amount#53 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#54 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))] +Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_sales_price#49))#70, sum(coalesce(cast(cr_return_amount#53 as decimal(12,2)), 0.00))#71, sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#54 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#72] +Results [5]: [MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#49))#70,17,2) AS sales#73, sum(coalesce(cast(cr_return_amount#53 as decimal(12,2)), 0.00))#71 AS returns#74, sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#54 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#72 AS profit#75, catalog channel AS channel#76, concat(catalog_page, cp_catalog_page_id#57) AS id#77] + +(67) Scan parquet default.web_sales +Output [7]: [ws_sold_date_sk#78, ws_item_sk#79, ws_web_site_sk#80, ws_promo_sk#81, ws_order_number#82, ws_ext_sales_price#83, ws_net_profit#84] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_web_site_sk), IsNotNull(ws_item_sk), IsNotNull(ws_promo_sk)] +ReadSchema: struct + +(68) ColumnarToRow [codegen id : 20] +Input [7]: [ws_sold_date_sk#78, ws_item_sk#79, ws_web_site_sk#80, ws_promo_sk#81, ws_order_number#82, ws_ext_sales_price#83, ws_net_profit#84] + +(69) Filter [codegen id : 20] +Input [7]: [ws_sold_date_sk#78, ws_item_sk#79, ws_web_site_sk#80, ws_promo_sk#81, ws_order_number#82, ws_ext_sales_price#83, ws_net_profit#84] +Condition : (((isnotnull(ws_sold_date_sk#78) AND isnotnull(ws_web_site_sk#80)) AND isnotnull(ws_item_sk#79)) AND isnotnull(ws_promo_sk#81)) + +(70) Scan parquet default.web_returns +Output [4]: [wr_item_sk#85, wr_order_number#86, wr_return_amt#87, wr_net_loss#88] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_returns] +PushedFilters: [IsNotNull(wr_item_sk), IsNotNull(wr_order_number)] +ReadSchema: struct + +(71) ColumnarToRow [codegen id : 15] +Input [4]: [wr_item_sk#85, wr_order_number#86, wr_return_amt#87, wr_net_loss#88] + +(72) Filter [codegen id : 15] +Input [4]: [wr_item_sk#85, wr_order_number#86, wr_return_amt#87, wr_net_loss#88] +Condition : (isnotnull(wr_item_sk#85) AND isnotnull(wr_order_number#86)) + +(73) BroadcastExchange +Input [4]: [wr_item_sk#85, wr_order_number#86, wr_return_amt#87, wr_net_loss#88] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false], input[1, bigint, false]),false), [id=#89] + +(74) BroadcastHashJoin [codegen id : 20] +Left keys [2]: [cast(ws_item_sk#79 as bigint), cast(ws_order_number#82 as bigint)] +Right keys [2]: [wr_item_sk#85, wr_order_number#86] +Join condition: None + +(75) Project [codegen id : 20] +Output [8]: [ws_sold_date_sk#78, ws_item_sk#79, ws_web_site_sk#80, ws_promo_sk#81, ws_ext_sales_price#83, ws_net_profit#84, wr_return_amt#87, wr_net_loss#88] +Input [11]: [ws_sold_date_sk#78, ws_item_sk#79, ws_web_site_sk#80, ws_promo_sk#81, ws_order_number#82, ws_ext_sales_price#83, ws_net_profit#84, wr_item_sk#85, wr_order_number#86, wr_return_amt#87, wr_net_loss#88] + +(76) ReusedExchange [Reuses operator id: 14] +Output [1]: [d_date_sk#13] + +(77) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [ws_sold_date_sk#78] +Right keys [1]: [d_date_sk#13] +Join condition: None + +(78) Project [codegen id : 20] +Output [7]: [ws_item_sk#79, ws_web_site_sk#80, ws_promo_sk#81, ws_ext_sales_price#83, ws_net_profit#84, wr_return_amt#87, wr_net_loss#88] +Input [9]: [ws_sold_date_sk#78, ws_item_sk#79, ws_web_site_sk#80, ws_promo_sk#81, ws_ext_sales_price#83, ws_net_profit#84, wr_return_amt#87, wr_net_loss#88, d_date_sk#13] + +(79) Scan parquet default.web_site +Output [2]: [web_site_sk#90, web_site_id#91] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_site] +PushedFilters: [IsNotNull(web_site_sk)] +ReadSchema: struct + +(80) ColumnarToRow [codegen id : 17] +Input [2]: [web_site_sk#90, web_site_id#91] + +(81) Filter [codegen id : 17] +Input [2]: [web_site_sk#90, web_site_id#91] +Condition : isnotnull(web_site_sk#90) + +(82) BroadcastExchange +Input [2]: [web_site_sk#90, web_site_id#91] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#92] + +(83) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [ws_web_site_sk#80] +Right keys [1]: [web_site_sk#90] +Join condition: None + +(84) Project [codegen id : 20] +Output [7]: [ws_item_sk#79, ws_promo_sk#81, ws_ext_sales_price#83, ws_net_profit#84, wr_return_amt#87, wr_net_loss#88, web_site_id#91] +Input [9]: [ws_item_sk#79, ws_web_site_sk#80, ws_promo_sk#81, ws_ext_sales_price#83, ws_net_profit#84, wr_return_amt#87, wr_net_loss#88, web_site_sk#90, web_site_id#91] + +(85) ReusedExchange [Reuses operator id: 27] +Output [1]: [i_item_sk#19] + +(86) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [ws_item_sk#79] +Right keys [1]: [i_item_sk#19] +Join condition: None + +(87) Project [codegen id : 20] +Output [6]: [ws_promo_sk#81, ws_ext_sales_price#83, ws_net_profit#84, wr_return_amt#87, wr_net_loss#88, web_site_id#91] +Input [8]: [ws_item_sk#79, ws_promo_sk#81, ws_ext_sales_price#83, ws_net_profit#84, wr_return_amt#87, wr_net_loss#88, web_site_id#91, i_item_sk#19] + +(88) ReusedExchange [Reuses operator id: 34] +Output [1]: [p_promo_sk#22] + +(89) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [ws_promo_sk#81] +Right keys [1]: [p_promo_sk#22] +Join condition: None + +(90) Project [codegen id : 20] +Output [5]: [ws_ext_sales_price#83, ws_net_profit#84, wr_return_amt#87, wr_net_loss#88, web_site_id#91] +Input [7]: [ws_promo_sk#81, ws_ext_sales_price#83, ws_net_profit#84, wr_return_amt#87, wr_net_loss#88, web_site_id#91, p_promo_sk#22] + +(91) HashAggregate [codegen id : 20] +Input [5]: [ws_ext_sales_price#83, ws_net_profit#84, wr_return_amt#87, wr_net_loss#88, web_site_id#91] +Keys [1]: [web_site_id#91] +Functions [3]: [partial_sum(UnscaledValue(ws_ext_sales_price#83)), partial_sum(coalesce(cast(wr_return_amt#87 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ws_net_profit#84 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#88 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))] +Aggregate Attributes [5]: [sum#93, sum#94, isEmpty#95, sum#96, isEmpty#97] +Results [6]: [web_site_id#91, sum#98, sum#99, isEmpty#100, sum#101, isEmpty#102] + +(92) Exchange +Input [6]: [web_site_id#91, sum#98, sum#99, isEmpty#100, sum#101, isEmpty#102] +Arguments: hashpartitioning(web_site_id#91, 5), true, [id=#103] + +(93) HashAggregate [codegen id : 21] +Input [6]: [web_site_id#91, sum#98, sum#99, isEmpty#100, sum#101, isEmpty#102] +Keys [1]: [web_site_id#91] +Functions [3]: [sum(UnscaledValue(ws_ext_sales_price#83)), sum(coalesce(cast(wr_return_amt#87 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ws_net_profit#84 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#88 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_sales_price#83))#104, sum(coalesce(cast(wr_return_amt#87 as decimal(12,2)), 0.00))#105, sum(CheckOverflow((promote_precision(cast(ws_net_profit#84 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#88 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#106] +Results [5]: [MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#83))#104,17,2) AS sales#107, sum(coalesce(cast(wr_return_amt#87 as decimal(12,2)), 0.00))#105 AS returns#108, sum(CheckOverflow((promote_precision(cast(ws_net_profit#84 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#88 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#106 AS profit#109, web channel AS channel#110, concat(web_site, web_site_id#91) AS id#111] + +(94) Union + +(95) Expand [codegen id : 22] +Input [5]: [sales#39, returns#40, profit#41, channel#42, id#43] +Arguments: [List(sales#39, returns#40, profit#41, channel#42, id#43, 0), List(sales#39, returns#40, profit#41, channel#42, null, 1), List(sales#39, returns#40, profit#41, null, null, 3)], [sales#39, returns#40, profit#41, channel#112, id#113, spark_grouping_id#114] + +(96) HashAggregate [codegen id : 22] +Input [6]: [sales#39, returns#40, profit#41, channel#112, id#113, spark_grouping_id#114] +Keys [3]: [channel#112, id#113, spark_grouping_id#114] +Functions [3]: [partial_sum(sales#39), partial_sum(returns#40), partial_sum(profit#41)] +Aggregate Attributes [6]: [sum#115, isEmpty#116, sum#117, isEmpty#118, sum#119, isEmpty#120] +Results [9]: [channel#112, id#113, spark_grouping_id#114, sum#121, isEmpty#122, sum#123, isEmpty#124, sum#125, isEmpty#126] + +(97) Exchange +Input [9]: [channel#112, id#113, spark_grouping_id#114, sum#121, isEmpty#122, sum#123, isEmpty#124, sum#125, isEmpty#126] +Arguments: hashpartitioning(channel#112, id#113, spark_grouping_id#114, 5), true, [id=#127] + +(98) HashAggregate [codegen id : 23] +Input [9]: [channel#112, id#113, spark_grouping_id#114, sum#121, isEmpty#122, sum#123, isEmpty#124, sum#125, isEmpty#126] +Keys [3]: [channel#112, id#113, spark_grouping_id#114] +Functions [3]: [sum(sales#39), sum(returns#40), sum(profit#41)] +Aggregate Attributes [3]: [sum(sales#39)#128, sum(returns#40)#129, sum(profit#41)#130] +Results [5]: [channel#112, id#113, sum(sales#39)#128 AS sales#131, sum(returns#40)#129 AS returns#132, sum(profit#41)#130 AS profit#133] + +(99) TakeOrderedAndProject +Input [5]: [channel#112, id#113, sales#131, returns#132, profit#133] +Arguments: 100, [channel#112 ASC NULLS FIRST, id#113 ASC NULLS FIRST], [channel#112, id#113, sales#131, returns#132, profit#133] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80/simplified.txt new file mode 100644 index 0000000000000..2f2f3b8cd3d25 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80/simplified.txt @@ -0,0 +1,148 @@ +TakeOrderedAndProject [channel,id,profit,returns,sales] + WholeStageCodegen (23) + HashAggregate [channel,id,isEmpty,isEmpty,isEmpty,spark_grouping_id,sum,sum,sum] [isEmpty,isEmpty,isEmpty,profit,returns,sales,sum,sum,sum,sum(profit),sum(returns),sum(sales)] + InputAdapter + Exchange [channel,id,spark_grouping_id] #1 + WholeStageCodegen (22) + HashAggregate [channel,id,profit,returns,sales,spark_grouping_id] [isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum] + Expand [channel,id,profit,returns,sales] + InputAdapter + Union + WholeStageCodegen (7) + HashAggregate [isEmpty,isEmpty,s_store_id,sum,sum,sum] [channel,id,isEmpty,isEmpty,profit,returns,sales,sum,sum,sum,sum(CheckOverflow((promote_precision(cast(ss_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true)),sum(UnscaledValue(ss_ext_sales_price)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00))] + InputAdapter + Exchange [s_store_id] #2 + WholeStageCodegen (6) + HashAggregate [s_store_id,sr_net_loss,sr_return_amt,ss_ext_sales_price,ss_net_profit] [isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum] + Project [s_store_id,sr_net_loss,sr_return_amt,ss_ext_sales_price,ss_net_profit] + BroadcastHashJoin [p_promo_sk,ss_promo_sk] + Project [s_store_id,sr_net_loss,sr_return_amt,ss_ext_sales_price,ss_net_profit,ss_promo_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [s_store_id,sr_net_loss,sr_return_amt,ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_promo_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [sr_net_loss,sr_return_amt,ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_promo_sk,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [sr_net_loss,sr_return_amt,ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_promo_sk,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + Filter [ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [sr_item_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_item_sk,sr_net_loss,sr_return_amt,sr_ticket_number] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_id,s_store_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (4) + Project [i_item_sk] + Filter [i_current_price,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_current_price,i_item_sk] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (5) + Project [p_promo_sk] + Filter [p_channel_tv,p_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.promotion [p_channel_tv,p_promo_sk] + WholeStageCodegen (14) + HashAggregate [cp_catalog_page_id,isEmpty,isEmpty,sum,sum,sum] [channel,id,isEmpty,isEmpty,profit,returns,sales,sum,sum,sum,sum(CheckOverflow((promote_precision(cast(cs_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true)),sum(UnscaledValue(cs_ext_sales_price)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00))] + InputAdapter + Exchange [cp_catalog_page_id] #8 + WholeStageCodegen (13) + HashAggregate [cp_catalog_page_id,cr_net_loss,cr_return_amount,cs_ext_sales_price,cs_net_profit] [isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum] + Project [cp_catalog_page_id,cr_net_loss,cr_return_amount,cs_ext_sales_price,cs_net_profit] + BroadcastHashJoin [cs_promo_sk,p_promo_sk] + Project [cp_catalog_page_id,cr_net_loss,cr_return_amount,cs_ext_sales_price,cs_net_profit,cs_promo_sk] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cp_catalog_page_id,cr_net_loss,cr_return_amount,cs_ext_sales_price,cs_item_sk,cs_net_profit,cs_promo_sk] + BroadcastHashJoin [cp_catalog_page_sk,cs_catalog_page_sk] + Project [cr_net_loss,cr_return_amount,cs_catalog_page_sk,cs_ext_sales_price,cs_item_sk,cs_net_profit,cs_promo_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cr_net_loss,cr_return_amount,cs_catalog_page_sk,cs_ext_sales_price,cs_item_sk,cs_net_profit,cs_promo_sk,cs_sold_date_sk] + BroadcastHashJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] + Filter [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_catalog_page_sk,cs_ext_sales_price,cs_item_sk,cs_net_profit,cs_order_number,cs_promo_sk,cs_sold_date_sk] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (8) + Filter [cr_item_sk,cr_order_number] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_item_sk,cr_net_loss,cr_order_number,cr_return_amount] + InputAdapter + ReusedExchange [d_date_sk] #4 + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (10) + Filter [cp_catalog_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_page [cp_catalog_page_id,cp_catalog_page_sk] + InputAdapter + ReusedExchange [i_item_sk] #6 + InputAdapter + ReusedExchange [p_promo_sk] #7 + WholeStageCodegen (21) + HashAggregate [isEmpty,isEmpty,sum,sum,sum,web_site_id] [channel,id,isEmpty,isEmpty,profit,returns,sales,sum,sum,sum,sum(CheckOverflow((promote_precision(cast(ws_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true)),sum(UnscaledValue(ws_ext_sales_price)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00))] + InputAdapter + Exchange [web_site_id] #11 + WholeStageCodegen (20) + HashAggregate [web_site_id,wr_net_loss,wr_return_amt,ws_ext_sales_price,ws_net_profit] [isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum] + Project [web_site_id,wr_net_loss,wr_return_amt,ws_ext_sales_price,ws_net_profit] + BroadcastHashJoin [p_promo_sk,ws_promo_sk] + Project [web_site_id,wr_net_loss,wr_return_amt,ws_ext_sales_price,ws_net_profit,ws_promo_sk] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Project [web_site_id,wr_net_loss,wr_return_amt,ws_ext_sales_price,ws_item_sk,ws_net_profit,ws_promo_sk] + BroadcastHashJoin [web_site_sk,ws_web_site_sk] + Project [wr_net_loss,wr_return_amt,ws_ext_sales_price,ws_item_sk,ws_net_profit,ws_promo_sk,ws_web_site_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [wr_net_loss,wr_return_amt,ws_ext_sales_price,ws_item_sk,ws_net_profit,ws_promo_sk,ws_sold_date_sk,ws_web_site_sk] + BroadcastHashJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] + Filter [ws_item_sk,ws_promo_sk,ws_sold_date_sk,ws_web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_ext_sales_price,ws_item_sk,ws_net_profit,ws_order_number,ws_promo_sk,ws_sold_date_sk,ws_web_site_sk] + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (15) + Filter [wr_item_sk,wr_order_number] + ColumnarToRow + InputAdapter + Scan parquet default.web_returns [wr_item_sk,wr_net_loss,wr_order_number,wr_return_amt] + InputAdapter + ReusedExchange [d_date_sk] #4 + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (17) + Filter [web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_site [web_site_id,web_site_sk] + InputAdapter + ReusedExchange [i_item_sk] #6 + InputAdapter + ReusedExchange [p_promo_sk] #7 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81.sf100/explain.txt new file mode 100644 index 0000000000000..d85e361512b17 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81.sf100/explain.txt @@ -0,0 +1,343 @@ +== Physical Plan == +TakeOrderedAndProject (61) ++- * Project (60) + +- * BroadcastHashJoin Inner BuildRight (59) + :- * Project (38) + : +- * SortMergeJoin Inner (37) + : :- * Sort (11) + : : +- Exchange (10) + : : +- * Project (9) + : : +- * BroadcastHashJoin Inner BuildRight (8) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.customer (1) + : : +- BroadcastExchange (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet default.customer_address (4) + : +- * Sort (36) + : +- Exchange (35) + : +- * Filter (34) + : +- * HashAggregate (33) + : +- Exchange (32) + : +- * HashAggregate (31) + : +- * Project (30) + : +- * SortMergeJoin Inner (29) + : :- * Sort (23) + : : +- Exchange (22) + : : +- * Project (21) + : : +- * BroadcastHashJoin Inner BuildRight (20) + : : :- * Filter (14) + : : : +- * ColumnarToRow (13) + : : : +- Scan parquet default.catalog_returns (12) + : : +- BroadcastExchange (19) + : : +- * Project (18) + : : +- * Filter (17) + : : +- * ColumnarToRow (16) + : : +- Scan parquet default.date_dim (15) + : +- * Sort (28) + : +- Exchange (27) + : +- * Filter (26) + : +- * ColumnarToRow (25) + : +- Scan parquet default.customer_address (24) + +- BroadcastExchange (58) + +- * Filter (57) + +- * HashAggregate (56) + +- Exchange (55) + +- * HashAggregate (54) + +- * HashAggregate (53) + +- Exchange (52) + +- * HashAggregate (51) + +- * Project (50) + +- * SortMergeJoin Inner (49) + :- * Sort (46) + : +- Exchange (45) + : +- * Project (44) + : +- * BroadcastHashJoin Inner BuildRight (43) + : :- * Filter (41) + : : +- * ColumnarToRow (40) + : : +- Scan parquet default.catalog_returns (39) + : +- ReusedExchange (42) + +- * Sort (48) + +- ReusedExchange (47) + + +(1) Scan parquet default.customer +Output [6]: [c_customer_sk#1, c_customer_id#2, c_current_addr_sk#3, c_salutation#4, c_first_name#5, c_last_name#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 2] +Input [6]: [c_customer_sk#1, c_customer_id#2, c_current_addr_sk#3, c_salutation#4, c_first_name#5, c_last_name#6] + +(3) Filter [codegen id : 2] +Input [6]: [c_customer_sk#1, c_customer_id#2, c_current_addr_sk#3, c_salutation#4, c_first_name#5, c_last_name#6] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_current_addr_sk#3)) + +(4) Scan parquet default.customer_address +Output [12]: [ca_address_sk#7, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer_address] +PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,GA), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [12]: [ca_address_sk#7, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18] + +(6) Filter [codegen id : 1] +Input [12]: [ca_address_sk#7, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18] +Condition : ((isnotnull(ca_state#14) AND (ca_state#14 = GA)) AND isnotnull(ca_address_sk#7)) + +(7) BroadcastExchange +Input [12]: [ca_address_sk#7, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] + +(8) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [c_current_addr_sk#3] +Right keys [1]: [ca_address_sk#7] +Join condition: None + +(9) Project [codegen id : 2] +Output [16]: [c_customer_sk#1, c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18] +Input [18]: [c_customer_sk#1, c_customer_id#2, c_current_addr_sk#3, c_salutation#4, c_first_name#5, c_last_name#6, ca_address_sk#7, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18] + +(10) Exchange +Input [16]: [c_customer_sk#1, c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18] +Arguments: hashpartitioning(c_customer_sk#1, 5), true, [id=#20] + +(11) Sort [codegen id : 3] +Input [16]: [c_customer_sk#1, c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18] +Arguments: [c_customer_sk#1 ASC NULLS FIRST], false, 0 + +(12) Scan parquet default.catalog_returns +Output [4]: [cr_returned_date_sk#21, cr_returning_customer_sk#22, cr_returning_addr_sk#23, cr_return_amt_inc_tax#24] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_returns] +PushedFilters: [IsNotNull(cr_returned_date_sk), IsNotNull(cr_returning_addr_sk), IsNotNull(cr_returning_customer_sk)] +ReadSchema: struct + +(13) ColumnarToRow [codegen id : 5] +Input [4]: [cr_returned_date_sk#21, cr_returning_customer_sk#22, cr_returning_addr_sk#23, cr_return_amt_inc_tax#24] + +(14) Filter [codegen id : 5] +Input [4]: [cr_returned_date_sk#21, cr_returning_customer_sk#22, cr_returning_addr_sk#23, cr_return_amt_inc_tax#24] +Condition : ((isnotnull(cr_returned_date_sk#21) AND isnotnull(cr_returning_addr_sk#23)) AND isnotnull(cr_returning_customer_sk#22)) + +(15) Scan parquet default.date_dim +Output [2]: [d_date_sk#25, d_year#26] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(16) ColumnarToRow [codegen id : 4] +Input [2]: [d_date_sk#25, d_year#26] + +(17) Filter [codegen id : 4] +Input [2]: [d_date_sk#25, d_year#26] +Condition : ((isnotnull(d_year#26) AND (d_year#26 = 2000)) AND isnotnull(d_date_sk#25)) + +(18) Project [codegen id : 4] +Output [1]: [d_date_sk#25] +Input [2]: [d_date_sk#25, d_year#26] + +(19) BroadcastExchange +Input [1]: [d_date_sk#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#27] + +(20) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cr_returned_date_sk#21] +Right keys [1]: [d_date_sk#25] +Join condition: None + +(21) Project [codegen id : 5] +Output [3]: [cr_returning_customer_sk#22, cr_returning_addr_sk#23, cr_return_amt_inc_tax#24] +Input [5]: [cr_returned_date_sk#21, cr_returning_customer_sk#22, cr_returning_addr_sk#23, cr_return_amt_inc_tax#24, d_date_sk#25] + +(22) Exchange +Input [3]: [cr_returning_customer_sk#22, cr_returning_addr_sk#23, cr_return_amt_inc_tax#24] +Arguments: hashpartitioning(cr_returning_addr_sk#23, 5), true, [id=#28] + +(23) Sort [codegen id : 6] +Input [3]: [cr_returning_customer_sk#22, cr_returning_addr_sk#23, cr_return_amt_inc_tax#24] +Arguments: [cr_returning_addr_sk#23 ASC NULLS FIRST], false, 0 + +(24) Scan parquet default.customer_address +Output [2]: [ca_address_sk#7, ca_state#14] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_state)] +ReadSchema: struct + +(25) ColumnarToRow [codegen id : 7] +Input [2]: [ca_address_sk#7, ca_state#14] + +(26) Filter [codegen id : 7] +Input [2]: [ca_address_sk#7, ca_state#14] +Condition : (isnotnull(ca_address_sk#7) AND isnotnull(ca_state#14)) + +(27) Exchange +Input [2]: [ca_address_sk#7, ca_state#14] +Arguments: hashpartitioning(ca_address_sk#7, 5), true, [id=#29] + +(28) Sort [codegen id : 8] +Input [2]: [ca_address_sk#7, ca_state#14] +Arguments: [ca_address_sk#7 ASC NULLS FIRST], false, 0 + +(29) SortMergeJoin [codegen id : 9] +Left keys [1]: [cr_returning_addr_sk#23] +Right keys [1]: [ca_address_sk#7] +Join condition: None + +(30) Project [codegen id : 9] +Output [3]: [cr_returning_customer_sk#22, cr_return_amt_inc_tax#24, ca_state#14] +Input [5]: [cr_returning_customer_sk#22, cr_returning_addr_sk#23, cr_return_amt_inc_tax#24, ca_address_sk#7, ca_state#14] + +(31) HashAggregate [codegen id : 9] +Input [3]: [cr_returning_customer_sk#22, cr_return_amt_inc_tax#24, ca_state#14] +Keys [2]: [cr_returning_customer_sk#22, ca_state#14] +Functions [1]: [partial_sum(UnscaledValue(cr_return_amt_inc_tax#24))] +Aggregate Attributes [1]: [sum#30] +Results [3]: [cr_returning_customer_sk#22, ca_state#14, sum#31] + +(32) Exchange +Input [3]: [cr_returning_customer_sk#22, ca_state#14, sum#31] +Arguments: hashpartitioning(cr_returning_customer_sk#22, ca_state#14, 5), true, [id=#32] + +(33) HashAggregate [codegen id : 10] +Input [3]: [cr_returning_customer_sk#22, ca_state#14, sum#31] +Keys [2]: [cr_returning_customer_sk#22, ca_state#14] +Functions [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#24))] +Aggregate Attributes [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#24))#33] +Results [3]: [cr_returning_customer_sk#22 AS ctr_customer_sk#34, ca_state#14 AS ctr_state#35, MakeDecimal(sum(UnscaledValue(cr_return_amt_inc_tax#24))#33,17,2) AS ctr_total_return#36] + +(34) Filter [codegen id : 10] +Input [3]: [ctr_customer_sk#34, ctr_state#35, ctr_total_return#36] +Condition : isnotnull(ctr_total_return#36) + +(35) Exchange +Input [3]: [ctr_customer_sk#34, ctr_state#35, ctr_total_return#36] +Arguments: hashpartitioning(ctr_customer_sk#34, 5), true, [id=#37] + +(36) Sort [codegen id : 11] +Input [3]: [ctr_customer_sk#34, ctr_state#35, ctr_total_return#36] +Arguments: [ctr_customer_sk#34 ASC NULLS FIRST], false, 0 + +(37) SortMergeJoin [codegen id : 20] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ctr_customer_sk#34] +Join condition: None + +(38) Project [codegen id : 20] +Output [17]: [c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18, ctr_state#35, ctr_total_return#36] +Input [19]: [c_customer_sk#1, c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18, ctr_customer_sk#34, ctr_state#35, ctr_total_return#36] + +(39) Scan parquet default.catalog_returns +Output [4]: [cr_returned_date_sk#21, cr_returning_customer_sk#22, cr_returning_addr_sk#23, cr_return_amt_inc_tax#24] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_returns] +PushedFilters: [IsNotNull(cr_returned_date_sk), IsNotNull(cr_returning_addr_sk)] +ReadSchema: struct + +(40) ColumnarToRow [codegen id : 13] +Input [4]: [cr_returned_date_sk#21, cr_returning_customer_sk#22, cr_returning_addr_sk#23, cr_return_amt_inc_tax#24] + +(41) Filter [codegen id : 13] +Input [4]: [cr_returned_date_sk#21, cr_returning_customer_sk#22, cr_returning_addr_sk#23, cr_return_amt_inc_tax#24] +Condition : (isnotnull(cr_returned_date_sk#21) AND isnotnull(cr_returning_addr_sk#23)) + +(42) ReusedExchange [Reuses operator id: 19] +Output [1]: [d_date_sk#25] + +(43) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [cr_returned_date_sk#21] +Right keys [1]: [d_date_sk#25] +Join condition: None + +(44) Project [codegen id : 13] +Output [3]: [cr_returning_customer_sk#22, cr_returning_addr_sk#23, cr_return_amt_inc_tax#24] +Input [5]: [cr_returned_date_sk#21, cr_returning_customer_sk#22, cr_returning_addr_sk#23, cr_return_amt_inc_tax#24, d_date_sk#25] + +(45) Exchange +Input [3]: [cr_returning_customer_sk#22, cr_returning_addr_sk#23, cr_return_amt_inc_tax#24] +Arguments: hashpartitioning(cr_returning_addr_sk#23, 5), true, [id=#38] + +(46) Sort [codegen id : 14] +Input [3]: [cr_returning_customer_sk#22, cr_returning_addr_sk#23, cr_return_amt_inc_tax#24] +Arguments: [cr_returning_addr_sk#23 ASC NULLS FIRST], false, 0 + +(47) ReusedExchange [Reuses operator id: 27] +Output [2]: [ca_address_sk#7, ca_state#14] + +(48) Sort [codegen id : 16] +Input [2]: [ca_address_sk#7, ca_state#14] +Arguments: [ca_address_sk#7 ASC NULLS FIRST], false, 0 + +(49) SortMergeJoin [codegen id : 17] +Left keys [1]: [cr_returning_addr_sk#23] +Right keys [1]: [ca_address_sk#7] +Join condition: None + +(50) Project [codegen id : 17] +Output [3]: [cr_returning_customer_sk#22, cr_return_amt_inc_tax#24, ca_state#14] +Input [5]: [cr_returning_customer_sk#22, cr_returning_addr_sk#23, cr_return_amt_inc_tax#24, ca_address_sk#7, ca_state#14] + +(51) HashAggregate [codegen id : 17] +Input [3]: [cr_returning_customer_sk#22, cr_return_amt_inc_tax#24, ca_state#14] +Keys [2]: [cr_returning_customer_sk#22, ca_state#14] +Functions [1]: [partial_sum(UnscaledValue(cr_return_amt_inc_tax#24))] +Aggregate Attributes [1]: [sum#39] +Results [3]: [cr_returning_customer_sk#22, ca_state#14, sum#40] + +(52) Exchange +Input [3]: [cr_returning_customer_sk#22, ca_state#14, sum#40] +Arguments: hashpartitioning(cr_returning_customer_sk#22, ca_state#14, 5), true, [id=#41] + +(53) HashAggregate [codegen id : 18] +Input [3]: [cr_returning_customer_sk#22, ca_state#14, sum#40] +Keys [2]: [cr_returning_customer_sk#22, ca_state#14] +Functions [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#24))] +Aggregate Attributes [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#24))#42] +Results [2]: [ca_state#14 AS ctr_state#35, MakeDecimal(sum(UnscaledValue(cr_return_amt_inc_tax#24))#42,17,2) AS ctr_total_return#36] + +(54) HashAggregate [codegen id : 18] +Input [2]: [ctr_state#35, ctr_total_return#36] +Keys [1]: [ctr_state#35] +Functions [1]: [partial_avg(ctr_total_return#36)] +Aggregate Attributes [2]: [sum#43, count#44] +Results [3]: [ctr_state#35, sum#45, count#46] + +(55) Exchange +Input [3]: [ctr_state#35, sum#45, count#46] +Arguments: hashpartitioning(ctr_state#35, 5), true, [id=#47] + +(56) HashAggregate [codegen id : 19] +Input [3]: [ctr_state#35, sum#45, count#46] +Keys [1]: [ctr_state#35] +Functions [1]: [avg(ctr_total_return#36)] +Aggregate Attributes [1]: [avg(ctr_total_return#36)#48] +Results [2]: [CheckOverflow((promote_precision(avg(ctr_total_return#36)#48) * 1.200000), DecimalType(24,7), true) AS (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#49, ctr_state#35 AS ctr_state#35#50] + +(57) Filter [codegen id : 19] +Input [2]: [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#49, ctr_state#35#50] +Condition : isnotnull((CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#49) + +(58) BroadcastExchange +Input [2]: [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#49, ctr_state#35#50] +Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [id=#51] + +(59) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [ctr_state#35] +Right keys [1]: [ctr_state#35#50] +Join condition: (cast(ctr_total_return#36 as decimal(24,7)) > (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#49) + +(60) Project [codegen id : 20] +Output [16]: [c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18, ctr_total_return#36] +Input [19]: [c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18, ctr_state#35, ctr_total_return#36, (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#49, ctr_state#35#50] + +(61) TakeOrderedAndProject +Input [16]: [c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18, ctr_total_return#36] +Arguments: 100, [c_customer_id#2 ASC NULLS FIRST, c_salutation#4 ASC NULLS FIRST, c_first_name#5 ASC NULLS FIRST, c_last_name#6 ASC NULLS FIRST, ca_street_number#8 ASC NULLS FIRST, ca_street_name#9 ASC NULLS FIRST, ca_street_type#10 ASC NULLS FIRST, ca_suite_number#11 ASC NULLS FIRST, ca_city#12 ASC NULLS FIRST, ca_county#13 ASC NULLS FIRST, ca_state#14 ASC NULLS FIRST, ca_zip#15 ASC NULLS FIRST, ca_country#16 ASC NULLS FIRST, ca_gmt_offset#17 ASC NULLS FIRST, ca_location_type#18 ASC NULLS FIRST, ctr_total_return#36 ASC NULLS FIRST], [c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18, ctr_total_return#36] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81.sf100/simplified.txt new file mode 100644 index 0000000000000..d76e1217ae004 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81.sf100/simplified.txt @@ -0,0 +1,104 @@ +TakeOrderedAndProject [c_customer_id,c_first_name,c_last_name,c_salutation,ca_city,ca_country,ca_county,ca_gmt_offset,ca_location_type,ca_state,ca_street_name,ca_street_number,ca_street_type,ca_suite_number,ca_zip,ctr_total_return] + WholeStageCodegen (20) + Project [c_customer_id,c_first_name,c_last_name,c_salutation,ca_city,ca_country,ca_county,ca_gmt_offset,ca_location_type,ca_state,ca_street_name,ca_street_number,ca_street_type,ca_suite_number,ca_zip,ctr_total_return] + BroadcastHashJoin [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6))),ctr_state,ctr_state,ctr_total_return] + Project [c_customer_id,c_first_name,c_last_name,c_salutation,ca_city,ca_country,ca_county,ca_gmt_offset,ca_location_type,ca_state,ca_street_name,ca_street_number,ca_street_type,ca_suite_number,ca_zip,ctr_state,ctr_total_return] + SortMergeJoin [c_customer_sk,ctr_customer_sk] + InputAdapter + WholeStageCodegen (3) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #1 + WholeStageCodegen (2) + Project [c_customer_id,c_customer_sk,c_first_name,c_last_name,c_salutation,ca_city,ca_country,ca_county,ca_gmt_offset,ca_location_type,ca_state,ca_street_name,ca_street_number,ca_street_type,ca_suite_number,ca_zip] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Filter [c_current_addr_sk,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_current_addr_sk,c_customer_id,c_customer_sk,c_first_name,c_last_name,c_salutation] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [ca_address_sk,ca_state] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_city,ca_country,ca_county,ca_gmt_offset,ca_location_type,ca_state,ca_street_name,ca_street_number,ca_street_type,ca_suite_number,ca_zip] + InputAdapter + WholeStageCodegen (11) + Sort [ctr_customer_sk] + InputAdapter + Exchange [ctr_customer_sk] #3 + WholeStageCodegen (10) + Filter [ctr_total_return] + HashAggregate [ca_state,cr_returning_customer_sk,sum] [ctr_customer_sk,ctr_state,ctr_total_return,sum,sum(UnscaledValue(cr_return_amt_inc_tax))] + InputAdapter + Exchange [ca_state,cr_returning_customer_sk] #4 + WholeStageCodegen (9) + HashAggregate [ca_state,cr_return_amt_inc_tax,cr_returning_customer_sk] [sum,sum] + Project [ca_state,cr_return_amt_inc_tax,cr_returning_customer_sk] + SortMergeJoin [ca_address_sk,cr_returning_addr_sk] + InputAdapter + WholeStageCodegen (6) + Sort [cr_returning_addr_sk] + InputAdapter + Exchange [cr_returning_addr_sk] #5 + WholeStageCodegen (5) + Project [cr_return_amt_inc_tax,cr_returning_addr_sk,cr_returning_customer_sk] + BroadcastHashJoin [cr_returned_date_sk,d_date_sk] + Filter [cr_returned_date_sk,cr_returning_addr_sk,cr_returning_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_return_amt_inc_tax,cr_returned_date_sk,cr_returning_addr_sk,cr_returning_customer_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (4) + Project [d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + WholeStageCodegen (8) + Sort [ca_address_sk] + InputAdapter + Exchange [ca_address_sk] #7 + WholeStageCodegen (7) + Filter [ca_address_sk,ca_state] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (19) + Filter [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))] + HashAggregate [count,ctr_state,sum] [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6))),avg(ctr_total_return),count,ctr_state,sum] + InputAdapter + Exchange [ctr_state] #9 + WholeStageCodegen (18) + HashAggregate [ctr_state,ctr_total_return] [count,count,sum,sum] + HashAggregate [ca_state,cr_returning_customer_sk,sum] [ctr_state,ctr_total_return,sum,sum(UnscaledValue(cr_return_amt_inc_tax))] + InputAdapter + Exchange [ca_state,cr_returning_customer_sk] #10 + WholeStageCodegen (17) + HashAggregate [ca_state,cr_return_amt_inc_tax,cr_returning_customer_sk] [sum,sum] + Project [ca_state,cr_return_amt_inc_tax,cr_returning_customer_sk] + SortMergeJoin [ca_address_sk,cr_returning_addr_sk] + InputAdapter + WholeStageCodegen (14) + Sort [cr_returning_addr_sk] + InputAdapter + Exchange [cr_returning_addr_sk] #11 + WholeStageCodegen (13) + Project [cr_return_amt_inc_tax,cr_returning_addr_sk,cr_returning_customer_sk] + BroadcastHashJoin [cr_returned_date_sk,d_date_sk] + Filter [cr_returned_date_sk,cr_returning_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_return_amt_inc_tax,cr_returned_date_sk,cr_returning_addr_sk,cr_returning_customer_sk] + InputAdapter + ReusedExchange [d_date_sk] #6 + InputAdapter + WholeStageCodegen (16) + Sort [ca_address_sk] + InputAdapter + ReusedExchange [ca_address_sk,ca_state] #7 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81/explain.txt new file mode 100644 index 0000000000000..956a51096d2d3 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81/explain.txt @@ -0,0 +1,298 @@ +== Physical Plan == +TakeOrderedAndProject (52) ++- * Project (51) + +- * BroadcastHashJoin Inner BuildRight (50) + :- * Project (45) + : +- * BroadcastHashJoin Inner BuildRight (44) + : :- * Project (39) + : : +- * BroadcastHashJoin Inner BuildRight (38) + : : :- * Filter (20) + : : : +- * HashAggregate (19) + : : : +- Exchange (18) + : : : +- * HashAggregate (17) + : : : +- * Project (16) + : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : :- * Project (10) + : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.catalog_returns (1) + : : : : +- BroadcastExchange (8) + : : : : +- * Project (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.date_dim (4) + : : : +- BroadcastExchange (14) + : : : +- * Filter (13) + : : : +- * ColumnarToRow (12) + : : : +- Scan parquet default.customer_address (11) + : : +- BroadcastExchange (37) + : : +- * Filter (36) + : : +- * HashAggregate (35) + : : +- Exchange (34) + : : +- * HashAggregate (33) + : : +- * HashAggregate (32) + : : +- Exchange (31) + : : +- * HashAggregate (30) + : : +- * Project (29) + : : +- * BroadcastHashJoin Inner BuildRight (28) + : : :- * Project (26) + : : : +- * BroadcastHashJoin Inner BuildRight (25) + : : : :- * Filter (23) + : : : : +- * ColumnarToRow (22) + : : : : +- Scan parquet default.catalog_returns (21) + : : : +- ReusedExchange (24) + : : +- ReusedExchange (27) + : +- BroadcastExchange (43) + : +- * Filter (42) + : +- * ColumnarToRow (41) + : +- Scan parquet default.customer (40) + +- BroadcastExchange (49) + +- * Filter (48) + +- * ColumnarToRow (47) + +- Scan parquet default.customer_address (46) + + +(1) Scan parquet default.catalog_returns +Output [4]: [cr_returned_date_sk#1, cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_returns] +PushedFilters: [IsNotNull(cr_returned_date_sk), IsNotNull(cr_returning_addr_sk), IsNotNull(cr_returning_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [4]: [cr_returned_date_sk#1, cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4] + +(3) Filter [codegen id : 3] +Input [4]: [cr_returned_date_sk#1, cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4] +Condition : ((isnotnull(cr_returned_date_sk#1) AND isnotnull(cr_returning_addr_sk#3)) AND isnotnull(cr_returning_customer_sk#2)) + +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#5, d_year#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#5, d_year#6] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#5, d_year#6] +Condition : ((isnotnull(d_year#6) AND (d_year#6 = 2000)) AND isnotnull(d_date_sk#5)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#5] +Input [2]: [d_date_sk#5, d_year#6] + +(8) BroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#7] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cr_returned_date_sk#1] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(10) Project [codegen id : 3] +Output [3]: [cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4] +Input [5]: [cr_returned_date_sk#1, cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4, d_date_sk#5] + +(11) Scan parquet default.customer_address +Output [2]: [ca_address_sk#8, ca_state#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_state)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [ca_address_sk#8, ca_state#9] + +(13) Filter [codegen id : 2] +Input [2]: [ca_address_sk#8, ca_state#9] +Condition : (isnotnull(ca_address_sk#8) AND isnotnull(ca_state#9)) + +(14) BroadcastExchange +Input [2]: [ca_address_sk#8, ca_state#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#10] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cr_returning_addr_sk#3] +Right keys [1]: [ca_address_sk#8] +Join condition: None + +(16) Project [codegen id : 3] +Output [3]: [cr_returning_customer_sk#2, cr_return_amt_inc_tax#4, ca_state#9] +Input [5]: [cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4, ca_address_sk#8, ca_state#9] + +(17) HashAggregate [codegen id : 3] +Input [3]: [cr_returning_customer_sk#2, cr_return_amt_inc_tax#4, ca_state#9] +Keys [2]: [cr_returning_customer_sk#2, ca_state#9] +Functions [1]: [partial_sum(UnscaledValue(cr_return_amt_inc_tax#4))] +Aggregate Attributes [1]: [sum#11] +Results [3]: [cr_returning_customer_sk#2, ca_state#9, sum#12] + +(18) Exchange +Input [3]: [cr_returning_customer_sk#2, ca_state#9, sum#12] +Arguments: hashpartitioning(cr_returning_customer_sk#2, ca_state#9, 5), true, [id=#13] + +(19) HashAggregate [codegen id : 11] +Input [3]: [cr_returning_customer_sk#2, ca_state#9, sum#12] +Keys [2]: [cr_returning_customer_sk#2, ca_state#9] +Functions [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#4))] +Aggregate Attributes [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#4))#14] +Results [3]: [cr_returning_customer_sk#2 AS ctr_customer_sk#15, ca_state#9 AS ctr_state#16, MakeDecimal(sum(UnscaledValue(cr_return_amt_inc_tax#4))#14,17,2) AS ctr_total_return#17] + +(20) Filter [codegen id : 11] +Input [3]: [ctr_customer_sk#15, ctr_state#16, ctr_total_return#17] +Condition : isnotnull(ctr_total_return#17) + +(21) Scan parquet default.catalog_returns +Output [4]: [cr_returned_date_sk#1, cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_returns] +PushedFilters: [IsNotNull(cr_returned_date_sk), IsNotNull(cr_returning_addr_sk)] +ReadSchema: struct + +(22) ColumnarToRow [codegen id : 6] +Input [4]: [cr_returned_date_sk#1, cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4] + +(23) Filter [codegen id : 6] +Input [4]: [cr_returned_date_sk#1, cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4] +Condition : (isnotnull(cr_returned_date_sk#1) AND isnotnull(cr_returning_addr_sk#3)) + +(24) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(25) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cr_returned_date_sk#1] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(26) Project [codegen id : 6] +Output [3]: [cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4] +Input [5]: [cr_returned_date_sk#1, cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4, d_date_sk#5] + +(27) ReusedExchange [Reuses operator id: 14] +Output [2]: [ca_address_sk#8, ca_state#9] + +(28) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cr_returning_addr_sk#3] +Right keys [1]: [ca_address_sk#8] +Join condition: None + +(29) Project [codegen id : 6] +Output [3]: [cr_returning_customer_sk#2, cr_return_amt_inc_tax#4, ca_state#9] +Input [5]: [cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4, ca_address_sk#8, ca_state#9] + +(30) HashAggregate [codegen id : 6] +Input [3]: [cr_returning_customer_sk#2, cr_return_amt_inc_tax#4, ca_state#9] +Keys [2]: [cr_returning_customer_sk#2, ca_state#9] +Functions [1]: [partial_sum(UnscaledValue(cr_return_amt_inc_tax#4))] +Aggregate Attributes [1]: [sum#18] +Results [3]: [cr_returning_customer_sk#2, ca_state#9, sum#19] + +(31) Exchange +Input [3]: [cr_returning_customer_sk#2, ca_state#9, sum#19] +Arguments: hashpartitioning(cr_returning_customer_sk#2, ca_state#9, 5), true, [id=#20] + +(32) HashAggregate [codegen id : 7] +Input [3]: [cr_returning_customer_sk#2, ca_state#9, sum#19] +Keys [2]: [cr_returning_customer_sk#2, ca_state#9] +Functions [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#4))] +Aggregate Attributes [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#4))#21] +Results [2]: [ca_state#9 AS ctr_state#16, MakeDecimal(sum(UnscaledValue(cr_return_amt_inc_tax#4))#21,17,2) AS ctr_total_return#17] + +(33) HashAggregate [codegen id : 7] +Input [2]: [ctr_state#16, ctr_total_return#17] +Keys [1]: [ctr_state#16] +Functions [1]: [partial_avg(ctr_total_return#17)] +Aggregate Attributes [2]: [sum#22, count#23] +Results [3]: [ctr_state#16, sum#24, count#25] + +(34) Exchange +Input [3]: [ctr_state#16, sum#24, count#25] +Arguments: hashpartitioning(ctr_state#16, 5), true, [id=#26] + +(35) HashAggregate [codegen id : 8] +Input [3]: [ctr_state#16, sum#24, count#25] +Keys [1]: [ctr_state#16] +Functions [1]: [avg(ctr_total_return#17)] +Aggregate Attributes [1]: [avg(ctr_total_return#17)#27] +Results [2]: [CheckOverflow((promote_precision(avg(ctr_total_return#17)#27) * 1.200000), DecimalType(24,7), true) AS (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#28, ctr_state#16 AS ctr_state#16#29] + +(36) Filter [codegen id : 8] +Input [2]: [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#28, ctr_state#16#29] +Condition : isnotnull((CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#28) + +(37) BroadcastExchange +Input [2]: [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#28, ctr_state#16#29] +Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [id=#30] + +(38) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ctr_state#16] +Right keys [1]: [ctr_state#16#29] +Join condition: (cast(ctr_total_return#17 as decimal(24,7)) > (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#28) + +(39) Project [codegen id : 11] +Output [2]: [ctr_customer_sk#15, ctr_total_return#17] +Input [5]: [ctr_customer_sk#15, ctr_state#16, ctr_total_return#17, (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#28, ctr_state#16#29] + +(40) Scan parquet default.customer +Output [6]: [c_customer_sk#31, c_customer_id#32, c_current_addr_sk#33, c_salutation#34, c_first_name#35, c_last_name#36] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(41) ColumnarToRow [codegen id : 9] +Input [6]: [c_customer_sk#31, c_customer_id#32, c_current_addr_sk#33, c_salutation#34, c_first_name#35, c_last_name#36] + +(42) Filter [codegen id : 9] +Input [6]: [c_customer_sk#31, c_customer_id#32, c_current_addr_sk#33, c_salutation#34, c_first_name#35, c_last_name#36] +Condition : (isnotnull(c_customer_sk#31) AND isnotnull(c_current_addr_sk#33)) + +(43) BroadcastExchange +Input [6]: [c_customer_sk#31, c_customer_id#32, c_current_addr_sk#33, c_salutation#34, c_first_name#35, c_last_name#36] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#37] + +(44) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ctr_customer_sk#15] +Right keys [1]: [c_customer_sk#31] +Join condition: None + +(45) Project [codegen id : 11] +Output [6]: [ctr_total_return#17, c_customer_id#32, c_current_addr_sk#33, c_salutation#34, c_first_name#35, c_last_name#36] +Input [8]: [ctr_customer_sk#15, ctr_total_return#17, c_customer_sk#31, c_customer_id#32, c_current_addr_sk#33, c_salutation#34, c_first_name#35, c_last_name#36] + +(46) Scan parquet default.customer_address +Output [12]: [ca_address_sk#8, ca_street_number#38, ca_street_name#39, ca_street_type#40, ca_suite_number#41, ca_city#42, ca_county#43, ca_state#9, ca_zip#44, ca_country#45, ca_gmt_offset#46, ca_location_type#47] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer_address] +PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,GA), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(47) ColumnarToRow [codegen id : 10] +Input [12]: [ca_address_sk#8, ca_street_number#38, ca_street_name#39, ca_street_type#40, ca_suite_number#41, ca_city#42, ca_county#43, ca_state#9, ca_zip#44, ca_country#45, ca_gmt_offset#46, ca_location_type#47] + +(48) Filter [codegen id : 10] +Input [12]: [ca_address_sk#8, ca_street_number#38, ca_street_name#39, ca_street_type#40, ca_suite_number#41, ca_city#42, ca_county#43, ca_state#9, ca_zip#44, ca_country#45, ca_gmt_offset#46, ca_location_type#47] +Condition : ((isnotnull(ca_state#9) AND (ca_state#9 = GA)) AND isnotnull(ca_address_sk#8)) + +(49) BroadcastExchange +Input [12]: [ca_address_sk#8, ca_street_number#38, ca_street_name#39, ca_street_type#40, ca_suite_number#41, ca_city#42, ca_county#43, ca_state#9, ca_zip#44, ca_country#45, ca_gmt_offset#46, ca_location_type#47] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#48] + +(50) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [c_current_addr_sk#33] +Right keys [1]: [ca_address_sk#8] +Join condition: None + +(51) Project [codegen id : 11] +Output [16]: [c_customer_id#32, c_salutation#34, c_first_name#35, c_last_name#36, ca_street_number#38, ca_street_name#39, ca_street_type#40, ca_suite_number#41, ca_city#42, ca_county#43, ca_state#9, ca_zip#44, ca_country#45, ca_gmt_offset#46, ca_location_type#47, ctr_total_return#17] +Input [18]: [ctr_total_return#17, c_customer_id#32, c_current_addr_sk#33, c_salutation#34, c_first_name#35, c_last_name#36, ca_address_sk#8, ca_street_number#38, ca_street_name#39, ca_street_type#40, ca_suite_number#41, ca_city#42, ca_county#43, ca_state#9, ca_zip#44, ca_country#45, ca_gmt_offset#46, ca_location_type#47] + +(52) TakeOrderedAndProject +Input [16]: [c_customer_id#32, c_salutation#34, c_first_name#35, c_last_name#36, ca_street_number#38, ca_street_name#39, ca_street_type#40, ca_suite_number#41, ca_city#42, ca_county#43, ca_state#9, ca_zip#44, ca_country#45, ca_gmt_offset#46, ca_location_type#47, ctr_total_return#17] +Arguments: 100, [c_customer_id#32 ASC NULLS FIRST, c_salutation#34 ASC NULLS FIRST, c_first_name#35 ASC NULLS FIRST, c_last_name#36 ASC NULLS FIRST, ca_street_number#38 ASC NULLS FIRST, ca_street_name#39 ASC NULLS FIRST, ca_street_type#40 ASC NULLS FIRST, ca_suite_number#41 ASC NULLS FIRST, ca_city#42 ASC NULLS FIRST, ca_county#43 ASC NULLS FIRST, ca_state#9 ASC NULLS FIRST, ca_zip#44 ASC NULLS FIRST, ca_country#45 ASC NULLS FIRST, ca_gmt_offset#46 ASC NULLS FIRST, ca_location_type#47 ASC NULLS FIRST, ctr_total_return#17 ASC NULLS FIRST], [c_customer_id#32, c_salutation#34, c_first_name#35, c_last_name#36, ca_street_number#38, ca_street_name#39, ca_street_type#40, ca_suite_number#41, ca_city#42, ca_county#43, ca_state#9, ca_zip#44, ca_country#45, ca_gmt_offset#46, ca_location_type#47, ctr_total_return#17] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81/simplified.txt new file mode 100644 index 0000000000000..c0587b06a0da3 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81/simplified.txt @@ -0,0 +1,77 @@ +TakeOrderedAndProject [c_customer_id,c_first_name,c_last_name,c_salutation,ca_city,ca_country,ca_county,ca_gmt_offset,ca_location_type,ca_state,ca_street_name,ca_street_number,ca_street_type,ca_suite_number,ca_zip,ctr_total_return] + WholeStageCodegen (11) + Project [c_customer_id,c_first_name,c_last_name,c_salutation,ca_city,ca_country,ca_county,ca_gmt_offset,ca_location_type,ca_state,ca_street_name,ca_street_number,ca_street_type,ca_suite_number,ca_zip,ctr_total_return] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_addr_sk,c_customer_id,c_first_name,c_last_name,c_salutation,ctr_total_return] + BroadcastHashJoin [c_customer_sk,ctr_customer_sk] + Project [ctr_customer_sk,ctr_total_return] + BroadcastHashJoin [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6))),ctr_state,ctr_state,ctr_total_return] + Filter [ctr_total_return] + HashAggregate [ca_state,cr_returning_customer_sk,sum] [ctr_customer_sk,ctr_state,ctr_total_return,sum,sum(UnscaledValue(cr_return_amt_inc_tax))] + InputAdapter + Exchange [ca_state,cr_returning_customer_sk] #1 + WholeStageCodegen (3) + HashAggregate [ca_state,cr_return_amt_inc_tax,cr_returning_customer_sk] [sum,sum] + Project [ca_state,cr_return_amt_inc_tax,cr_returning_customer_sk] + BroadcastHashJoin [ca_address_sk,cr_returning_addr_sk] + Project [cr_return_amt_inc_tax,cr_returning_addr_sk,cr_returning_customer_sk] + BroadcastHashJoin [cr_returned_date_sk,d_date_sk] + Filter [cr_returned_date_sk,cr_returning_addr_sk,cr_returning_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_return_amt_inc_tax,cr_returned_date_sk,cr_returning_addr_sk,cr_returning_customer_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [ca_address_sk,ca_state] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (8) + Filter [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))] + HashAggregate [count,ctr_state,sum] [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6))),avg(ctr_total_return),count,ctr_state,sum] + InputAdapter + Exchange [ctr_state] #5 + WholeStageCodegen (7) + HashAggregate [ctr_state,ctr_total_return] [count,count,sum,sum] + HashAggregate [ca_state,cr_returning_customer_sk,sum] [ctr_state,ctr_total_return,sum,sum(UnscaledValue(cr_return_amt_inc_tax))] + InputAdapter + Exchange [ca_state,cr_returning_customer_sk] #6 + WholeStageCodegen (6) + HashAggregate [ca_state,cr_return_amt_inc_tax,cr_returning_customer_sk] [sum,sum] + Project [ca_state,cr_return_amt_inc_tax,cr_returning_customer_sk] + BroadcastHashJoin [ca_address_sk,cr_returning_addr_sk] + Project [cr_return_amt_inc_tax,cr_returning_addr_sk,cr_returning_customer_sk] + BroadcastHashJoin [cr_returned_date_sk,d_date_sk] + Filter [cr_returned_date_sk,cr_returning_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_return_amt_inc_tax,cr_returned_date_sk,cr_returning_addr_sk,cr_returning_customer_sk] + InputAdapter + ReusedExchange [d_date_sk] #2 + InputAdapter + ReusedExchange [ca_address_sk,ca_state] #3 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (9) + Filter [c_current_addr_sk,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_current_addr_sk,c_customer_id,c_customer_sk,c_first_name,c_last_name,c_salutation] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (10) + Filter [ca_address_sk,ca_state] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_city,ca_country,ca_county,ca_gmt_offset,ca_location_type,ca_state,ca_street_name,ca_street_number,ca_street_type,ca_suite_number,ca_zip] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82.sf100/explain.txt new file mode 100644 index 0000000000000..c814e20e826e3 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82.sf100/explain.txt @@ -0,0 +1,175 @@ +== Physical Plan == +TakeOrderedAndProject (31) ++- * HashAggregate (30) + +- Exchange (29) + +- * HashAggregate (28) + +- * Project (27) + +- * SortMergeJoin Inner (26) + :- * Sort (20) + : +- Exchange (19) + : +- * Project (18) + : +- * BroadcastHashJoin Inner BuildRight (17) + : :- * Project (11) + : : +- * BroadcastHashJoin Inner BuildLeft (10) + : : :- BroadcastExchange (5) + : : : +- * Project (4) + : : : +- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.item (1) + : : +- * Project (9) + : : +- * Filter (8) + : : +- * ColumnarToRow (7) + : : +- Scan parquet default.inventory (6) + : +- BroadcastExchange (16) + : +- * Project (15) + : +- * Filter (14) + : +- * ColumnarToRow (13) + : +- Scan parquet default.date_dim (12) + +- * Sort (25) + +- Exchange (24) + +- * Filter (23) + +- * ColumnarToRow (22) + +- Scan parquet default.store_sales (21) + + +(1) Scan parquet default.item +Output [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,62.00), In(i_manufact_id, [129,270,821,423]), IsNotNull(i_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] + +(3) Filter [codegen id : 1] +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] +Condition : ((((isnotnull(i_current_price#4) AND (i_current_price#4 >= 62.00)) AND (cast(i_current_price#4 as decimal(12,2)) <= 92.00)) AND i_manufact_id#5 IN (129,270,821,423)) AND isnotnull(i_item_sk#1)) + +(4) Project [codegen id : 1] +Output [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] + +(5) BroadcastExchange +Input [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#6] + +(6) Scan parquet default.inventory +Output [3]: [inv_date_sk#7, inv_item_sk#8, inv_quantity_on_hand#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/inventory] +PushedFilters: [IsNotNull(inv_quantity_on_hand), GreaterThanOrEqual(inv_quantity_on_hand,100), LessThanOrEqual(inv_quantity_on_hand,500), IsNotNull(inv_item_sk), IsNotNull(inv_date_sk)] +ReadSchema: struct + +(7) ColumnarToRow +Input [3]: [inv_date_sk#7, inv_item_sk#8, inv_quantity_on_hand#9] + +(8) Filter +Input [3]: [inv_date_sk#7, inv_item_sk#8, inv_quantity_on_hand#9] +Condition : ((((isnotnull(inv_quantity_on_hand#9) AND (inv_quantity_on_hand#9 >= 100)) AND (inv_quantity_on_hand#9 <= 500)) AND isnotnull(inv_item_sk#8)) AND isnotnull(inv_date_sk#7)) + +(9) Project +Output [2]: [inv_date_sk#7, inv_item_sk#8] +Input [3]: [inv_date_sk#7, inv_item_sk#8, inv_quantity_on_hand#9] + +(10) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [inv_item_sk#8] +Join condition: None + +(11) Project [codegen id : 3] +Output [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#7] +Input [6]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#7, inv_item_sk#8] + +(12) Scan parquet default.date_dim +Output [2]: [d_date_sk#10, d_date#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-05-25), LessThanOrEqual(d_date,2000-07-24), IsNotNull(d_date_sk)] +ReadSchema: struct + +(13) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#10, d_date#11] + +(14) Filter [codegen id : 2] +Input [2]: [d_date_sk#10, d_date#11] +Condition : (((isnotnull(d_date#11) AND (d_date#11 >= 11102)) AND (d_date#11 <= 11162)) AND isnotnull(d_date_sk#10)) + +(15) Project [codegen id : 2] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_date#11] + +(16) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(17) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [inv_date_sk#7] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(18) Project [codegen id : 3] +Output [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] +Input [6]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#7, d_date_sk#10] + +(19) Exchange +Input [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] +Arguments: hashpartitioning(i_item_sk#1, 5), true, [id=#13] + +(20) Sort [codegen id : 4] +Input [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] +Arguments: [i_item_sk#1 ASC NULLS FIRST], false, 0 + +(21) Scan parquet default.store_sales +Output [1]: [ss_item_sk#14] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(22) ColumnarToRow [codegen id : 5] +Input [1]: [ss_item_sk#14] + +(23) Filter [codegen id : 5] +Input [1]: [ss_item_sk#14] +Condition : isnotnull(ss_item_sk#14) + +(24) Exchange +Input [1]: [ss_item_sk#14] +Arguments: hashpartitioning(ss_item_sk#14, 5), true, [id=#15] + +(25) Sort [codegen id : 6] +Input [1]: [ss_item_sk#14] +Arguments: [ss_item_sk#14 ASC NULLS FIRST], false, 0 + +(26) SortMergeJoin [codegen id : 7] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [ss_item_sk#14] +Join condition: None + +(27) Project [codegen id : 7] +Output [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, ss_item_sk#14] + +(28) HashAggregate [codegen id : 7] +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Keys [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] + +(29) Exchange +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Arguments: hashpartitioning(i_item_id#2, i_item_desc#3, i_current_price#4, 5), true, [id=#16] + +(30) HashAggregate [codegen id : 8] +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Keys [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] + +(31) TakeOrderedAndProject +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Arguments: 100, [i_item_id#2 ASC NULLS FIRST], [i_item_id#2, i_item_desc#3, i_current_price#4] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82.sf100/simplified.txt new file mode 100644 index 0000000000000..d9f1a7056caaa --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82.sf100/simplified.txt @@ -0,0 +1,50 @@ +TakeOrderedAndProject [i_current_price,i_item_desc,i_item_id] + WholeStageCodegen (8) + HashAggregate [i_current_price,i_item_desc,i_item_id] + InputAdapter + Exchange [i_current_price,i_item_desc,i_item_id] #1 + WholeStageCodegen (7) + HashAggregate [i_current_price,i_item_desc,i_item_id] + Project [i_current_price,i_item_desc,i_item_id] + SortMergeJoin [i_item_sk,ss_item_sk] + InputAdapter + WholeStageCodegen (4) + Sort [i_item_sk] + InputAdapter + Exchange [i_item_sk] #2 + WholeStageCodegen (3) + Project [i_current_price,i_item_desc,i_item_id,i_item_sk] + BroadcastHashJoin [d_date_sk,inv_date_sk] + Project [i_current_price,i_item_desc,i_item_id,i_item_sk,inv_date_sk] + BroadcastHashJoin [i_item_sk,inv_item_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [i_current_price,i_item_desc,i_item_id,i_item_sk] + Filter [i_current_price,i_item_sk,i_manufact_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_current_price,i_item_desc,i_item_id,i_item_sk,i_manufact_id] + Project [inv_date_sk,inv_item_sk] + Filter [inv_date_sk,inv_item_sk,inv_quantity_on_hand] + ColumnarToRow + InputAdapter + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk] + InputAdapter + WholeStageCodegen (6) + Sort [ss_item_sk] + InputAdapter + Exchange [ss_item_sk] #5 + WholeStageCodegen (5) + Filter [ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82/explain.txt new file mode 100644 index 0000000000000..d717c8f635828 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82/explain.txt @@ -0,0 +1,160 @@ +== Physical Plan == +TakeOrderedAndProject (28) ++- * HashAggregate (27) + +- Exchange (26) + +- * HashAggregate (25) + +- * Project (24) + +- * BroadcastHashJoin Inner BuildRight (23) + :- * Project (18) + : +- * BroadcastHashJoin Inner BuildRight (17) + : :- * Project (11) + : : +- * BroadcastHashJoin Inner BuildRight (10) + : : :- * Project (4) + : : : +- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.item (1) + : : +- BroadcastExchange (9) + : : +- * Project (8) + : : +- * Filter (7) + : : +- * ColumnarToRow (6) + : : +- Scan parquet default.inventory (5) + : +- BroadcastExchange (16) + : +- * Project (15) + : +- * Filter (14) + : +- * ColumnarToRow (13) + : +- Scan parquet default.date_dim (12) + +- BroadcastExchange (22) + +- * Filter (21) + +- * ColumnarToRow (20) + +- Scan parquet default.store_sales (19) + + +(1) Scan parquet default.item +Output [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,62.00), In(i_manufact_id, [129,270,821,423]), IsNotNull(i_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] + +(3) Filter [codegen id : 4] +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] +Condition : ((((isnotnull(i_current_price#4) AND (i_current_price#4 >= 62.00)) AND (cast(i_current_price#4 as decimal(12,2)) <= 92.00)) AND i_manufact_id#5 IN (129,270,821,423)) AND isnotnull(i_item_sk#1)) + +(4) Project [codegen id : 4] +Output [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] + +(5) Scan parquet default.inventory +Output [3]: [inv_date_sk#6, inv_item_sk#7, inv_quantity_on_hand#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/inventory] +PushedFilters: [IsNotNull(inv_quantity_on_hand), GreaterThanOrEqual(inv_quantity_on_hand,100), LessThanOrEqual(inv_quantity_on_hand,500), IsNotNull(inv_item_sk), IsNotNull(inv_date_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [3]: [inv_date_sk#6, inv_item_sk#7, inv_quantity_on_hand#8] + +(7) Filter [codegen id : 1] +Input [3]: [inv_date_sk#6, inv_item_sk#7, inv_quantity_on_hand#8] +Condition : ((((isnotnull(inv_quantity_on_hand#8) AND (inv_quantity_on_hand#8 >= 100)) AND (inv_quantity_on_hand#8 <= 500)) AND isnotnull(inv_item_sk#7)) AND isnotnull(inv_date_sk#6)) + +(8) Project [codegen id : 1] +Output [2]: [inv_date_sk#6, inv_item_sk#7] +Input [3]: [inv_date_sk#6, inv_item_sk#7, inv_quantity_on_hand#8] + +(9) BroadcastExchange +Input [2]: [inv_date_sk#6, inv_item_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [id=#9] + +(10) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [inv_item_sk#7] +Join condition: None + +(11) Project [codegen id : 4] +Output [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#6] +Input [6]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#6, inv_item_sk#7] + +(12) Scan parquet default.date_dim +Output [2]: [d_date_sk#10, d_date#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-05-25), LessThanOrEqual(d_date,2000-07-24), IsNotNull(d_date_sk)] +ReadSchema: struct + +(13) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#10, d_date#11] + +(14) Filter [codegen id : 2] +Input [2]: [d_date_sk#10, d_date#11] +Condition : (((isnotnull(d_date#11) AND (d_date#11 >= 11102)) AND (d_date#11 <= 11162)) AND isnotnull(d_date_sk#10)) + +(15) Project [codegen id : 2] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_date#11] + +(16) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(17) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_date_sk#6] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(18) Project [codegen id : 4] +Output [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] +Input [6]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#6, d_date_sk#10] + +(19) Scan parquet default.store_sales +Output [1]: [ss_item_sk#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 3] +Input [1]: [ss_item_sk#13] + +(21) Filter [codegen id : 3] +Input [1]: [ss_item_sk#13] +Condition : isnotnull(ss_item_sk#13) + +(22) BroadcastExchange +Input [1]: [ss_item_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] + +(23) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [ss_item_sk#13] +Join condition: None + +(24) Project [codegen id : 4] +Output [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, ss_item_sk#13] + +(25) HashAggregate [codegen id : 4] +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Keys [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] + +(26) Exchange +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Arguments: hashpartitioning(i_item_id#2, i_item_desc#3, i_current_price#4, 5), true, [id=#15] + +(27) HashAggregate [codegen id : 5] +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Keys [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] + +(28) TakeOrderedAndProject +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Arguments: 100, [i_item_id#2 ASC NULLS FIRST], [i_item_id#2, i_item_desc#3, i_current_price#4] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82/simplified.txt new file mode 100644 index 0000000000000..8006c538a2a9b --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82/simplified.txt @@ -0,0 +1,41 @@ +TakeOrderedAndProject [i_current_price,i_item_desc,i_item_id] + WholeStageCodegen (5) + HashAggregate [i_current_price,i_item_desc,i_item_id] + InputAdapter + Exchange [i_current_price,i_item_desc,i_item_id] #1 + WholeStageCodegen (4) + HashAggregate [i_current_price,i_item_desc,i_item_id] + Project [i_current_price,i_item_desc,i_item_id] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [i_current_price,i_item_desc,i_item_id,i_item_sk] + BroadcastHashJoin [d_date_sk,inv_date_sk] + Project [i_current_price,i_item_desc,i_item_id,i_item_sk,inv_date_sk] + BroadcastHashJoin [i_item_sk,inv_item_sk] + Project [i_current_price,i_item_desc,i_item_id,i_item_sk] + Filter [i_current_price,i_item_sk,i_manufact_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_current_price,i_item_desc,i_item_id,i_item_sk,i_manufact_id] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [inv_date_sk,inv_item_sk] + Filter [inv_date_sk,inv_item_sk,inv_quantity_on_hand] + ColumnarToRow + InputAdapter + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.sf100/explain.txt new file mode 100644 index 0000000000000..86a1fade375f2 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.sf100/explain.txt @@ -0,0 +1,344 @@ +== Physical Plan == +TakeOrderedAndProject (61) ++- * Project (60) + +- * BroadcastHashJoin Inner BuildRight (59) + :- * Project (45) + : +- * BroadcastHashJoin Inner BuildRight (44) + : :- * HashAggregate (30) + : : +- Exchange (29) + : : +- * HashAggregate (28) + : : +- * Project (27) + : : +- * BroadcastHashJoin Inner BuildRight (26) + : : :- * Project (21) + : : : +- * BroadcastHashJoin Inner BuildRight (20) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_returns (1) + : : : +- BroadcastExchange (19) + : : : +- * Project (18) + : : : +- * BroadcastHashJoin LeftSemi BuildRight (17) + : : : :- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.date_dim (4) + : : : +- BroadcastExchange (16) + : : : +- * Project (15) + : : : +- * BroadcastHashJoin LeftSemi BuildRight (14) + : : : :- * ColumnarToRow (8) + : : : : +- Scan parquet default.date_dim (7) + : : : +- BroadcastExchange (13) + : : : +- * Project (12) + : : : +- * Filter (11) + : : : +- * ColumnarToRow (10) + : : : +- Scan parquet default.date_dim (9) + : : +- BroadcastExchange (25) + : : +- * Filter (24) + : : +- * ColumnarToRow (23) + : : +- Scan parquet default.item (22) + : +- BroadcastExchange (43) + : +- * HashAggregate (42) + : +- Exchange (41) + : +- * HashAggregate (40) + : +- * Project (39) + : +- * BroadcastHashJoin Inner BuildRight (38) + : :- * Project (36) + : : +- * BroadcastHashJoin Inner BuildRight (35) + : : :- * Filter (33) + : : : +- * ColumnarToRow (32) + : : : +- Scan parquet default.catalog_returns (31) + : : +- ReusedExchange (34) + : +- ReusedExchange (37) + +- BroadcastExchange (58) + +- * HashAggregate (57) + +- Exchange (56) + +- * HashAggregate (55) + +- * Project (54) + +- * BroadcastHashJoin Inner BuildRight (53) + :- * Project (51) + : +- * BroadcastHashJoin Inner BuildRight (50) + : :- * Filter (48) + : : +- * ColumnarToRow (47) + : : +- Scan parquet default.web_returns (46) + : +- ReusedExchange (49) + +- ReusedExchange (52) + + +(1) Scan parquet default.store_returns +Output [3]: [sr_returned_date_sk#1, sr_item_sk#2, sr_return_quantity#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_returns] +PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_returned_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [3]: [sr_returned_date_sk#1, sr_item_sk#2, sr_return_quantity#3] + +(3) Filter [codegen id : 5] +Input [3]: [sr_returned_date_sk#1, sr_item_sk#2, sr_return_quantity#3] +Condition : (isnotnull(sr_item_sk#2) AND isnotnull(sr_returned_date_sk#1)) + +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#4, d_date#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 3] +Input [2]: [d_date_sk#4, d_date#5] + +(6) Filter [codegen id : 3] +Input [2]: [d_date_sk#4, d_date#5] +Condition : isnotnull(d_date_sk#4) + +(7) Scan parquet default.date_dim +Output [2]: [d_date#5, d_week_seq#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +ReadSchema: struct + +(8) ColumnarToRow [codegen id : 2] +Input [2]: [d_date#5, d_week_seq#6] + +(9) Scan parquet default.date_dim +Output [2]: [d_date#5, d_week_seq#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +ReadSchema: struct + +(10) ColumnarToRow [codegen id : 1] +Input [2]: [d_date#5, d_week_seq#6] + +(11) Filter [codegen id : 1] +Input [2]: [d_date#5, d_week_seq#6] +Condition : cast(d_date#5 as string) IN (2000-06-30,2000-09-27,2000-11-17) + +(12) Project [codegen id : 1] +Output [1]: [d_week_seq#6 AS d_week_seq#6#7] +Input [2]: [d_date#5, d_week_seq#6] + +(13) BroadcastExchange +Input [1]: [d_week_seq#6#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#8] + +(14) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [d_week_seq#6] +Right keys [1]: [d_week_seq#6#7] +Join condition: None + +(15) Project [codegen id : 2] +Output [1]: [d_date#5 AS d_date#5#9] +Input [2]: [d_date#5, d_week_seq#6] + +(16) BroadcastExchange +Input [1]: [d_date#5#9] +Arguments: HashedRelationBroadcastMode(List(input[0, date, true]),false), [id=#10] + +(17) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [d_date#5] +Right keys [1]: [d_date#5#9] +Join condition: None + +(18) Project [codegen id : 3] +Output [1]: [d_date_sk#4] +Input [2]: [d_date_sk#4, d_date#5] + +(19) BroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] + +(20) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [sr_returned_date_sk#1] +Right keys [1]: [cast(d_date_sk#4 as bigint)] +Join condition: None + +(21) Project [codegen id : 5] +Output [2]: [sr_item_sk#2, sr_return_quantity#3] +Input [4]: [sr_returned_date_sk#1, sr_item_sk#2, sr_return_quantity#3, d_date_sk#4] + +(22) Scan parquet default.item +Output [2]: [i_item_sk#12, i_item_id#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_item_id)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 4] +Input [2]: [i_item_sk#12, i_item_id#13] + +(24) Filter [codegen id : 4] +Input [2]: [i_item_sk#12, i_item_id#13] +Condition : (isnotnull(i_item_sk#12) AND isnotnull(i_item_id#13)) + +(25) BroadcastExchange +Input [2]: [i_item_sk#12, i_item_id#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] + +(26) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [sr_item_sk#2] +Right keys [1]: [cast(i_item_sk#12 as bigint)] +Join condition: None + +(27) Project [codegen id : 5] +Output [2]: [sr_return_quantity#3, i_item_id#13] +Input [4]: [sr_item_sk#2, sr_return_quantity#3, i_item_sk#12, i_item_id#13] + +(28) HashAggregate [codegen id : 5] +Input [2]: [sr_return_quantity#3, i_item_id#13] +Keys [1]: [i_item_id#13] +Functions [1]: [partial_sum(cast(sr_return_quantity#3 as bigint))] +Aggregate Attributes [1]: [sum#15] +Results [2]: [i_item_id#13, sum#16] + +(29) Exchange +Input [2]: [i_item_id#13, sum#16] +Arguments: hashpartitioning(i_item_id#13, 5), true, [id=#17] + +(30) HashAggregate [codegen id : 18] +Input [2]: [i_item_id#13, sum#16] +Keys [1]: [i_item_id#13] +Functions [1]: [sum(cast(sr_return_quantity#3 as bigint))] +Aggregate Attributes [1]: [sum(cast(sr_return_quantity#3 as bigint))#18] +Results [2]: [i_item_id#13 AS item_id#19, sum(cast(sr_return_quantity#3 as bigint))#18 AS sr_item_qty#20] + +(31) Scan parquet default.catalog_returns +Output [3]: [cr_returned_date_sk#21, cr_item_sk#22, cr_return_quantity#23] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_returns] +PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_returned_date_sk)] +ReadSchema: struct + +(32) ColumnarToRow [codegen id : 10] +Input [3]: [cr_returned_date_sk#21, cr_item_sk#22, cr_return_quantity#23] + +(33) Filter [codegen id : 10] +Input [3]: [cr_returned_date_sk#21, cr_item_sk#22, cr_return_quantity#23] +Condition : (isnotnull(cr_item_sk#22) AND isnotnull(cr_returned_date_sk#21)) + +(34) ReusedExchange [Reuses operator id: 19] +Output [1]: [d_date_sk#4] + +(35) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cr_returned_date_sk#21] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(36) Project [codegen id : 10] +Output [2]: [cr_item_sk#22, cr_return_quantity#23] +Input [4]: [cr_returned_date_sk#21, cr_item_sk#22, cr_return_quantity#23, d_date_sk#4] + +(37) ReusedExchange [Reuses operator id: 25] +Output [2]: [i_item_sk#12, i_item_id#13] + +(38) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cr_item_sk#22] +Right keys [1]: [i_item_sk#12] +Join condition: None + +(39) Project [codegen id : 10] +Output [2]: [cr_return_quantity#23, i_item_id#13] +Input [4]: [cr_item_sk#22, cr_return_quantity#23, i_item_sk#12, i_item_id#13] + +(40) HashAggregate [codegen id : 10] +Input [2]: [cr_return_quantity#23, i_item_id#13] +Keys [1]: [i_item_id#13] +Functions [1]: [partial_sum(cast(cr_return_quantity#23 as bigint))] +Aggregate Attributes [1]: [sum#24] +Results [2]: [i_item_id#13, sum#25] + +(41) Exchange +Input [2]: [i_item_id#13, sum#25] +Arguments: hashpartitioning(i_item_id#13, 5), true, [id=#26] + +(42) HashAggregate [codegen id : 11] +Input [2]: [i_item_id#13, sum#25] +Keys [1]: [i_item_id#13] +Functions [1]: [sum(cast(cr_return_quantity#23 as bigint))] +Aggregate Attributes [1]: [sum(cast(cr_return_quantity#23 as bigint))#27] +Results [2]: [i_item_id#13 AS item_id#28, sum(cast(cr_return_quantity#23 as bigint))#27 AS cr_item_qty#29] + +(43) BroadcastExchange +Input [2]: [item_id#28, cr_item_qty#29] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#30] + +(44) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [item_id#19] +Right keys [1]: [item_id#28] +Join condition: None + +(45) Project [codegen id : 18] +Output [3]: [item_id#19, sr_item_qty#20, cr_item_qty#29] +Input [4]: [item_id#19, sr_item_qty#20, item_id#28, cr_item_qty#29] + +(46) Scan parquet default.web_returns +Output [3]: [wr_returned_date_sk#31, wr_item_sk#32, wr_return_quantity#33] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_returns] +PushedFilters: [IsNotNull(wr_item_sk), IsNotNull(wr_returned_date_sk)] +ReadSchema: struct + +(47) ColumnarToRow [codegen id : 16] +Input [3]: [wr_returned_date_sk#31, wr_item_sk#32, wr_return_quantity#33] + +(48) Filter [codegen id : 16] +Input [3]: [wr_returned_date_sk#31, wr_item_sk#32, wr_return_quantity#33] +Condition : (isnotnull(wr_item_sk#32) AND isnotnull(wr_returned_date_sk#31)) + +(49) ReusedExchange [Reuses operator id: 19] +Output [1]: [d_date_sk#4] + +(50) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [wr_returned_date_sk#31] +Right keys [1]: [cast(d_date_sk#4 as bigint)] +Join condition: None + +(51) Project [codegen id : 16] +Output [2]: [wr_item_sk#32, wr_return_quantity#33] +Input [4]: [wr_returned_date_sk#31, wr_item_sk#32, wr_return_quantity#33, d_date_sk#4] + +(52) ReusedExchange [Reuses operator id: 25] +Output [2]: [i_item_sk#12, i_item_id#13] + +(53) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [wr_item_sk#32] +Right keys [1]: [cast(i_item_sk#12 as bigint)] +Join condition: None + +(54) Project [codegen id : 16] +Output [2]: [wr_return_quantity#33, i_item_id#13] +Input [4]: [wr_item_sk#32, wr_return_quantity#33, i_item_sk#12, i_item_id#13] + +(55) HashAggregate [codegen id : 16] +Input [2]: [wr_return_quantity#33, i_item_id#13] +Keys [1]: [i_item_id#13] +Functions [1]: [partial_sum(cast(wr_return_quantity#33 as bigint))] +Aggregate Attributes [1]: [sum#34] +Results [2]: [i_item_id#13, sum#35] + +(56) Exchange +Input [2]: [i_item_id#13, sum#35] +Arguments: hashpartitioning(i_item_id#13, 5), true, [id=#36] + +(57) HashAggregate [codegen id : 17] +Input [2]: [i_item_id#13, sum#35] +Keys [1]: [i_item_id#13] +Functions [1]: [sum(cast(wr_return_quantity#33 as bigint))] +Aggregate Attributes [1]: [sum(cast(wr_return_quantity#33 as bigint))#37] +Results [2]: [i_item_id#13 AS item_id#38, sum(cast(wr_return_quantity#33 as bigint))#37 AS wr_item_qty#39] + +(58) BroadcastExchange +Input [2]: [item_id#38, wr_item_qty#39] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#40] + +(59) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [item_id#19] +Right keys [1]: [item_id#38] +Join condition: None + +(60) Project [codegen id : 18] +Output [8]: [item_id#19, sr_item_qty#20, (((cast(sr_item_qty#20 as double) / cast(((sr_item_qty#20 + cr_item_qty#29) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS sr_dev#41, cr_item_qty#29, (((cast(cr_item_qty#29 as double) / cast(((sr_item_qty#20 + cr_item_qty#29) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS cr_dev#42, wr_item_qty#39, (((cast(wr_item_qty#39 as double) / cast(((sr_item_qty#20 + cr_item_qty#29) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS wr_dev#43, CheckOverflow((promote_precision(cast(cast(((sr_item_qty#20 + cr_item_qty#29) + wr_item_qty#39) as decimal(20,0)) as decimal(21,1))) / 3.0), DecimalType(27,6), true) AS average#44] +Input [5]: [item_id#19, sr_item_qty#20, cr_item_qty#29, item_id#38, wr_item_qty#39] + +(61) TakeOrderedAndProject +Input [8]: [item_id#19, sr_item_qty#20, sr_dev#41, cr_item_qty#29, cr_dev#42, wr_item_qty#39, wr_dev#43, average#44] +Arguments: 100, [item_id#19 ASC NULLS FIRST, sr_item_qty#20 ASC NULLS FIRST], [item_id#19, sr_item_qty#20, sr_dev#41, cr_item_qty#29, cr_dev#42, wr_item_qty#39, wr_dev#43, average#44] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.sf100/simplified.txt new file mode 100644 index 0000000000000..294bc6bea32d6 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.sf100/simplified.txt @@ -0,0 +1,91 @@ +TakeOrderedAndProject [average,cr_dev,cr_item_qty,item_id,sr_dev,sr_item_qty,wr_dev,wr_item_qty] + WholeStageCodegen (18) + Project [cr_item_qty,item_id,sr_item_qty,wr_item_qty] + BroadcastHashJoin [item_id,item_id] + Project [cr_item_qty,item_id,sr_item_qty] + BroadcastHashJoin [item_id,item_id] + HashAggregate [i_item_id,sum] [item_id,sr_item_qty,sum,sum(cast(sr_return_quantity as bigint))] + InputAdapter + Exchange [i_item_id] #1 + WholeStageCodegen (5) + HashAggregate [i_item_id,sr_return_quantity] [sum,sum] + Project [i_item_id,sr_return_quantity] + BroadcastHashJoin [i_item_sk,sr_item_sk] + Project [sr_item_sk,sr_return_quantity] + BroadcastHashJoin [d_date_sk,sr_returned_date_sk] + Filter [sr_item_sk,sr_returned_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_item_sk,sr_return_quantity,sr_returned_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (3) + Project [d_date_sk] + BroadcastHashJoin [d_date,d_date] + Filter [d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [d_date] + BroadcastHashJoin [d_week_seq,d_week_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_week_seq] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Project [d_week_seq] + Filter [d_date] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_week_seq] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Filter [i_item_id,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_id,i_item_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (11) + HashAggregate [i_item_id,sum] [cr_item_qty,item_id,sum,sum(cast(cr_return_quantity as bigint))] + InputAdapter + Exchange [i_item_id] #7 + WholeStageCodegen (10) + HashAggregate [cr_return_quantity,i_item_id] [sum,sum] + Project [cr_return_quantity,i_item_id] + BroadcastHashJoin [cr_item_sk,i_item_sk] + Project [cr_item_sk,cr_return_quantity] + BroadcastHashJoin [cr_returned_date_sk,d_date_sk] + Filter [cr_item_sk,cr_returned_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_item_sk,cr_return_quantity,cr_returned_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #2 + InputAdapter + ReusedExchange [i_item_id,i_item_sk] #5 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (17) + HashAggregate [i_item_id,sum] [item_id,sum,sum(cast(wr_return_quantity as bigint)),wr_item_qty] + InputAdapter + Exchange [i_item_id] #9 + WholeStageCodegen (16) + HashAggregate [i_item_id,wr_return_quantity] [sum,sum] + Project [i_item_id,wr_return_quantity] + BroadcastHashJoin [i_item_sk,wr_item_sk] + Project [wr_item_sk,wr_return_quantity] + BroadcastHashJoin [d_date_sk,wr_returned_date_sk] + Filter [wr_item_sk,wr_returned_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_returns [wr_item_sk,wr_return_quantity,wr_returned_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #2 + InputAdapter + ReusedExchange [i_item_id,i_item_sk] #5 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83/explain.txt new file mode 100644 index 0000000000000..39fc20878d286 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83/explain.txt @@ -0,0 +1,344 @@ +== Physical Plan == +TakeOrderedAndProject (61) ++- * Project (60) + +- * BroadcastHashJoin Inner BuildRight (59) + :- * Project (45) + : +- * BroadcastHashJoin Inner BuildRight (44) + : :- * HashAggregate (30) + : : +- Exchange (29) + : : +- * HashAggregate (28) + : : +- * Project (27) + : : +- * BroadcastHashJoin Inner BuildRight (26) + : : :- * Project (9) + : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_returns (1) + : : : +- BroadcastExchange (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.item (4) + : : +- BroadcastExchange (25) + : : +- * Project (24) + : : +- * BroadcastHashJoin LeftSemi BuildRight (23) + : : :- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet default.date_dim (10) + : : +- BroadcastExchange (22) + : : +- * Project (21) + : : +- * BroadcastHashJoin LeftSemi BuildRight (20) + : : :- * ColumnarToRow (14) + : : : +- Scan parquet default.date_dim (13) + : : +- BroadcastExchange (19) + : : +- * Project (18) + : : +- * Filter (17) + : : +- * ColumnarToRow (16) + : : +- Scan parquet default.date_dim (15) + : +- BroadcastExchange (43) + : +- * HashAggregate (42) + : +- Exchange (41) + : +- * HashAggregate (40) + : +- * Project (39) + : +- * BroadcastHashJoin Inner BuildRight (38) + : :- * Project (36) + : : +- * BroadcastHashJoin Inner BuildRight (35) + : : :- * Filter (33) + : : : +- * ColumnarToRow (32) + : : : +- Scan parquet default.catalog_returns (31) + : : +- ReusedExchange (34) + : +- ReusedExchange (37) + +- BroadcastExchange (58) + +- * HashAggregate (57) + +- Exchange (56) + +- * HashAggregate (55) + +- * Project (54) + +- * BroadcastHashJoin Inner BuildRight (53) + :- * Project (51) + : +- * BroadcastHashJoin Inner BuildRight (50) + : :- * Filter (48) + : : +- * ColumnarToRow (47) + : : +- Scan parquet default.web_returns (46) + : +- ReusedExchange (49) + +- ReusedExchange (52) + + +(1) Scan parquet default.store_returns +Output [3]: [sr_returned_date_sk#1, sr_item_sk#2, sr_return_quantity#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_returns] +PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_returned_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [3]: [sr_returned_date_sk#1, sr_item_sk#2, sr_return_quantity#3] + +(3) Filter [codegen id : 5] +Input [3]: [sr_returned_date_sk#1, sr_item_sk#2, sr_return_quantity#3] +Condition : (isnotnull(sr_item_sk#2) AND isnotnull(sr_returned_date_sk#1)) + +(4) Scan parquet default.item +Output [2]: [i_item_sk#4, i_item_id#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_item_id)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [i_item_sk#4, i_item_id#5] + +(6) Filter [codegen id : 1] +Input [2]: [i_item_sk#4, i_item_id#5] +Condition : (isnotnull(i_item_sk#4) AND isnotnull(i_item_id#5)) + +(7) BroadcastExchange +Input [2]: [i_item_sk#4, i_item_id#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#6] + +(8) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [sr_item_sk#2] +Right keys [1]: [cast(i_item_sk#4 as bigint)] +Join condition: None + +(9) Project [codegen id : 5] +Output [3]: [sr_returned_date_sk#1, sr_return_quantity#3, i_item_id#5] +Input [5]: [sr_returned_date_sk#1, sr_item_sk#2, sr_return_quantity#3, i_item_sk#4, i_item_id#5] + +(10) Scan parquet default.date_dim +Output [2]: [d_date_sk#7, d_date#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 4] +Input [2]: [d_date_sk#7, d_date#8] + +(12) Filter [codegen id : 4] +Input [2]: [d_date_sk#7, d_date#8] +Condition : isnotnull(d_date_sk#7) + +(13) Scan parquet default.date_dim +Output [2]: [d_date#8, d_week_seq#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +ReadSchema: struct + +(14) ColumnarToRow [codegen id : 3] +Input [2]: [d_date#8, d_week_seq#9] + +(15) Scan parquet default.date_dim +Output [2]: [d_date#8, d_week_seq#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +ReadSchema: struct + +(16) ColumnarToRow [codegen id : 2] +Input [2]: [d_date#8, d_week_seq#9] + +(17) Filter [codegen id : 2] +Input [2]: [d_date#8, d_week_seq#9] +Condition : cast(d_date#8 as string) IN (2000-06-30,2000-09-27,2000-11-17) + +(18) Project [codegen id : 2] +Output [1]: [d_week_seq#9 AS d_week_seq#9#10] +Input [2]: [d_date#8, d_week_seq#9] + +(19) BroadcastExchange +Input [1]: [d_week_seq#9#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] + +(20) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [d_week_seq#9] +Right keys [1]: [d_week_seq#9#10] +Join condition: None + +(21) Project [codegen id : 3] +Output [1]: [d_date#8 AS d_date#8#12] +Input [2]: [d_date#8, d_week_seq#9] + +(22) BroadcastExchange +Input [1]: [d_date#8#12] +Arguments: HashedRelationBroadcastMode(List(input[0, date, true]),false), [id=#13] + +(23) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [d_date#8] +Right keys [1]: [d_date#8#12] +Join condition: None + +(24) Project [codegen id : 4] +Output [1]: [d_date_sk#7] +Input [2]: [d_date_sk#7, d_date#8] + +(25) BroadcastExchange +Input [1]: [d_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] + +(26) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [sr_returned_date_sk#1] +Right keys [1]: [cast(d_date_sk#7 as bigint)] +Join condition: None + +(27) Project [codegen id : 5] +Output [2]: [sr_return_quantity#3, i_item_id#5] +Input [4]: [sr_returned_date_sk#1, sr_return_quantity#3, i_item_id#5, d_date_sk#7] + +(28) HashAggregate [codegen id : 5] +Input [2]: [sr_return_quantity#3, i_item_id#5] +Keys [1]: [i_item_id#5] +Functions [1]: [partial_sum(cast(sr_return_quantity#3 as bigint))] +Aggregate Attributes [1]: [sum#15] +Results [2]: [i_item_id#5, sum#16] + +(29) Exchange +Input [2]: [i_item_id#5, sum#16] +Arguments: hashpartitioning(i_item_id#5, 5), true, [id=#17] + +(30) HashAggregate [codegen id : 18] +Input [2]: [i_item_id#5, sum#16] +Keys [1]: [i_item_id#5] +Functions [1]: [sum(cast(sr_return_quantity#3 as bigint))] +Aggregate Attributes [1]: [sum(cast(sr_return_quantity#3 as bigint))#18] +Results [2]: [i_item_id#5 AS item_id#19, sum(cast(sr_return_quantity#3 as bigint))#18 AS sr_item_qty#20] + +(31) Scan parquet default.catalog_returns +Output [3]: [cr_returned_date_sk#21, cr_item_sk#22, cr_return_quantity#23] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_returns] +PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_returned_date_sk)] +ReadSchema: struct + +(32) ColumnarToRow [codegen id : 10] +Input [3]: [cr_returned_date_sk#21, cr_item_sk#22, cr_return_quantity#23] + +(33) Filter [codegen id : 10] +Input [3]: [cr_returned_date_sk#21, cr_item_sk#22, cr_return_quantity#23] +Condition : (isnotnull(cr_item_sk#22) AND isnotnull(cr_returned_date_sk#21)) + +(34) ReusedExchange [Reuses operator id: 7] +Output [2]: [i_item_sk#4, i_item_id#5] + +(35) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cr_item_sk#22] +Right keys [1]: [i_item_sk#4] +Join condition: None + +(36) Project [codegen id : 10] +Output [3]: [cr_returned_date_sk#21, cr_return_quantity#23, i_item_id#5] +Input [5]: [cr_returned_date_sk#21, cr_item_sk#22, cr_return_quantity#23, i_item_sk#4, i_item_id#5] + +(37) ReusedExchange [Reuses operator id: 25] +Output [1]: [d_date_sk#7] + +(38) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cr_returned_date_sk#21] +Right keys [1]: [d_date_sk#7] +Join condition: None + +(39) Project [codegen id : 10] +Output [2]: [cr_return_quantity#23, i_item_id#5] +Input [4]: [cr_returned_date_sk#21, cr_return_quantity#23, i_item_id#5, d_date_sk#7] + +(40) HashAggregate [codegen id : 10] +Input [2]: [cr_return_quantity#23, i_item_id#5] +Keys [1]: [i_item_id#5] +Functions [1]: [partial_sum(cast(cr_return_quantity#23 as bigint))] +Aggregate Attributes [1]: [sum#24] +Results [2]: [i_item_id#5, sum#25] + +(41) Exchange +Input [2]: [i_item_id#5, sum#25] +Arguments: hashpartitioning(i_item_id#5, 5), true, [id=#26] + +(42) HashAggregate [codegen id : 11] +Input [2]: [i_item_id#5, sum#25] +Keys [1]: [i_item_id#5] +Functions [1]: [sum(cast(cr_return_quantity#23 as bigint))] +Aggregate Attributes [1]: [sum(cast(cr_return_quantity#23 as bigint))#27] +Results [2]: [i_item_id#5 AS item_id#28, sum(cast(cr_return_quantity#23 as bigint))#27 AS cr_item_qty#29] + +(43) BroadcastExchange +Input [2]: [item_id#28, cr_item_qty#29] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#30] + +(44) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [item_id#19] +Right keys [1]: [item_id#28] +Join condition: None + +(45) Project [codegen id : 18] +Output [3]: [item_id#19, sr_item_qty#20, cr_item_qty#29] +Input [4]: [item_id#19, sr_item_qty#20, item_id#28, cr_item_qty#29] + +(46) Scan parquet default.web_returns +Output [3]: [wr_returned_date_sk#31, wr_item_sk#32, wr_return_quantity#33] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_returns] +PushedFilters: [IsNotNull(wr_item_sk), IsNotNull(wr_returned_date_sk)] +ReadSchema: struct + +(47) ColumnarToRow [codegen id : 16] +Input [3]: [wr_returned_date_sk#31, wr_item_sk#32, wr_return_quantity#33] + +(48) Filter [codegen id : 16] +Input [3]: [wr_returned_date_sk#31, wr_item_sk#32, wr_return_quantity#33] +Condition : (isnotnull(wr_item_sk#32) AND isnotnull(wr_returned_date_sk#31)) + +(49) ReusedExchange [Reuses operator id: 7] +Output [2]: [i_item_sk#4, i_item_id#5] + +(50) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [wr_item_sk#32] +Right keys [1]: [cast(i_item_sk#4 as bigint)] +Join condition: None + +(51) Project [codegen id : 16] +Output [3]: [wr_returned_date_sk#31, wr_return_quantity#33, i_item_id#5] +Input [5]: [wr_returned_date_sk#31, wr_item_sk#32, wr_return_quantity#33, i_item_sk#4, i_item_id#5] + +(52) ReusedExchange [Reuses operator id: 25] +Output [1]: [d_date_sk#7] + +(53) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [wr_returned_date_sk#31] +Right keys [1]: [cast(d_date_sk#7 as bigint)] +Join condition: None + +(54) Project [codegen id : 16] +Output [2]: [wr_return_quantity#33, i_item_id#5] +Input [4]: [wr_returned_date_sk#31, wr_return_quantity#33, i_item_id#5, d_date_sk#7] + +(55) HashAggregate [codegen id : 16] +Input [2]: [wr_return_quantity#33, i_item_id#5] +Keys [1]: [i_item_id#5] +Functions [1]: [partial_sum(cast(wr_return_quantity#33 as bigint))] +Aggregate Attributes [1]: [sum#34] +Results [2]: [i_item_id#5, sum#35] + +(56) Exchange +Input [2]: [i_item_id#5, sum#35] +Arguments: hashpartitioning(i_item_id#5, 5), true, [id=#36] + +(57) HashAggregate [codegen id : 17] +Input [2]: [i_item_id#5, sum#35] +Keys [1]: [i_item_id#5] +Functions [1]: [sum(cast(wr_return_quantity#33 as bigint))] +Aggregate Attributes [1]: [sum(cast(wr_return_quantity#33 as bigint))#37] +Results [2]: [i_item_id#5 AS item_id#38, sum(cast(wr_return_quantity#33 as bigint))#37 AS wr_item_qty#39] + +(58) BroadcastExchange +Input [2]: [item_id#38, wr_item_qty#39] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#40] + +(59) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [item_id#19] +Right keys [1]: [item_id#38] +Join condition: None + +(60) Project [codegen id : 18] +Output [8]: [item_id#19, sr_item_qty#20, (((cast(sr_item_qty#20 as double) / cast(((sr_item_qty#20 + cr_item_qty#29) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS sr_dev#41, cr_item_qty#29, (((cast(cr_item_qty#29 as double) / cast(((sr_item_qty#20 + cr_item_qty#29) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS cr_dev#42, wr_item_qty#39, (((cast(wr_item_qty#39 as double) / cast(((sr_item_qty#20 + cr_item_qty#29) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS wr_dev#43, CheckOverflow((promote_precision(cast(cast(((sr_item_qty#20 + cr_item_qty#29) + wr_item_qty#39) as decimal(20,0)) as decimal(21,1))) / 3.0), DecimalType(27,6), true) AS average#44] +Input [5]: [item_id#19, sr_item_qty#20, cr_item_qty#29, item_id#38, wr_item_qty#39] + +(61) TakeOrderedAndProject +Input [8]: [item_id#19, sr_item_qty#20, sr_dev#41, cr_item_qty#29, cr_dev#42, wr_item_qty#39, wr_dev#43, average#44] +Arguments: 100, [item_id#19 ASC NULLS FIRST, sr_item_qty#20 ASC NULLS FIRST], [item_id#19, sr_item_qty#20, sr_dev#41, cr_item_qty#29, cr_dev#42, wr_item_qty#39, wr_dev#43, average#44] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83/simplified.txt new file mode 100644 index 0000000000000..d0639eada79ae --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83/simplified.txt @@ -0,0 +1,91 @@ +TakeOrderedAndProject [average,cr_dev,cr_item_qty,item_id,sr_dev,sr_item_qty,wr_dev,wr_item_qty] + WholeStageCodegen (18) + Project [cr_item_qty,item_id,sr_item_qty,wr_item_qty] + BroadcastHashJoin [item_id,item_id] + Project [cr_item_qty,item_id,sr_item_qty] + BroadcastHashJoin [item_id,item_id] + HashAggregate [i_item_id,sum] [item_id,sr_item_qty,sum,sum(cast(sr_return_quantity as bigint))] + InputAdapter + Exchange [i_item_id] #1 + WholeStageCodegen (5) + HashAggregate [i_item_id,sr_return_quantity] [sum,sum] + Project [i_item_id,sr_return_quantity] + BroadcastHashJoin [d_date_sk,sr_returned_date_sk] + Project [i_item_id,sr_return_quantity,sr_returned_date_sk] + BroadcastHashJoin [i_item_sk,sr_item_sk] + Filter [sr_item_sk,sr_returned_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_item_sk,sr_return_quantity,sr_returned_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [i_item_id,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_id,i_item_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (4) + Project [d_date_sk] + BroadcastHashJoin [d_date,d_date] + Filter [d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [d_date] + BroadcastHashJoin [d_week_seq,d_week_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_week_seq] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Project [d_week_seq] + Filter [d_date] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_week_seq] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (11) + HashAggregate [i_item_id,sum] [cr_item_qty,item_id,sum,sum(cast(cr_return_quantity as bigint))] + InputAdapter + Exchange [i_item_id] #7 + WholeStageCodegen (10) + HashAggregate [cr_return_quantity,i_item_id] [sum,sum] + Project [cr_return_quantity,i_item_id] + BroadcastHashJoin [cr_returned_date_sk,d_date_sk] + Project [cr_return_quantity,cr_returned_date_sk,i_item_id] + BroadcastHashJoin [cr_item_sk,i_item_sk] + Filter [cr_item_sk,cr_returned_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_item_sk,cr_return_quantity,cr_returned_date_sk] + InputAdapter + ReusedExchange [i_item_id,i_item_sk] #2 + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (17) + HashAggregate [i_item_id,sum] [item_id,sum,sum(cast(wr_return_quantity as bigint)),wr_item_qty] + InputAdapter + Exchange [i_item_id] #9 + WholeStageCodegen (16) + HashAggregate [i_item_id,wr_return_quantity] [sum,sum] + Project [i_item_id,wr_return_quantity] + BroadcastHashJoin [d_date_sk,wr_returned_date_sk] + Project [i_item_id,wr_return_quantity,wr_returned_date_sk] + BroadcastHashJoin [i_item_sk,wr_item_sk] + Filter [wr_item_sk,wr_returned_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_returns [wr_item_sk,wr_return_quantity,wr_returned_date_sk] + InputAdapter + ReusedExchange [i_item_id,i_item_sk] #2 + InputAdapter + ReusedExchange [d_date_sk] #3 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84.sf100/explain.txt new file mode 100644 index 0000000000000..4109009213406 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84.sf100/explain.txt @@ -0,0 +1,200 @@ +== Physical Plan == +TakeOrderedAndProject (36) ++- * Project (35) + +- * BroadcastHashJoin Inner BuildLeft (34) + :- BroadcastExchange (30) + : +- * Project (29) + : +- * BroadcastHashJoin Inner BuildLeft (28) + : :- BroadcastExchange (24) + : : +- * Project (23) + : : +- * BroadcastHashJoin Inner BuildRight (22) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.customer (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.customer_address (4) + : : +- BroadcastExchange (21) + : : +- * Project (20) + : : +- * BroadcastHashJoin Inner BuildLeft (19) + : : :- BroadcastExchange (15) + : : : +- * Project (14) + : : : +- * Filter (13) + : : : +- * ColumnarToRow (12) + : : : +- Scan parquet default.income_band (11) + : : +- * Filter (18) + : : +- * ColumnarToRow (17) + : : +- Scan parquet default.household_demographics (16) + : +- * Filter (27) + : +- * ColumnarToRow (26) + : +- Scan parquet default.customer_demographics (25) + +- * Filter (33) + +- * ColumnarToRow (32) + +- Scan parquet default.store_returns (31) + + +(1) Scan parquet default.customer +Output [6]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4, c_first_name#5, c_last_name#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer] +PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_hdemo_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [6]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4, c_first_name#5, c_last_name#6] + +(3) Filter [codegen id : 4] +Input [6]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4, c_first_name#5, c_last_name#6] +Condition : ((isnotnull(c_current_addr_sk#4) AND isnotnull(c_current_cdemo_sk#2)) AND isnotnull(c_current_hdemo_sk#3)) + +(4) Scan parquet default.customer_address +Output [2]: [ca_address_sk#7, ca_city#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer_address] +PushedFilters: [IsNotNull(ca_city), EqualTo(ca_city,Edgewood), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [ca_address_sk#7, ca_city#8] + +(6) Filter [codegen id : 1] +Input [2]: [ca_address_sk#7, ca_city#8] +Condition : ((isnotnull(ca_city#8) AND (ca_city#8 = Edgewood)) AND isnotnull(ca_address_sk#7)) + +(7) Project [codegen id : 1] +Output [1]: [ca_address_sk#7] +Input [2]: [ca_address_sk#7, ca_city#8] + +(8) BroadcastExchange +Input [1]: [ca_address_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [c_current_addr_sk#4] +Right keys [1]: [ca_address_sk#7] +Join condition: None + +(10) Project [codegen id : 4] +Output [5]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6] +Input [7]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4, c_first_name#5, c_last_name#6, ca_address_sk#7] + +(11) Scan parquet default.income_band +Output [3]: [ib_income_band_sk#10, ib_lower_bound#11, ib_upper_bound#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/income_band] +PushedFilters: [IsNotNull(ib_lower_bound), IsNotNull(ib_upper_bound), GreaterThanOrEqual(ib_lower_bound,38128), LessThanOrEqual(ib_upper_bound,88128), IsNotNull(ib_income_band_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [ib_income_band_sk#10, ib_lower_bound#11, ib_upper_bound#12] + +(13) Filter [codegen id : 2] +Input [3]: [ib_income_band_sk#10, ib_lower_bound#11, ib_upper_bound#12] +Condition : ((((isnotnull(ib_lower_bound#11) AND isnotnull(ib_upper_bound#12)) AND (ib_lower_bound#11 >= 38128)) AND (ib_upper_bound#12 <= 88128)) AND isnotnull(ib_income_band_sk#10)) + +(14) Project [codegen id : 2] +Output [1]: [ib_income_band_sk#10] +Input [3]: [ib_income_band_sk#10, ib_lower_bound#11, ib_upper_bound#12] + +(15) BroadcastExchange +Input [1]: [ib_income_band_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] + +(16) Scan parquet default.household_demographics +Output [2]: [hd_demo_sk#14, hd_income_band_sk#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/household_demographics] +PushedFilters: [IsNotNull(hd_demo_sk), IsNotNull(hd_income_band_sk)] +ReadSchema: struct + +(17) ColumnarToRow +Input [2]: [hd_demo_sk#14, hd_income_band_sk#15] + +(18) Filter +Input [2]: [hd_demo_sk#14, hd_income_band_sk#15] +Condition : (isnotnull(hd_demo_sk#14) AND isnotnull(hd_income_band_sk#15)) + +(19) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ib_income_band_sk#10] +Right keys [1]: [hd_income_band_sk#15] +Join condition: None + +(20) Project [codegen id : 3] +Output [1]: [hd_demo_sk#14] +Input [3]: [ib_income_band_sk#10, hd_demo_sk#14, hd_income_band_sk#15] + +(21) BroadcastExchange +Input [1]: [hd_demo_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] + +(22) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [c_current_hdemo_sk#3] +Right keys [1]: [hd_demo_sk#14] +Join condition: None + +(23) Project [codegen id : 4] +Output [4]: [c_customer_id#1, c_current_cdemo_sk#2, c_first_name#5, c_last_name#6] +Input [6]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6, hd_demo_sk#14] + +(24) BroadcastExchange +Input [4]: [c_customer_id#1, c_current_cdemo_sk#2, c_first_name#5, c_last_name#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [id=#17] + +(25) Scan parquet default.customer_demographics +Output [1]: [cd_demo_sk#18] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(26) ColumnarToRow +Input [1]: [cd_demo_sk#18] + +(27) Filter +Input [1]: [cd_demo_sk#18] +Condition : isnotnull(cd_demo_sk#18) + +(28) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [c_current_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#18] +Join condition: None + +(29) Project [codegen id : 5] +Output [4]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#18] +Input [5]: [c_customer_id#1, c_current_cdemo_sk#2, c_first_name#5, c_last_name#6, cd_demo_sk#18] + +(30) BroadcastExchange +Input [4]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[3, int, true] as bigint)),false), [id=#19] + +(31) Scan parquet default.store_returns +Output [1]: [sr_cdemo_sk#20] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_returns] +PushedFilters: [IsNotNull(sr_cdemo_sk)] +ReadSchema: struct + +(32) ColumnarToRow +Input [1]: [sr_cdemo_sk#20] + +(33) Filter +Input [1]: [sr_cdemo_sk#20] +Condition : isnotnull(sr_cdemo_sk#20) + +(34) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cast(cd_demo_sk#18 as bigint)] +Right keys [1]: [sr_cdemo_sk#20] +Join condition: None + +(35) Project [codegen id : 6] +Output [3]: [c_customer_id#1 AS customer_id#21, concat(c_last_name#6, , , c_first_name#5) AS customername#22, c_customer_id#1] +Input [5]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#18, sr_cdemo_sk#20] + +(36) TakeOrderedAndProject +Input [3]: [customer_id#21, customername#22, c_customer_id#1] +Arguments: 100, [c_customer_id#1 ASC NULLS FIRST], [customer_id#21, customername#22] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84.sf100/simplified.txt new file mode 100644 index 0000000000000..7c2afa21084ea --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84.sf100/simplified.txt @@ -0,0 +1,53 @@ +TakeOrderedAndProject [c_customer_id,customer_id,customername] + WholeStageCodegen (6) + Project [c_customer_id,c_first_name,c_last_name] + BroadcastHashJoin [cd_demo_sk,sr_cdemo_sk] + InputAdapter + BroadcastExchange #1 + WholeStageCodegen (5) + Project [c_customer_id,c_first_name,c_last_name,cd_demo_sk] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (4) + Project [c_current_cdemo_sk,c_customer_id,c_first_name,c_last_name] + BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] + Project [c_current_cdemo_sk,c_current_hdemo_sk,c_customer_id,c_first_name,c_last_name] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Filter [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_id,c_first_name,c_last_name] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [ca_address_sk] + Filter [ca_address_sk,ca_city] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_city] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [hd_demo_sk] + BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Project [ib_income_band_sk] + Filter [ib_income_band_sk,ib_lower_bound,ib_upper_bound] + ColumnarToRow + InputAdapter + Scan parquet default.income_band [ib_income_band_sk,ib_lower_bound,ib_upper_bound] + Filter [hd_demo_sk,hd_income_band_sk] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_demo_sk,hd_income_band_sk] + Filter [cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk] + Filter [sr_cdemo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_cdemo_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84/explain.txt new file mode 100644 index 0000000000000..bd581e4738788 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84/explain.txt @@ -0,0 +1,200 @@ +== Physical Plan == +TakeOrderedAndProject (36) ++- * Project (35) + +- * BroadcastHashJoin Inner BuildRight (34) + :- * Project (29) + : +- * BroadcastHashJoin Inner BuildRight (28) + : :- * Project (22) + : : +- * BroadcastHashJoin Inner BuildRight (21) + : : :- * Project (16) + : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : :- * Project (10) + : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.customer (1) + : : : : +- BroadcastExchange (8) + : : : : +- * Project (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.customer_address (4) + : : : +- BroadcastExchange (14) + : : : +- * Filter (13) + : : : +- * ColumnarToRow (12) + : : : +- Scan parquet default.customer_demographics (11) + : : +- BroadcastExchange (20) + : : +- * Filter (19) + : : +- * ColumnarToRow (18) + : : +- Scan parquet default.household_demographics (17) + : +- BroadcastExchange (27) + : +- * Project (26) + : +- * Filter (25) + : +- * ColumnarToRow (24) + : +- Scan parquet default.income_band (23) + +- BroadcastExchange (33) + +- * Filter (32) + +- * ColumnarToRow (31) + +- Scan parquet default.store_returns (30) + + +(1) Scan parquet default.customer +Output [6]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4, c_first_name#5, c_last_name#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_hdemo_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 6] +Input [6]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4, c_first_name#5, c_last_name#6] + +(3) Filter [codegen id : 6] +Input [6]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4, c_first_name#5, c_last_name#6] +Condition : ((isnotnull(c_current_addr_sk#4) AND isnotnull(c_current_cdemo_sk#2)) AND isnotnull(c_current_hdemo_sk#3)) + +(4) Scan parquet default.customer_address +Output [2]: [ca_address_sk#7, ca_city#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer_address] +PushedFilters: [IsNotNull(ca_city), EqualTo(ca_city,Edgewood), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [ca_address_sk#7, ca_city#8] + +(6) Filter [codegen id : 1] +Input [2]: [ca_address_sk#7, ca_city#8] +Condition : ((isnotnull(ca_city#8) AND (ca_city#8 = Edgewood)) AND isnotnull(ca_address_sk#7)) + +(7) Project [codegen id : 1] +Output [1]: [ca_address_sk#7] +Input [2]: [ca_address_sk#7, ca_city#8] + +(8) BroadcastExchange +Input [1]: [ca_address_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] + +(9) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [c_current_addr_sk#4] +Right keys [1]: [ca_address_sk#7] +Join condition: None + +(10) Project [codegen id : 6] +Output [5]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6] +Input [7]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4, c_first_name#5, c_last_name#6, ca_address_sk#7] + +(11) Scan parquet default.customer_demographics +Output [1]: [cd_demo_sk#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [1]: [cd_demo_sk#10] + +(13) Filter [codegen id : 2] +Input [1]: [cd_demo_sk#10] +Condition : isnotnull(cd_demo_sk#10) + +(14) BroadcastExchange +Input [1]: [cd_demo_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#11] + +(15) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [c_current_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#10] +Join condition: None + +(16) Project [codegen id : 6] +Output [5]: [c_customer_id#1, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6, cd_demo_sk#10] +Input [6]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6, cd_demo_sk#10] + +(17) Scan parquet default.household_demographics +Output [2]: [hd_demo_sk#12, hd_income_band_sk#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/household_demographics] +PushedFilters: [IsNotNull(hd_demo_sk), IsNotNull(hd_income_band_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [2]: [hd_demo_sk#12, hd_income_band_sk#13] + +(19) Filter [codegen id : 3] +Input [2]: [hd_demo_sk#12, hd_income_band_sk#13] +Condition : (isnotnull(hd_demo_sk#12) AND isnotnull(hd_income_band_sk#13)) + +(20) BroadcastExchange +Input [2]: [hd_demo_sk#12, hd_income_band_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] + +(21) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [c_current_hdemo_sk#3] +Right keys [1]: [hd_demo_sk#12] +Join condition: None + +(22) Project [codegen id : 6] +Output [5]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#10, hd_income_band_sk#13] +Input [7]: [c_customer_id#1, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6, cd_demo_sk#10, hd_demo_sk#12, hd_income_band_sk#13] + +(23) Scan parquet default.income_band +Output [3]: [ib_income_band_sk#15, ib_lower_bound#16, ib_upper_bound#17] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/income_band] +PushedFilters: [IsNotNull(ib_lower_bound), IsNotNull(ib_upper_bound), GreaterThanOrEqual(ib_lower_bound,38128), LessThanOrEqual(ib_upper_bound,88128), IsNotNull(ib_income_band_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 4] +Input [3]: [ib_income_band_sk#15, ib_lower_bound#16, ib_upper_bound#17] + +(25) Filter [codegen id : 4] +Input [3]: [ib_income_band_sk#15, ib_lower_bound#16, ib_upper_bound#17] +Condition : ((((isnotnull(ib_lower_bound#16) AND isnotnull(ib_upper_bound#17)) AND (ib_lower_bound#16 >= 38128)) AND (ib_upper_bound#17 <= 88128)) AND isnotnull(ib_income_band_sk#15)) + +(26) Project [codegen id : 4] +Output [1]: [ib_income_band_sk#15] +Input [3]: [ib_income_band_sk#15, ib_lower_bound#16, ib_upper_bound#17] + +(27) BroadcastExchange +Input [1]: [ib_income_band_sk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] + +(28) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [hd_income_band_sk#13] +Right keys [1]: [ib_income_band_sk#15] +Join condition: None + +(29) Project [codegen id : 6] +Output [4]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#10] +Input [6]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#10, hd_income_band_sk#13, ib_income_band_sk#15] + +(30) Scan parquet default.store_returns +Output [1]: [sr_cdemo_sk#19] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_returns] +PushedFilters: [IsNotNull(sr_cdemo_sk)] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 5] +Input [1]: [sr_cdemo_sk#19] + +(32) Filter [codegen id : 5] +Input [1]: [sr_cdemo_sk#19] +Condition : isnotnull(sr_cdemo_sk#19) + +(33) BroadcastExchange +Input [1]: [sr_cdemo_sk#19] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [id=#20] + +(34) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cast(cd_demo_sk#10 as bigint)] +Right keys [1]: [sr_cdemo_sk#19] +Join condition: None + +(35) Project [codegen id : 6] +Output [3]: [c_customer_id#1 AS customer_id#21, concat(c_last_name#6, , , c_first_name#5) AS customername#22, c_customer_id#1] +Input [5]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#10, sr_cdemo_sk#19] + +(36) TakeOrderedAndProject +Input [3]: [customer_id#21, customername#22, c_customer_id#1] +Arguments: 100, [c_customer_id#1 ASC NULLS FIRST], [customer_id#21, customername#22] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84/simplified.txt new file mode 100644 index 0000000000000..c771df2baf5fe --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84/simplified.txt @@ -0,0 +1,53 @@ +TakeOrderedAndProject [c_customer_id,customer_id,customername] + WholeStageCodegen (6) + Project [c_customer_id,c_first_name,c_last_name] + BroadcastHashJoin [cd_demo_sk,sr_cdemo_sk] + Project [c_customer_id,c_first_name,c_last_name,cd_demo_sk] + BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] + Project [c_customer_id,c_first_name,c_last_name,cd_demo_sk,hd_income_band_sk] + BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] + Project [c_current_hdemo_sk,c_customer_id,c_first_name,c_last_name,cd_demo_sk] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [c_current_cdemo_sk,c_current_hdemo_sk,c_customer_id,c_first_name,c_last_name] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Filter [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_id,c_first_name,c_last_name] + InputAdapter + BroadcastExchange #1 + WholeStageCodegen (1) + Project [ca_address_sk] + Filter [ca_address_sk,ca_city] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_city] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (2) + Filter [cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (3) + Filter [hd_demo_sk,hd_income_band_sk] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_demo_sk,hd_income_band_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (4) + Project [ib_income_band_sk] + Filter [ib_income_band_sk,ib_lower_bound,ib_upper_bound] + ColumnarToRow + InputAdapter + Scan parquet default.income_band [ib_income_band_sk,ib_lower_bound,ib_upper_bound] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (5) + Filter [sr_cdemo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_cdemo_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.sf100/explain.txt new file mode 100644 index 0000000000000..86d82ddb52e1a --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.sf100/explain.txt @@ -0,0 +1,317 @@ +== Physical Plan == +TakeOrderedAndProject (57) ++- * HashAggregate (56) + +- Exchange (55) + +- * HashAggregate (54) + +- * Project (53) + +- * SortMergeJoin Inner (52) + :- * Sort (40) + : +- Exchange (39) + : +- * Project (38) + : +- * BroadcastHashJoin Inner BuildRight (37) + : :- * Project (31) + : : +- * BroadcastHashJoin Inner BuildRight (30) + : : :- * Project (25) + : : : +- * BroadcastHashJoin Inner BuildLeft (24) + : : : :- BroadcastExchange (5) + : : : : +- * Project (4) + : : : : +- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.date_dim (1) + : : : +- * Project (23) + : : : +- * SortMergeJoin Inner (22) + : : : :- * Sort (16) + : : : : +- Exchange (15) + : : : : +- * Project (14) + : : : : +- * BroadcastHashJoin Inner BuildRight (13) + : : : : :- * Filter (8) + : : : : : +- * ColumnarToRow (7) + : : : : : +- Scan parquet default.web_sales (6) + : : : : +- BroadcastExchange (12) + : : : : +- * Filter (11) + : : : : +- * ColumnarToRow (10) + : : : : +- Scan parquet default.web_page (9) + : : : +- * Sort (21) + : : : +- Exchange (20) + : : : +- * Filter (19) + : : : +- * ColumnarToRow (18) + : : : +- Scan parquet default.web_returns (17) + : : +- BroadcastExchange (29) + : : +- * Filter (28) + : : +- * ColumnarToRow (27) + : : +- Scan parquet default.reason (26) + : +- BroadcastExchange (36) + : +- * Project (35) + : +- * Filter (34) + : +- * ColumnarToRow (33) + : +- Scan parquet default.customer_address (32) + +- * Sort (51) + +- Exchange (50) + +- * Project (49) + +- * BroadcastHashJoin Inner BuildRight (48) + :- * Filter (43) + : +- * ColumnarToRow (42) + : +- Scan parquet default.customer_demographics (41) + +- BroadcastExchange (47) + +- * Filter (46) + +- * ColumnarToRow (45) + +- Scan parquet default.customer_demographics (44) + + +(1) Scan parquet default.date_dim +Output [2]: [d_date_sk#1, d_year#2] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#1, d_year#2] + +(3) Filter [codegen id : 1] +Input [2]: [d_date_sk#1, d_year#2] +Condition : ((isnotnull(d_year#2) AND (d_year#2 = 2000)) AND isnotnull(d_date_sk#1)) + +(4) Project [codegen id : 1] +Output [1]: [d_date_sk#1] +Input [2]: [d_date_sk#1, d_year#2] + +(5) BroadcastExchange +Input [1]: [d_date_sk#1] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#3] + +(6) Scan parquet default.web_sales +Output [7]: [ws_sold_date_sk#4, ws_item_sk#5, ws_web_page_sk#6, ws_order_number#7, ws_quantity#8, ws_sales_price#9, ws_net_profit#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_order_number), IsNotNull(ws_web_page_sk), IsNotNull(ws_sold_date_sk), Or(Or(And(GreaterThanOrEqual(ws_sales_price,100.00),LessThanOrEqual(ws_sales_price,150.00)),And(GreaterThanOrEqual(ws_sales_price,50.00),LessThanOrEqual(ws_sales_price,100.00))),And(GreaterThanOrEqual(ws_sales_price,150.00),LessThanOrEqual(ws_sales_price,200.00))), Or(Or(And(GreaterThanOrEqual(ws_net_profit,100.00),LessThanOrEqual(ws_net_profit,200.00)),And(GreaterThanOrEqual(ws_net_profit,150.00),LessThanOrEqual(ws_net_profit,300.00))),And(GreaterThanOrEqual(ws_net_profit,50.00),LessThanOrEqual(ws_net_profit,250.00)))] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 3] +Input [7]: [ws_sold_date_sk#4, ws_item_sk#5, ws_web_page_sk#6, ws_order_number#7, ws_quantity#8, ws_sales_price#9, ws_net_profit#10] + +(8) Filter [codegen id : 3] +Input [7]: [ws_sold_date_sk#4, ws_item_sk#5, ws_web_page_sk#6, ws_order_number#7, ws_quantity#8, ws_sales_price#9, ws_net_profit#10] +Condition : (((((isnotnull(ws_item_sk#5) AND isnotnull(ws_order_number#7)) AND isnotnull(ws_web_page_sk#6)) AND isnotnull(ws_sold_date_sk#4)) AND ((((ws_sales_price#9 >= 100.00) AND (ws_sales_price#9 <= 150.00)) OR ((ws_sales_price#9 >= 50.00) AND (ws_sales_price#9 <= 100.00))) OR ((ws_sales_price#9 >= 150.00) AND (ws_sales_price#9 <= 200.00)))) AND ((((ws_net_profit#10 >= 100.00) AND (ws_net_profit#10 <= 200.00)) OR ((ws_net_profit#10 >= 150.00) AND (ws_net_profit#10 <= 300.00))) OR ((ws_net_profit#10 >= 50.00) AND (ws_net_profit#10 <= 250.00)))) + +(9) Scan parquet default.web_page +Output [1]: [wp_web_page_sk#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_page] +PushedFilters: [IsNotNull(wp_web_page_sk)] +ReadSchema: struct + +(10) ColumnarToRow [codegen id : 2] +Input [1]: [wp_web_page_sk#11] + +(11) Filter [codegen id : 2] +Input [1]: [wp_web_page_sk#11] +Condition : isnotnull(wp_web_page_sk#11) + +(12) BroadcastExchange +Input [1]: [wp_web_page_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#12] + +(13) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ws_web_page_sk#6] +Right keys [1]: [wp_web_page_sk#11] +Join condition: None + +(14) Project [codegen id : 3] +Output [6]: [ws_sold_date_sk#4, ws_item_sk#5, ws_order_number#7, ws_quantity#8, ws_sales_price#9, ws_net_profit#10] +Input [8]: [ws_sold_date_sk#4, ws_item_sk#5, ws_web_page_sk#6, ws_order_number#7, ws_quantity#8, ws_sales_price#9, ws_net_profit#10, wp_web_page_sk#11] + +(15) Exchange +Input [6]: [ws_sold_date_sk#4, ws_item_sk#5, ws_order_number#7, ws_quantity#8, ws_sales_price#9, ws_net_profit#10] +Arguments: hashpartitioning(cast(ws_item_sk#5 as bigint), cast(ws_order_number#7 as bigint), 5), true, [id=#13] + +(16) Sort [codegen id : 4] +Input [6]: [ws_sold_date_sk#4, ws_item_sk#5, ws_order_number#7, ws_quantity#8, ws_sales_price#9, ws_net_profit#10] +Arguments: [cast(ws_item_sk#5 as bigint) ASC NULLS FIRST, cast(ws_order_number#7 as bigint) ASC NULLS FIRST], false, 0 + +(17) Scan parquet default.web_returns +Output [8]: [wr_item_sk#14, wr_refunded_cdemo_sk#15, wr_refunded_addr_sk#16, wr_returning_cdemo_sk#17, wr_reason_sk#18, wr_order_number#19, wr_fee#20, wr_refunded_cash#21] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_returns] +PushedFilters: [IsNotNull(wr_item_sk), IsNotNull(wr_order_number), IsNotNull(wr_refunded_cdemo_sk), IsNotNull(wr_returning_cdemo_sk), IsNotNull(wr_refunded_addr_sk), IsNotNull(wr_reason_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 5] +Input [8]: [wr_item_sk#14, wr_refunded_cdemo_sk#15, wr_refunded_addr_sk#16, wr_returning_cdemo_sk#17, wr_reason_sk#18, wr_order_number#19, wr_fee#20, wr_refunded_cash#21] + +(19) Filter [codegen id : 5] +Input [8]: [wr_item_sk#14, wr_refunded_cdemo_sk#15, wr_refunded_addr_sk#16, wr_returning_cdemo_sk#17, wr_reason_sk#18, wr_order_number#19, wr_fee#20, wr_refunded_cash#21] +Condition : (((((isnotnull(wr_item_sk#14) AND isnotnull(wr_order_number#19)) AND isnotnull(wr_refunded_cdemo_sk#15)) AND isnotnull(wr_returning_cdemo_sk#17)) AND isnotnull(wr_refunded_addr_sk#16)) AND isnotnull(wr_reason_sk#18)) + +(20) Exchange +Input [8]: [wr_item_sk#14, wr_refunded_cdemo_sk#15, wr_refunded_addr_sk#16, wr_returning_cdemo_sk#17, wr_reason_sk#18, wr_order_number#19, wr_fee#20, wr_refunded_cash#21] +Arguments: hashpartitioning(wr_item_sk#14, wr_order_number#19, 5), true, [id=#22] + +(21) Sort [codegen id : 6] +Input [8]: [wr_item_sk#14, wr_refunded_cdemo_sk#15, wr_refunded_addr_sk#16, wr_returning_cdemo_sk#17, wr_reason_sk#18, wr_order_number#19, wr_fee#20, wr_refunded_cash#21] +Arguments: [wr_item_sk#14 ASC NULLS FIRST, wr_order_number#19 ASC NULLS FIRST], false, 0 + +(22) SortMergeJoin +Left keys [2]: [cast(ws_item_sk#5 as bigint), cast(ws_order_number#7 as bigint)] +Right keys [2]: [wr_item_sk#14, wr_order_number#19] +Join condition: None + +(23) Project +Output [10]: [ws_sold_date_sk#4, ws_quantity#8, ws_sales_price#9, ws_net_profit#10, wr_refunded_cdemo_sk#15, wr_refunded_addr_sk#16, wr_returning_cdemo_sk#17, wr_reason_sk#18, wr_fee#20, wr_refunded_cash#21] +Input [14]: [ws_sold_date_sk#4, ws_item_sk#5, ws_order_number#7, ws_quantity#8, ws_sales_price#9, ws_net_profit#10, wr_item_sk#14, wr_refunded_cdemo_sk#15, wr_refunded_addr_sk#16, wr_returning_cdemo_sk#17, wr_reason_sk#18, wr_order_number#19, wr_fee#20, wr_refunded_cash#21] + +(24) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [d_date_sk#1] +Right keys [1]: [ws_sold_date_sk#4] +Join condition: None + +(25) Project [codegen id : 9] +Output [9]: [ws_quantity#8, ws_sales_price#9, ws_net_profit#10, wr_refunded_cdemo_sk#15, wr_refunded_addr_sk#16, wr_returning_cdemo_sk#17, wr_reason_sk#18, wr_fee#20, wr_refunded_cash#21] +Input [11]: [d_date_sk#1, ws_sold_date_sk#4, ws_quantity#8, ws_sales_price#9, ws_net_profit#10, wr_refunded_cdemo_sk#15, wr_refunded_addr_sk#16, wr_returning_cdemo_sk#17, wr_reason_sk#18, wr_fee#20, wr_refunded_cash#21] + +(26) Scan parquet default.reason +Output [2]: [r_reason_sk#23, r_reason_desc#24] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/reason] +PushedFilters: [IsNotNull(r_reason_sk)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 7] +Input [2]: [r_reason_sk#23, r_reason_desc#24] + +(28) Filter [codegen id : 7] +Input [2]: [r_reason_sk#23, r_reason_desc#24] +Condition : isnotnull(r_reason_sk#23) + +(29) BroadcastExchange +Input [2]: [r_reason_sk#23, r_reason_desc#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#25] + +(30) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [wr_reason_sk#18] +Right keys [1]: [cast(r_reason_sk#23 as bigint)] +Join condition: None + +(31) Project [codegen id : 9] +Output [9]: [ws_quantity#8, ws_sales_price#9, ws_net_profit#10, wr_refunded_cdemo_sk#15, wr_refunded_addr_sk#16, wr_returning_cdemo_sk#17, wr_fee#20, wr_refunded_cash#21, r_reason_desc#24] +Input [11]: [ws_quantity#8, ws_sales_price#9, ws_net_profit#10, wr_refunded_cdemo_sk#15, wr_refunded_addr_sk#16, wr_returning_cdemo_sk#17, wr_reason_sk#18, wr_fee#20, wr_refunded_cash#21, r_reason_sk#23, r_reason_desc#24] + +(32) Scan parquet default.customer_address +Output [3]: [ca_address_sk#26, ca_state#27, ca_country#28] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer_address] +PushedFilters: [IsNotNull(ca_country), EqualTo(ca_country,United States), IsNotNull(ca_address_sk), Or(Or(In(ca_state, [IN,OH,NJ]),In(ca_state, [WI,CT,KY])),In(ca_state, [LA,IA,AR]))] +ReadSchema: struct + +(33) ColumnarToRow [codegen id : 8] +Input [3]: [ca_address_sk#26, ca_state#27, ca_country#28] + +(34) Filter [codegen id : 8] +Input [3]: [ca_address_sk#26, ca_state#27, ca_country#28] +Condition : (((isnotnull(ca_country#28) AND (ca_country#28 = United States)) AND isnotnull(ca_address_sk#26)) AND ((ca_state#27 IN (IN,OH,NJ) OR ca_state#27 IN (WI,CT,KY)) OR ca_state#27 IN (LA,IA,AR))) + +(35) Project [codegen id : 8] +Output [2]: [ca_address_sk#26, ca_state#27] +Input [3]: [ca_address_sk#26, ca_state#27, ca_country#28] + +(36) BroadcastExchange +Input [2]: [ca_address_sk#26, ca_state#27] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#29] + +(37) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [wr_refunded_addr_sk#16] +Right keys [1]: [cast(ca_address_sk#26 as bigint)] +Join condition: ((((ca_state#27 IN (IN,OH,NJ) AND (ws_net_profit#10 >= 100.00)) AND (ws_net_profit#10 <= 200.00)) OR ((ca_state#27 IN (WI,CT,KY) AND (ws_net_profit#10 >= 150.00)) AND (ws_net_profit#10 <= 300.00))) OR ((ca_state#27 IN (LA,IA,AR) AND (ws_net_profit#10 >= 50.00)) AND (ws_net_profit#10 <= 250.00))) + +(38) Project [codegen id : 9] +Output [7]: [ws_quantity#8, ws_sales_price#9, wr_refunded_cdemo_sk#15, wr_returning_cdemo_sk#17, wr_fee#20, wr_refunded_cash#21, r_reason_desc#24] +Input [11]: [ws_quantity#8, ws_sales_price#9, ws_net_profit#10, wr_refunded_cdemo_sk#15, wr_refunded_addr_sk#16, wr_returning_cdemo_sk#17, wr_fee#20, wr_refunded_cash#21, r_reason_desc#24, ca_address_sk#26, ca_state#27] + +(39) Exchange +Input [7]: [ws_quantity#8, ws_sales_price#9, wr_refunded_cdemo_sk#15, wr_returning_cdemo_sk#17, wr_fee#20, wr_refunded_cash#21, r_reason_desc#24] +Arguments: hashpartitioning(wr_refunded_cdemo_sk#15, wr_returning_cdemo_sk#17, 5), true, [id=#30] + +(40) Sort [codegen id : 10] +Input [7]: [ws_quantity#8, ws_sales_price#9, wr_refunded_cdemo_sk#15, wr_returning_cdemo_sk#17, wr_fee#20, wr_refunded_cash#21, r_reason_desc#24] +Arguments: [wr_refunded_cdemo_sk#15 ASC NULLS FIRST, wr_returning_cdemo_sk#17 ASC NULLS FIRST], false, 0 + +(41) Scan parquet default.customer_demographics +Output [3]: [cd_demo_sk#31, cd_marital_status#32, cd_education_status#33] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer_demographics] +PushedFilters: [IsNotNull(cd_education_status), IsNotNull(cd_marital_status), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(42) ColumnarToRow [codegen id : 12] +Input [3]: [cd_demo_sk#31, cd_marital_status#32, cd_education_status#33] + +(43) Filter [codegen id : 12] +Input [3]: [cd_demo_sk#31, cd_marital_status#32, cd_education_status#33] +Condition : ((isnotnull(cd_education_status#33) AND isnotnull(cd_marital_status#32)) AND isnotnull(cd_demo_sk#31)) + +(44) Scan parquet default.customer_demographics +Output [3]: [cd_demo_sk#34, cd_marital_status#35, cd_education_status#36] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_education_status), IsNotNull(cd_marital_status), Or(Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,Advanced Degree)),And(EqualTo(cd_marital_status,S),EqualTo(cd_education_status,College))),And(EqualTo(cd_marital_status,W),EqualTo(cd_education_status,2 yr Degree)))] +ReadSchema: struct + +(45) ColumnarToRow [codegen id : 11] +Input [3]: [cd_demo_sk#34, cd_marital_status#35, cd_education_status#36] + +(46) Filter [codegen id : 11] +Input [3]: [cd_demo_sk#34, cd_marital_status#35, cd_education_status#36] +Condition : (((isnotnull(cd_demo_sk#34) AND isnotnull(cd_education_status#36)) AND isnotnull(cd_marital_status#35)) AND ((((cd_marital_status#35 = M) AND (cd_education_status#36 = Advanced Degree)) OR ((cd_marital_status#35 = S) AND (cd_education_status#36 = College))) OR ((cd_marital_status#35 = W) AND (cd_education_status#36 = 2 yr Degree)))) + +(47) BroadcastExchange +Input [3]: [cd_demo_sk#34, cd_marital_status#35, cd_education_status#36] +Arguments: HashedRelationBroadcastMode(List(input[2, string, false], input[1, string, false]),false), [id=#37] + +(48) BroadcastHashJoin [codegen id : 12] +Left keys [2]: [cd_education_status#33, cd_marital_status#32] +Right keys [2]: [cd_education_status#36, cd_marital_status#35] +Join condition: None + +(49) Project [codegen id : 12] +Output [4]: [cd_demo_sk#31, cd_demo_sk#34, cd_marital_status#35, cd_education_status#36] +Input [6]: [cd_demo_sk#31, cd_marital_status#32, cd_education_status#33, cd_demo_sk#34, cd_marital_status#35, cd_education_status#36] + +(50) Exchange +Input [4]: [cd_demo_sk#31, cd_demo_sk#34, cd_marital_status#35, cd_education_status#36] +Arguments: hashpartitioning(cast(cd_demo_sk#34 as bigint), cast(cd_demo_sk#31 as bigint), 5), true, [id=#38] + +(51) Sort [codegen id : 13] +Input [4]: [cd_demo_sk#31, cd_demo_sk#34, cd_marital_status#35, cd_education_status#36] +Arguments: [cast(cd_demo_sk#34 as bigint) ASC NULLS FIRST, cast(cd_demo_sk#31 as bigint) ASC NULLS FIRST], false, 0 + +(52) SortMergeJoin [codegen id : 14] +Left keys [2]: [wr_refunded_cdemo_sk#15, wr_returning_cdemo_sk#17] +Right keys [2]: [cast(cd_demo_sk#34 as bigint), cast(cd_demo_sk#31 as bigint)] +Join condition: ((((((cd_marital_status#35 = M) AND (cd_education_status#36 = Advanced Degree)) AND (ws_sales_price#9 >= 100.00)) AND (ws_sales_price#9 <= 150.00)) OR ((((cd_marital_status#35 = S) AND (cd_education_status#36 = College)) AND (ws_sales_price#9 >= 50.00)) AND (ws_sales_price#9 <= 100.00))) OR ((((cd_marital_status#35 = W) AND (cd_education_status#36 = 2 yr Degree)) AND (ws_sales_price#9 >= 150.00)) AND (ws_sales_price#9 <= 200.00))) + +(53) Project [codegen id : 14] +Output [4]: [ws_quantity#8, wr_fee#20, wr_refunded_cash#21, r_reason_desc#24] +Input [11]: [ws_quantity#8, ws_sales_price#9, wr_refunded_cdemo_sk#15, wr_returning_cdemo_sk#17, wr_fee#20, wr_refunded_cash#21, r_reason_desc#24, cd_demo_sk#31, cd_demo_sk#34, cd_marital_status#35, cd_education_status#36] + +(54) HashAggregate [codegen id : 14] +Input [4]: [ws_quantity#8, wr_fee#20, wr_refunded_cash#21, r_reason_desc#24] +Keys [1]: [r_reason_desc#24] +Functions [3]: [partial_avg(cast(ws_quantity#8 as bigint)), partial_avg(UnscaledValue(wr_refunded_cash#21)), partial_avg(UnscaledValue(wr_fee#20))] +Aggregate Attributes [6]: [sum#39, count#40, sum#41, count#42, sum#43, count#44] +Results [7]: [r_reason_desc#24, sum#45, count#46, sum#47, count#48, sum#49, count#50] + +(55) Exchange +Input [7]: [r_reason_desc#24, sum#45, count#46, sum#47, count#48, sum#49, count#50] +Arguments: hashpartitioning(r_reason_desc#24, 5), true, [id=#51] + +(56) HashAggregate [codegen id : 15] +Input [7]: [r_reason_desc#24, sum#45, count#46, sum#47, count#48, sum#49, count#50] +Keys [1]: [r_reason_desc#24] +Functions [3]: [avg(cast(ws_quantity#8 as bigint)), avg(UnscaledValue(wr_refunded_cash#21)), avg(UnscaledValue(wr_fee#20))] +Aggregate Attributes [3]: [avg(cast(ws_quantity#8 as bigint))#52, avg(UnscaledValue(wr_refunded_cash#21))#53, avg(UnscaledValue(wr_fee#20))#54] +Results [5]: [substr(r_reason_desc#24, 1, 20) AS substr(r_reason_desc, 1, 20)#55, avg(cast(ws_quantity#8 as bigint))#52 AS avg(ws_quantity)#56, cast((avg(UnscaledValue(wr_refunded_cash#21))#53 / 100.0) as decimal(11,6)) AS avg(wr_refunded_cash)#57, cast((avg(UnscaledValue(wr_fee#20))#54 / 100.0) as decimal(11,6)) AS avg(wr_fee)#58, avg(cast(ws_quantity#8 as bigint))#52 AS aggOrder#59] + +(57) TakeOrderedAndProject +Input [5]: [substr(r_reason_desc, 1, 20)#55, avg(ws_quantity)#56, avg(wr_refunded_cash)#57, avg(wr_fee)#58, aggOrder#59] +Arguments: 100, [substr(r_reason_desc, 1, 20)#55 ASC NULLS FIRST, aggOrder#59 ASC NULLS FIRST, avg(wr_refunded_cash)#57 ASC NULLS FIRST, avg(wr_fee)#58 ASC NULLS FIRST], [substr(r_reason_desc, 1, 20)#55, avg(ws_quantity)#56, avg(wr_refunded_cash)#57, avg(wr_fee)#58] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.sf100/simplified.txt new file mode 100644 index 0000000000000..19941ad51fc18 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.sf100/simplified.txt @@ -0,0 +1,94 @@ +TakeOrderedAndProject [aggOrder,avg(wr_fee),avg(wr_refunded_cash),avg(ws_quantity),substr(r_reason_desc, 1, 20)] + WholeStageCodegen (15) + HashAggregate [count,count,count,r_reason_desc,sum,sum,sum] [aggOrder,avg(UnscaledValue(wr_fee)),avg(UnscaledValue(wr_refunded_cash)),avg(cast(ws_quantity as bigint)),avg(wr_fee),avg(wr_refunded_cash),avg(ws_quantity),count,count,count,substr(r_reason_desc, 1, 20),sum,sum,sum] + InputAdapter + Exchange [r_reason_desc] #1 + WholeStageCodegen (14) + HashAggregate [r_reason_desc,wr_fee,wr_refunded_cash,ws_quantity] [count,count,count,count,count,count,sum,sum,sum,sum,sum,sum] + Project [r_reason_desc,wr_fee,wr_refunded_cash,ws_quantity] + SortMergeJoin [cd_demo_sk,cd_demo_sk,cd_education_status,cd_marital_status,wr_refunded_cdemo_sk,wr_returning_cdemo_sk,ws_sales_price] + InputAdapter + WholeStageCodegen (10) + Sort [wr_refunded_cdemo_sk,wr_returning_cdemo_sk] + InputAdapter + Exchange [wr_refunded_cdemo_sk,wr_returning_cdemo_sk] #2 + WholeStageCodegen (9) + Project [r_reason_desc,wr_fee,wr_refunded_cash,wr_refunded_cdemo_sk,wr_returning_cdemo_sk,ws_quantity,ws_sales_price] + BroadcastHashJoin [ca_address_sk,ca_state,wr_refunded_addr_sk,ws_net_profit] + Project [r_reason_desc,wr_fee,wr_refunded_addr_sk,wr_refunded_cash,wr_refunded_cdemo_sk,wr_returning_cdemo_sk,ws_net_profit,ws_quantity,ws_sales_price] + BroadcastHashJoin [r_reason_sk,wr_reason_sk] + Project [wr_fee,wr_reason_sk,wr_refunded_addr_sk,wr_refunded_cash,wr_refunded_cdemo_sk,wr_returning_cdemo_sk,ws_net_profit,ws_quantity,ws_sales_price] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + Project [wr_fee,wr_reason_sk,wr_refunded_addr_sk,wr_refunded_cash,wr_refunded_cdemo_sk,wr_returning_cdemo_sk,ws_net_profit,ws_quantity,ws_sales_price,ws_sold_date_sk] + SortMergeJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] + InputAdapter + WholeStageCodegen (4) + Sort [ws_item_sk,ws_order_number] + InputAdapter + Exchange [ws_item_sk,ws_order_number] #4 + WholeStageCodegen (3) + Project [ws_item_sk,ws_net_profit,ws_order_number,ws_quantity,ws_sales_price,ws_sold_date_sk] + BroadcastHashJoin [wp_web_page_sk,ws_web_page_sk] + Filter [ws_item_sk,ws_net_profit,ws_order_number,ws_sales_price,ws_sold_date_sk,ws_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_item_sk,ws_net_profit,ws_order_number,ws_quantity,ws_sales_price,ws_sold_date_sk,ws_web_page_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Filter [wp_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_page [wp_web_page_sk] + InputAdapter + WholeStageCodegen (6) + Sort [wr_item_sk,wr_order_number] + InputAdapter + Exchange [wr_item_sk,wr_order_number] #6 + WholeStageCodegen (5) + Filter [wr_item_sk,wr_order_number,wr_reason_sk,wr_refunded_addr_sk,wr_refunded_cdemo_sk,wr_returning_cdemo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_returns [wr_fee,wr_item_sk,wr_order_number,wr_reason_sk,wr_refunded_addr_sk,wr_refunded_cash,wr_refunded_cdemo_sk,wr_returning_cdemo_sk] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (7) + Filter [r_reason_sk] + ColumnarToRow + InputAdapter + Scan parquet default.reason [r_reason_desc,r_reason_sk] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (8) + Project [ca_address_sk,ca_state] + Filter [ca_address_sk,ca_country,ca_state] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_country,ca_state] + InputAdapter + WholeStageCodegen (13) + Sort [cd_demo_sk,cd_demo_sk] + InputAdapter + Exchange [cd_demo_sk,cd_demo_sk] #9 + WholeStageCodegen (12) + Project [cd_demo_sk,cd_demo_sk,cd_education_status,cd_marital_status] + BroadcastHashJoin [cd_education_status,cd_education_status,cd_marital_status,cd_marital_status] + Filter [cd_demo_sk,cd_education_status,cd_marital_status] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_education_status,cd_marital_status] + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (11) + Filter [cd_demo_sk,cd_education_status,cd_marital_status] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_education_status,cd_marital_status] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85/explain.txt new file mode 100644 index 0000000000000..94567de54317f --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85/explain.txt @@ -0,0 +1,287 @@ +== Physical Plan == +TakeOrderedAndProject (51) ++- * HashAggregate (50) + +- Exchange (49) + +- * HashAggregate (48) + +- * Project (47) + +- * BroadcastHashJoin Inner BuildRight (46) + :- * Project (41) + : +- * BroadcastHashJoin Inner BuildRight (40) + : :- * Project (34) + : : +- * BroadcastHashJoin Inner BuildRight (33) + : : :- * Project (27) + : : : +- * BroadcastHashJoin Inner BuildRight (26) + : : : :- * Project (21) + : : : : +- * BroadcastHashJoin Inner BuildRight (20) + : : : : :- * Project (15) + : : : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : : : :- * Project (9) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : : : :- * Filter (3) + : : : : : : : +- * ColumnarToRow (2) + : : : : : : : +- Scan parquet default.web_sales (1) + : : : : : : +- BroadcastExchange (7) + : : : : : : +- * Filter (6) + : : : : : : +- * ColumnarToRow (5) + : : : : : : +- Scan parquet default.web_returns (4) + : : : : : +- BroadcastExchange (13) + : : : : : +- * Filter (12) + : : : : : +- * ColumnarToRow (11) + : : : : : +- Scan parquet default.web_page (10) + : : : : +- BroadcastExchange (19) + : : : : +- * Filter (18) + : : : : +- * ColumnarToRow (17) + : : : : +- Scan parquet default.customer_demographics (16) + : : : +- BroadcastExchange (25) + : : : +- * Filter (24) + : : : +- * ColumnarToRow (23) + : : : +- Scan parquet default.customer_demographics (22) + : : +- BroadcastExchange (32) + : : +- * Project (31) + : : +- * Filter (30) + : : +- * ColumnarToRow (29) + : : +- Scan parquet default.customer_address (28) + : +- BroadcastExchange (39) + : +- * Project (38) + : +- * Filter (37) + : +- * ColumnarToRow (36) + : +- Scan parquet default.date_dim (35) + +- BroadcastExchange (45) + +- * Filter (44) + +- * ColumnarToRow (43) + +- Scan parquet default.reason (42) + + +(1) Scan parquet default.web_sales +Output [7]: [ws_sold_date_sk#1, ws_item_sk#2, ws_web_page_sk#3, ws_order_number#4, ws_quantity#5, ws_sales_price#6, ws_net_profit#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_order_number), IsNotNull(ws_web_page_sk), IsNotNull(ws_sold_date_sk), Or(Or(And(GreaterThanOrEqual(ws_sales_price,100.00),LessThanOrEqual(ws_sales_price,150.00)),And(GreaterThanOrEqual(ws_sales_price,50.00),LessThanOrEqual(ws_sales_price,100.00))),And(GreaterThanOrEqual(ws_sales_price,150.00),LessThanOrEqual(ws_sales_price,200.00))), Or(Or(And(GreaterThanOrEqual(ws_net_profit,100.00),LessThanOrEqual(ws_net_profit,200.00)),And(GreaterThanOrEqual(ws_net_profit,150.00),LessThanOrEqual(ws_net_profit,300.00))),And(GreaterThanOrEqual(ws_net_profit,50.00),LessThanOrEqual(ws_net_profit,250.00)))] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 8] +Input [7]: [ws_sold_date_sk#1, ws_item_sk#2, ws_web_page_sk#3, ws_order_number#4, ws_quantity#5, ws_sales_price#6, ws_net_profit#7] + +(3) Filter [codegen id : 8] +Input [7]: [ws_sold_date_sk#1, ws_item_sk#2, ws_web_page_sk#3, ws_order_number#4, ws_quantity#5, ws_sales_price#6, ws_net_profit#7] +Condition : (((((isnotnull(ws_item_sk#2) AND isnotnull(ws_order_number#4)) AND isnotnull(ws_web_page_sk#3)) AND isnotnull(ws_sold_date_sk#1)) AND ((((ws_sales_price#6 >= 100.00) AND (ws_sales_price#6 <= 150.00)) OR ((ws_sales_price#6 >= 50.00) AND (ws_sales_price#6 <= 100.00))) OR ((ws_sales_price#6 >= 150.00) AND (ws_sales_price#6 <= 200.00)))) AND ((((ws_net_profit#7 >= 100.00) AND (ws_net_profit#7 <= 200.00)) OR ((ws_net_profit#7 >= 150.00) AND (ws_net_profit#7 <= 300.00))) OR ((ws_net_profit#7 >= 50.00) AND (ws_net_profit#7 <= 250.00)))) + +(4) Scan parquet default.web_returns +Output [8]: [wr_item_sk#8, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_order_number#13, wr_fee#14, wr_refunded_cash#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_returns] +PushedFilters: [IsNotNull(wr_item_sk), IsNotNull(wr_order_number), IsNotNull(wr_refunded_cdemo_sk), IsNotNull(wr_returning_cdemo_sk), IsNotNull(wr_refunded_addr_sk), IsNotNull(wr_reason_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [8]: [wr_item_sk#8, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_order_number#13, wr_fee#14, wr_refunded_cash#15] + +(6) Filter [codegen id : 1] +Input [8]: [wr_item_sk#8, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_order_number#13, wr_fee#14, wr_refunded_cash#15] +Condition : (((((isnotnull(wr_item_sk#8) AND isnotnull(wr_order_number#13)) AND isnotnull(wr_refunded_cdemo_sk#9)) AND isnotnull(wr_returning_cdemo_sk#11)) AND isnotnull(wr_refunded_addr_sk#10)) AND isnotnull(wr_reason_sk#12)) + +(7) BroadcastExchange +Input [8]: [wr_item_sk#8, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_order_number#13, wr_fee#14, wr_refunded_cash#15] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false], input[5, bigint, false]),false), [id=#16] + +(8) BroadcastHashJoin [codegen id : 8] +Left keys [2]: [cast(ws_item_sk#2 as bigint), cast(ws_order_number#4 as bigint)] +Right keys [2]: [wr_item_sk#8, wr_order_number#13] +Join condition: None + +(9) Project [codegen id : 8] +Output [11]: [ws_sold_date_sk#1, ws_web_page_sk#3, ws_quantity#5, ws_sales_price#6, ws_net_profit#7, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15] +Input [15]: [ws_sold_date_sk#1, ws_item_sk#2, ws_web_page_sk#3, ws_order_number#4, ws_quantity#5, ws_sales_price#6, ws_net_profit#7, wr_item_sk#8, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_order_number#13, wr_fee#14, wr_refunded_cash#15] + +(10) Scan parquet default.web_page +Output [1]: [wp_web_page_sk#17] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_page] +PushedFilters: [IsNotNull(wp_web_page_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [1]: [wp_web_page_sk#17] + +(12) Filter [codegen id : 2] +Input [1]: [wp_web_page_sk#17] +Condition : isnotnull(wp_web_page_sk#17) + +(13) BroadcastExchange +Input [1]: [wp_web_page_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#18] + +(14) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ws_web_page_sk#3] +Right keys [1]: [wp_web_page_sk#17] +Join condition: None + +(15) Project [codegen id : 8] +Output [10]: [ws_sold_date_sk#1, ws_quantity#5, ws_sales_price#6, ws_net_profit#7, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15] +Input [12]: [ws_sold_date_sk#1, ws_web_page_sk#3, ws_quantity#5, ws_sales_price#6, ws_net_profit#7, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, wp_web_page_sk#17] + +(16) Scan parquet default.customer_demographics +Output [3]: [cd_demo_sk#19, cd_marital_status#20, cd_education_status#21] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_marital_status), IsNotNull(cd_education_status), Or(Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,Advanced Degree)),And(EqualTo(cd_marital_status,S),EqualTo(cd_education_status,College))),And(EqualTo(cd_marital_status,W),EqualTo(cd_education_status,2 yr Degree)))] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [3]: [cd_demo_sk#19, cd_marital_status#20, cd_education_status#21] + +(18) Filter [codegen id : 3] +Input [3]: [cd_demo_sk#19, cd_marital_status#20, cd_education_status#21] +Condition : (((isnotnull(cd_demo_sk#19) AND isnotnull(cd_marital_status#20)) AND isnotnull(cd_education_status#21)) AND ((((cd_marital_status#20 = M) AND (cd_education_status#21 = Advanced Degree)) OR ((cd_marital_status#20 = S) AND (cd_education_status#21 = College))) OR ((cd_marital_status#20 = W) AND (cd_education_status#21 = 2 yr Degree)))) + +(19) BroadcastExchange +Input [3]: [cd_demo_sk#19, cd_marital_status#20, cd_education_status#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#22] + +(20) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [wr_refunded_cdemo_sk#9] +Right keys [1]: [cast(cd_demo_sk#19 as bigint)] +Join condition: ((((((cd_marital_status#20 = M) AND (cd_education_status#21 = Advanced Degree)) AND (ws_sales_price#6 >= 100.00)) AND (ws_sales_price#6 <= 150.00)) OR ((((cd_marital_status#20 = S) AND (cd_education_status#21 = College)) AND (ws_sales_price#6 >= 50.00)) AND (ws_sales_price#6 <= 100.00))) OR ((((cd_marital_status#20 = W) AND (cd_education_status#21 = 2 yr Degree)) AND (ws_sales_price#6 >= 150.00)) AND (ws_sales_price#6 <= 200.00))) + +(21) Project [codegen id : 8] +Output [10]: [ws_sold_date_sk#1, ws_quantity#5, ws_net_profit#7, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, cd_marital_status#20, cd_education_status#21] +Input [13]: [ws_sold_date_sk#1, ws_quantity#5, ws_sales_price#6, ws_net_profit#7, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, cd_demo_sk#19, cd_marital_status#20, cd_education_status#21] + +(22) Scan parquet default.customer_demographics +Output [3]: [cd_demo_sk#23, cd_marital_status#24, cd_education_status#25] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer_demographics] +PushedFilters: [IsNotNull(cd_education_status), IsNotNull(cd_demo_sk), IsNotNull(cd_marital_status)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 4] +Input [3]: [cd_demo_sk#23, cd_marital_status#24, cd_education_status#25] + +(24) Filter [codegen id : 4] +Input [3]: [cd_demo_sk#23, cd_marital_status#24, cd_education_status#25] +Condition : ((isnotnull(cd_education_status#25) AND isnotnull(cd_demo_sk#23)) AND isnotnull(cd_marital_status#24)) + +(25) BroadcastExchange +Input [3]: [cd_demo_sk#23, cd_marital_status#24, cd_education_status#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint), input[1, string, false], input[2, string, false]),false), [id=#26] + +(26) BroadcastHashJoin [codegen id : 8] +Left keys [3]: [wr_returning_cdemo_sk#11, cd_marital_status#20, cd_education_status#21] +Right keys [3]: [cast(cd_demo_sk#23 as bigint), cd_marital_status#24, cd_education_status#25] +Join condition: None + +(27) Project [codegen id : 8] +Output [7]: [ws_sold_date_sk#1, ws_quantity#5, ws_net_profit#7, wr_refunded_addr_sk#10, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15] +Input [13]: [ws_sold_date_sk#1, ws_quantity#5, ws_net_profit#7, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, cd_marital_status#20, cd_education_status#21, cd_demo_sk#23, cd_marital_status#24, cd_education_status#25] + +(28) Scan parquet default.customer_address +Output [3]: [ca_address_sk#27, ca_state#28, ca_country#29] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer_address] +PushedFilters: [IsNotNull(ca_country), EqualTo(ca_country,United States), IsNotNull(ca_address_sk), Or(Or(In(ca_state, [IN,OH,NJ]),In(ca_state, [WI,CT,KY])),In(ca_state, [LA,IA,AR]))] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 5] +Input [3]: [ca_address_sk#27, ca_state#28, ca_country#29] + +(30) Filter [codegen id : 5] +Input [3]: [ca_address_sk#27, ca_state#28, ca_country#29] +Condition : (((isnotnull(ca_country#29) AND (ca_country#29 = United States)) AND isnotnull(ca_address_sk#27)) AND ((ca_state#28 IN (IN,OH,NJ) OR ca_state#28 IN (WI,CT,KY)) OR ca_state#28 IN (LA,IA,AR))) + +(31) Project [codegen id : 5] +Output [2]: [ca_address_sk#27, ca_state#28] +Input [3]: [ca_address_sk#27, ca_state#28, ca_country#29] + +(32) BroadcastExchange +Input [2]: [ca_address_sk#27, ca_state#28] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#30] + +(33) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [wr_refunded_addr_sk#10] +Right keys [1]: [cast(ca_address_sk#27 as bigint)] +Join condition: ((((ca_state#28 IN (IN,OH,NJ) AND (ws_net_profit#7 >= 100.00)) AND (ws_net_profit#7 <= 200.00)) OR ((ca_state#28 IN (WI,CT,KY) AND (ws_net_profit#7 >= 150.00)) AND (ws_net_profit#7 <= 300.00))) OR ((ca_state#28 IN (LA,IA,AR) AND (ws_net_profit#7 >= 50.00)) AND (ws_net_profit#7 <= 250.00))) + +(34) Project [codegen id : 8] +Output [5]: [ws_sold_date_sk#1, ws_quantity#5, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15] +Input [9]: [ws_sold_date_sk#1, ws_quantity#5, ws_net_profit#7, wr_refunded_addr_sk#10, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, ca_address_sk#27, ca_state#28] + +(35) Scan parquet default.date_dim +Output [2]: [d_date_sk#31, d_year#32] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(36) ColumnarToRow [codegen id : 6] +Input [2]: [d_date_sk#31, d_year#32] + +(37) Filter [codegen id : 6] +Input [2]: [d_date_sk#31, d_year#32] +Condition : ((isnotnull(d_year#32) AND (d_year#32 = 2000)) AND isnotnull(d_date_sk#31)) + +(38) Project [codegen id : 6] +Output [1]: [d_date_sk#31] +Input [2]: [d_date_sk#31, d_year#32] + +(39) BroadcastExchange +Input [1]: [d_date_sk#31] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#33] + +(40) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ws_sold_date_sk#1] +Right keys [1]: [d_date_sk#31] +Join condition: None + +(41) Project [codegen id : 8] +Output [4]: [ws_quantity#5, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15] +Input [6]: [ws_sold_date_sk#1, ws_quantity#5, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, d_date_sk#31] + +(42) Scan parquet default.reason +Output [2]: [r_reason_sk#34, r_reason_desc#35] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/reason] +PushedFilters: [IsNotNull(r_reason_sk)] +ReadSchema: struct + +(43) ColumnarToRow [codegen id : 7] +Input [2]: [r_reason_sk#34, r_reason_desc#35] + +(44) Filter [codegen id : 7] +Input [2]: [r_reason_sk#34, r_reason_desc#35] +Condition : isnotnull(r_reason_sk#34) + +(45) BroadcastExchange +Input [2]: [r_reason_sk#34, r_reason_desc#35] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#36] + +(46) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [wr_reason_sk#12] +Right keys [1]: [cast(r_reason_sk#34 as bigint)] +Join condition: None + +(47) Project [codegen id : 8] +Output [4]: [ws_quantity#5, wr_fee#14, wr_refunded_cash#15, r_reason_desc#35] +Input [6]: [ws_quantity#5, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, r_reason_sk#34, r_reason_desc#35] + +(48) HashAggregate [codegen id : 8] +Input [4]: [ws_quantity#5, wr_fee#14, wr_refunded_cash#15, r_reason_desc#35] +Keys [1]: [r_reason_desc#35] +Functions [3]: [partial_avg(cast(ws_quantity#5 as bigint)), partial_avg(UnscaledValue(wr_refunded_cash#15)), partial_avg(UnscaledValue(wr_fee#14))] +Aggregate Attributes [6]: [sum#37, count#38, sum#39, count#40, sum#41, count#42] +Results [7]: [r_reason_desc#35, sum#43, count#44, sum#45, count#46, sum#47, count#48] + +(49) Exchange +Input [7]: [r_reason_desc#35, sum#43, count#44, sum#45, count#46, sum#47, count#48] +Arguments: hashpartitioning(r_reason_desc#35, 5), true, [id=#49] + +(50) HashAggregate [codegen id : 9] +Input [7]: [r_reason_desc#35, sum#43, count#44, sum#45, count#46, sum#47, count#48] +Keys [1]: [r_reason_desc#35] +Functions [3]: [avg(cast(ws_quantity#5 as bigint)), avg(UnscaledValue(wr_refunded_cash#15)), avg(UnscaledValue(wr_fee#14))] +Aggregate Attributes [3]: [avg(cast(ws_quantity#5 as bigint))#50, avg(UnscaledValue(wr_refunded_cash#15))#51, avg(UnscaledValue(wr_fee#14))#52] +Results [5]: [substr(r_reason_desc#35, 1, 20) AS substr(r_reason_desc, 1, 20)#53, avg(cast(ws_quantity#5 as bigint))#50 AS avg(ws_quantity)#54, cast((avg(UnscaledValue(wr_refunded_cash#15))#51 / 100.0) as decimal(11,6)) AS avg(wr_refunded_cash)#55, cast((avg(UnscaledValue(wr_fee#14))#52 / 100.0) as decimal(11,6)) AS avg(wr_fee)#56, avg(cast(ws_quantity#5 as bigint))#50 AS aggOrder#57] + +(51) TakeOrderedAndProject +Input [5]: [substr(r_reason_desc, 1, 20)#53, avg(ws_quantity)#54, avg(wr_refunded_cash)#55, avg(wr_fee)#56, aggOrder#57] +Arguments: 100, [substr(r_reason_desc, 1, 20)#53 ASC NULLS FIRST, aggOrder#57 ASC NULLS FIRST, avg(wr_refunded_cash)#55 ASC NULLS FIRST, avg(wr_fee)#56 ASC NULLS FIRST], [substr(r_reason_desc, 1, 20)#53, avg(ws_quantity)#54, avg(wr_refunded_cash)#55, avg(wr_fee)#56] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85/simplified.txt new file mode 100644 index 0000000000000..db6da292ac49b --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85/simplified.txt @@ -0,0 +1,76 @@ +TakeOrderedAndProject [aggOrder,avg(wr_fee),avg(wr_refunded_cash),avg(ws_quantity),substr(r_reason_desc, 1, 20)] + WholeStageCodegen (9) + HashAggregate [count,count,count,r_reason_desc,sum,sum,sum] [aggOrder,avg(UnscaledValue(wr_fee)),avg(UnscaledValue(wr_refunded_cash)),avg(cast(ws_quantity as bigint)),avg(wr_fee),avg(wr_refunded_cash),avg(ws_quantity),count,count,count,substr(r_reason_desc, 1, 20),sum,sum,sum] + InputAdapter + Exchange [r_reason_desc] #1 + WholeStageCodegen (8) + HashAggregate [r_reason_desc,wr_fee,wr_refunded_cash,ws_quantity] [count,count,count,count,count,count,sum,sum,sum,sum,sum,sum] + Project [r_reason_desc,wr_fee,wr_refunded_cash,ws_quantity] + BroadcastHashJoin [r_reason_sk,wr_reason_sk] + Project [wr_fee,wr_reason_sk,wr_refunded_cash,ws_quantity] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [wr_fee,wr_reason_sk,wr_refunded_cash,ws_quantity,ws_sold_date_sk] + BroadcastHashJoin [ca_address_sk,ca_state,wr_refunded_addr_sk,ws_net_profit] + Project [wr_fee,wr_reason_sk,wr_refunded_addr_sk,wr_refunded_cash,ws_net_profit,ws_quantity,ws_sold_date_sk] + BroadcastHashJoin [cd_demo_sk,cd_education_status,cd_education_status,cd_marital_status,cd_marital_status,wr_returning_cdemo_sk] + Project [cd_education_status,cd_marital_status,wr_fee,wr_reason_sk,wr_refunded_addr_sk,wr_refunded_cash,wr_returning_cdemo_sk,ws_net_profit,ws_quantity,ws_sold_date_sk] + BroadcastHashJoin [cd_demo_sk,cd_education_status,cd_marital_status,wr_refunded_cdemo_sk,ws_sales_price] + Project [wr_fee,wr_reason_sk,wr_refunded_addr_sk,wr_refunded_cash,wr_refunded_cdemo_sk,wr_returning_cdemo_sk,ws_net_profit,ws_quantity,ws_sales_price,ws_sold_date_sk] + BroadcastHashJoin [wp_web_page_sk,ws_web_page_sk] + Project [wr_fee,wr_reason_sk,wr_refunded_addr_sk,wr_refunded_cash,wr_refunded_cdemo_sk,wr_returning_cdemo_sk,ws_net_profit,ws_quantity,ws_sales_price,ws_sold_date_sk,ws_web_page_sk] + BroadcastHashJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] + Filter [ws_item_sk,ws_net_profit,ws_order_number,ws_sales_price,ws_sold_date_sk,ws_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_item_sk,ws_net_profit,ws_order_number,ws_quantity,ws_sales_price,ws_sold_date_sk,ws_web_page_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [wr_item_sk,wr_order_number,wr_reason_sk,wr_refunded_addr_sk,wr_refunded_cdemo_sk,wr_returning_cdemo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_returns [wr_fee,wr_item_sk,wr_order_number,wr_reason_sk,wr_refunded_addr_sk,wr_refunded_cash,wr_refunded_cdemo_sk,wr_returning_cdemo_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [wp_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_page [wp_web_page_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [cd_demo_sk,cd_education_status,cd_marital_status] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_education_status,cd_marital_status] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Filter [cd_demo_sk,cd_education_status,cd_marital_status] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_education_status,cd_marital_status] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Project [ca_address_sk,ca_state] + Filter [ca_address_sk,ca_country,ca_state] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_country,ca_state] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (6) + Project [d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (7) + Filter [r_reason_sk] + ColumnarToRow + InputAdapter + Scan parquet default.reason [r_reason_desc,r_reason_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86.sf100/explain.txt new file mode 100644 index 0000000000000..af394e3d93d65 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86.sf100/explain.txt @@ -0,0 +1,142 @@ +== Physical Plan == +TakeOrderedAndProject (25) ++- * Project (24) + +- Window (23) + +- * Sort (22) + +- Exchange (21) + +- * HashAggregate (20) + +- Exchange (19) + +- * HashAggregate (18) + +- * Expand (17) + +- * Project (16) + +- * BroadcastHashJoin Inner BuildRight (15) + :- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.web_sales (1) + : +- BroadcastExchange (8) + : +- * Project (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet default.date_dim (4) + +- BroadcastExchange (14) + +- * Filter (13) + +- * ColumnarToRow (12) + +- Scan parquet default.item (11) + + +(1) Scan parquet default.web_sales +Output [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_net_paid#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_net_paid#3] + +(3) Filter [codegen id : 3] +Input [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_net_paid#3] +Condition : (isnotnull(ws_sold_date_sk#1) AND isnotnull(ws_item_sk#2)) + +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#4, d_month_seq#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1200)) AND (d_month_seq#5 <= 1211)) AND isnotnull(d_date_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#4] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(8) BroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#6] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ws_sold_date_sk#1] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(10) Project [codegen id : 3] +Output [2]: [ws_item_sk#2, ws_net_paid#3] +Input [4]: [ws_sold_date_sk#1, ws_item_sk#2, ws_net_paid#3, d_date_sk#4] + +(11) Scan parquet default.item +Output [3]: [i_item_sk#7, i_class#8, i_category#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [i_item_sk#7, i_class#8, i_category#9] + +(13) Filter [codegen id : 2] +Input [3]: [i_item_sk#7, i_class#8, i_category#9] +Condition : isnotnull(i_item_sk#7) + +(14) BroadcastExchange +Input [3]: [i_item_sk#7, i_class#8, i_category#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#10] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ws_item_sk#2] +Right keys [1]: [i_item_sk#7] +Join condition: None + +(16) Project [codegen id : 3] +Output [3]: [ws_net_paid#3, i_category#9, i_class#8] +Input [5]: [ws_item_sk#2, ws_net_paid#3, i_item_sk#7, i_class#8, i_category#9] + +(17) Expand [codegen id : 3] +Input [3]: [ws_net_paid#3, i_category#9, i_class#8] +Arguments: [List(ws_net_paid#3, i_category#9, i_class#8, 0), List(ws_net_paid#3, i_category#9, null, 1), List(ws_net_paid#3, null, null, 3)], [ws_net_paid#3, i_category#11, i_class#12, spark_grouping_id#13] + +(18) HashAggregate [codegen id : 3] +Input [4]: [ws_net_paid#3, i_category#11, i_class#12, spark_grouping_id#13] +Keys [3]: [i_category#11, i_class#12, spark_grouping_id#13] +Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#3))] +Aggregate Attributes [1]: [sum#14] +Results [4]: [i_category#11, i_class#12, spark_grouping_id#13, sum#15] + +(19) Exchange +Input [4]: [i_category#11, i_class#12, spark_grouping_id#13, sum#15] +Arguments: hashpartitioning(i_category#11, i_class#12, spark_grouping_id#13, 5), true, [id=#16] + +(20) HashAggregate [codegen id : 4] +Input [4]: [i_category#11, i_class#12, spark_grouping_id#13, sum#15] +Keys [3]: [i_category#11, i_class#12, spark_grouping_id#13] +Functions [1]: [sum(UnscaledValue(ws_net_paid#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#3))#17] +Results [7]: [MakeDecimal(sum(UnscaledValue(ws_net_paid#3))#17,17,2) AS total_sum#18, i_category#11, i_class#12, (cast((shiftright(spark_grouping_id#13, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#13, 0) & 1) as tinyint)) AS lochierarchy#19, (cast((shiftright(spark_grouping_id#13, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#13, 0) & 1) as tinyint)) AS _w1#20, CASE WHEN (cast(cast((shiftright(spark_grouping_id#13, 0) & 1) as tinyint) as int) = 0) THEN i_category#11 END AS _w2#21, MakeDecimal(sum(UnscaledValue(ws_net_paid#3))#17,17,2) AS _w3#22] + +(21) Exchange +Input [7]: [total_sum#18, i_category#11, i_class#12, lochierarchy#19, _w1#20, _w2#21, _w3#22] +Arguments: hashpartitioning(_w1#20, _w2#21, 5), true, [id=#23] + +(22) Sort [codegen id : 5] +Input [7]: [total_sum#18, i_category#11, i_class#12, lochierarchy#19, _w1#20, _w2#21, _w3#22] +Arguments: [_w1#20 ASC NULLS FIRST, _w2#21 ASC NULLS FIRST, _w3#22 DESC NULLS LAST], false, 0 + +(23) Window +Input [7]: [total_sum#18, i_category#11, i_class#12, lochierarchy#19, _w1#20, _w2#21, _w3#22] +Arguments: [rank(_w3#22) windowspecdefinition(_w1#20, _w2#21, _w3#22 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#24], [_w1#20, _w2#21], [_w3#22 DESC NULLS LAST] + +(24) Project [codegen id : 6] +Output [5]: [total_sum#18, i_category#11, i_class#12, lochierarchy#19, rank_within_parent#24] +Input [8]: [total_sum#18, i_category#11, i_class#12, lochierarchy#19, _w1#20, _w2#21, _w3#22, rank_within_parent#24] + +(25) TakeOrderedAndProject +Input [5]: [total_sum#18, i_category#11, i_class#12, lochierarchy#19, rank_within_parent#24] +Arguments: 100, [lochierarchy#19 DESC NULLS LAST, CASE WHEN (cast(lochierarchy#19 as int) = 0) THEN i_category#11 END ASC NULLS FIRST, rank_within_parent#24 ASC NULLS FIRST], [total_sum#18, i_category#11, i_class#12, lochierarchy#19, rank_within_parent#24] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86.sf100/simplified.txt new file mode 100644 index 0000000000000..76b89dcf2f6c7 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86.sf100/simplified.txt @@ -0,0 +1,39 @@ +TakeOrderedAndProject [i_category,i_class,lochierarchy,rank_within_parent,total_sum] + WholeStageCodegen (6) + Project [i_category,i_class,lochierarchy,rank_within_parent,total_sum] + InputAdapter + Window [_w1,_w2,_w3] + WholeStageCodegen (5) + Sort [_w1,_w2,_w3] + InputAdapter + Exchange [_w1,_w2] #1 + WholeStageCodegen (4) + HashAggregate [i_category,i_class,spark_grouping_id,sum] [_w1,_w2,_w3,lochierarchy,sum,sum(UnscaledValue(ws_net_paid)),total_sum] + InputAdapter + Exchange [i_category,i_class,spark_grouping_id] #2 + WholeStageCodegen (3) + HashAggregate [i_category,i_class,spark_grouping_id,ws_net_paid] [sum,sum] + Expand [i_category,i_class,ws_net_paid] + Project [i_category,i_class,ws_net_paid] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Project [ws_item_sk,ws_net_paid] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_item_sk,ws_net_paid,ws_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_month_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_category,i_class,i_item_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86/explain.txt new file mode 100644 index 0000000000000..712444ca3a9a5 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86/explain.txt @@ -0,0 +1,142 @@ +== Physical Plan == +TakeOrderedAndProject (25) ++- * Project (24) + +- Window (23) + +- * Sort (22) + +- Exchange (21) + +- * HashAggregate (20) + +- Exchange (19) + +- * HashAggregate (18) + +- * Expand (17) + +- * Project (16) + +- * BroadcastHashJoin Inner BuildRight (15) + :- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.web_sales (1) + : +- BroadcastExchange (8) + : +- * Project (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet default.date_dim (4) + +- BroadcastExchange (14) + +- * Filter (13) + +- * ColumnarToRow (12) + +- Scan parquet default.item (11) + + +(1) Scan parquet default.web_sales +Output [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_net_paid#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_net_paid#3] + +(3) Filter [codegen id : 3] +Input [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_net_paid#3] +Condition : (isnotnull(ws_sold_date_sk#1) AND isnotnull(ws_item_sk#2)) + +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#4, d_month_seq#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1200)) AND (d_month_seq#5 <= 1211)) AND isnotnull(d_date_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#4] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(8) BroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#6] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ws_sold_date_sk#1] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(10) Project [codegen id : 3] +Output [2]: [ws_item_sk#2, ws_net_paid#3] +Input [4]: [ws_sold_date_sk#1, ws_item_sk#2, ws_net_paid#3, d_date_sk#4] + +(11) Scan parquet default.item +Output [3]: [i_item_sk#7, i_class#8, i_category#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [i_item_sk#7, i_class#8, i_category#9] + +(13) Filter [codegen id : 2] +Input [3]: [i_item_sk#7, i_class#8, i_category#9] +Condition : isnotnull(i_item_sk#7) + +(14) BroadcastExchange +Input [3]: [i_item_sk#7, i_class#8, i_category#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#10] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ws_item_sk#2] +Right keys [1]: [i_item_sk#7] +Join condition: None + +(16) Project [codegen id : 3] +Output [3]: [ws_net_paid#3, i_category#9, i_class#8] +Input [5]: [ws_item_sk#2, ws_net_paid#3, i_item_sk#7, i_class#8, i_category#9] + +(17) Expand [codegen id : 3] +Input [3]: [ws_net_paid#3, i_category#9, i_class#8] +Arguments: [List(ws_net_paid#3, i_category#9, i_class#8, 0), List(ws_net_paid#3, i_category#9, null, 1), List(ws_net_paid#3, null, null, 3)], [ws_net_paid#3, i_category#11, i_class#12, spark_grouping_id#13] + +(18) HashAggregate [codegen id : 3] +Input [4]: [ws_net_paid#3, i_category#11, i_class#12, spark_grouping_id#13] +Keys [3]: [i_category#11, i_class#12, spark_grouping_id#13] +Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#3))] +Aggregate Attributes [1]: [sum#14] +Results [4]: [i_category#11, i_class#12, spark_grouping_id#13, sum#15] + +(19) Exchange +Input [4]: [i_category#11, i_class#12, spark_grouping_id#13, sum#15] +Arguments: hashpartitioning(i_category#11, i_class#12, spark_grouping_id#13, 5), true, [id=#16] + +(20) HashAggregate [codegen id : 4] +Input [4]: [i_category#11, i_class#12, spark_grouping_id#13, sum#15] +Keys [3]: [i_category#11, i_class#12, spark_grouping_id#13] +Functions [1]: [sum(UnscaledValue(ws_net_paid#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#3))#17] +Results [7]: [MakeDecimal(sum(UnscaledValue(ws_net_paid#3))#17,17,2) AS total_sum#18, i_category#11, i_class#12, (cast((shiftright(spark_grouping_id#13, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#13, 0) & 1) as tinyint)) AS lochierarchy#19, (cast((shiftright(spark_grouping_id#13, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#13, 0) & 1) as tinyint)) AS _w1#20, CASE WHEN (cast(cast((shiftright(spark_grouping_id#13, 0) & 1) as tinyint) as int) = 0) THEN i_category#11 END AS _w2#21, MakeDecimal(sum(UnscaledValue(ws_net_paid#3))#17,17,2) AS _w3#22] + +(21) Exchange +Input [7]: [total_sum#18, i_category#11, i_class#12, lochierarchy#19, _w1#20, _w2#21, _w3#22] +Arguments: hashpartitioning(_w1#20, _w2#21, 5), true, [id=#23] + +(22) Sort [codegen id : 5] +Input [7]: [total_sum#18, i_category#11, i_class#12, lochierarchy#19, _w1#20, _w2#21, _w3#22] +Arguments: [_w1#20 ASC NULLS FIRST, _w2#21 ASC NULLS FIRST, _w3#22 DESC NULLS LAST], false, 0 + +(23) Window +Input [7]: [total_sum#18, i_category#11, i_class#12, lochierarchy#19, _w1#20, _w2#21, _w3#22] +Arguments: [rank(_w3#22) windowspecdefinition(_w1#20, _w2#21, _w3#22 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#24], [_w1#20, _w2#21], [_w3#22 DESC NULLS LAST] + +(24) Project [codegen id : 6] +Output [5]: [total_sum#18, i_category#11, i_class#12, lochierarchy#19, rank_within_parent#24] +Input [8]: [total_sum#18, i_category#11, i_class#12, lochierarchy#19, _w1#20, _w2#21, _w3#22, rank_within_parent#24] + +(25) TakeOrderedAndProject +Input [5]: [total_sum#18, i_category#11, i_class#12, lochierarchy#19, rank_within_parent#24] +Arguments: 100, [lochierarchy#19 DESC NULLS LAST, CASE WHEN (cast(lochierarchy#19 as int) = 0) THEN i_category#11 END ASC NULLS FIRST, rank_within_parent#24 ASC NULLS FIRST], [total_sum#18, i_category#11, i_class#12, lochierarchy#19, rank_within_parent#24] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86/simplified.txt new file mode 100644 index 0000000000000..76b89dcf2f6c7 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86/simplified.txt @@ -0,0 +1,39 @@ +TakeOrderedAndProject [i_category,i_class,lochierarchy,rank_within_parent,total_sum] + WholeStageCodegen (6) + Project [i_category,i_class,lochierarchy,rank_within_parent,total_sum] + InputAdapter + Window [_w1,_w2,_w3] + WholeStageCodegen (5) + Sort [_w1,_w2,_w3] + InputAdapter + Exchange [_w1,_w2] #1 + WholeStageCodegen (4) + HashAggregate [i_category,i_class,spark_grouping_id,sum] [_w1,_w2,_w3,lochierarchy,sum,sum(UnscaledValue(ws_net_paid)),total_sum] + InputAdapter + Exchange [i_category,i_class,spark_grouping_id] #2 + WholeStageCodegen (3) + HashAggregate [i_category,i_class,spark_grouping_id,ws_net_paid] [sum,sum] + Expand [i_category,i_class,ws_net_paid] + Project [i_category,i_class,ws_net_paid] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Project [ws_item_sk,ws_net_paid] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_item_sk,ws_net_paid,ws_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_month_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_category,i_class,i_item_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87.sf100/explain.txt new file mode 100644 index 0000000000000..cb0aa8eb0bd8b --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87.sf100/explain.txt @@ -0,0 +1,388 @@ +== Physical Plan == +* HashAggregate (67) ++- Exchange (66) + +- * HashAggregate (65) + +- * HashAggregate (64) + +- * HashAggregate (63) + +- * HashAggregate (62) + +- * HashAggregate (61) + +- * HashAggregate (60) + +- Exchange (59) + +- * HashAggregate (58) + +- SortMergeJoin LeftAnti (57) + :- SortMergeJoin LeftAnti (39) + : :- * Sort (21) + : : +- Exchange (20) + : : +- * Project (19) + : : +- * SortMergeJoin Inner (18) + : : :- * Sort (12) + : : : +- Exchange (11) + : : : +- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- * Sort (17) + : : +- Exchange (16) + : : +- * Filter (15) + : : +- * ColumnarToRow (14) + : : +- Scan parquet default.customer (13) + : +- * Sort (38) + : +- Exchange (37) + : +- * HashAggregate (36) + : +- Exchange (35) + : +- * HashAggregate (34) + : +- * Project (33) + : +- * SortMergeJoin Inner (32) + : :- * Sort (29) + : : +- Exchange (28) + : : +- * Project (27) + : : +- * BroadcastHashJoin Inner BuildRight (26) + : : :- * Filter (24) + : : : +- * ColumnarToRow (23) + : : : +- Scan parquet default.catalog_sales (22) + : : +- ReusedExchange (25) + : +- * Sort (31) + : +- ReusedExchange (30) + +- * Sort (56) + +- Exchange (55) + +- * HashAggregate (54) + +- Exchange (53) + +- * HashAggregate (52) + +- * Project (51) + +- * SortMergeJoin Inner (50) + :- * Sort (47) + : +- Exchange (46) + : +- * Project (45) + : +- * BroadcastHashJoin Inner BuildRight (44) + : :- * Filter (42) + : : +- * ColumnarToRow (41) + : : +- Scan parquet default.web_sales (40) + : +- ReusedExchange (43) + +- * Sort (49) + +- ReusedExchange (48) + + +(1) Scan parquet default.store_sales +Output [2]: [ss_sold_date_sk#1, ss_customer_sk#2] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 2] +Input [2]: [ss_sold_date_sk#1, ss_customer_sk#2] + +(3) Filter [codegen id : 2] +Input [2]: [ss_sold_date_sk#1, ss_customer_sk#2] +Condition : (isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_customer_sk#2)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#3, d_date#4, d_month_seq#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#3, d_date#4, d_month_seq#5] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#3, d_date#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1200)) AND (d_month_seq#5 <= 1211)) AND isnotnull(d_date_sk#3)) + +(7) Project [codegen id : 1] +Output [2]: [d_date_sk#3, d_date#4] +Input [3]: [d_date_sk#3, d_date#4, d_month_seq#5] + +(8) BroadcastExchange +Input [2]: [d_date_sk#3, d_date#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#6] + +(9) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#3] +Join condition: None + +(10) Project [codegen id : 2] +Output [2]: [ss_customer_sk#2, d_date#4] +Input [4]: [ss_sold_date_sk#1, ss_customer_sk#2, d_date_sk#3, d_date#4] + +(11) Exchange +Input [2]: [ss_customer_sk#2, d_date#4] +Arguments: hashpartitioning(ss_customer_sk#2, 5), true, [id=#7] + +(12) Sort [codegen id : 3] +Input [2]: [ss_customer_sk#2, d_date#4] +Arguments: [ss_customer_sk#2 ASC NULLS FIRST], false, 0 + +(13) Scan parquet default.customer +Output [3]: [c_customer_sk#8, c_first_name#9, c_last_name#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(14) ColumnarToRow [codegen id : 4] +Input [3]: [c_customer_sk#8, c_first_name#9, c_last_name#10] + +(15) Filter [codegen id : 4] +Input [3]: [c_customer_sk#8, c_first_name#9, c_last_name#10] +Condition : isnotnull(c_customer_sk#8) + +(16) Exchange +Input [3]: [c_customer_sk#8, c_first_name#9, c_last_name#10] +Arguments: hashpartitioning(c_customer_sk#8, 5), true, [id=#11] + +(17) Sort [codegen id : 5] +Input [3]: [c_customer_sk#8, c_first_name#9, c_last_name#10] +Arguments: [c_customer_sk#8 ASC NULLS FIRST], false, 0 + +(18) SortMergeJoin [codegen id : 6] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#8] +Join condition: None + +(19) Project [codegen id : 6] +Output [3]: [d_date#4, c_first_name#9, c_last_name#10] +Input [5]: [ss_customer_sk#2, d_date#4, c_customer_sk#8, c_first_name#9, c_last_name#10] + +(20) Exchange +Input [3]: [d_date#4, c_first_name#9, c_last_name#10] +Arguments: hashpartitioning(coalesce(c_last_name#10, ), isnull(c_last_name#10), coalesce(c_first_name#9, ), isnull(c_first_name#9), coalesce(d_date#4, 0), isnull(d_date#4), 5), true, [id=#12] + +(21) Sort [codegen id : 7] +Input [3]: [d_date#4, c_first_name#9, c_last_name#10] +Arguments: [coalesce(c_last_name#10, ) ASC NULLS FIRST, isnull(c_last_name#10) ASC NULLS FIRST, coalesce(c_first_name#9, ) ASC NULLS FIRST, isnull(c_first_name#9) ASC NULLS FIRST, coalesce(d_date#4, 0) ASC NULLS FIRST, isnull(d_date#4) ASC NULLS FIRST], false, 0 + +(22) Scan parquet default.catalog_sales +Output [2]: [cs_sold_date_sk#13, cs_bill_customer_sk#14] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_customer_sk)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 9] +Input [2]: [cs_sold_date_sk#13, cs_bill_customer_sk#14] + +(24) Filter [codegen id : 9] +Input [2]: [cs_sold_date_sk#13, cs_bill_customer_sk#14] +Condition : (isnotnull(cs_sold_date_sk#13) AND isnotnull(cs_bill_customer_sk#14)) + +(25) ReusedExchange [Reuses operator id: 8] +Output [2]: [d_date_sk#15, d_date#16] + +(26) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [cs_sold_date_sk#13] +Right keys [1]: [d_date_sk#15] +Join condition: None + +(27) Project [codegen id : 9] +Output [2]: [cs_bill_customer_sk#14, d_date#16] +Input [4]: [cs_sold_date_sk#13, cs_bill_customer_sk#14, d_date_sk#15, d_date#16] + +(28) Exchange +Input [2]: [cs_bill_customer_sk#14, d_date#16] +Arguments: hashpartitioning(cs_bill_customer_sk#14, 5), true, [id=#17] + +(29) Sort [codegen id : 10] +Input [2]: [cs_bill_customer_sk#14, d_date#16] +Arguments: [cs_bill_customer_sk#14 ASC NULLS FIRST], false, 0 + +(30) ReusedExchange [Reuses operator id: 16] +Output [3]: [c_customer_sk#18, c_first_name#19, c_last_name#20] + +(31) Sort [codegen id : 12] +Input [3]: [c_customer_sk#18, c_first_name#19, c_last_name#20] +Arguments: [c_customer_sk#18 ASC NULLS FIRST], false, 0 + +(32) SortMergeJoin [codegen id : 13] +Left keys [1]: [cs_bill_customer_sk#14] +Right keys [1]: [c_customer_sk#18] +Join condition: None + +(33) Project [codegen id : 13] +Output [3]: [c_last_name#20, c_first_name#19, d_date#16] +Input [5]: [cs_bill_customer_sk#14, d_date#16, c_customer_sk#18, c_first_name#19, c_last_name#20] + +(34) HashAggregate [codegen id : 13] +Input [3]: [c_last_name#20, c_first_name#19, d_date#16] +Keys [3]: [c_last_name#20, c_first_name#19, d_date#16] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#20, c_first_name#19, d_date#16] + +(35) Exchange +Input [3]: [c_last_name#20, c_first_name#19, d_date#16] +Arguments: hashpartitioning(c_last_name#20, c_first_name#19, d_date#16, 5), true, [id=#21] + +(36) HashAggregate [codegen id : 14] +Input [3]: [c_last_name#20, c_first_name#19, d_date#16] +Keys [3]: [c_last_name#20, c_first_name#19, d_date#16] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#20, c_first_name#19, d_date#16] + +(37) Exchange +Input [3]: [c_last_name#20, c_first_name#19, d_date#16] +Arguments: hashpartitioning(coalesce(c_last_name#20, ), isnull(c_last_name#20), coalesce(c_first_name#19, ), isnull(c_first_name#19), coalesce(d_date#16, 0), isnull(d_date#16), 5), true, [id=#22] + +(38) Sort [codegen id : 15] +Input [3]: [c_last_name#20, c_first_name#19, d_date#16] +Arguments: [coalesce(c_last_name#20, ) ASC NULLS FIRST, isnull(c_last_name#20) ASC NULLS FIRST, coalesce(c_first_name#19, ) ASC NULLS FIRST, isnull(c_first_name#19) ASC NULLS FIRST, coalesce(d_date#16, 0) ASC NULLS FIRST, isnull(d_date#16) ASC NULLS FIRST], false, 0 + +(39) SortMergeJoin +Left keys [6]: [coalesce(c_last_name#10, ), isnull(c_last_name#10), coalesce(c_first_name#9, ), isnull(c_first_name#9), coalesce(d_date#4, 0), isnull(d_date#4)] +Right keys [6]: [coalesce(c_last_name#20, ), isnull(c_last_name#20), coalesce(c_first_name#19, ), isnull(c_first_name#19), coalesce(d_date#16, 0), isnull(d_date#16)] +Join condition: None + +(40) Scan parquet default.web_sales +Output [2]: [ws_sold_date_sk#23, ws_bill_customer_sk#24] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_customer_sk)] +ReadSchema: struct + +(41) ColumnarToRow [codegen id : 17] +Input [2]: [ws_sold_date_sk#23, ws_bill_customer_sk#24] + +(42) Filter [codegen id : 17] +Input [2]: [ws_sold_date_sk#23, ws_bill_customer_sk#24] +Condition : (isnotnull(ws_sold_date_sk#23) AND isnotnull(ws_bill_customer_sk#24)) + +(43) ReusedExchange [Reuses operator id: 8] +Output [2]: [d_date_sk#25, d_date#26] + +(44) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_sold_date_sk#23] +Right keys [1]: [d_date_sk#25] +Join condition: None + +(45) Project [codegen id : 17] +Output [2]: [ws_bill_customer_sk#24, d_date#26] +Input [4]: [ws_sold_date_sk#23, ws_bill_customer_sk#24, d_date_sk#25, d_date#26] + +(46) Exchange +Input [2]: [ws_bill_customer_sk#24, d_date#26] +Arguments: hashpartitioning(ws_bill_customer_sk#24, 5), true, [id=#27] + +(47) Sort [codegen id : 18] +Input [2]: [ws_bill_customer_sk#24, d_date#26] +Arguments: [ws_bill_customer_sk#24 ASC NULLS FIRST], false, 0 + +(48) ReusedExchange [Reuses operator id: 16] +Output [3]: [c_customer_sk#28, c_first_name#29, c_last_name#30] + +(49) Sort [codegen id : 20] +Input [3]: [c_customer_sk#28, c_first_name#29, c_last_name#30] +Arguments: [c_customer_sk#28 ASC NULLS FIRST], false, 0 + +(50) SortMergeJoin [codegen id : 21] +Left keys [1]: [ws_bill_customer_sk#24] +Right keys [1]: [c_customer_sk#28] +Join condition: None + +(51) Project [codegen id : 21] +Output [3]: [c_last_name#30, c_first_name#29, d_date#26] +Input [5]: [ws_bill_customer_sk#24, d_date#26, c_customer_sk#28, c_first_name#29, c_last_name#30] + +(52) HashAggregate [codegen id : 21] +Input [3]: [c_last_name#30, c_first_name#29, d_date#26] +Keys [3]: [c_last_name#30, c_first_name#29, d_date#26] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#30, c_first_name#29, d_date#26] + +(53) Exchange +Input [3]: [c_last_name#30, c_first_name#29, d_date#26] +Arguments: hashpartitioning(c_last_name#30, c_first_name#29, d_date#26, 5), true, [id=#31] + +(54) HashAggregate [codegen id : 22] +Input [3]: [c_last_name#30, c_first_name#29, d_date#26] +Keys [3]: [c_last_name#30, c_first_name#29, d_date#26] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#30, c_first_name#29, d_date#26] + +(55) Exchange +Input [3]: [c_last_name#30, c_first_name#29, d_date#26] +Arguments: hashpartitioning(coalesce(c_last_name#30, ), isnull(c_last_name#30), coalesce(c_first_name#29, ), isnull(c_first_name#29), coalesce(d_date#26, 0), isnull(d_date#26), 5), true, [id=#32] + +(56) Sort [codegen id : 23] +Input [3]: [c_last_name#30, c_first_name#29, d_date#26] +Arguments: [coalesce(c_last_name#30, ) ASC NULLS FIRST, isnull(c_last_name#30) ASC NULLS FIRST, coalesce(c_first_name#29, ) ASC NULLS FIRST, isnull(c_first_name#29) ASC NULLS FIRST, coalesce(d_date#26, 0) ASC NULLS FIRST, isnull(d_date#26) ASC NULLS FIRST], false, 0 + +(57) SortMergeJoin +Left keys [6]: [coalesce(c_last_name#10, ), isnull(c_last_name#10), coalesce(c_first_name#9, ), isnull(c_first_name#9), coalesce(d_date#4, 0), isnull(d_date#4)] +Right keys [6]: [coalesce(c_last_name#30, ), isnull(c_last_name#30), coalesce(c_first_name#29, ), isnull(c_first_name#29), coalesce(d_date#26, 0), isnull(d_date#26)] +Join condition: None + +(58) HashAggregate [codegen id : 24] +Input [3]: [d_date#4, c_first_name#9, c_last_name#10] +Keys [3]: [c_last_name#10, c_first_name#9, d_date#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#10, c_first_name#9, d_date#4] + +(59) Exchange +Input [3]: [c_last_name#10, c_first_name#9, d_date#4] +Arguments: hashpartitioning(c_last_name#10, c_first_name#9, d_date#4, 5), true, [id=#33] + +(60) HashAggregate [codegen id : 25] +Input [3]: [c_last_name#10, c_first_name#9, d_date#4] +Keys [3]: [c_last_name#10, c_first_name#9, d_date#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#10, c_first_name#9, d_date#4] + +(61) HashAggregate [codegen id : 25] +Input [3]: [c_last_name#10, c_first_name#9, d_date#4] +Keys [3]: [c_last_name#10, c_first_name#9, d_date#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#10, c_first_name#9, d_date#4] + +(62) HashAggregate [codegen id : 25] +Input [3]: [c_last_name#10, c_first_name#9, d_date#4] +Keys [3]: [c_last_name#10, c_first_name#9, d_date#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#10, c_first_name#9, d_date#4] + +(63) HashAggregate [codegen id : 25] +Input [3]: [c_last_name#10, c_first_name#9, d_date#4] +Keys [3]: [c_last_name#10, c_first_name#9, d_date#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#10, c_first_name#9, d_date#4] + +(64) HashAggregate [codegen id : 25] +Input [3]: [c_last_name#10, c_first_name#9, d_date#4] +Keys [3]: [c_last_name#10, c_first_name#9, d_date#4] +Functions: [] +Aggregate Attributes: [] +Results: [] + +(65) HashAggregate [codegen id : 25] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#34] +Results [1]: [count#35] + +(66) Exchange +Input [1]: [count#35] +Arguments: SinglePartition, true, [id=#36] + +(67) HashAggregate [codegen id : 26] +Input [1]: [count#35] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#37] +Results [1]: [count(1)#37 AS count(1)#38] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87.sf100/simplified.txt new file mode 100644 index 0000000000000..0404e73452564 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87.sf100/simplified.txt @@ -0,0 +1,117 @@ +WholeStageCodegen (26) + HashAggregate [count] [count,count(1),count(1)] + InputAdapter + Exchange #1 + WholeStageCodegen (25) + HashAggregate [count,count] + HashAggregate [c_first_name,c_last_name,d_date] + HashAggregate [c_first_name,c_last_name,d_date] + HashAggregate [c_first_name,c_last_name,d_date] + HashAggregate [c_first_name,c_last_name,d_date] + HashAggregate [c_first_name,c_last_name,d_date] + InputAdapter + Exchange [c_first_name,c_last_name,d_date] #2 + WholeStageCodegen (24) + HashAggregate [c_first_name,c_last_name,d_date] + InputAdapter + SortMergeJoin [c_first_name,c_first_name,c_last_name,c_last_name,d_date,d_date] + SortMergeJoin [c_first_name,c_first_name,c_last_name,c_last_name,d_date,d_date] + WholeStageCodegen (7) + Sort [c_first_name,c_last_name,d_date] + InputAdapter + Exchange [c_first_name,c_last_name,d_date] #3 + WholeStageCodegen (6) + Project [c_first_name,c_last_name,d_date] + SortMergeJoin [c_customer_sk,ss_customer_sk] + InputAdapter + WholeStageCodegen (3) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #4 + WholeStageCodegen (2) + Project [d_date,ss_customer_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_customer_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (1) + Project [d_date,d_date_sk] + Filter [d_date_sk,d_month_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk,d_month_seq] + InputAdapter + WholeStageCodegen (5) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #6 + WholeStageCodegen (4) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name] + WholeStageCodegen (15) + Sort [c_first_name,c_last_name,d_date] + InputAdapter + Exchange [c_first_name,c_last_name,d_date] #7 + WholeStageCodegen (14) + HashAggregate [c_first_name,c_last_name,d_date] + InputAdapter + Exchange [c_first_name,c_last_name,d_date] #8 + WholeStageCodegen (13) + HashAggregate [c_first_name,c_last_name,d_date] + Project [c_first_name,c_last_name,d_date] + SortMergeJoin [c_customer_sk,cs_bill_customer_sk] + InputAdapter + WholeStageCodegen (10) + Sort [cs_bill_customer_sk] + InputAdapter + Exchange [cs_bill_customer_sk] #9 + WholeStageCodegen (9) + Project [cs_bill_customer_sk,d_date] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_bill_customer_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date,d_date_sk] #5 + InputAdapter + WholeStageCodegen (12) + Sort [c_customer_sk] + InputAdapter + ReusedExchange [c_customer_sk,c_first_name,c_last_name] #6 + WholeStageCodegen (23) + Sort [c_first_name,c_last_name,d_date] + InputAdapter + Exchange [c_first_name,c_last_name,d_date] #10 + WholeStageCodegen (22) + HashAggregate [c_first_name,c_last_name,d_date] + InputAdapter + Exchange [c_first_name,c_last_name,d_date] #11 + WholeStageCodegen (21) + HashAggregate [c_first_name,c_last_name,d_date] + Project [c_first_name,c_last_name,d_date] + SortMergeJoin [c_customer_sk,ws_bill_customer_sk] + InputAdapter + WholeStageCodegen (18) + Sort [ws_bill_customer_sk] + InputAdapter + Exchange [ws_bill_customer_sk] #12 + WholeStageCodegen (17) + Project [d_date,ws_bill_customer_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_bill_customer_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date,d_date_sk] #5 + InputAdapter + WholeStageCodegen (20) + Sort [c_customer_sk] + InputAdapter + ReusedExchange [c_customer_sk,c_first_name,c_last_name] #6 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/explain.txt new file mode 100644 index 0000000000000..57f97a686c31a --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/explain.txt @@ -0,0 +1,323 @@ +== Physical Plan == +* HashAggregate (54) ++- Exchange (53) + +- * HashAggregate (52) + +- * HashAggregate (51) + +- * HashAggregate (50) + +- * HashAggregate (49) + +- * HashAggregate (48) + +- * HashAggregate (47) + +- Exchange (46) + +- * HashAggregate (45) + +- * BroadcastHashJoin LeftAnti BuildRight (44) + :- * BroadcastHashJoin LeftAnti BuildRight (30) + : :- * Project (16) + : : +- * BroadcastHashJoin Inner BuildRight (15) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.customer (11) + : +- BroadcastExchange (29) + : +- * HashAggregate (28) + : +- Exchange (27) + : +- * HashAggregate (26) + : +- * Project (25) + : +- * BroadcastHashJoin Inner BuildRight (24) + : :- * Project (22) + : : +- * BroadcastHashJoin Inner BuildRight (21) + : : :- * Filter (19) + : : : +- * ColumnarToRow (18) + : : : +- Scan parquet default.catalog_sales (17) + : : +- ReusedExchange (20) + : +- ReusedExchange (23) + +- BroadcastExchange (43) + +- * HashAggregate (42) + +- Exchange (41) + +- * HashAggregate (40) + +- * Project (39) + +- * BroadcastHashJoin Inner BuildRight (38) + :- * Project (36) + : +- * BroadcastHashJoin Inner BuildRight (35) + : :- * Filter (33) + : : +- * ColumnarToRow (32) + : : +- Scan parquet default.web_sales (31) + : +- ReusedExchange (34) + +- ReusedExchange (37) + + +(1) Scan parquet default.store_sales +Output [2]: [ss_sold_date_sk#1, ss_customer_sk#2] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 11] +Input [2]: [ss_sold_date_sk#1, ss_customer_sk#2] + +(3) Filter [codegen id : 11] +Input [2]: [ss_sold_date_sk#1, ss_customer_sk#2] +Condition : (isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_customer_sk#2)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#3, d_date#4, d_month_seq#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#3, d_date#4, d_month_seq#5] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#3, d_date#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1200)) AND (d_month_seq#5 <= 1211)) AND isnotnull(d_date_sk#3)) + +(7) Project [codegen id : 1] +Output [2]: [d_date_sk#3, d_date#4] +Input [3]: [d_date_sk#3, d_date#4, d_month_seq#5] + +(8) BroadcastExchange +Input [2]: [d_date_sk#3, d_date#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#6] + +(9) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#3] +Join condition: None + +(10) Project [codegen id : 11] +Output [2]: [ss_customer_sk#2, d_date#4] +Input [4]: [ss_sold_date_sk#1, ss_customer_sk#2, d_date_sk#3, d_date#4] + +(11) Scan parquet default.customer +Output [3]: [c_customer_sk#7, c_first_name#8, c_last_name#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [c_customer_sk#7, c_first_name#8, c_last_name#9] + +(13) Filter [codegen id : 2] +Input [3]: [c_customer_sk#7, c_first_name#8, c_last_name#9] +Condition : isnotnull(c_customer_sk#7) + +(14) BroadcastExchange +Input [3]: [c_customer_sk#7, c_first_name#8, c_last_name#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#10] + +(15) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#7] +Join condition: None + +(16) Project [codegen id : 11] +Output [3]: [d_date#4, c_first_name#8, c_last_name#9] +Input [5]: [ss_customer_sk#2, d_date#4, c_customer_sk#7, c_first_name#8, c_last_name#9] + +(17) Scan parquet default.catalog_sales +Output [2]: [cs_sold_date_sk#11, cs_bill_customer_sk#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_customer_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 5] +Input [2]: [cs_sold_date_sk#11, cs_bill_customer_sk#12] + +(19) Filter [codegen id : 5] +Input [2]: [cs_sold_date_sk#11, cs_bill_customer_sk#12] +Condition : (isnotnull(cs_sold_date_sk#11) AND isnotnull(cs_bill_customer_sk#12)) + +(20) ReusedExchange [Reuses operator id: 8] +Output [2]: [d_date_sk#13, d_date#14] + +(21) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_sold_date_sk#11] +Right keys [1]: [d_date_sk#13] +Join condition: None + +(22) Project [codegen id : 5] +Output [2]: [cs_bill_customer_sk#12, d_date#14] +Input [4]: [cs_sold_date_sk#11, cs_bill_customer_sk#12, d_date_sk#13, d_date#14] + +(23) ReusedExchange [Reuses operator id: 14] +Output [3]: [c_customer_sk#15, c_first_name#16, c_last_name#17] + +(24) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_bill_customer_sk#12] +Right keys [1]: [c_customer_sk#15] +Join condition: None + +(25) Project [codegen id : 5] +Output [3]: [c_last_name#17, c_first_name#16, d_date#14] +Input [5]: [cs_bill_customer_sk#12, d_date#14, c_customer_sk#15, c_first_name#16, c_last_name#17] + +(26) HashAggregate [codegen id : 5] +Input [3]: [c_last_name#17, c_first_name#16, d_date#14] +Keys [3]: [c_last_name#17, c_first_name#16, d_date#14] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#17, c_first_name#16, d_date#14] + +(27) Exchange +Input [3]: [c_last_name#17, c_first_name#16, d_date#14] +Arguments: hashpartitioning(c_last_name#17, c_first_name#16, d_date#14, 5), true, [id=#18] + +(28) HashAggregate [codegen id : 6] +Input [3]: [c_last_name#17, c_first_name#16, d_date#14] +Keys [3]: [c_last_name#17, c_first_name#16, d_date#14] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#17, c_first_name#16, d_date#14] + +(29) BroadcastExchange +Input [3]: [c_last_name#17, c_first_name#16, d_date#14] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), isnull(input[0, string, true]), coalesce(input[1, string, true], ), isnull(input[1, string, true]), coalesce(input[2, date, true], 0), isnull(input[2, date, true])),false), [id=#19] + +(30) BroadcastHashJoin [codegen id : 11] +Left keys [6]: [coalesce(c_last_name#9, ), isnull(c_last_name#9), coalesce(c_first_name#8, ), isnull(c_first_name#8), coalesce(d_date#4, 0), isnull(d_date#4)] +Right keys [6]: [coalesce(c_last_name#17, ), isnull(c_last_name#17), coalesce(c_first_name#16, ), isnull(c_first_name#16), coalesce(d_date#14, 0), isnull(d_date#14)] +Join condition: None + +(31) Scan parquet default.web_sales +Output [2]: [ws_sold_date_sk#20, ws_bill_customer_sk#21] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_customer_sk)] +ReadSchema: struct + +(32) ColumnarToRow [codegen id : 9] +Input [2]: [ws_sold_date_sk#20, ws_bill_customer_sk#21] + +(33) Filter [codegen id : 9] +Input [2]: [ws_sold_date_sk#20, ws_bill_customer_sk#21] +Condition : (isnotnull(ws_sold_date_sk#20) AND isnotnull(ws_bill_customer_sk#21)) + +(34) ReusedExchange [Reuses operator id: 8] +Output [2]: [d_date_sk#22, d_date#23] + +(35) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ws_sold_date_sk#20] +Right keys [1]: [d_date_sk#22] +Join condition: None + +(36) Project [codegen id : 9] +Output [2]: [ws_bill_customer_sk#21, d_date#23] +Input [4]: [ws_sold_date_sk#20, ws_bill_customer_sk#21, d_date_sk#22, d_date#23] + +(37) ReusedExchange [Reuses operator id: 14] +Output [3]: [c_customer_sk#24, c_first_name#25, c_last_name#26] + +(38) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ws_bill_customer_sk#21] +Right keys [1]: [c_customer_sk#24] +Join condition: None + +(39) Project [codegen id : 9] +Output [3]: [c_last_name#26, c_first_name#25, d_date#23] +Input [5]: [ws_bill_customer_sk#21, d_date#23, c_customer_sk#24, c_first_name#25, c_last_name#26] + +(40) HashAggregate [codegen id : 9] +Input [3]: [c_last_name#26, c_first_name#25, d_date#23] +Keys [3]: [c_last_name#26, c_first_name#25, d_date#23] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#26, c_first_name#25, d_date#23] + +(41) Exchange +Input [3]: [c_last_name#26, c_first_name#25, d_date#23] +Arguments: hashpartitioning(c_last_name#26, c_first_name#25, d_date#23, 5), true, [id=#27] + +(42) HashAggregate [codegen id : 10] +Input [3]: [c_last_name#26, c_first_name#25, d_date#23] +Keys [3]: [c_last_name#26, c_first_name#25, d_date#23] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#26, c_first_name#25, d_date#23] + +(43) BroadcastExchange +Input [3]: [c_last_name#26, c_first_name#25, d_date#23] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), isnull(input[0, string, true]), coalesce(input[1, string, true], ), isnull(input[1, string, true]), coalesce(input[2, date, true], 0), isnull(input[2, date, true])),false), [id=#28] + +(44) BroadcastHashJoin [codegen id : 11] +Left keys [6]: [coalesce(c_last_name#9, ), isnull(c_last_name#9), coalesce(c_first_name#8, ), isnull(c_first_name#8), coalesce(d_date#4, 0), isnull(d_date#4)] +Right keys [6]: [coalesce(c_last_name#26, ), isnull(c_last_name#26), coalesce(c_first_name#25, ), isnull(c_first_name#25), coalesce(d_date#23, 0), isnull(d_date#23)] +Join condition: None + +(45) HashAggregate [codegen id : 11] +Input [3]: [d_date#4, c_first_name#8, c_last_name#9] +Keys [3]: [c_last_name#9, c_first_name#8, d_date#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#9, c_first_name#8, d_date#4] + +(46) Exchange +Input [3]: [c_last_name#9, c_first_name#8, d_date#4] +Arguments: hashpartitioning(c_last_name#9, c_first_name#8, d_date#4, 5), true, [id=#29] + +(47) HashAggregate [codegen id : 12] +Input [3]: [c_last_name#9, c_first_name#8, d_date#4] +Keys [3]: [c_last_name#9, c_first_name#8, d_date#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#9, c_first_name#8, d_date#4] + +(48) HashAggregate [codegen id : 12] +Input [3]: [c_last_name#9, c_first_name#8, d_date#4] +Keys [3]: [c_last_name#9, c_first_name#8, d_date#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#9, c_first_name#8, d_date#4] + +(49) HashAggregate [codegen id : 12] +Input [3]: [c_last_name#9, c_first_name#8, d_date#4] +Keys [3]: [c_last_name#9, c_first_name#8, d_date#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#9, c_first_name#8, d_date#4] + +(50) HashAggregate [codegen id : 12] +Input [3]: [c_last_name#9, c_first_name#8, d_date#4] +Keys [3]: [c_last_name#9, c_first_name#8, d_date#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#9, c_first_name#8, d_date#4] + +(51) HashAggregate [codegen id : 12] +Input [3]: [c_last_name#9, c_first_name#8, d_date#4] +Keys [3]: [c_last_name#9, c_first_name#8, d_date#4] +Functions: [] +Aggregate Attributes: [] +Results: [] + +(52) HashAggregate [codegen id : 12] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#30] +Results [1]: [count#31] + +(53) Exchange +Input [1]: [count#31] +Arguments: SinglePartition, true, [id=#32] + +(54) HashAggregate [codegen id : 13] +Input [1]: [count#31] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#33] +Results [1]: [count(1)#33 AS count(1)#34] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/simplified.txt new file mode 100644 index 0000000000000..3caad01cb7c4e --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/simplified.txt @@ -0,0 +1,80 @@ +WholeStageCodegen (13) + HashAggregate [count] [count,count(1),count(1)] + InputAdapter + Exchange #1 + WholeStageCodegen (12) + HashAggregate [count,count] + HashAggregate [c_first_name,c_last_name,d_date] + HashAggregate [c_first_name,c_last_name,d_date] + HashAggregate [c_first_name,c_last_name,d_date] + HashAggregate [c_first_name,c_last_name,d_date] + HashAggregate [c_first_name,c_last_name,d_date] + InputAdapter + Exchange [c_first_name,c_last_name,d_date] #2 + WholeStageCodegen (11) + HashAggregate [c_first_name,c_last_name,d_date] + BroadcastHashJoin [c_first_name,c_first_name,c_last_name,c_last_name,d_date,d_date] + BroadcastHashJoin [c_first_name,c_first_name,c_last_name,c_last_name,d_date,d_date] + Project [c_first_name,c_last_name,d_date] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Project [d_date,ss_customer_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_customer_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date,d_date_sk] + Filter [d_date_sk,d_month_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (6) + HashAggregate [c_first_name,c_last_name,d_date] + InputAdapter + Exchange [c_first_name,c_last_name,d_date] #6 + WholeStageCodegen (5) + HashAggregate [c_first_name,c_last_name,d_date] + Project [c_first_name,c_last_name,d_date] + BroadcastHashJoin [c_customer_sk,cs_bill_customer_sk] + Project [cs_bill_customer_sk,d_date] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_bill_customer_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date,d_date_sk] #3 + InputAdapter + ReusedExchange [c_customer_sk,c_first_name,c_last_name] #4 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (10) + HashAggregate [c_first_name,c_last_name,d_date] + InputAdapter + Exchange [c_first_name,c_last_name,d_date] #8 + WholeStageCodegen (9) + HashAggregate [c_first_name,c_last_name,d_date] + Project [c_first_name,c_last_name,d_date] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + Project [d_date,ws_bill_customer_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_bill_customer_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date,d_date_sk] #3 + InputAdapter + ReusedExchange [c_customer_sk,c_first_name,c_last_name] #4 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88.sf100/explain.txt new file mode 100644 index 0000000000000..1ec80c2abe08d --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88.sf100/explain.txt @@ -0,0 +1,960 @@ +== Physical Plan == +BroadcastNestedLoopJoin Inner BuildRight (174) +:- BroadcastNestedLoopJoin Inner BuildRight (153) +: :- BroadcastNestedLoopJoin Inner BuildRight (132) +: : :- BroadcastNestedLoopJoin Inner BuildRight (111) +: : : :- BroadcastNestedLoopJoin Inner BuildRight (90) +: : : : :- BroadcastNestedLoopJoin Inner BuildRight (69) +: : : : : :- BroadcastNestedLoopJoin Inner BuildRight (48) +: : : : : : :- * HashAggregate (27) +: : : : : : : +- Exchange (26) +: : : : : : : +- * HashAggregate (25) +: : : : : : : +- * Project (24) +: : : : : : : +- * BroadcastHashJoin Inner BuildRight (23) +: : : : : : : :- * Project (17) +: : : : : : : : +- * BroadcastHashJoin Inner BuildRight (16) +: : : : : : : : :- * Project (10) +: : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (9) +: : : : : : : : : :- * Filter (3) +: : : : : : : : : : +- * ColumnarToRow (2) +: : : : : : : : : : +- Scan parquet default.store_sales (1) +: : : : : : : : : +- BroadcastExchange (8) +: : : : : : : : : +- * Project (7) +: : : : : : : : : +- * Filter (6) +: : : : : : : : : +- * ColumnarToRow (5) +: : : : : : : : : +- Scan parquet default.time_dim (4) +: : : : : : : : +- BroadcastExchange (15) +: : : : : : : : +- * Project (14) +: : : : : : : : +- * Filter (13) +: : : : : : : : +- * ColumnarToRow (12) +: : : : : : : : +- Scan parquet default.store (11) +: : : : : : : +- BroadcastExchange (22) +: : : : : : : +- * Project (21) +: : : : : : : +- * Filter (20) +: : : : : : : +- * ColumnarToRow (19) +: : : : : : : +- Scan parquet default.household_demographics (18) +: : : : : : +- BroadcastExchange (47) +: : : : : : +- * HashAggregate (46) +: : : : : : +- Exchange (45) +: : : : : : +- * HashAggregate (44) +: : : : : : +- * Project (43) +: : : : : : +- * BroadcastHashJoin Inner BuildRight (42) +: : : : : : :- * Project (40) +: : : : : : : +- * BroadcastHashJoin Inner BuildRight (39) +: : : : : : : :- * Project (37) +: : : : : : : : +- * BroadcastHashJoin Inner BuildRight (36) +: : : : : : : : :- * Filter (30) +: : : : : : : : : +- * ColumnarToRow (29) +: : : : : : : : : +- Scan parquet default.store_sales (28) +: : : : : : : : +- BroadcastExchange (35) +: : : : : : : : +- * Project (34) +: : : : : : : : +- * Filter (33) +: : : : : : : : +- * ColumnarToRow (32) +: : : : : : : : +- Scan parquet default.time_dim (31) +: : : : : : : +- ReusedExchange (38) +: : : : : : +- ReusedExchange (41) +: : : : : +- BroadcastExchange (68) +: : : : : +- * HashAggregate (67) +: : : : : +- Exchange (66) +: : : : : +- * HashAggregate (65) +: : : : : +- * Project (64) +: : : : : +- * BroadcastHashJoin Inner BuildRight (63) +: : : : : :- * Project (61) +: : : : : : +- * BroadcastHashJoin Inner BuildRight (60) +: : : : : : :- * Project (58) +: : : : : : : +- * BroadcastHashJoin Inner BuildRight (57) +: : : : : : : :- * Filter (51) +: : : : : : : : +- * ColumnarToRow (50) +: : : : : : : : +- Scan parquet default.store_sales (49) +: : : : : : : +- BroadcastExchange (56) +: : : : : : : +- * Project (55) +: : : : : : : +- * Filter (54) +: : : : : : : +- * ColumnarToRow (53) +: : : : : : : +- Scan parquet default.time_dim (52) +: : : : : : +- ReusedExchange (59) +: : : : : +- ReusedExchange (62) +: : : : +- BroadcastExchange (89) +: : : : +- * HashAggregate (88) +: : : : +- Exchange (87) +: : : : +- * HashAggregate (86) +: : : : +- * Project (85) +: : : : +- * BroadcastHashJoin Inner BuildRight (84) +: : : : :- * Project (82) +: : : : : +- * BroadcastHashJoin Inner BuildRight (81) +: : : : : :- * Project (79) +: : : : : : +- * BroadcastHashJoin Inner BuildRight (78) +: : : : : : :- * Filter (72) +: : : : : : : +- * ColumnarToRow (71) +: : : : : : : +- Scan parquet default.store_sales (70) +: : : : : : +- BroadcastExchange (77) +: : : : : : +- * Project (76) +: : : : : : +- * Filter (75) +: : : : : : +- * ColumnarToRow (74) +: : : : : : +- Scan parquet default.time_dim (73) +: : : : : +- ReusedExchange (80) +: : : : +- ReusedExchange (83) +: : : +- BroadcastExchange (110) +: : : +- * HashAggregate (109) +: : : +- Exchange (108) +: : : +- * HashAggregate (107) +: : : +- * Project (106) +: : : +- * BroadcastHashJoin Inner BuildRight (105) +: : : :- * Project (103) +: : : : +- * BroadcastHashJoin Inner BuildRight (102) +: : : : :- * Project (100) +: : : : : +- * BroadcastHashJoin Inner BuildRight (99) +: : : : : :- * Filter (93) +: : : : : : +- * ColumnarToRow (92) +: : : : : : +- Scan parquet default.store_sales (91) +: : : : : +- BroadcastExchange (98) +: : : : : +- * Project (97) +: : : : : +- * Filter (96) +: : : : : +- * ColumnarToRow (95) +: : : : : +- Scan parquet default.time_dim (94) +: : : : +- ReusedExchange (101) +: : : +- ReusedExchange (104) +: : +- BroadcastExchange (131) +: : +- * HashAggregate (130) +: : +- Exchange (129) +: : +- * HashAggregate (128) +: : +- * Project (127) +: : +- * BroadcastHashJoin Inner BuildRight (126) +: : :- * Project (124) +: : : +- * BroadcastHashJoin Inner BuildRight (123) +: : : :- * Project (121) +: : : : +- * BroadcastHashJoin Inner BuildRight (120) +: : : : :- * Filter (114) +: : : : : +- * ColumnarToRow (113) +: : : : : +- Scan parquet default.store_sales (112) +: : : : +- BroadcastExchange (119) +: : : : +- * Project (118) +: : : : +- * Filter (117) +: : : : +- * ColumnarToRow (116) +: : : : +- Scan parquet default.time_dim (115) +: : : +- ReusedExchange (122) +: : +- ReusedExchange (125) +: +- BroadcastExchange (152) +: +- * HashAggregate (151) +: +- Exchange (150) +: +- * HashAggregate (149) +: +- * Project (148) +: +- * BroadcastHashJoin Inner BuildRight (147) +: :- * Project (145) +: : +- * BroadcastHashJoin Inner BuildRight (144) +: : :- * Project (142) +: : : +- * BroadcastHashJoin Inner BuildRight (141) +: : : :- * Filter (135) +: : : : +- * ColumnarToRow (134) +: : : : +- Scan parquet default.store_sales (133) +: : : +- BroadcastExchange (140) +: : : +- * Project (139) +: : : +- * Filter (138) +: : : +- * ColumnarToRow (137) +: : : +- Scan parquet default.time_dim (136) +: : +- ReusedExchange (143) +: +- ReusedExchange (146) ++- BroadcastExchange (173) + +- * HashAggregate (172) + +- Exchange (171) + +- * HashAggregate (170) + +- * Project (169) + +- * BroadcastHashJoin Inner BuildRight (168) + :- * Project (166) + : +- * BroadcastHashJoin Inner BuildRight (165) + : :- * Project (163) + : : +- * BroadcastHashJoin Inner BuildRight (162) + : : :- * Filter (156) + : : : +- * ColumnarToRow (155) + : : : +- Scan parquet default.store_sales (154) + : : +- BroadcastExchange (161) + : : +- * Project (160) + : : +- * Filter (159) + : : +- * ColumnarToRow (158) + : : +- Scan parquet default.time_dim (157) + : +- ReusedExchange (164) + +- ReusedExchange (167) + + +(1) Scan parquet default.store_sales +Output [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] + +(3) Filter [codegen id : 4] +Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Condition : ((isnotnull(ss_hdemo_sk#2) AND isnotnull(ss_sold_time_sk#1)) AND isnotnull(ss_store_sk#3)) + +(4) Scan parquet default.time_dim +Output [3]: [t_time_sk#4, t_hour#5, t_minute#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/time_dim] +PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,8), GreaterThanOrEqual(t_minute,30), IsNotNull(t_time_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [t_time_sk#4, t_hour#5, t_minute#6] + +(6) Filter [codegen id : 1] +Input [3]: [t_time_sk#4, t_hour#5, t_minute#6] +Condition : ((((isnotnull(t_hour#5) AND isnotnull(t_minute#6)) AND (t_hour#5 = 8)) AND (t_minute#6 >= 30)) AND isnotnull(t_time_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [t_time_sk#4] +Input [3]: [t_time_sk#4, t_hour#5, t_minute#6] + +(8) BroadcastExchange +Input [1]: [t_time_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#7] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_time_sk#1] +Right keys [1]: [t_time_sk#4] +Join condition: None + +(10) Project [codegen id : 4] +Output [2]: [ss_hdemo_sk#2, ss_store_sk#3] +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, t_time_sk#4] + +(11) Scan parquet default.store +Output [2]: [s_store_sk#8, s_store_name#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_store_name), EqualTo(s_store_name,ese), IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [s_store_sk#8, s_store_name#9] + +(13) Filter [codegen id : 2] +Input [2]: [s_store_sk#8, s_store_name#9] +Condition : ((isnotnull(s_store_name#9) AND (s_store_name#9 = ese)) AND isnotnull(s_store_sk#8)) + +(14) Project [codegen id : 2] +Output [1]: [s_store_sk#8] +Input [2]: [s_store_sk#8, s_store_name#9] + +(15) BroadcastExchange +Input [1]: [s_store_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#10] + +(16) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#8] +Join condition: None + +(17) Project [codegen id : 4] +Output [1]: [ss_hdemo_sk#2] +Input [3]: [ss_hdemo_sk#2, ss_store_sk#3, s_store_sk#8] + +(18) Scan parquet default.household_demographics +Output [3]: [hd_demo_sk#11, hd_dep_count#12, hd_vehicle_count#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/household_demographics] +PushedFilters: [Or(Or(And(EqualTo(hd_dep_count,4),LessThanOrEqual(hd_vehicle_count,6)),And(EqualTo(hd_dep_count,2),LessThanOrEqual(hd_vehicle_count,4))),And(EqualTo(hd_dep_count,0),LessThanOrEqual(hd_vehicle_count,2))), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [3]: [hd_demo_sk#11, hd_dep_count#12, hd_vehicle_count#13] + +(20) Filter [codegen id : 3] +Input [3]: [hd_demo_sk#11, hd_dep_count#12, hd_vehicle_count#13] +Condition : (((((hd_dep_count#12 = 4) AND (hd_vehicle_count#13 <= 6)) OR ((hd_dep_count#12 = 2) AND (hd_vehicle_count#13 <= 4))) OR ((hd_dep_count#12 = 0) AND (hd_vehicle_count#13 <= 2))) AND isnotnull(hd_demo_sk#11)) + +(21) Project [codegen id : 3] +Output [1]: [hd_demo_sk#11] +Input [3]: [hd_demo_sk#11, hd_dep_count#12, hd_vehicle_count#13] + +(22) BroadcastExchange +Input [1]: [hd_demo_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] + +(23) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_hdemo_sk#2] +Right keys [1]: [hd_demo_sk#11] +Join condition: None + +(24) Project [codegen id : 4] +Output: [] +Input [2]: [ss_hdemo_sk#2, hd_demo_sk#11] + +(25) HashAggregate [codegen id : 4] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#15] +Results [1]: [count#16] + +(26) Exchange +Input [1]: [count#16] +Arguments: SinglePartition, true, [id=#17] + +(27) HashAggregate [codegen id : 5] +Input [1]: [count#16] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#18] +Results [1]: [count(1)#18 AS h8_30_to_9#19] + +(28) Scan parquet default.store_sales +Output [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 9] +Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] + +(30) Filter [codegen id : 9] +Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Condition : ((isnotnull(ss_hdemo_sk#2) AND isnotnull(ss_sold_time_sk#1)) AND isnotnull(ss_store_sk#3)) + +(31) Scan parquet default.time_dim +Output [3]: [t_time_sk#4, t_hour#5, t_minute#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/time_dim] +PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,9), LessThan(t_minute,30), IsNotNull(t_time_sk)] +ReadSchema: struct + +(32) ColumnarToRow [codegen id : 6] +Input [3]: [t_time_sk#4, t_hour#5, t_minute#6] + +(33) Filter [codegen id : 6] +Input [3]: [t_time_sk#4, t_hour#5, t_minute#6] +Condition : ((((isnotnull(t_hour#5) AND isnotnull(t_minute#6)) AND (t_hour#5 = 9)) AND (t_minute#6 < 30)) AND isnotnull(t_time_sk#4)) + +(34) Project [codegen id : 6] +Output [1]: [t_time_sk#4] +Input [3]: [t_time_sk#4, t_hour#5, t_minute#6] + +(35) BroadcastExchange +Input [1]: [t_time_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] + +(36) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_sold_time_sk#1] +Right keys [1]: [t_time_sk#4] +Join condition: None + +(37) Project [codegen id : 9] +Output [2]: [ss_hdemo_sk#2, ss_store_sk#3] +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, t_time_sk#4] + +(38) ReusedExchange [Reuses operator id: 15] +Output [1]: [s_store_sk#8] + +(39) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#8] +Join condition: None + +(40) Project [codegen id : 9] +Output [1]: [ss_hdemo_sk#2] +Input [3]: [ss_hdemo_sk#2, ss_store_sk#3, s_store_sk#8] + +(41) ReusedExchange [Reuses operator id: 22] +Output [1]: [hd_demo_sk#11] + +(42) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_hdemo_sk#2] +Right keys [1]: [hd_demo_sk#11] +Join condition: None + +(43) Project [codegen id : 9] +Output: [] +Input [2]: [ss_hdemo_sk#2, hd_demo_sk#11] + +(44) HashAggregate [codegen id : 9] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#21] +Results [1]: [count#22] + +(45) Exchange +Input [1]: [count#22] +Arguments: SinglePartition, true, [id=#23] + +(46) HashAggregate [codegen id : 10] +Input [1]: [count#22] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#24] +Results [1]: [count(1)#24 AS h9_to_9_30#25] + +(47) BroadcastExchange +Input [1]: [h9_to_9_30#25] +Arguments: IdentityBroadcastMode, [id=#26] + +(48) BroadcastNestedLoopJoin +Join condition: None + +(49) Scan parquet default.store_sales +Output [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(50) ColumnarToRow [codegen id : 14] +Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] + +(51) Filter [codegen id : 14] +Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Condition : ((isnotnull(ss_hdemo_sk#2) AND isnotnull(ss_sold_time_sk#1)) AND isnotnull(ss_store_sk#3)) + +(52) Scan parquet default.time_dim +Output [3]: [t_time_sk#4, t_hour#5, t_minute#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/time_dim] +PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,9), GreaterThanOrEqual(t_minute,30), IsNotNull(t_time_sk)] +ReadSchema: struct + +(53) ColumnarToRow [codegen id : 11] +Input [3]: [t_time_sk#4, t_hour#5, t_minute#6] + +(54) Filter [codegen id : 11] +Input [3]: [t_time_sk#4, t_hour#5, t_minute#6] +Condition : ((((isnotnull(t_hour#5) AND isnotnull(t_minute#6)) AND (t_hour#5 = 9)) AND (t_minute#6 >= 30)) AND isnotnull(t_time_sk#4)) + +(55) Project [codegen id : 11] +Output [1]: [t_time_sk#4] +Input [3]: [t_time_sk#4, t_hour#5, t_minute#6] + +(56) BroadcastExchange +Input [1]: [t_time_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#27] + +(57) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [ss_sold_time_sk#1] +Right keys [1]: [t_time_sk#4] +Join condition: None + +(58) Project [codegen id : 14] +Output [2]: [ss_hdemo_sk#2, ss_store_sk#3] +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, t_time_sk#4] + +(59) ReusedExchange [Reuses operator id: 15] +Output [1]: [s_store_sk#8] + +(60) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#8] +Join condition: None + +(61) Project [codegen id : 14] +Output [1]: [ss_hdemo_sk#2] +Input [3]: [ss_hdemo_sk#2, ss_store_sk#3, s_store_sk#8] + +(62) ReusedExchange [Reuses operator id: 22] +Output [1]: [hd_demo_sk#11] + +(63) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [ss_hdemo_sk#2] +Right keys [1]: [hd_demo_sk#11] +Join condition: None + +(64) Project [codegen id : 14] +Output: [] +Input [2]: [ss_hdemo_sk#2, hd_demo_sk#11] + +(65) HashAggregate [codegen id : 14] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#28] +Results [1]: [count#29] + +(66) Exchange +Input [1]: [count#29] +Arguments: SinglePartition, true, [id=#30] + +(67) HashAggregate [codegen id : 15] +Input [1]: [count#29] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#31] +Results [1]: [count(1)#31 AS h9_30_to_10#32] + +(68) BroadcastExchange +Input [1]: [h9_30_to_10#32] +Arguments: IdentityBroadcastMode, [id=#33] + +(69) BroadcastNestedLoopJoin +Join condition: None + +(70) Scan parquet default.store_sales +Output [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(71) ColumnarToRow [codegen id : 19] +Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] + +(72) Filter [codegen id : 19] +Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Condition : ((isnotnull(ss_hdemo_sk#2) AND isnotnull(ss_sold_time_sk#1)) AND isnotnull(ss_store_sk#3)) + +(73) Scan parquet default.time_dim +Output [3]: [t_time_sk#4, t_hour#5, t_minute#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/time_dim] +PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,10), LessThan(t_minute,30), IsNotNull(t_time_sk)] +ReadSchema: struct + +(74) ColumnarToRow [codegen id : 16] +Input [3]: [t_time_sk#4, t_hour#5, t_minute#6] + +(75) Filter [codegen id : 16] +Input [3]: [t_time_sk#4, t_hour#5, t_minute#6] +Condition : ((((isnotnull(t_hour#5) AND isnotnull(t_minute#6)) AND (t_hour#5 = 10)) AND (t_minute#6 < 30)) AND isnotnull(t_time_sk#4)) + +(76) Project [codegen id : 16] +Output [1]: [t_time_sk#4] +Input [3]: [t_time_sk#4, t_hour#5, t_minute#6] + +(77) BroadcastExchange +Input [1]: [t_time_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#34] + +(78) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [ss_sold_time_sk#1] +Right keys [1]: [t_time_sk#4] +Join condition: None + +(79) Project [codegen id : 19] +Output [2]: [ss_hdemo_sk#2, ss_store_sk#3] +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, t_time_sk#4] + +(80) ReusedExchange [Reuses operator id: 15] +Output [1]: [s_store_sk#8] + +(81) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#8] +Join condition: None + +(82) Project [codegen id : 19] +Output [1]: [ss_hdemo_sk#2] +Input [3]: [ss_hdemo_sk#2, ss_store_sk#3, s_store_sk#8] + +(83) ReusedExchange [Reuses operator id: 22] +Output [1]: [hd_demo_sk#11] + +(84) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [ss_hdemo_sk#2] +Right keys [1]: [hd_demo_sk#11] +Join condition: None + +(85) Project [codegen id : 19] +Output: [] +Input [2]: [ss_hdemo_sk#2, hd_demo_sk#11] + +(86) HashAggregate [codegen id : 19] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#35] +Results [1]: [count#36] + +(87) Exchange +Input [1]: [count#36] +Arguments: SinglePartition, true, [id=#37] + +(88) HashAggregate [codegen id : 20] +Input [1]: [count#36] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#38] +Results [1]: [count(1)#38 AS h10_to_10_30#39] + +(89) BroadcastExchange +Input [1]: [h10_to_10_30#39] +Arguments: IdentityBroadcastMode, [id=#40] + +(90) BroadcastNestedLoopJoin +Join condition: None + +(91) Scan parquet default.store_sales +Output [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(92) ColumnarToRow [codegen id : 24] +Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] + +(93) Filter [codegen id : 24] +Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Condition : ((isnotnull(ss_hdemo_sk#2) AND isnotnull(ss_sold_time_sk#1)) AND isnotnull(ss_store_sk#3)) + +(94) Scan parquet default.time_dim +Output [3]: [t_time_sk#4, t_hour#5, t_minute#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/time_dim] +PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,10), GreaterThanOrEqual(t_minute,30), IsNotNull(t_time_sk)] +ReadSchema: struct + +(95) ColumnarToRow [codegen id : 21] +Input [3]: [t_time_sk#4, t_hour#5, t_minute#6] + +(96) Filter [codegen id : 21] +Input [3]: [t_time_sk#4, t_hour#5, t_minute#6] +Condition : ((((isnotnull(t_hour#5) AND isnotnull(t_minute#6)) AND (t_hour#5 = 10)) AND (t_minute#6 >= 30)) AND isnotnull(t_time_sk#4)) + +(97) Project [codegen id : 21] +Output [1]: [t_time_sk#4] +Input [3]: [t_time_sk#4, t_hour#5, t_minute#6] + +(98) BroadcastExchange +Input [1]: [t_time_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#41] + +(99) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [ss_sold_time_sk#1] +Right keys [1]: [t_time_sk#4] +Join condition: None + +(100) Project [codegen id : 24] +Output [2]: [ss_hdemo_sk#2, ss_store_sk#3] +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, t_time_sk#4] + +(101) ReusedExchange [Reuses operator id: 15] +Output [1]: [s_store_sk#8] + +(102) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#8] +Join condition: None + +(103) Project [codegen id : 24] +Output [1]: [ss_hdemo_sk#2] +Input [3]: [ss_hdemo_sk#2, ss_store_sk#3, s_store_sk#8] + +(104) ReusedExchange [Reuses operator id: 22] +Output [1]: [hd_demo_sk#11] + +(105) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [ss_hdemo_sk#2] +Right keys [1]: [hd_demo_sk#11] +Join condition: None + +(106) Project [codegen id : 24] +Output: [] +Input [2]: [ss_hdemo_sk#2, hd_demo_sk#11] + +(107) HashAggregate [codegen id : 24] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#42] +Results [1]: [count#43] + +(108) Exchange +Input [1]: [count#43] +Arguments: SinglePartition, true, [id=#44] + +(109) HashAggregate [codegen id : 25] +Input [1]: [count#43] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#45] +Results [1]: [count(1)#45 AS h10_30_to_11#46] + +(110) BroadcastExchange +Input [1]: [h10_30_to_11#46] +Arguments: IdentityBroadcastMode, [id=#47] + +(111) BroadcastNestedLoopJoin +Join condition: None + +(112) Scan parquet default.store_sales +Output [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(113) ColumnarToRow [codegen id : 29] +Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] + +(114) Filter [codegen id : 29] +Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Condition : ((isnotnull(ss_hdemo_sk#2) AND isnotnull(ss_sold_time_sk#1)) AND isnotnull(ss_store_sk#3)) + +(115) Scan parquet default.time_dim +Output [3]: [t_time_sk#4, t_hour#5, t_minute#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/time_dim] +PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,11), LessThan(t_minute,30), IsNotNull(t_time_sk)] +ReadSchema: struct + +(116) ColumnarToRow [codegen id : 26] +Input [3]: [t_time_sk#4, t_hour#5, t_minute#6] + +(117) Filter [codegen id : 26] +Input [3]: [t_time_sk#4, t_hour#5, t_minute#6] +Condition : ((((isnotnull(t_hour#5) AND isnotnull(t_minute#6)) AND (t_hour#5 = 11)) AND (t_minute#6 < 30)) AND isnotnull(t_time_sk#4)) + +(118) Project [codegen id : 26] +Output [1]: [t_time_sk#4] +Input [3]: [t_time_sk#4, t_hour#5, t_minute#6] + +(119) BroadcastExchange +Input [1]: [t_time_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#48] + +(120) BroadcastHashJoin [codegen id : 29] +Left keys [1]: [ss_sold_time_sk#1] +Right keys [1]: [t_time_sk#4] +Join condition: None + +(121) Project [codegen id : 29] +Output [2]: [ss_hdemo_sk#2, ss_store_sk#3] +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, t_time_sk#4] + +(122) ReusedExchange [Reuses operator id: 15] +Output [1]: [s_store_sk#8] + +(123) BroadcastHashJoin [codegen id : 29] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#8] +Join condition: None + +(124) Project [codegen id : 29] +Output [1]: [ss_hdemo_sk#2] +Input [3]: [ss_hdemo_sk#2, ss_store_sk#3, s_store_sk#8] + +(125) ReusedExchange [Reuses operator id: 22] +Output [1]: [hd_demo_sk#11] + +(126) BroadcastHashJoin [codegen id : 29] +Left keys [1]: [ss_hdemo_sk#2] +Right keys [1]: [hd_demo_sk#11] +Join condition: None + +(127) Project [codegen id : 29] +Output: [] +Input [2]: [ss_hdemo_sk#2, hd_demo_sk#11] + +(128) HashAggregate [codegen id : 29] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#49] +Results [1]: [count#50] + +(129) Exchange +Input [1]: [count#50] +Arguments: SinglePartition, true, [id=#51] + +(130) HashAggregate [codegen id : 30] +Input [1]: [count#50] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#52] +Results [1]: [count(1)#52 AS h11_to_11_30#53] + +(131) BroadcastExchange +Input [1]: [h11_to_11_30#53] +Arguments: IdentityBroadcastMode, [id=#54] + +(132) BroadcastNestedLoopJoin +Join condition: None + +(133) Scan parquet default.store_sales +Output [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(134) ColumnarToRow [codegen id : 34] +Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] + +(135) Filter [codegen id : 34] +Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Condition : ((isnotnull(ss_hdemo_sk#2) AND isnotnull(ss_sold_time_sk#1)) AND isnotnull(ss_store_sk#3)) + +(136) Scan parquet default.time_dim +Output [3]: [t_time_sk#4, t_hour#5, t_minute#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/time_dim] +PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,11), GreaterThanOrEqual(t_minute,30), IsNotNull(t_time_sk)] +ReadSchema: struct + +(137) ColumnarToRow [codegen id : 31] +Input [3]: [t_time_sk#4, t_hour#5, t_minute#6] + +(138) Filter [codegen id : 31] +Input [3]: [t_time_sk#4, t_hour#5, t_minute#6] +Condition : ((((isnotnull(t_hour#5) AND isnotnull(t_minute#6)) AND (t_hour#5 = 11)) AND (t_minute#6 >= 30)) AND isnotnull(t_time_sk#4)) + +(139) Project [codegen id : 31] +Output [1]: [t_time_sk#4] +Input [3]: [t_time_sk#4, t_hour#5, t_minute#6] + +(140) BroadcastExchange +Input [1]: [t_time_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#55] + +(141) BroadcastHashJoin [codegen id : 34] +Left keys [1]: [ss_sold_time_sk#1] +Right keys [1]: [t_time_sk#4] +Join condition: None + +(142) Project [codegen id : 34] +Output [2]: [ss_hdemo_sk#2, ss_store_sk#3] +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, t_time_sk#4] + +(143) ReusedExchange [Reuses operator id: 15] +Output [1]: [s_store_sk#8] + +(144) BroadcastHashJoin [codegen id : 34] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#8] +Join condition: None + +(145) Project [codegen id : 34] +Output [1]: [ss_hdemo_sk#2] +Input [3]: [ss_hdemo_sk#2, ss_store_sk#3, s_store_sk#8] + +(146) ReusedExchange [Reuses operator id: 22] +Output [1]: [hd_demo_sk#11] + +(147) BroadcastHashJoin [codegen id : 34] +Left keys [1]: [ss_hdemo_sk#2] +Right keys [1]: [hd_demo_sk#11] +Join condition: None + +(148) Project [codegen id : 34] +Output: [] +Input [2]: [ss_hdemo_sk#2, hd_demo_sk#11] + +(149) HashAggregate [codegen id : 34] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#56] +Results [1]: [count#57] + +(150) Exchange +Input [1]: [count#57] +Arguments: SinglePartition, true, [id=#58] + +(151) HashAggregate [codegen id : 35] +Input [1]: [count#57] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#59] +Results [1]: [count(1)#59 AS h11_30_to_12#60] + +(152) BroadcastExchange +Input [1]: [h11_30_to_12#60] +Arguments: IdentityBroadcastMode, [id=#61] + +(153) BroadcastNestedLoopJoin +Join condition: None + +(154) Scan parquet default.store_sales +Output [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(155) ColumnarToRow [codegen id : 39] +Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] + +(156) Filter [codegen id : 39] +Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Condition : ((isnotnull(ss_hdemo_sk#2) AND isnotnull(ss_sold_time_sk#1)) AND isnotnull(ss_store_sk#3)) + +(157) Scan parquet default.time_dim +Output [3]: [t_time_sk#4, t_hour#5, t_minute#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/time_dim] +PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,12), LessThan(t_minute,30), IsNotNull(t_time_sk)] +ReadSchema: struct + +(158) ColumnarToRow [codegen id : 36] +Input [3]: [t_time_sk#4, t_hour#5, t_minute#6] + +(159) Filter [codegen id : 36] +Input [3]: [t_time_sk#4, t_hour#5, t_minute#6] +Condition : ((((isnotnull(t_hour#5) AND isnotnull(t_minute#6)) AND (t_hour#5 = 12)) AND (t_minute#6 < 30)) AND isnotnull(t_time_sk#4)) + +(160) Project [codegen id : 36] +Output [1]: [t_time_sk#4] +Input [3]: [t_time_sk#4, t_hour#5, t_minute#6] + +(161) BroadcastExchange +Input [1]: [t_time_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#62] + +(162) BroadcastHashJoin [codegen id : 39] +Left keys [1]: [ss_sold_time_sk#1] +Right keys [1]: [t_time_sk#4] +Join condition: None + +(163) Project [codegen id : 39] +Output [2]: [ss_hdemo_sk#2, ss_store_sk#3] +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, t_time_sk#4] + +(164) ReusedExchange [Reuses operator id: 15] +Output [1]: [s_store_sk#8] + +(165) BroadcastHashJoin [codegen id : 39] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#8] +Join condition: None + +(166) Project [codegen id : 39] +Output [1]: [ss_hdemo_sk#2] +Input [3]: [ss_hdemo_sk#2, ss_store_sk#3, s_store_sk#8] + +(167) ReusedExchange [Reuses operator id: 22] +Output [1]: [hd_demo_sk#11] + +(168) BroadcastHashJoin [codegen id : 39] +Left keys [1]: [ss_hdemo_sk#2] +Right keys [1]: [hd_demo_sk#11] +Join condition: None + +(169) Project [codegen id : 39] +Output: [] +Input [2]: [ss_hdemo_sk#2, hd_demo_sk#11] + +(170) HashAggregate [codegen id : 39] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#63] +Results [1]: [count#64] + +(171) Exchange +Input [1]: [count#64] +Arguments: SinglePartition, true, [id=#65] + +(172) HashAggregate [codegen id : 40] +Input [1]: [count#64] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#66] +Results [1]: [count(1)#66 AS h12_to_12_30#67] + +(173) BroadcastExchange +Input [1]: [h12_to_12_30#67] +Arguments: IdentityBroadcastMode, [id=#68] + +(174) BroadcastNestedLoopJoin +Join condition: None + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88.sf100/simplified.txt new file mode 100644 index 0000000000000..1e591a4bc0894 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88.sf100/simplified.txt @@ -0,0 +1,250 @@ +BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + WholeStageCodegen (5) + HashAggregate [count] [count,count(1),h8_30_to_9] + InputAdapter + Exchange #1 + WholeStageCodegen (4) + HashAggregate [count,count] + Project + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [ss_hdemo_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_hdemo_sk,ss_store_sk] + BroadcastHashJoin [ss_sold_time_sk,t_time_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [t_time_sk] + Filter [t_hour,t_minute,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet default.time_dim [t_hour,t_minute,t_time_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [s_store_sk] + Filter [s_store_name,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_name,s_store_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [hd_demo_sk] + Filter [hd_demo_sk,hd_dep_count,hd_vehicle_count] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] + BroadcastExchange #5 + WholeStageCodegen (10) + HashAggregate [count] [count,count(1),h9_to_9_30] + InputAdapter + Exchange #6 + WholeStageCodegen (9) + HashAggregate [count,count] + Project + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [ss_hdemo_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_hdemo_sk,ss_store_sk] + BroadcastHashJoin [ss_sold_time_sk,t_time_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (6) + Project [t_time_sk] + Filter [t_hour,t_minute,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet default.time_dim [t_hour,t_minute,t_time_sk] + InputAdapter + ReusedExchange [s_store_sk] #3 + InputAdapter + ReusedExchange [hd_demo_sk] #4 + BroadcastExchange #8 + WholeStageCodegen (15) + HashAggregate [count] [count,count(1),h9_30_to_10] + InputAdapter + Exchange #9 + WholeStageCodegen (14) + HashAggregate [count,count] + Project + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [ss_hdemo_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_hdemo_sk,ss_store_sk] + BroadcastHashJoin [ss_sold_time_sk,t_time_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (11) + Project [t_time_sk] + Filter [t_hour,t_minute,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet default.time_dim [t_hour,t_minute,t_time_sk] + InputAdapter + ReusedExchange [s_store_sk] #3 + InputAdapter + ReusedExchange [hd_demo_sk] #4 + BroadcastExchange #11 + WholeStageCodegen (20) + HashAggregate [count] [count,count(1),h10_to_10_30] + InputAdapter + Exchange #12 + WholeStageCodegen (19) + HashAggregate [count,count] + Project + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [ss_hdemo_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_hdemo_sk,ss_store_sk] + BroadcastHashJoin [ss_sold_time_sk,t_time_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (16) + Project [t_time_sk] + Filter [t_hour,t_minute,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet default.time_dim [t_hour,t_minute,t_time_sk] + InputAdapter + ReusedExchange [s_store_sk] #3 + InputAdapter + ReusedExchange [hd_demo_sk] #4 + BroadcastExchange #14 + WholeStageCodegen (25) + HashAggregate [count] [count,count(1),h10_30_to_11] + InputAdapter + Exchange #15 + WholeStageCodegen (24) + HashAggregate [count,count] + Project + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [ss_hdemo_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_hdemo_sk,ss_store_sk] + BroadcastHashJoin [ss_sold_time_sk,t_time_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + InputAdapter + BroadcastExchange #16 + WholeStageCodegen (21) + Project [t_time_sk] + Filter [t_hour,t_minute,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet default.time_dim [t_hour,t_minute,t_time_sk] + InputAdapter + ReusedExchange [s_store_sk] #3 + InputAdapter + ReusedExchange [hd_demo_sk] #4 + BroadcastExchange #17 + WholeStageCodegen (30) + HashAggregate [count] [count,count(1),h11_to_11_30] + InputAdapter + Exchange #18 + WholeStageCodegen (29) + HashAggregate [count,count] + Project + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [ss_hdemo_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_hdemo_sk,ss_store_sk] + BroadcastHashJoin [ss_sold_time_sk,t_time_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + InputAdapter + BroadcastExchange #19 + WholeStageCodegen (26) + Project [t_time_sk] + Filter [t_hour,t_minute,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet default.time_dim [t_hour,t_minute,t_time_sk] + InputAdapter + ReusedExchange [s_store_sk] #3 + InputAdapter + ReusedExchange [hd_demo_sk] #4 + BroadcastExchange #20 + WholeStageCodegen (35) + HashAggregate [count] [count,count(1),h11_30_to_12] + InputAdapter + Exchange #21 + WholeStageCodegen (34) + HashAggregate [count,count] + Project + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [ss_hdemo_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_hdemo_sk,ss_store_sk] + BroadcastHashJoin [ss_sold_time_sk,t_time_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + InputAdapter + BroadcastExchange #22 + WholeStageCodegen (31) + Project [t_time_sk] + Filter [t_hour,t_minute,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet default.time_dim [t_hour,t_minute,t_time_sk] + InputAdapter + ReusedExchange [s_store_sk] #3 + InputAdapter + ReusedExchange [hd_demo_sk] #4 + BroadcastExchange #23 + WholeStageCodegen (40) + HashAggregate [count] [count,count(1),h12_to_12_30] + InputAdapter + Exchange #24 + WholeStageCodegen (39) + HashAggregate [count,count] + Project + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [ss_hdemo_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_hdemo_sk,ss_store_sk] + BroadcastHashJoin [ss_sold_time_sk,t_time_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + InputAdapter + BroadcastExchange #25 + WholeStageCodegen (36) + Project [t_time_sk] + Filter [t_hour,t_minute,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet default.time_dim [t_hour,t_minute,t_time_sk] + InputAdapter + ReusedExchange [s_store_sk] #3 + InputAdapter + ReusedExchange [hd_demo_sk] #4 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88/explain.txt new file mode 100644 index 0000000000000..4b9064aff5f0c --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88/explain.txt @@ -0,0 +1,960 @@ +== Physical Plan == +BroadcastNestedLoopJoin Inner BuildRight (174) +:- BroadcastNestedLoopJoin Inner BuildRight (153) +: :- BroadcastNestedLoopJoin Inner BuildRight (132) +: : :- BroadcastNestedLoopJoin Inner BuildRight (111) +: : : :- BroadcastNestedLoopJoin Inner BuildRight (90) +: : : : :- BroadcastNestedLoopJoin Inner BuildRight (69) +: : : : : :- BroadcastNestedLoopJoin Inner BuildRight (48) +: : : : : : :- * HashAggregate (27) +: : : : : : : +- Exchange (26) +: : : : : : : +- * HashAggregate (25) +: : : : : : : +- * Project (24) +: : : : : : : +- * BroadcastHashJoin Inner BuildRight (23) +: : : : : : : :- * Project (17) +: : : : : : : : +- * BroadcastHashJoin Inner BuildRight (16) +: : : : : : : : :- * Project (10) +: : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (9) +: : : : : : : : : :- * Filter (3) +: : : : : : : : : : +- * ColumnarToRow (2) +: : : : : : : : : : +- Scan parquet default.store_sales (1) +: : : : : : : : : +- BroadcastExchange (8) +: : : : : : : : : +- * Project (7) +: : : : : : : : : +- * Filter (6) +: : : : : : : : : +- * ColumnarToRow (5) +: : : : : : : : : +- Scan parquet default.household_demographics (4) +: : : : : : : : +- BroadcastExchange (15) +: : : : : : : : +- * Project (14) +: : : : : : : : +- * Filter (13) +: : : : : : : : +- * ColumnarToRow (12) +: : : : : : : : +- Scan parquet default.time_dim (11) +: : : : : : : +- BroadcastExchange (22) +: : : : : : : +- * Project (21) +: : : : : : : +- * Filter (20) +: : : : : : : +- * ColumnarToRow (19) +: : : : : : : +- Scan parquet default.store (18) +: : : : : : +- BroadcastExchange (47) +: : : : : : +- * HashAggregate (46) +: : : : : : +- Exchange (45) +: : : : : : +- * HashAggregate (44) +: : : : : : +- * Project (43) +: : : : : : +- * BroadcastHashJoin Inner BuildRight (42) +: : : : : : :- * Project (40) +: : : : : : : +- * BroadcastHashJoin Inner BuildRight (39) +: : : : : : : :- * Project (33) +: : : : : : : : +- * BroadcastHashJoin Inner BuildRight (32) +: : : : : : : : :- * Filter (30) +: : : : : : : : : +- * ColumnarToRow (29) +: : : : : : : : : +- Scan parquet default.store_sales (28) +: : : : : : : : +- ReusedExchange (31) +: : : : : : : +- BroadcastExchange (38) +: : : : : : : +- * Project (37) +: : : : : : : +- * Filter (36) +: : : : : : : +- * ColumnarToRow (35) +: : : : : : : +- Scan parquet default.time_dim (34) +: : : : : : +- ReusedExchange (41) +: : : : : +- BroadcastExchange (68) +: : : : : +- * HashAggregate (67) +: : : : : +- Exchange (66) +: : : : : +- * HashAggregate (65) +: : : : : +- * Project (64) +: : : : : +- * BroadcastHashJoin Inner BuildRight (63) +: : : : : :- * Project (61) +: : : : : : +- * BroadcastHashJoin Inner BuildRight (60) +: : : : : : :- * Project (54) +: : : : : : : +- * BroadcastHashJoin Inner BuildRight (53) +: : : : : : : :- * Filter (51) +: : : : : : : : +- * ColumnarToRow (50) +: : : : : : : : +- Scan parquet default.store_sales (49) +: : : : : : : +- ReusedExchange (52) +: : : : : : +- BroadcastExchange (59) +: : : : : : +- * Project (58) +: : : : : : +- * Filter (57) +: : : : : : +- * ColumnarToRow (56) +: : : : : : +- Scan parquet default.time_dim (55) +: : : : : +- ReusedExchange (62) +: : : : +- BroadcastExchange (89) +: : : : +- * HashAggregate (88) +: : : : +- Exchange (87) +: : : : +- * HashAggregate (86) +: : : : +- * Project (85) +: : : : +- * BroadcastHashJoin Inner BuildRight (84) +: : : : :- * Project (82) +: : : : : +- * BroadcastHashJoin Inner BuildRight (81) +: : : : : :- * Project (75) +: : : : : : +- * BroadcastHashJoin Inner BuildRight (74) +: : : : : : :- * Filter (72) +: : : : : : : +- * ColumnarToRow (71) +: : : : : : : +- Scan parquet default.store_sales (70) +: : : : : : +- ReusedExchange (73) +: : : : : +- BroadcastExchange (80) +: : : : : +- * Project (79) +: : : : : +- * Filter (78) +: : : : : +- * ColumnarToRow (77) +: : : : : +- Scan parquet default.time_dim (76) +: : : : +- ReusedExchange (83) +: : : +- BroadcastExchange (110) +: : : +- * HashAggregate (109) +: : : +- Exchange (108) +: : : +- * HashAggregate (107) +: : : +- * Project (106) +: : : +- * BroadcastHashJoin Inner BuildRight (105) +: : : :- * Project (103) +: : : : +- * BroadcastHashJoin Inner BuildRight (102) +: : : : :- * Project (96) +: : : : : +- * BroadcastHashJoin Inner BuildRight (95) +: : : : : :- * Filter (93) +: : : : : : +- * ColumnarToRow (92) +: : : : : : +- Scan parquet default.store_sales (91) +: : : : : +- ReusedExchange (94) +: : : : +- BroadcastExchange (101) +: : : : +- * Project (100) +: : : : +- * Filter (99) +: : : : +- * ColumnarToRow (98) +: : : : +- Scan parquet default.time_dim (97) +: : : +- ReusedExchange (104) +: : +- BroadcastExchange (131) +: : +- * HashAggregate (130) +: : +- Exchange (129) +: : +- * HashAggregate (128) +: : +- * Project (127) +: : +- * BroadcastHashJoin Inner BuildRight (126) +: : :- * Project (124) +: : : +- * BroadcastHashJoin Inner BuildRight (123) +: : : :- * Project (117) +: : : : +- * BroadcastHashJoin Inner BuildRight (116) +: : : : :- * Filter (114) +: : : : : +- * ColumnarToRow (113) +: : : : : +- Scan parquet default.store_sales (112) +: : : : +- ReusedExchange (115) +: : : +- BroadcastExchange (122) +: : : +- * Project (121) +: : : +- * Filter (120) +: : : +- * ColumnarToRow (119) +: : : +- Scan parquet default.time_dim (118) +: : +- ReusedExchange (125) +: +- BroadcastExchange (152) +: +- * HashAggregate (151) +: +- Exchange (150) +: +- * HashAggregate (149) +: +- * Project (148) +: +- * BroadcastHashJoin Inner BuildRight (147) +: :- * Project (145) +: : +- * BroadcastHashJoin Inner BuildRight (144) +: : :- * Project (138) +: : : +- * BroadcastHashJoin Inner BuildRight (137) +: : : :- * Filter (135) +: : : : +- * ColumnarToRow (134) +: : : : +- Scan parquet default.store_sales (133) +: : : +- ReusedExchange (136) +: : +- BroadcastExchange (143) +: : +- * Project (142) +: : +- * Filter (141) +: : +- * ColumnarToRow (140) +: : +- Scan parquet default.time_dim (139) +: +- ReusedExchange (146) ++- BroadcastExchange (173) + +- * HashAggregate (172) + +- Exchange (171) + +- * HashAggregate (170) + +- * Project (169) + +- * BroadcastHashJoin Inner BuildRight (168) + :- * Project (166) + : +- * BroadcastHashJoin Inner BuildRight (165) + : :- * Project (159) + : : +- * BroadcastHashJoin Inner BuildRight (158) + : : :- * Filter (156) + : : : +- * ColumnarToRow (155) + : : : +- Scan parquet default.store_sales (154) + : : +- ReusedExchange (157) + : +- BroadcastExchange (164) + : +- * Project (163) + : +- * Filter (162) + : +- * ColumnarToRow (161) + : +- Scan parquet default.time_dim (160) + +- ReusedExchange (167) + + +(1) Scan parquet default.store_sales +Output [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] + +(3) Filter [codegen id : 4] +Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Condition : ((isnotnull(ss_hdemo_sk#2) AND isnotnull(ss_sold_time_sk#1)) AND isnotnull(ss_store_sk#3)) + +(4) Scan parquet default.household_demographics +Output [3]: [hd_demo_sk#4, hd_dep_count#5, hd_vehicle_count#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/household_demographics] +PushedFilters: [Or(Or(And(EqualTo(hd_dep_count,4),LessThanOrEqual(hd_vehicle_count,6)),And(EqualTo(hd_dep_count,2),LessThanOrEqual(hd_vehicle_count,4))),And(EqualTo(hd_dep_count,0),LessThanOrEqual(hd_vehicle_count,2))), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [hd_demo_sk#4, hd_dep_count#5, hd_vehicle_count#6] + +(6) Filter [codegen id : 1] +Input [3]: [hd_demo_sk#4, hd_dep_count#5, hd_vehicle_count#6] +Condition : (((((hd_dep_count#5 = 4) AND (hd_vehicle_count#6 <= 6)) OR ((hd_dep_count#5 = 2) AND (hd_vehicle_count#6 <= 4))) OR ((hd_dep_count#5 = 0) AND (hd_vehicle_count#6 <= 2))) AND isnotnull(hd_demo_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [hd_demo_sk#4] +Input [3]: [hd_demo_sk#4, hd_dep_count#5, hd_vehicle_count#6] + +(8) BroadcastExchange +Input [1]: [hd_demo_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#7] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_hdemo_sk#2] +Right keys [1]: [hd_demo_sk#4] +Join condition: None + +(10) Project [codegen id : 4] +Output [2]: [ss_sold_time_sk#1, ss_store_sk#3] +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, hd_demo_sk#4] + +(11) Scan parquet default.time_dim +Output [3]: [t_time_sk#8, t_hour#9, t_minute#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/time_dim] +PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,8), GreaterThanOrEqual(t_minute,30), IsNotNull(t_time_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] + +(13) Filter [codegen id : 2] +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] +Condition : ((((isnotnull(t_hour#9) AND isnotnull(t_minute#10)) AND (t_hour#9 = 8)) AND (t_minute#10 >= 30)) AND isnotnull(t_time_sk#8)) + +(14) Project [codegen id : 2] +Output [1]: [t_time_sk#8] +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] + +(15) BroadcastExchange +Input [1]: [t_time_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] + +(16) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_time_sk#1] +Right keys [1]: [t_time_sk#8] +Join condition: None + +(17) Project [codegen id : 4] +Output [1]: [ss_store_sk#3] +Input [3]: [ss_sold_time_sk#1, ss_store_sk#3, t_time_sk#8] + +(18) Scan parquet default.store +Output [2]: [s_store_sk#12, s_store_name#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_store_name), EqualTo(s_store_name,ese), IsNotNull(s_store_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [2]: [s_store_sk#12, s_store_name#13] + +(20) Filter [codegen id : 3] +Input [2]: [s_store_sk#12, s_store_name#13] +Condition : ((isnotnull(s_store_name#13) AND (s_store_name#13 = ese)) AND isnotnull(s_store_sk#12)) + +(21) Project [codegen id : 3] +Output [1]: [s_store_sk#12] +Input [2]: [s_store_sk#12, s_store_name#13] + +(22) BroadcastExchange +Input [1]: [s_store_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] + +(23) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#12] +Join condition: None + +(24) Project [codegen id : 4] +Output: [] +Input [2]: [ss_store_sk#3, s_store_sk#12] + +(25) HashAggregate [codegen id : 4] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#15] +Results [1]: [count#16] + +(26) Exchange +Input [1]: [count#16] +Arguments: SinglePartition, true, [id=#17] + +(27) HashAggregate [codegen id : 5] +Input [1]: [count#16] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#18] +Results [1]: [count(1)#18 AS h8_30_to_9#19] + +(28) Scan parquet default.store_sales +Output [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 9] +Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] + +(30) Filter [codegen id : 9] +Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Condition : ((isnotnull(ss_hdemo_sk#2) AND isnotnull(ss_sold_time_sk#1)) AND isnotnull(ss_store_sk#3)) + +(31) ReusedExchange [Reuses operator id: 8] +Output [1]: [hd_demo_sk#4] + +(32) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_hdemo_sk#2] +Right keys [1]: [hd_demo_sk#4] +Join condition: None + +(33) Project [codegen id : 9] +Output [2]: [ss_sold_time_sk#1, ss_store_sk#3] +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, hd_demo_sk#4] + +(34) Scan parquet default.time_dim +Output [3]: [t_time_sk#8, t_hour#9, t_minute#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/time_dim] +PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,9), LessThan(t_minute,30), IsNotNull(t_time_sk)] +ReadSchema: struct + +(35) ColumnarToRow [codegen id : 7] +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] + +(36) Filter [codegen id : 7] +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] +Condition : ((((isnotnull(t_hour#9) AND isnotnull(t_minute#10)) AND (t_hour#9 = 9)) AND (t_minute#10 < 30)) AND isnotnull(t_time_sk#8)) + +(37) Project [codegen id : 7] +Output [1]: [t_time_sk#8] +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] + +(38) BroadcastExchange +Input [1]: [t_time_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] + +(39) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_sold_time_sk#1] +Right keys [1]: [t_time_sk#8] +Join condition: None + +(40) Project [codegen id : 9] +Output [1]: [ss_store_sk#3] +Input [3]: [ss_sold_time_sk#1, ss_store_sk#3, t_time_sk#8] + +(41) ReusedExchange [Reuses operator id: 22] +Output [1]: [s_store_sk#12] + +(42) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#12] +Join condition: None + +(43) Project [codegen id : 9] +Output: [] +Input [2]: [ss_store_sk#3, s_store_sk#12] + +(44) HashAggregate [codegen id : 9] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#21] +Results [1]: [count#22] + +(45) Exchange +Input [1]: [count#22] +Arguments: SinglePartition, true, [id=#23] + +(46) HashAggregate [codegen id : 10] +Input [1]: [count#22] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#24] +Results [1]: [count(1)#24 AS h9_to_9_30#25] + +(47) BroadcastExchange +Input [1]: [h9_to_9_30#25] +Arguments: IdentityBroadcastMode, [id=#26] + +(48) BroadcastNestedLoopJoin +Join condition: None + +(49) Scan parquet default.store_sales +Output [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(50) ColumnarToRow [codegen id : 14] +Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] + +(51) Filter [codegen id : 14] +Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Condition : ((isnotnull(ss_hdemo_sk#2) AND isnotnull(ss_sold_time_sk#1)) AND isnotnull(ss_store_sk#3)) + +(52) ReusedExchange [Reuses operator id: 8] +Output [1]: [hd_demo_sk#4] + +(53) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [ss_hdemo_sk#2] +Right keys [1]: [hd_demo_sk#4] +Join condition: None + +(54) Project [codegen id : 14] +Output [2]: [ss_sold_time_sk#1, ss_store_sk#3] +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, hd_demo_sk#4] + +(55) Scan parquet default.time_dim +Output [3]: [t_time_sk#8, t_hour#9, t_minute#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/time_dim] +PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,9), GreaterThanOrEqual(t_minute,30), IsNotNull(t_time_sk)] +ReadSchema: struct + +(56) ColumnarToRow [codegen id : 12] +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] + +(57) Filter [codegen id : 12] +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] +Condition : ((((isnotnull(t_hour#9) AND isnotnull(t_minute#10)) AND (t_hour#9 = 9)) AND (t_minute#10 >= 30)) AND isnotnull(t_time_sk#8)) + +(58) Project [codegen id : 12] +Output [1]: [t_time_sk#8] +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] + +(59) BroadcastExchange +Input [1]: [t_time_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#27] + +(60) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [ss_sold_time_sk#1] +Right keys [1]: [t_time_sk#8] +Join condition: None + +(61) Project [codegen id : 14] +Output [1]: [ss_store_sk#3] +Input [3]: [ss_sold_time_sk#1, ss_store_sk#3, t_time_sk#8] + +(62) ReusedExchange [Reuses operator id: 22] +Output [1]: [s_store_sk#12] + +(63) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#12] +Join condition: None + +(64) Project [codegen id : 14] +Output: [] +Input [2]: [ss_store_sk#3, s_store_sk#12] + +(65) HashAggregate [codegen id : 14] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#28] +Results [1]: [count#29] + +(66) Exchange +Input [1]: [count#29] +Arguments: SinglePartition, true, [id=#30] + +(67) HashAggregate [codegen id : 15] +Input [1]: [count#29] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#31] +Results [1]: [count(1)#31 AS h9_30_to_10#32] + +(68) BroadcastExchange +Input [1]: [h9_30_to_10#32] +Arguments: IdentityBroadcastMode, [id=#33] + +(69) BroadcastNestedLoopJoin +Join condition: None + +(70) Scan parquet default.store_sales +Output [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(71) ColumnarToRow [codegen id : 19] +Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] + +(72) Filter [codegen id : 19] +Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Condition : ((isnotnull(ss_hdemo_sk#2) AND isnotnull(ss_sold_time_sk#1)) AND isnotnull(ss_store_sk#3)) + +(73) ReusedExchange [Reuses operator id: 8] +Output [1]: [hd_demo_sk#4] + +(74) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [ss_hdemo_sk#2] +Right keys [1]: [hd_demo_sk#4] +Join condition: None + +(75) Project [codegen id : 19] +Output [2]: [ss_sold_time_sk#1, ss_store_sk#3] +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, hd_demo_sk#4] + +(76) Scan parquet default.time_dim +Output [3]: [t_time_sk#8, t_hour#9, t_minute#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/time_dim] +PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,10), LessThan(t_minute,30), IsNotNull(t_time_sk)] +ReadSchema: struct + +(77) ColumnarToRow [codegen id : 17] +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] + +(78) Filter [codegen id : 17] +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] +Condition : ((((isnotnull(t_hour#9) AND isnotnull(t_minute#10)) AND (t_hour#9 = 10)) AND (t_minute#10 < 30)) AND isnotnull(t_time_sk#8)) + +(79) Project [codegen id : 17] +Output [1]: [t_time_sk#8] +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] + +(80) BroadcastExchange +Input [1]: [t_time_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#34] + +(81) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [ss_sold_time_sk#1] +Right keys [1]: [t_time_sk#8] +Join condition: None + +(82) Project [codegen id : 19] +Output [1]: [ss_store_sk#3] +Input [3]: [ss_sold_time_sk#1, ss_store_sk#3, t_time_sk#8] + +(83) ReusedExchange [Reuses operator id: 22] +Output [1]: [s_store_sk#12] + +(84) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#12] +Join condition: None + +(85) Project [codegen id : 19] +Output: [] +Input [2]: [ss_store_sk#3, s_store_sk#12] + +(86) HashAggregate [codegen id : 19] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#35] +Results [1]: [count#36] + +(87) Exchange +Input [1]: [count#36] +Arguments: SinglePartition, true, [id=#37] + +(88) HashAggregate [codegen id : 20] +Input [1]: [count#36] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#38] +Results [1]: [count(1)#38 AS h10_to_10_30#39] + +(89) BroadcastExchange +Input [1]: [h10_to_10_30#39] +Arguments: IdentityBroadcastMode, [id=#40] + +(90) BroadcastNestedLoopJoin +Join condition: None + +(91) Scan parquet default.store_sales +Output [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(92) ColumnarToRow [codegen id : 24] +Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] + +(93) Filter [codegen id : 24] +Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Condition : ((isnotnull(ss_hdemo_sk#2) AND isnotnull(ss_sold_time_sk#1)) AND isnotnull(ss_store_sk#3)) + +(94) ReusedExchange [Reuses operator id: 8] +Output [1]: [hd_demo_sk#4] + +(95) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [ss_hdemo_sk#2] +Right keys [1]: [hd_demo_sk#4] +Join condition: None + +(96) Project [codegen id : 24] +Output [2]: [ss_sold_time_sk#1, ss_store_sk#3] +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, hd_demo_sk#4] + +(97) Scan parquet default.time_dim +Output [3]: [t_time_sk#8, t_hour#9, t_minute#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/time_dim] +PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,10), GreaterThanOrEqual(t_minute,30), IsNotNull(t_time_sk)] +ReadSchema: struct + +(98) ColumnarToRow [codegen id : 22] +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] + +(99) Filter [codegen id : 22] +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] +Condition : ((((isnotnull(t_hour#9) AND isnotnull(t_minute#10)) AND (t_hour#9 = 10)) AND (t_minute#10 >= 30)) AND isnotnull(t_time_sk#8)) + +(100) Project [codegen id : 22] +Output [1]: [t_time_sk#8] +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] + +(101) BroadcastExchange +Input [1]: [t_time_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#41] + +(102) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [ss_sold_time_sk#1] +Right keys [1]: [t_time_sk#8] +Join condition: None + +(103) Project [codegen id : 24] +Output [1]: [ss_store_sk#3] +Input [3]: [ss_sold_time_sk#1, ss_store_sk#3, t_time_sk#8] + +(104) ReusedExchange [Reuses operator id: 22] +Output [1]: [s_store_sk#12] + +(105) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#12] +Join condition: None + +(106) Project [codegen id : 24] +Output: [] +Input [2]: [ss_store_sk#3, s_store_sk#12] + +(107) HashAggregate [codegen id : 24] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#42] +Results [1]: [count#43] + +(108) Exchange +Input [1]: [count#43] +Arguments: SinglePartition, true, [id=#44] + +(109) HashAggregate [codegen id : 25] +Input [1]: [count#43] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#45] +Results [1]: [count(1)#45 AS h10_30_to_11#46] + +(110) BroadcastExchange +Input [1]: [h10_30_to_11#46] +Arguments: IdentityBroadcastMode, [id=#47] + +(111) BroadcastNestedLoopJoin +Join condition: None + +(112) Scan parquet default.store_sales +Output [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(113) ColumnarToRow [codegen id : 29] +Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] + +(114) Filter [codegen id : 29] +Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Condition : ((isnotnull(ss_hdemo_sk#2) AND isnotnull(ss_sold_time_sk#1)) AND isnotnull(ss_store_sk#3)) + +(115) ReusedExchange [Reuses operator id: 8] +Output [1]: [hd_demo_sk#4] + +(116) BroadcastHashJoin [codegen id : 29] +Left keys [1]: [ss_hdemo_sk#2] +Right keys [1]: [hd_demo_sk#4] +Join condition: None + +(117) Project [codegen id : 29] +Output [2]: [ss_sold_time_sk#1, ss_store_sk#3] +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, hd_demo_sk#4] + +(118) Scan parquet default.time_dim +Output [3]: [t_time_sk#8, t_hour#9, t_minute#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/time_dim] +PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,11), LessThan(t_minute,30), IsNotNull(t_time_sk)] +ReadSchema: struct + +(119) ColumnarToRow [codegen id : 27] +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] + +(120) Filter [codegen id : 27] +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] +Condition : ((((isnotnull(t_hour#9) AND isnotnull(t_minute#10)) AND (t_hour#9 = 11)) AND (t_minute#10 < 30)) AND isnotnull(t_time_sk#8)) + +(121) Project [codegen id : 27] +Output [1]: [t_time_sk#8] +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] + +(122) BroadcastExchange +Input [1]: [t_time_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#48] + +(123) BroadcastHashJoin [codegen id : 29] +Left keys [1]: [ss_sold_time_sk#1] +Right keys [1]: [t_time_sk#8] +Join condition: None + +(124) Project [codegen id : 29] +Output [1]: [ss_store_sk#3] +Input [3]: [ss_sold_time_sk#1, ss_store_sk#3, t_time_sk#8] + +(125) ReusedExchange [Reuses operator id: 22] +Output [1]: [s_store_sk#12] + +(126) BroadcastHashJoin [codegen id : 29] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#12] +Join condition: None + +(127) Project [codegen id : 29] +Output: [] +Input [2]: [ss_store_sk#3, s_store_sk#12] + +(128) HashAggregate [codegen id : 29] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#49] +Results [1]: [count#50] + +(129) Exchange +Input [1]: [count#50] +Arguments: SinglePartition, true, [id=#51] + +(130) HashAggregate [codegen id : 30] +Input [1]: [count#50] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#52] +Results [1]: [count(1)#52 AS h11_to_11_30#53] + +(131) BroadcastExchange +Input [1]: [h11_to_11_30#53] +Arguments: IdentityBroadcastMode, [id=#54] + +(132) BroadcastNestedLoopJoin +Join condition: None + +(133) Scan parquet default.store_sales +Output [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(134) ColumnarToRow [codegen id : 34] +Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] + +(135) Filter [codegen id : 34] +Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Condition : ((isnotnull(ss_hdemo_sk#2) AND isnotnull(ss_sold_time_sk#1)) AND isnotnull(ss_store_sk#3)) + +(136) ReusedExchange [Reuses operator id: 8] +Output [1]: [hd_demo_sk#4] + +(137) BroadcastHashJoin [codegen id : 34] +Left keys [1]: [ss_hdemo_sk#2] +Right keys [1]: [hd_demo_sk#4] +Join condition: None + +(138) Project [codegen id : 34] +Output [2]: [ss_sold_time_sk#1, ss_store_sk#3] +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, hd_demo_sk#4] + +(139) Scan parquet default.time_dim +Output [3]: [t_time_sk#8, t_hour#9, t_minute#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/time_dim] +PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,11), GreaterThanOrEqual(t_minute,30), IsNotNull(t_time_sk)] +ReadSchema: struct + +(140) ColumnarToRow [codegen id : 32] +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] + +(141) Filter [codegen id : 32] +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] +Condition : ((((isnotnull(t_hour#9) AND isnotnull(t_minute#10)) AND (t_hour#9 = 11)) AND (t_minute#10 >= 30)) AND isnotnull(t_time_sk#8)) + +(142) Project [codegen id : 32] +Output [1]: [t_time_sk#8] +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] + +(143) BroadcastExchange +Input [1]: [t_time_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#55] + +(144) BroadcastHashJoin [codegen id : 34] +Left keys [1]: [ss_sold_time_sk#1] +Right keys [1]: [t_time_sk#8] +Join condition: None + +(145) Project [codegen id : 34] +Output [1]: [ss_store_sk#3] +Input [3]: [ss_sold_time_sk#1, ss_store_sk#3, t_time_sk#8] + +(146) ReusedExchange [Reuses operator id: 22] +Output [1]: [s_store_sk#12] + +(147) BroadcastHashJoin [codegen id : 34] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#12] +Join condition: None + +(148) Project [codegen id : 34] +Output: [] +Input [2]: [ss_store_sk#3, s_store_sk#12] + +(149) HashAggregate [codegen id : 34] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#56] +Results [1]: [count#57] + +(150) Exchange +Input [1]: [count#57] +Arguments: SinglePartition, true, [id=#58] + +(151) HashAggregate [codegen id : 35] +Input [1]: [count#57] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#59] +Results [1]: [count(1)#59 AS h11_30_to_12#60] + +(152) BroadcastExchange +Input [1]: [h11_30_to_12#60] +Arguments: IdentityBroadcastMode, [id=#61] + +(153) BroadcastNestedLoopJoin +Join condition: None + +(154) Scan parquet default.store_sales +Output [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(155) ColumnarToRow [codegen id : 39] +Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] + +(156) Filter [codegen id : 39] +Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Condition : ((isnotnull(ss_hdemo_sk#2) AND isnotnull(ss_sold_time_sk#1)) AND isnotnull(ss_store_sk#3)) + +(157) ReusedExchange [Reuses operator id: 8] +Output [1]: [hd_demo_sk#4] + +(158) BroadcastHashJoin [codegen id : 39] +Left keys [1]: [ss_hdemo_sk#2] +Right keys [1]: [hd_demo_sk#4] +Join condition: None + +(159) Project [codegen id : 39] +Output [2]: [ss_sold_time_sk#1, ss_store_sk#3] +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, hd_demo_sk#4] + +(160) Scan parquet default.time_dim +Output [3]: [t_time_sk#8, t_hour#9, t_minute#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/time_dim] +PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,12), LessThan(t_minute,30), IsNotNull(t_time_sk)] +ReadSchema: struct + +(161) ColumnarToRow [codegen id : 37] +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] + +(162) Filter [codegen id : 37] +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] +Condition : ((((isnotnull(t_hour#9) AND isnotnull(t_minute#10)) AND (t_hour#9 = 12)) AND (t_minute#10 < 30)) AND isnotnull(t_time_sk#8)) + +(163) Project [codegen id : 37] +Output [1]: [t_time_sk#8] +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] + +(164) BroadcastExchange +Input [1]: [t_time_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#62] + +(165) BroadcastHashJoin [codegen id : 39] +Left keys [1]: [ss_sold_time_sk#1] +Right keys [1]: [t_time_sk#8] +Join condition: None + +(166) Project [codegen id : 39] +Output [1]: [ss_store_sk#3] +Input [3]: [ss_sold_time_sk#1, ss_store_sk#3, t_time_sk#8] + +(167) ReusedExchange [Reuses operator id: 22] +Output [1]: [s_store_sk#12] + +(168) BroadcastHashJoin [codegen id : 39] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#12] +Join condition: None + +(169) Project [codegen id : 39] +Output: [] +Input [2]: [ss_store_sk#3, s_store_sk#12] + +(170) HashAggregate [codegen id : 39] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#63] +Results [1]: [count#64] + +(171) Exchange +Input [1]: [count#64] +Arguments: SinglePartition, true, [id=#65] + +(172) HashAggregate [codegen id : 40] +Input [1]: [count#64] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#66] +Results [1]: [count(1)#66 AS h12_to_12_30#67] + +(173) BroadcastExchange +Input [1]: [h12_to_12_30#67] +Arguments: IdentityBroadcastMode, [id=#68] + +(174) BroadcastNestedLoopJoin +Join condition: None + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88/simplified.txt new file mode 100644 index 0000000000000..261822d0f8ce1 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88/simplified.txt @@ -0,0 +1,250 @@ +BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + WholeStageCodegen (5) + HashAggregate [count] [count,count(1),h8_30_to_9] + InputAdapter + Exchange #1 + WholeStageCodegen (4) + HashAggregate [count,count] + Project + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_store_sk] + BroadcastHashJoin [ss_sold_time_sk,t_time_sk] + Project [ss_sold_time_sk,ss_store_sk] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [hd_demo_sk] + Filter [hd_demo_sk,hd_dep_count,hd_vehicle_count] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [t_time_sk] + Filter [t_hour,t_minute,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet default.time_dim [t_hour,t_minute,t_time_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [s_store_sk] + Filter [s_store_name,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_name,s_store_sk] + BroadcastExchange #5 + WholeStageCodegen (10) + HashAggregate [count] [count,count(1),h9_to_9_30] + InputAdapter + Exchange #6 + WholeStageCodegen (9) + HashAggregate [count,count] + Project + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_store_sk] + BroadcastHashJoin [ss_sold_time_sk,t_time_sk] + Project [ss_sold_time_sk,ss_store_sk] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + InputAdapter + ReusedExchange [hd_demo_sk] #2 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (7) + Project [t_time_sk] + Filter [t_hour,t_minute,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet default.time_dim [t_hour,t_minute,t_time_sk] + InputAdapter + ReusedExchange [s_store_sk] #4 + BroadcastExchange #8 + WholeStageCodegen (15) + HashAggregate [count] [count,count(1),h9_30_to_10] + InputAdapter + Exchange #9 + WholeStageCodegen (14) + HashAggregate [count,count] + Project + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_store_sk] + BroadcastHashJoin [ss_sold_time_sk,t_time_sk] + Project [ss_sold_time_sk,ss_store_sk] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + InputAdapter + ReusedExchange [hd_demo_sk] #2 + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (12) + Project [t_time_sk] + Filter [t_hour,t_minute,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet default.time_dim [t_hour,t_minute,t_time_sk] + InputAdapter + ReusedExchange [s_store_sk] #4 + BroadcastExchange #11 + WholeStageCodegen (20) + HashAggregate [count] [count,count(1),h10_to_10_30] + InputAdapter + Exchange #12 + WholeStageCodegen (19) + HashAggregate [count,count] + Project + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_store_sk] + BroadcastHashJoin [ss_sold_time_sk,t_time_sk] + Project [ss_sold_time_sk,ss_store_sk] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + InputAdapter + ReusedExchange [hd_demo_sk] #2 + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (17) + Project [t_time_sk] + Filter [t_hour,t_minute,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet default.time_dim [t_hour,t_minute,t_time_sk] + InputAdapter + ReusedExchange [s_store_sk] #4 + BroadcastExchange #14 + WholeStageCodegen (25) + HashAggregate [count] [count,count(1),h10_30_to_11] + InputAdapter + Exchange #15 + WholeStageCodegen (24) + HashAggregate [count,count] + Project + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_store_sk] + BroadcastHashJoin [ss_sold_time_sk,t_time_sk] + Project [ss_sold_time_sk,ss_store_sk] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + InputAdapter + ReusedExchange [hd_demo_sk] #2 + InputAdapter + BroadcastExchange #16 + WholeStageCodegen (22) + Project [t_time_sk] + Filter [t_hour,t_minute,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet default.time_dim [t_hour,t_minute,t_time_sk] + InputAdapter + ReusedExchange [s_store_sk] #4 + BroadcastExchange #17 + WholeStageCodegen (30) + HashAggregate [count] [count,count(1),h11_to_11_30] + InputAdapter + Exchange #18 + WholeStageCodegen (29) + HashAggregate [count,count] + Project + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_store_sk] + BroadcastHashJoin [ss_sold_time_sk,t_time_sk] + Project [ss_sold_time_sk,ss_store_sk] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + InputAdapter + ReusedExchange [hd_demo_sk] #2 + InputAdapter + BroadcastExchange #19 + WholeStageCodegen (27) + Project [t_time_sk] + Filter [t_hour,t_minute,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet default.time_dim [t_hour,t_minute,t_time_sk] + InputAdapter + ReusedExchange [s_store_sk] #4 + BroadcastExchange #20 + WholeStageCodegen (35) + HashAggregate [count] [count,count(1),h11_30_to_12] + InputAdapter + Exchange #21 + WholeStageCodegen (34) + HashAggregate [count,count] + Project + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_store_sk] + BroadcastHashJoin [ss_sold_time_sk,t_time_sk] + Project [ss_sold_time_sk,ss_store_sk] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + InputAdapter + ReusedExchange [hd_demo_sk] #2 + InputAdapter + BroadcastExchange #22 + WholeStageCodegen (32) + Project [t_time_sk] + Filter [t_hour,t_minute,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet default.time_dim [t_hour,t_minute,t_time_sk] + InputAdapter + ReusedExchange [s_store_sk] #4 + BroadcastExchange #23 + WholeStageCodegen (40) + HashAggregate [count] [count,count(1),h12_to_12_30] + InputAdapter + Exchange #24 + WholeStageCodegen (39) + HashAggregate [count,count] + Project + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_store_sk] + BroadcastHashJoin [ss_sold_time_sk,t_time_sk] + Project [ss_sold_time_sk,ss_store_sk] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + InputAdapter + ReusedExchange [hd_demo_sk] #2 + InputAdapter + BroadcastExchange #25 + WholeStageCodegen (37) + Project [t_time_sk] + Filter [t_hour,t_minute,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet default.time_dim [t_hour,t_minute,t_time_sk] + InputAdapter + ReusedExchange [s_store_sk] #4 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89.sf100/explain.txt new file mode 100644 index 0000000000000..1bc6a409b1b84 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89.sf100/explain.txt @@ -0,0 +1,175 @@ +== Physical Plan == +TakeOrderedAndProject (31) ++- * Project (30) + +- * Filter (29) + +- Window (28) + +- * Sort (27) + +- Exchange (26) + +- * HashAggregate (25) + +- Exchange (24) + +- * HashAggregate (23) + +- * Project (22) + +- * BroadcastHashJoin Inner BuildRight (21) + :- * Project (16) + : +- * BroadcastHashJoin Inner BuildRight (15) + : :- * Project (9) + : : +- * BroadcastHashJoin Inner BuildLeft (8) + : : :- BroadcastExchange (4) + : : : +- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.item (1) + : : +- * Filter (7) + : : +- * ColumnarToRow (6) + : : +- Scan parquet default.store_sales (5) + : +- BroadcastExchange (14) + : +- * Project (13) + : +- * Filter (12) + : +- * ColumnarToRow (11) + : +- Scan parquet default.date_dim (10) + +- BroadcastExchange (20) + +- * Filter (19) + +- * ColumnarToRow (18) + +- Scan parquet default.store (17) + + +(1) Scan parquet default.item +Output [4]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [Or(And(In(i_category, [Books,Electronics,Sports]),In(i_class, [computers,stereo,football])),And(In(i_category, [Men,Jewelry,Women]),In(i_class, [shirts,birdal,dresses]))), IsNotNull(i_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [4]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4] + +(3) Filter [codegen id : 1] +Input [4]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4] +Condition : (((i_category#4 IN (Books,Electronics,Sports) AND i_class#3 IN (computers,stereo,football)) OR (i_category#4 IN (Men,Jewelry,Women) AND i_class#3 IN (shirts,birdal,dresses))) AND isnotnull(i_item_sk#1)) + +(4) BroadcastExchange +Input [4]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#5] + +(5) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#6, ss_item_sk#7, ss_store_sk#8, ss_sales_price#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(6) ColumnarToRow +Input [4]: [ss_sold_date_sk#6, ss_item_sk#7, ss_store_sk#8, ss_sales_price#9] + +(7) Filter +Input [4]: [ss_sold_date_sk#6, ss_item_sk#7, ss_store_sk#8, ss_sales_price#9] +Condition : ((isnotnull(ss_item_sk#7) AND isnotnull(ss_sold_date_sk#6)) AND isnotnull(ss_store_sk#8)) + +(8) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [ss_item_sk#7] +Join condition: None + +(9) Project [codegen id : 4] +Output [6]: [i_brand#2, i_class#3, i_category#4, ss_sold_date_sk#6, ss_store_sk#8, ss_sales_price#9] +Input [8]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, ss_sold_date_sk#6, ss_item_sk#7, ss_store_sk#8, ss_sales_price#9] + +(10) Scan parquet default.date_dim +Output [3]: [d_date_sk#10, d_year#11, d_moy#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#10, d_year#11, d_moy#12] + +(12) Filter [codegen id : 2] +Input [3]: [d_date_sk#10, d_year#11, d_moy#12] +Condition : ((isnotnull(d_year#11) AND (d_year#11 = 1999)) AND isnotnull(d_date_sk#10)) + +(13) Project [codegen id : 2] +Output [2]: [d_date_sk#10, d_moy#12] +Input [3]: [d_date_sk#10, d_year#11, d_moy#12] + +(14) BroadcastExchange +Input [2]: [d_date_sk#10, d_moy#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] + +(15) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#6] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(16) Project [codegen id : 4] +Output [6]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#8, ss_sales_price#9, d_moy#12] +Input [8]: [i_brand#2, i_class#3, i_category#4, ss_sold_date_sk#6, ss_store_sk#8, ss_sales_price#9, d_date_sk#10, d_moy#12] + +(17) Scan parquet default.store +Output [3]: [s_store_sk#14, s_store_name#15, s_company_name#16] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [3]: [s_store_sk#14, s_store_name#15, s_company_name#16] + +(19) Filter [codegen id : 3] +Input [3]: [s_store_sk#14, s_store_name#15, s_company_name#16] +Condition : isnotnull(s_store_sk#14) + +(20) BroadcastExchange +Input [3]: [s_store_sk#14, s_store_name#15, s_company_name#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#17] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#8] +Right keys [1]: [s_store_sk#14] +Join condition: None + +(22) Project [codegen id : 4] +Output [7]: [i_brand#2, i_class#3, i_category#4, ss_sales_price#9, d_moy#12, s_store_name#15, s_company_name#16] +Input [9]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#8, ss_sales_price#9, d_moy#12, s_store_sk#14, s_store_name#15, s_company_name#16] + +(23) HashAggregate [codegen id : 4] +Input [7]: [i_brand#2, i_class#3, i_category#4, ss_sales_price#9, d_moy#12, s_store_name#15, s_company_name#16] +Keys [6]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#9))] +Aggregate Attributes [1]: [sum#18] +Results [7]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, sum#19] + +(24) Exchange +Input [7]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, sum#19] +Arguments: hashpartitioning(i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, 5), true, [id=#20] + +(25) HashAggregate [codegen id : 5] +Input [7]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, sum#19] +Keys [6]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12] +Functions [1]: [sum(UnscaledValue(ss_sales_price#9))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#9))#21] +Results [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, MakeDecimal(sum(UnscaledValue(ss_sales_price#9))#21,17,2) AS sum_sales#22, MakeDecimal(sum(UnscaledValue(ss_sales_price#9))#21,17,2) AS _w0#23] + +(26) Exchange +Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, sum_sales#22, _w0#23] +Arguments: hashpartitioning(i_category#4, i_brand#2, s_store_name#15, s_company_name#16, 5), true, [id=#24] + +(27) Sort [codegen id : 6] +Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, sum_sales#22, _w0#23] +Arguments: [i_category#4 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, s_store_name#15 ASC NULLS FIRST, s_company_name#16 ASC NULLS FIRST], false, 0 + +(28) Window +Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, sum_sales#22, _w0#23] +Arguments: [avg(_w0#23) windowspecdefinition(i_category#4, i_brand#2, s_store_name#15, s_company_name#16, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#25], [i_category#4, i_brand#2, s_store_name#15, s_company_name#16] + +(29) Filter [codegen id : 7] +Input [9]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, sum_sales#22, _w0#23, avg_monthly_sales#25] +Condition : (CASE WHEN NOT (avg_monthly_sales#25 = 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#22 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(22,6), true))) / promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(38,16), true) ELSE null END > 0.1000000000000000) + +(30) Project [codegen id : 7] +Output [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, sum_sales#22, avg_monthly_sales#25] +Input [9]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, sum_sales#22, _w0#23, avg_monthly_sales#25] + +(31) TakeOrderedAndProject +Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, sum_sales#22, avg_monthly_sales#25] +Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#22 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(22,6), true) ASC NULLS FIRST, s_store_name#15 ASC NULLS FIRST], [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, sum_sales#22, avg_monthly_sales#25] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89.sf100/simplified.txt new file mode 100644 index 0000000000000..c2eb87a05109c --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89.sf100/simplified.txt @@ -0,0 +1,48 @@ +TakeOrderedAndProject [avg_monthly_sales,d_moy,i_brand,i_category,i_class,s_company_name,s_store_name,sum_sales] + WholeStageCodegen (7) + Project [avg_monthly_sales,d_moy,i_brand,i_category,i_class,s_company_name,s_store_name,sum_sales] + Filter [avg_monthly_sales,sum_sales] + InputAdapter + Window [_w0,i_brand,i_category,s_company_name,s_store_name] + WholeStageCodegen (6) + Sort [i_brand,i_category,s_company_name,s_store_name] + InputAdapter + Exchange [i_brand,i_category,s_company_name,s_store_name] #1 + WholeStageCodegen (5) + HashAggregate [d_moy,i_brand,i_category,i_class,s_company_name,s_store_name,sum] [_w0,sum,sum(UnscaledValue(ss_sales_price)),sum_sales] + InputAdapter + Exchange [d_moy,i_brand,i_category,i_class,s_company_name,s_store_name] #2 + WholeStageCodegen (4) + HashAggregate [d_moy,i_brand,i_category,i_class,s_company_name,s_store_name,ss_sales_price] [sum,sum] + Project [d_moy,i_brand,i_category,i_class,s_company_name,s_store_name,ss_sales_price] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [d_moy,i_brand,i_category,i_class,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_brand,i_category,i_class,ss_sales_price,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [i_category,i_class,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand,i_category,i_class,i_item_sk] + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [d_date_sk,d_moy] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_company_name,s_store_name,s_store_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/explain.txt new file mode 100644 index 0000000000000..29c357117d279 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/explain.txt @@ -0,0 +1,175 @@ +== Physical Plan == +TakeOrderedAndProject (31) ++- * Project (30) + +- * Filter (29) + +- Window (28) + +- * Sort (27) + +- Exchange (26) + +- * HashAggregate (25) + +- Exchange (24) + +- * HashAggregate (23) + +- * Project (22) + +- * BroadcastHashJoin Inner BuildRight (21) + :- * Project (16) + : +- * BroadcastHashJoin Inner BuildRight (15) + : :- * Project (9) + : : +- * BroadcastHashJoin Inner BuildRight (8) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.item (1) + : : +- BroadcastExchange (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet default.store_sales (4) + : +- BroadcastExchange (14) + : +- * Project (13) + : +- * Filter (12) + : +- * ColumnarToRow (11) + : +- Scan parquet default.date_dim (10) + +- BroadcastExchange (20) + +- * Filter (19) + +- * ColumnarToRow (18) + +- Scan parquet default.store (17) + + +(1) Scan parquet default.item +Output [4]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [Or(And(In(i_category, [Books,Electronics,Sports]),In(i_class, [computers,stereo,football])),And(In(i_category, [Men,Jewelry,Women]),In(i_class, [shirts,birdal,dresses]))), IsNotNull(i_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [4]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4] + +(3) Filter [codegen id : 4] +Input [4]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4] +Condition : (((i_category#4 IN (Books,Electronics,Sports) AND i_class#3 IN (computers,stereo,football)) OR (i_category#4 IN (Men,Jewelry,Women) AND i_class#3 IN (shirts,birdal,dresses))) AND isnotnull(i_item_sk#1)) + +(4) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#5, ss_item_sk#6, ss_store_sk#7, ss_sales_price#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [ss_sold_date_sk#5, ss_item_sk#6, ss_store_sk#7, ss_sales_price#8] + +(6) Filter [codegen id : 1] +Input [4]: [ss_sold_date_sk#5, ss_item_sk#6, ss_store_sk#7, ss_sales_price#8] +Condition : ((isnotnull(ss_item_sk#6) AND isnotnull(ss_sold_date_sk#5)) AND isnotnull(ss_store_sk#7)) + +(7) BroadcastExchange +Input [4]: [ss_sold_date_sk#5, ss_item_sk#6, ss_store_sk#7, ss_sales_price#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#9] + +(8) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [ss_item_sk#6] +Join condition: None + +(9) Project [codegen id : 4] +Output [6]: [i_brand#2, i_class#3, i_category#4, ss_sold_date_sk#5, ss_store_sk#7, ss_sales_price#8] +Input [8]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, ss_sold_date_sk#5, ss_item_sk#6, ss_store_sk#7, ss_sales_price#8] + +(10) Scan parquet default.date_dim +Output [3]: [d_date_sk#10, d_year#11, d_moy#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#10, d_year#11, d_moy#12] + +(12) Filter [codegen id : 2] +Input [3]: [d_date_sk#10, d_year#11, d_moy#12] +Condition : ((isnotnull(d_year#11) AND (d_year#11 = 1999)) AND isnotnull(d_date_sk#10)) + +(13) Project [codegen id : 2] +Output [2]: [d_date_sk#10, d_moy#12] +Input [3]: [d_date_sk#10, d_year#11, d_moy#12] + +(14) BroadcastExchange +Input [2]: [d_date_sk#10, d_moy#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] + +(15) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#5] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(16) Project [codegen id : 4] +Output [6]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#7, ss_sales_price#8, d_moy#12] +Input [8]: [i_brand#2, i_class#3, i_category#4, ss_sold_date_sk#5, ss_store_sk#7, ss_sales_price#8, d_date_sk#10, d_moy#12] + +(17) Scan parquet default.store +Output [3]: [s_store_sk#14, s_store_name#15, s_company_name#16] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [3]: [s_store_sk#14, s_store_name#15, s_company_name#16] + +(19) Filter [codegen id : 3] +Input [3]: [s_store_sk#14, s_store_name#15, s_company_name#16] +Condition : isnotnull(s_store_sk#14) + +(20) BroadcastExchange +Input [3]: [s_store_sk#14, s_store_name#15, s_company_name#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#17] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#7] +Right keys [1]: [s_store_sk#14] +Join condition: None + +(22) Project [codegen id : 4] +Output [7]: [i_brand#2, i_class#3, i_category#4, ss_sales_price#8, d_moy#12, s_store_name#15, s_company_name#16] +Input [9]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#7, ss_sales_price#8, d_moy#12, s_store_sk#14, s_store_name#15, s_company_name#16] + +(23) HashAggregate [codegen id : 4] +Input [7]: [i_brand#2, i_class#3, i_category#4, ss_sales_price#8, d_moy#12, s_store_name#15, s_company_name#16] +Keys [6]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#8))] +Aggregate Attributes [1]: [sum#18] +Results [7]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, sum#19] + +(24) Exchange +Input [7]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, sum#19] +Arguments: hashpartitioning(i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, 5), true, [id=#20] + +(25) HashAggregate [codegen id : 5] +Input [7]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, sum#19] +Keys [6]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12] +Functions [1]: [sum(UnscaledValue(ss_sales_price#8))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#8))#21] +Results [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, MakeDecimal(sum(UnscaledValue(ss_sales_price#8))#21,17,2) AS sum_sales#22, MakeDecimal(sum(UnscaledValue(ss_sales_price#8))#21,17,2) AS _w0#23] + +(26) Exchange +Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, sum_sales#22, _w0#23] +Arguments: hashpartitioning(i_category#4, i_brand#2, s_store_name#15, s_company_name#16, 5), true, [id=#24] + +(27) Sort [codegen id : 6] +Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, sum_sales#22, _w0#23] +Arguments: [i_category#4 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, s_store_name#15 ASC NULLS FIRST, s_company_name#16 ASC NULLS FIRST], false, 0 + +(28) Window +Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, sum_sales#22, _w0#23] +Arguments: [avg(_w0#23) windowspecdefinition(i_category#4, i_brand#2, s_store_name#15, s_company_name#16, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#25], [i_category#4, i_brand#2, s_store_name#15, s_company_name#16] + +(29) Filter [codegen id : 7] +Input [9]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, sum_sales#22, _w0#23, avg_monthly_sales#25] +Condition : (CASE WHEN NOT (avg_monthly_sales#25 = 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#22 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(22,6), true))) / promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(38,16), true) ELSE null END > 0.1000000000000000) + +(30) Project [codegen id : 7] +Output [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, sum_sales#22, avg_monthly_sales#25] +Input [9]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, sum_sales#22, _w0#23, avg_monthly_sales#25] + +(31) TakeOrderedAndProject +Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, sum_sales#22, avg_monthly_sales#25] +Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#22 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(22,6), true) ASC NULLS FIRST, s_store_name#15 ASC NULLS FIRST], [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, sum_sales#22, avg_monthly_sales#25] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/simplified.txt new file mode 100644 index 0000000000000..3b3ad9cf61293 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/simplified.txt @@ -0,0 +1,48 @@ +TakeOrderedAndProject [avg_monthly_sales,d_moy,i_brand,i_category,i_class,s_company_name,s_store_name,sum_sales] + WholeStageCodegen (7) + Project [avg_monthly_sales,d_moy,i_brand,i_category,i_class,s_company_name,s_store_name,sum_sales] + Filter [avg_monthly_sales,sum_sales] + InputAdapter + Window [_w0,i_brand,i_category,s_company_name,s_store_name] + WholeStageCodegen (6) + Sort [i_brand,i_category,s_company_name,s_store_name] + InputAdapter + Exchange [i_brand,i_category,s_company_name,s_store_name] #1 + WholeStageCodegen (5) + HashAggregate [d_moy,i_brand,i_category,i_class,s_company_name,s_store_name,sum] [_w0,sum,sum(UnscaledValue(ss_sales_price)),sum_sales] + InputAdapter + Exchange [d_moy,i_brand,i_category,i_class,s_company_name,s_store_name] #2 + WholeStageCodegen (4) + HashAggregate [d_moy,i_brand,i_category,i_class,s_company_name,s_store_name,ss_sales_price] [sum,sum] + Project [d_moy,i_brand,i_category,i_class,s_company_name,s_store_name,ss_sales_price] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [d_moy,i_brand,i_category,i_class,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_brand,i_category,i_class,ss_sales_price,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Filter [i_category,i_class,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand,i_category,i_class,i_item_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [d_date_sk,d_moy] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_company_name,s_store_name,s_store_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9.sf100/explain.txt new file mode 100644 index 0000000000000..d5d360c15d657 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9.sf100/explain.txt @@ -0,0 +1,718 @@ +== Physical Plan == +* Project (4) ++- * Filter (3) + +- * ColumnarToRow (2) + +- Scan parquet default.reason (1) + + +(1) Scan parquet default.reason +Output [1]: [r_reason_sk#1] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/reason] +PushedFilters: [IsNotNull(r_reason_sk), EqualTo(r_reason_sk,1)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [1]: [r_reason_sk#1] + +(3) Filter [codegen id : 1] +Input [1]: [r_reason_sk#1] +Condition : (isnotnull(r_reason_sk#1) AND (r_reason_sk#1 = 1)) + +(4) Project [codegen id : 1] +Output [5]: [CASE WHEN (Subquery scalar-subquery#2, [id=#3] > 62316685) THEN Subquery scalar-subquery#4, [id=#5] ELSE Subquery scalar-subquery#6, [id=#7] END AS bucket1#8, CASE WHEN (Subquery scalar-subquery#9, [id=#10] > 19045798) THEN Subquery scalar-subquery#11, [id=#12] ELSE Subquery scalar-subquery#13, [id=#14] END AS bucket2#15, CASE WHEN (Subquery scalar-subquery#16, [id=#17] > 365541424) THEN Subquery scalar-subquery#18, [id=#19] ELSE Subquery scalar-subquery#20, [id=#21] END AS bucket3#22, CASE WHEN (Subquery scalar-subquery#23, [id=#24] > 216357808) THEN Subquery scalar-subquery#25, [id=#26] ELSE Subquery scalar-subquery#27, [id=#28] END AS bucket4#29, CASE WHEN (Subquery scalar-subquery#30, [id=#31] > 184483884) THEN Subquery scalar-subquery#32, [id=#33] ELSE Subquery scalar-subquery#34, [id=#35] END AS bucket5#36] +Input [1]: [r_reason_sk#1] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#2, [id=#3] +* HashAggregate (11) ++- Exchange (10) + +- * HashAggregate (9) + +- * Project (8) + +- * Filter (7) + +- * ColumnarToRow (6) + +- Scan parquet default.store_sales (5) + + +(5) Scan parquet default.store_sales +Output [1]: [ss_quantity#37] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,1), LessThanOrEqual(ss_quantity,20)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [1]: [ss_quantity#37] + +(7) Filter [codegen id : 1] +Input [1]: [ss_quantity#37] +Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 1)) AND (ss_quantity#37 <= 20)) + +(8) Project [codegen id : 1] +Output: [] +Input [1]: [ss_quantity#37] + +(9) HashAggregate [codegen id : 1] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#38] +Results [1]: [count#39] + +(10) Exchange +Input [1]: [count#39] +Arguments: SinglePartition, true, [id=#40] + +(11) HashAggregate [codegen id : 2] +Input [1]: [count#39] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#41] +Results [1]: [count(1)#41 AS count(1)#42] + +Subquery:2 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#4, [id=#5] +* HashAggregate (18) ++- Exchange (17) + +- * HashAggregate (16) + +- * Project (15) + +- * Filter (14) + +- * ColumnarToRow (13) + +- Scan parquet default.store_sales (12) + + +(12) Scan parquet default.store_sales +Output [2]: [ss_quantity#37, ss_ext_discount_amt#43] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,1), LessThanOrEqual(ss_quantity,20)] +ReadSchema: struct + +(13) ColumnarToRow [codegen id : 1] +Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] + +(14) Filter [codegen id : 1] +Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] +Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 1)) AND (ss_quantity#37 <= 20)) + +(15) Project [codegen id : 1] +Output [1]: [ss_ext_discount_amt#43] +Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] + +(16) HashAggregate [codegen id : 1] +Input [1]: [ss_ext_discount_amt#43] +Keys: [] +Functions [1]: [partial_avg(UnscaledValue(ss_ext_discount_amt#43))] +Aggregate Attributes [2]: [sum#44, count#45] +Results [2]: [sum#46, count#47] + +(17) Exchange +Input [2]: [sum#46, count#47] +Arguments: SinglePartition, true, [id=#48] + +(18) HashAggregate [codegen id : 2] +Input [2]: [sum#46, count#47] +Keys: [] +Functions [1]: [avg(UnscaledValue(ss_ext_discount_amt#43))] +Aggregate Attributes [1]: [avg(UnscaledValue(ss_ext_discount_amt#43))#49] +Results [1]: [cast((avg(UnscaledValue(ss_ext_discount_amt#43))#49 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#50] + +Subquery:3 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#6, [id=#7] +* HashAggregate (25) ++- Exchange (24) + +- * HashAggregate (23) + +- * Project (22) + +- * Filter (21) + +- * ColumnarToRow (20) + +- Scan parquet default.store_sales (19) + + +(19) Scan parquet default.store_sales +Output [2]: [ss_quantity#37, ss_net_paid#51] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,1), LessThanOrEqual(ss_quantity,20)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 1] +Input [2]: [ss_quantity#37, ss_net_paid#51] + +(21) Filter [codegen id : 1] +Input [2]: [ss_quantity#37, ss_net_paid#51] +Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 1)) AND (ss_quantity#37 <= 20)) + +(22) Project [codegen id : 1] +Output [1]: [ss_net_paid#51] +Input [2]: [ss_quantity#37, ss_net_paid#51] + +(23) HashAggregate [codegen id : 1] +Input [1]: [ss_net_paid#51] +Keys: [] +Functions [1]: [partial_avg(UnscaledValue(ss_net_paid#51))] +Aggregate Attributes [2]: [sum#52, count#53] +Results [2]: [sum#54, count#55] + +(24) Exchange +Input [2]: [sum#54, count#55] +Arguments: SinglePartition, true, [id=#56] + +(25) HashAggregate [codegen id : 2] +Input [2]: [sum#54, count#55] +Keys: [] +Functions [1]: [avg(UnscaledValue(ss_net_paid#51))] +Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_paid#51))#57] +Results [1]: [cast((avg(UnscaledValue(ss_net_paid#51))#57 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#58] + +Subquery:4 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#9, [id=#10] +* HashAggregate (32) ++- Exchange (31) + +- * HashAggregate (30) + +- * Project (29) + +- * Filter (28) + +- * ColumnarToRow (27) + +- Scan parquet default.store_sales (26) + + +(26) Scan parquet default.store_sales +Output [1]: [ss_quantity#37] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,40)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 1] +Input [1]: [ss_quantity#37] + +(28) Filter [codegen id : 1] +Input [1]: [ss_quantity#37] +Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 21)) AND (ss_quantity#37 <= 40)) + +(29) Project [codegen id : 1] +Output: [] +Input [1]: [ss_quantity#37] + +(30) HashAggregate [codegen id : 1] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#59] +Results [1]: [count#60] + +(31) Exchange +Input [1]: [count#60] +Arguments: SinglePartition, true, [id=#61] + +(32) HashAggregate [codegen id : 2] +Input [1]: [count#60] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#62] +Results [1]: [count(1)#62 AS count(1)#63] + +Subquery:5 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#11, [id=#12] +* HashAggregate (39) ++- Exchange (38) + +- * HashAggregate (37) + +- * Project (36) + +- * Filter (35) + +- * ColumnarToRow (34) + +- Scan parquet default.store_sales (33) + + +(33) Scan parquet default.store_sales +Output [2]: [ss_quantity#37, ss_ext_discount_amt#43] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,40)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 1] +Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] + +(35) Filter [codegen id : 1] +Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] +Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 21)) AND (ss_quantity#37 <= 40)) + +(36) Project [codegen id : 1] +Output [1]: [ss_ext_discount_amt#43] +Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] + +(37) HashAggregate [codegen id : 1] +Input [1]: [ss_ext_discount_amt#43] +Keys: [] +Functions [1]: [partial_avg(UnscaledValue(ss_ext_discount_amt#43))] +Aggregate Attributes [2]: [sum#64, count#65] +Results [2]: [sum#66, count#67] + +(38) Exchange +Input [2]: [sum#66, count#67] +Arguments: SinglePartition, true, [id=#68] + +(39) HashAggregate [codegen id : 2] +Input [2]: [sum#66, count#67] +Keys: [] +Functions [1]: [avg(UnscaledValue(ss_ext_discount_amt#43))] +Aggregate Attributes [1]: [avg(UnscaledValue(ss_ext_discount_amt#43))#69] +Results [1]: [cast((avg(UnscaledValue(ss_ext_discount_amt#43))#69 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#70] + +Subquery:6 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#13, [id=#14] +* HashAggregate (46) ++- Exchange (45) + +- * HashAggregate (44) + +- * Project (43) + +- * Filter (42) + +- * ColumnarToRow (41) + +- Scan parquet default.store_sales (40) + + +(40) Scan parquet default.store_sales +Output [2]: [ss_quantity#37, ss_net_paid#51] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,40)] +ReadSchema: struct + +(41) ColumnarToRow [codegen id : 1] +Input [2]: [ss_quantity#37, ss_net_paid#51] + +(42) Filter [codegen id : 1] +Input [2]: [ss_quantity#37, ss_net_paid#51] +Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 21)) AND (ss_quantity#37 <= 40)) + +(43) Project [codegen id : 1] +Output [1]: [ss_net_paid#51] +Input [2]: [ss_quantity#37, ss_net_paid#51] + +(44) HashAggregate [codegen id : 1] +Input [1]: [ss_net_paid#51] +Keys: [] +Functions [1]: [partial_avg(UnscaledValue(ss_net_paid#51))] +Aggregate Attributes [2]: [sum#71, count#72] +Results [2]: [sum#73, count#74] + +(45) Exchange +Input [2]: [sum#73, count#74] +Arguments: SinglePartition, true, [id=#75] + +(46) HashAggregate [codegen id : 2] +Input [2]: [sum#73, count#74] +Keys: [] +Functions [1]: [avg(UnscaledValue(ss_net_paid#51))] +Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_paid#51))#76] +Results [1]: [cast((avg(UnscaledValue(ss_net_paid#51))#76 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#77] + +Subquery:7 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#16, [id=#17] +* HashAggregate (53) ++- Exchange (52) + +- * HashAggregate (51) + +- * Project (50) + +- * Filter (49) + +- * ColumnarToRow (48) + +- Scan parquet default.store_sales (47) + + +(47) Scan parquet default.store_sales +Output [1]: [ss_quantity#37] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,41), LessThanOrEqual(ss_quantity,60)] +ReadSchema: struct + +(48) ColumnarToRow [codegen id : 1] +Input [1]: [ss_quantity#37] + +(49) Filter [codegen id : 1] +Input [1]: [ss_quantity#37] +Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 41)) AND (ss_quantity#37 <= 60)) + +(50) Project [codegen id : 1] +Output: [] +Input [1]: [ss_quantity#37] + +(51) HashAggregate [codegen id : 1] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#78] +Results [1]: [count#79] + +(52) Exchange +Input [1]: [count#79] +Arguments: SinglePartition, true, [id=#80] + +(53) HashAggregate [codegen id : 2] +Input [1]: [count#79] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#81] +Results [1]: [count(1)#81 AS count(1)#82] + +Subquery:8 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#18, [id=#19] +* HashAggregate (60) ++- Exchange (59) + +- * HashAggregate (58) + +- * Project (57) + +- * Filter (56) + +- * ColumnarToRow (55) + +- Scan parquet default.store_sales (54) + + +(54) Scan parquet default.store_sales +Output [2]: [ss_quantity#37, ss_ext_discount_amt#43] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,41), LessThanOrEqual(ss_quantity,60)] +ReadSchema: struct + +(55) ColumnarToRow [codegen id : 1] +Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] + +(56) Filter [codegen id : 1] +Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] +Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 41)) AND (ss_quantity#37 <= 60)) + +(57) Project [codegen id : 1] +Output [1]: [ss_ext_discount_amt#43] +Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] + +(58) HashAggregate [codegen id : 1] +Input [1]: [ss_ext_discount_amt#43] +Keys: [] +Functions [1]: [partial_avg(UnscaledValue(ss_ext_discount_amt#43))] +Aggregate Attributes [2]: [sum#83, count#84] +Results [2]: [sum#85, count#86] + +(59) Exchange +Input [2]: [sum#85, count#86] +Arguments: SinglePartition, true, [id=#87] + +(60) HashAggregate [codegen id : 2] +Input [2]: [sum#85, count#86] +Keys: [] +Functions [1]: [avg(UnscaledValue(ss_ext_discount_amt#43))] +Aggregate Attributes [1]: [avg(UnscaledValue(ss_ext_discount_amt#43))#88] +Results [1]: [cast((avg(UnscaledValue(ss_ext_discount_amt#43))#88 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#89] + +Subquery:9 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#20, [id=#21] +* HashAggregate (67) ++- Exchange (66) + +- * HashAggregate (65) + +- * Project (64) + +- * Filter (63) + +- * ColumnarToRow (62) + +- Scan parquet default.store_sales (61) + + +(61) Scan parquet default.store_sales +Output [2]: [ss_quantity#37, ss_net_paid#51] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,41), LessThanOrEqual(ss_quantity,60)] +ReadSchema: struct + +(62) ColumnarToRow [codegen id : 1] +Input [2]: [ss_quantity#37, ss_net_paid#51] + +(63) Filter [codegen id : 1] +Input [2]: [ss_quantity#37, ss_net_paid#51] +Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 41)) AND (ss_quantity#37 <= 60)) + +(64) Project [codegen id : 1] +Output [1]: [ss_net_paid#51] +Input [2]: [ss_quantity#37, ss_net_paid#51] + +(65) HashAggregate [codegen id : 1] +Input [1]: [ss_net_paid#51] +Keys: [] +Functions [1]: [partial_avg(UnscaledValue(ss_net_paid#51))] +Aggregate Attributes [2]: [sum#90, count#91] +Results [2]: [sum#92, count#93] + +(66) Exchange +Input [2]: [sum#92, count#93] +Arguments: SinglePartition, true, [id=#94] + +(67) HashAggregate [codegen id : 2] +Input [2]: [sum#92, count#93] +Keys: [] +Functions [1]: [avg(UnscaledValue(ss_net_paid#51))] +Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_paid#51))#95] +Results [1]: [cast((avg(UnscaledValue(ss_net_paid#51))#95 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#96] + +Subquery:10 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#23, [id=#24] +* HashAggregate (74) ++- Exchange (73) + +- * HashAggregate (72) + +- * Project (71) + +- * Filter (70) + +- * ColumnarToRow (69) + +- Scan parquet default.store_sales (68) + + +(68) Scan parquet default.store_sales +Output [1]: [ss_quantity#37] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,61), LessThanOrEqual(ss_quantity,80)] +ReadSchema: struct + +(69) ColumnarToRow [codegen id : 1] +Input [1]: [ss_quantity#37] + +(70) Filter [codegen id : 1] +Input [1]: [ss_quantity#37] +Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 61)) AND (ss_quantity#37 <= 80)) + +(71) Project [codegen id : 1] +Output: [] +Input [1]: [ss_quantity#37] + +(72) HashAggregate [codegen id : 1] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#97] +Results [1]: [count#98] + +(73) Exchange +Input [1]: [count#98] +Arguments: SinglePartition, true, [id=#99] + +(74) HashAggregate [codegen id : 2] +Input [1]: [count#98] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#100] +Results [1]: [count(1)#100 AS count(1)#101] + +Subquery:11 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#25, [id=#26] +* HashAggregate (81) ++- Exchange (80) + +- * HashAggregate (79) + +- * Project (78) + +- * Filter (77) + +- * ColumnarToRow (76) + +- Scan parquet default.store_sales (75) + + +(75) Scan parquet default.store_sales +Output [2]: [ss_quantity#37, ss_ext_discount_amt#43] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,61), LessThanOrEqual(ss_quantity,80)] +ReadSchema: struct + +(76) ColumnarToRow [codegen id : 1] +Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] + +(77) Filter [codegen id : 1] +Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] +Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 61)) AND (ss_quantity#37 <= 80)) + +(78) Project [codegen id : 1] +Output [1]: [ss_ext_discount_amt#43] +Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] + +(79) HashAggregate [codegen id : 1] +Input [1]: [ss_ext_discount_amt#43] +Keys: [] +Functions [1]: [partial_avg(UnscaledValue(ss_ext_discount_amt#43))] +Aggregate Attributes [2]: [sum#102, count#103] +Results [2]: [sum#104, count#105] + +(80) Exchange +Input [2]: [sum#104, count#105] +Arguments: SinglePartition, true, [id=#106] + +(81) HashAggregate [codegen id : 2] +Input [2]: [sum#104, count#105] +Keys: [] +Functions [1]: [avg(UnscaledValue(ss_ext_discount_amt#43))] +Aggregate Attributes [1]: [avg(UnscaledValue(ss_ext_discount_amt#43))#107] +Results [1]: [cast((avg(UnscaledValue(ss_ext_discount_amt#43))#107 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#108] + +Subquery:12 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#27, [id=#28] +* HashAggregate (88) ++- Exchange (87) + +- * HashAggregate (86) + +- * Project (85) + +- * Filter (84) + +- * ColumnarToRow (83) + +- Scan parquet default.store_sales (82) + + +(82) Scan parquet default.store_sales +Output [2]: [ss_quantity#37, ss_net_paid#51] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,61), LessThanOrEqual(ss_quantity,80)] +ReadSchema: struct + +(83) ColumnarToRow [codegen id : 1] +Input [2]: [ss_quantity#37, ss_net_paid#51] + +(84) Filter [codegen id : 1] +Input [2]: [ss_quantity#37, ss_net_paid#51] +Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 61)) AND (ss_quantity#37 <= 80)) + +(85) Project [codegen id : 1] +Output [1]: [ss_net_paid#51] +Input [2]: [ss_quantity#37, ss_net_paid#51] + +(86) HashAggregate [codegen id : 1] +Input [1]: [ss_net_paid#51] +Keys: [] +Functions [1]: [partial_avg(UnscaledValue(ss_net_paid#51))] +Aggregate Attributes [2]: [sum#109, count#110] +Results [2]: [sum#111, count#112] + +(87) Exchange +Input [2]: [sum#111, count#112] +Arguments: SinglePartition, true, [id=#113] + +(88) HashAggregate [codegen id : 2] +Input [2]: [sum#111, count#112] +Keys: [] +Functions [1]: [avg(UnscaledValue(ss_net_paid#51))] +Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_paid#51))#114] +Results [1]: [cast((avg(UnscaledValue(ss_net_paid#51))#114 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#115] + +Subquery:13 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#30, [id=#31] +* HashAggregate (95) ++- Exchange (94) + +- * HashAggregate (93) + +- * Project (92) + +- * Filter (91) + +- * ColumnarToRow (90) + +- Scan parquet default.store_sales (89) + + +(89) Scan parquet default.store_sales +Output [1]: [ss_quantity#37] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,81), LessThanOrEqual(ss_quantity,100)] +ReadSchema: struct + +(90) ColumnarToRow [codegen id : 1] +Input [1]: [ss_quantity#37] + +(91) Filter [codegen id : 1] +Input [1]: [ss_quantity#37] +Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 81)) AND (ss_quantity#37 <= 100)) + +(92) Project [codegen id : 1] +Output: [] +Input [1]: [ss_quantity#37] + +(93) HashAggregate [codegen id : 1] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#116] +Results [1]: [count#117] + +(94) Exchange +Input [1]: [count#117] +Arguments: SinglePartition, true, [id=#118] + +(95) HashAggregate [codegen id : 2] +Input [1]: [count#117] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#119] +Results [1]: [count(1)#119 AS count(1)#120] + +Subquery:14 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#32, [id=#33] +* HashAggregate (102) ++- Exchange (101) + +- * HashAggregate (100) + +- * Project (99) + +- * Filter (98) + +- * ColumnarToRow (97) + +- Scan parquet default.store_sales (96) + + +(96) Scan parquet default.store_sales +Output [2]: [ss_quantity#37, ss_ext_discount_amt#43] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,81), LessThanOrEqual(ss_quantity,100)] +ReadSchema: struct + +(97) ColumnarToRow [codegen id : 1] +Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] + +(98) Filter [codegen id : 1] +Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] +Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 81)) AND (ss_quantity#37 <= 100)) + +(99) Project [codegen id : 1] +Output [1]: [ss_ext_discount_amt#43] +Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] + +(100) HashAggregate [codegen id : 1] +Input [1]: [ss_ext_discount_amt#43] +Keys: [] +Functions [1]: [partial_avg(UnscaledValue(ss_ext_discount_amt#43))] +Aggregate Attributes [2]: [sum#121, count#122] +Results [2]: [sum#123, count#124] + +(101) Exchange +Input [2]: [sum#123, count#124] +Arguments: SinglePartition, true, [id=#125] + +(102) HashAggregate [codegen id : 2] +Input [2]: [sum#123, count#124] +Keys: [] +Functions [1]: [avg(UnscaledValue(ss_ext_discount_amt#43))] +Aggregate Attributes [1]: [avg(UnscaledValue(ss_ext_discount_amt#43))#126] +Results [1]: [cast((avg(UnscaledValue(ss_ext_discount_amt#43))#126 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#127] + +Subquery:15 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#34, [id=#35] +* HashAggregate (109) ++- Exchange (108) + +- * HashAggregate (107) + +- * Project (106) + +- * Filter (105) + +- * ColumnarToRow (104) + +- Scan parquet default.store_sales (103) + + +(103) Scan parquet default.store_sales +Output [2]: [ss_quantity#37, ss_net_paid#51] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,81), LessThanOrEqual(ss_quantity,100)] +ReadSchema: struct + +(104) ColumnarToRow [codegen id : 1] +Input [2]: [ss_quantity#37, ss_net_paid#51] + +(105) Filter [codegen id : 1] +Input [2]: [ss_quantity#37, ss_net_paid#51] +Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 81)) AND (ss_quantity#37 <= 100)) + +(106) Project [codegen id : 1] +Output [1]: [ss_net_paid#51] +Input [2]: [ss_quantity#37, ss_net_paid#51] + +(107) HashAggregate [codegen id : 1] +Input [1]: [ss_net_paid#51] +Keys: [] +Functions [1]: [partial_avg(UnscaledValue(ss_net_paid#51))] +Aggregate Attributes [2]: [sum#128, count#129] +Results [2]: [sum#130, count#131] + +(108) Exchange +Input [2]: [sum#130, count#131] +Arguments: SinglePartition, true, [id=#132] + +(109) HashAggregate [codegen id : 2] +Input [2]: [sum#130, count#131] +Keys: [] +Functions [1]: [avg(UnscaledValue(ss_net_paid#51))] +Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_paid#51))#133] +Results [1]: [cast((avg(UnscaledValue(ss_net_paid#51))#133 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#134] + + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9.sf100/simplified.txt new file mode 100644 index 0000000000000..151ec8b405f2f --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9.sf100/simplified.txt @@ -0,0 +1,186 @@ +WholeStageCodegen (1) + Project + Subquery #1 + WholeStageCodegen (2) + HashAggregate [count] [count,count(1),count(1)] + InputAdapter + Exchange #1 + WholeStageCodegen (1) + HashAggregate [count,count] + Project + Filter [ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_quantity] + Subquery #2 + WholeStageCodegen (2) + HashAggregate [count,sum] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),count,sum] + InputAdapter + Exchange #2 + WholeStageCodegen (1) + HashAggregate [ss_ext_discount_amt] [count,count,sum,sum] + Project [ss_ext_discount_amt] + Filter [ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_discount_amt,ss_quantity] + Subquery #3 + WholeStageCodegen (2) + HashAggregate [count,sum] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),count,sum] + InputAdapter + Exchange #3 + WholeStageCodegen (1) + HashAggregate [ss_net_paid] [count,count,sum,sum] + Project [ss_net_paid] + Filter [ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_net_paid,ss_quantity] + Subquery #4 + WholeStageCodegen (2) + HashAggregate [count] [count,count(1),count(1)] + InputAdapter + Exchange #4 + WholeStageCodegen (1) + HashAggregate [count,count] + Project + Filter [ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_quantity] + Subquery #5 + WholeStageCodegen (2) + HashAggregate [count,sum] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),count,sum] + InputAdapter + Exchange #5 + WholeStageCodegen (1) + HashAggregate [ss_ext_discount_amt] [count,count,sum,sum] + Project [ss_ext_discount_amt] + Filter [ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_discount_amt,ss_quantity] + Subquery #6 + WholeStageCodegen (2) + HashAggregate [count,sum] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),count,sum] + InputAdapter + Exchange #6 + WholeStageCodegen (1) + HashAggregate [ss_net_paid] [count,count,sum,sum] + Project [ss_net_paid] + Filter [ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_net_paid,ss_quantity] + Subquery #7 + WholeStageCodegen (2) + HashAggregate [count] [count,count(1),count(1)] + InputAdapter + Exchange #7 + WholeStageCodegen (1) + HashAggregate [count,count] + Project + Filter [ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_quantity] + Subquery #8 + WholeStageCodegen (2) + HashAggregate [count,sum] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),count,sum] + InputAdapter + Exchange #8 + WholeStageCodegen (1) + HashAggregate [ss_ext_discount_amt] [count,count,sum,sum] + Project [ss_ext_discount_amt] + Filter [ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_discount_amt,ss_quantity] + Subquery #9 + WholeStageCodegen (2) + HashAggregate [count,sum] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),count,sum] + InputAdapter + Exchange #9 + WholeStageCodegen (1) + HashAggregate [ss_net_paid] [count,count,sum,sum] + Project [ss_net_paid] + Filter [ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_net_paid,ss_quantity] + Subquery #10 + WholeStageCodegen (2) + HashAggregate [count] [count,count(1),count(1)] + InputAdapter + Exchange #10 + WholeStageCodegen (1) + HashAggregate [count,count] + Project + Filter [ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_quantity] + Subquery #11 + WholeStageCodegen (2) + HashAggregate [count,sum] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),count,sum] + InputAdapter + Exchange #11 + WholeStageCodegen (1) + HashAggregate [ss_ext_discount_amt] [count,count,sum,sum] + Project [ss_ext_discount_amt] + Filter [ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_discount_amt,ss_quantity] + Subquery #12 + WholeStageCodegen (2) + HashAggregate [count,sum] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),count,sum] + InputAdapter + Exchange #12 + WholeStageCodegen (1) + HashAggregate [ss_net_paid] [count,count,sum,sum] + Project [ss_net_paid] + Filter [ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_net_paid,ss_quantity] + Subquery #13 + WholeStageCodegen (2) + HashAggregate [count] [count,count(1),count(1)] + InputAdapter + Exchange #13 + WholeStageCodegen (1) + HashAggregate [count,count] + Project + Filter [ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_quantity] + Subquery #14 + WholeStageCodegen (2) + HashAggregate [count,sum] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),count,sum] + InputAdapter + Exchange #14 + WholeStageCodegen (1) + HashAggregate [ss_ext_discount_amt] [count,count,sum,sum] + Project [ss_ext_discount_amt] + Filter [ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_discount_amt,ss_quantity] + Subquery #15 + WholeStageCodegen (2) + HashAggregate [count,sum] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),count,sum] + InputAdapter + Exchange #15 + WholeStageCodegen (1) + HashAggregate [ss_net_paid] [count,count,sum,sum] + Project [ss_net_paid] + Filter [ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_net_paid,ss_quantity] + Filter [r_reason_sk] + ColumnarToRow + InputAdapter + Scan parquet default.reason [r_reason_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/explain.txt new file mode 100644 index 0000000000000..2448680c053c1 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/explain.txt @@ -0,0 +1,718 @@ +== Physical Plan == +* Project (4) ++- * Filter (3) + +- * ColumnarToRow (2) + +- Scan parquet default.reason (1) + + +(1) Scan parquet default.reason +Output [1]: [r_reason_sk#1] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/reason] +PushedFilters: [IsNotNull(r_reason_sk), EqualTo(r_reason_sk,1)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [1]: [r_reason_sk#1] + +(3) Filter [codegen id : 1] +Input [1]: [r_reason_sk#1] +Condition : (isnotnull(r_reason_sk#1) AND (r_reason_sk#1 = 1)) + +(4) Project [codegen id : 1] +Output [5]: [CASE WHEN (Subquery scalar-subquery#2, [id=#3] > 62316685) THEN Subquery scalar-subquery#4, [id=#5] ELSE Subquery scalar-subquery#6, [id=#7] END AS bucket1#8, CASE WHEN (Subquery scalar-subquery#9, [id=#10] > 19045798) THEN Subquery scalar-subquery#11, [id=#12] ELSE Subquery scalar-subquery#13, [id=#14] END AS bucket2#15, CASE WHEN (Subquery scalar-subquery#16, [id=#17] > 365541424) THEN Subquery scalar-subquery#18, [id=#19] ELSE Subquery scalar-subquery#20, [id=#21] END AS bucket3#22, CASE WHEN (Subquery scalar-subquery#23, [id=#24] > 216357808) THEN Subquery scalar-subquery#25, [id=#26] ELSE Subquery scalar-subquery#27, [id=#28] END AS bucket4#29, CASE WHEN (Subquery scalar-subquery#30, [id=#31] > 184483884) THEN Subquery scalar-subquery#32, [id=#33] ELSE Subquery scalar-subquery#34, [id=#35] END AS bucket5#36] +Input [1]: [r_reason_sk#1] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#2, [id=#3] +* HashAggregate (11) ++- Exchange (10) + +- * HashAggregate (9) + +- * Project (8) + +- * Filter (7) + +- * ColumnarToRow (6) + +- Scan parquet default.store_sales (5) + + +(5) Scan parquet default.store_sales +Output [1]: [ss_quantity#37] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,1), LessThanOrEqual(ss_quantity,20)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [1]: [ss_quantity#37] + +(7) Filter [codegen id : 1] +Input [1]: [ss_quantity#37] +Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 1)) AND (ss_quantity#37 <= 20)) + +(8) Project [codegen id : 1] +Output: [] +Input [1]: [ss_quantity#37] + +(9) HashAggregate [codegen id : 1] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#38] +Results [1]: [count#39] + +(10) Exchange +Input [1]: [count#39] +Arguments: SinglePartition, true, [id=#40] + +(11) HashAggregate [codegen id : 2] +Input [1]: [count#39] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#41] +Results [1]: [count(1)#41 AS count(1)#42] + +Subquery:2 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#4, [id=#5] +* HashAggregate (18) ++- Exchange (17) + +- * HashAggregate (16) + +- * Project (15) + +- * Filter (14) + +- * ColumnarToRow (13) + +- Scan parquet default.store_sales (12) + + +(12) Scan parquet default.store_sales +Output [2]: [ss_quantity#37, ss_ext_discount_amt#43] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,1), LessThanOrEqual(ss_quantity,20)] +ReadSchema: struct + +(13) ColumnarToRow [codegen id : 1] +Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] + +(14) Filter [codegen id : 1] +Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] +Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 1)) AND (ss_quantity#37 <= 20)) + +(15) Project [codegen id : 1] +Output [1]: [ss_ext_discount_amt#43] +Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] + +(16) HashAggregate [codegen id : 1] +Input [1]: [ss_ext_discount_amt#43] +Keys: [] +Functions [1]: [partial_avg(UnscaledValue(ss_ext_discount_amt#43))] +Aggregate Attributes [2]: [sum#44, count#45] +Results [2]: [sum#46, count#47] + +(17) Exchange +Input [2]: [sum#46, count#47] +Arguments: SinglePartition, true, [id=#48] + +(18) HashAggregate [codegen id : 2] +Input [2]: [sum#46, count#47] +Keys: [] +Functions [1]: [avg(UnscaledValue(ss_ext_discount_amt#43))] +Aggregate Attributes [1]: [avg(UnscaledValue(ss_ext_discount_amt#43))#49] +Results [1]: [cast((avg(UnscaledValue(ss_ext_discount_amt#43))#49 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#50] + +Subquery:3 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#6, [id=#7] +* HashAggregate (25) ++- Exchange (24) + +- * HashAggregate (23) + +- * Project (22) + +- * Filter (21) + +- * ColumnarToRow (20) + +- Scan parquet default.store_sales (19) + + +(19) Scan parquet default.store_sales +Output [2]: [ss_quantity#37, ss_net_paid#51] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,1), LessThanOrEqual(ss_quantity,20)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 1] +Input [2]: [ss_quantity#37, ss_net_paid#51] + +(21) Filter [codegen id : 1] +Input [2]: [ss_quantity#37, ss_net_paid#51] +Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 1)) AND (ss_quantity#37 <= 20)) + +(22) Project [codegen id : 1] +Output [1]: [ss_net_paid#51] +Input [2]: [ss_quantity#37, ss_net_paid#51] + +(23) HashAggregate [codegen id : 1] +Input [1]: [ss_net_paid#51] +Keys: [] +Functions [1]: [partial_avg(UnscaledValue(ss_net_paid#51))] +Aggregate Attributes [2]: [sum#52, count#53] +Results [2]: [sum#54, count#55] + +(24) Exchange +Input [2]: [sum#54, count#55] +Arguments: SinglePartition, true, [id=#56] + +(25) HashAggregate [codegen id : 2] +Input [2]: [sum#54, count#55] +Keys: [] +Functions [1]: [avg(UnscaledValue(ss_net_paid#51))] +Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_paid#51))#57] +Results [1]: [cast((avg(UnscaledValue(ss_net_paid#51))#57 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#58] + +Subquery:4 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#9, [id=#10] +* HashAggregate (32) ++- Exchange (31) + +- * HashAggregate (30) + +- * Project (29) + +- * Filter (28) + +- * ColumnarToRow (27) + +- Scan parquet default.store_sales (26) + + +(26) Scan parquet default.store_sales +Output [1]: [ss_quantity#37] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,40)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 1] +Input [1]: [ss_quantity#37] + +(28) Filter [codegen id : 1] +Input [1]: [ss_quantity#37] +Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 21)) AND (ss_quantity#37 <= 40)) + +(29) Project [codegen id : 1] +Output: [] +Input [1]: [ss_quantity#37] + +(30) HashAggregate [codegen id : 1] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#59] +Results [1]: [count#60] + +(31) Exchange +Input [1]: [count#60] +Arguments: SinglePartition, true, [id=#61] + +(32) HashAggregate [codegen id : 2] +Input [1]: [count#60] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#62] +Results [1]: [count(1)#62 AS count(1)#63] + +Subquery:5 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#11, [id=#12] +* HashAggregate (39) ++- Exchange (38) + +- * HashAggregate (37) + +- * Project (36) + +- * Filter (35) + +- * ColumnarToRow (34) + +- Scan parquet default.store_sales (33) + + +(33) Scan parquet default.store_sales +Output [2]: [ss_quantity#37, ss_ext_discount_amt#43] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,40)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 1] +Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] + +(35) Filter [codegen id : 1] +Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] +Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 21)) AND (ss_quantity#37 <= 40)) + +(36) Project [codegen id : 1] +Output [1]: [ss_ext_discount_amt#43] +Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] + +(37) HashAggregate [codegen id : 1] +Input [1]: [ss_ext_discount_amt#43] +Keys: [] +Functions [1]: [partial_avg(UnscaledValue(ss_ext_discount_amt#43))] +Aggregate Attributes [2]: [sum#64, count#65] +Results [2]: [sum#66, count#67] + +(38) Exchange +Input [2]: [sum#66, count#67] +Arguments: SinglePartition, true, [id=#68] + +(39) HashAggregate [codegen id : 2] +Input [2]: [sum#66, count#67] +Keys: [] +Functions [1]: [avg(UnscaledValue(ss_ext_discount_amt#43))] +Aggregate Attributes [1]: [avg(UnscaledValue(ss_ext_discount_amt#43))#69] +Results [1]: [cast((avg(UnscaledValue(ss_ext_discount_amt#43))#69 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#70] + +Subquery:6 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#13, [id=#14] +* HashAggregate (46) ++- Exchange (45) + +- * HashAggregate (44) + +- * Project (43) + +- * Filter (42) + +- * ColumnarToRow (41) + +- Scan parquet default.store_sales (40) + + +(40) Scan parquet default.store_sales +Output [2]: [ss_quantity#37, ss_net_paid#51] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,40)] +ReadSchema: struct + +(41) ColumnarToRow [codegen id : 1] +Input [2]: [ss_quantity#37, ss_net_paid#51] + +(42) Filter [codegen id : 1] +Input [2]: [ss_quantity#37, ss_net_paid#51] +Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 21)) AND (ss_quantity#37 <= 40)) + +(43) Project [codegen id : 1] +Output [1]: [ss_net_paid#51] +Input [2]: [ss_quantity#37, ss_net_paid#51] + +(44) HashAggregate [codegen id : 1] +Input [1]: [ss_net_paid#51] +Keys: [] +Functions [1]: [partial_avg(UnscaledValue(ss_net_paid#51))] +Aggregate Attributes [2]: [sum#71, count#72] +Results [2]: [sum#73, count#74] + +(45) Exchange +Input [2]: [sum#73, count#74] +Arguments: SinglePartition, true, [id=#75] + +(46) HashAggregate [codegen id : 2] +Input [2]: [sum#73, count#74] +Keys: [] +Functions [1]: [avg(UnscaledValue(ss_net_paid#51))] +Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_paid#51))#76] +Results [1]: [cast((avg(UnscaledValue(ss_net_paid#51))#76 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#77] + +Subquery:7 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#16, [id=#17] +* HashAggregate (53) ++- Exchange (52) + +- * HashAggregate (51) + +- * Project (50) + +- * Filter (49) + +- * ColumnarToRow (48) + +- Scan parquet default.store_sales (47) + + +(47) Scan parquet default.store_sales +Output [1]: [ss_quantity#37] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,41), LessThanOrEqual(ss_quantity,60)] +ReadSchema: struct + +(48) ColumnarToRow [codegen id : 1] +Input [1]: [ss_quantity#37] + +(49) Filter [codegen id : 1] +Input [1]: [ss_quantity#37] +Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 41)) AND (ss_quantity#37 <= 60)) + +(50) Project [codegen id : 1] +Output: [] +Input [1]: [ss_quantity#37] + +(51) HashAggregate [codegen id : 1] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#78] +Results [1]: [count#79] + +(52) Exchange +Input [1]: [count#79] +Arguments: SinglePartition, true, [id=#80] + +(53) HashAggregate [codegen id : 2] +Input [1]: [count#79] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#81] +Results [1]: [count(1)#81 AS count(1)#82] + +Subquery:8 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#18, [id=#19] +* HashAggregate (60) ++- Exchange (59) + +- * HashAggregate (58) + +- * Project (57) + +- * Filter (56) + +- * ColumnarToRow (55) + +- Scan parquet default.store_sales (54) + + +(54) Scan parquet default.store_sales +Output [2]: [ss_quantity#37, ss_ext_discount_amt#43] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,41), LessThanOrEqual(ss_quantity,60)] +ReadSchema: struct + +(55) ColumnarToRow [codegen id : 1] +Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] + +(56) Filter [codegen id : 1] +Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] +Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 41)) AND (ss_quantity#37 <= 60)) + +(57) Project [codegen id : 1] +Output [1]: [ss_ext_discount_amt#43] +Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] + +(58) HashAggregate [codegen id : 1] +Input [1]: [ss_ext_discount_amt#43] +Keys: [] +Functions [1]: [partial_avg(UnscaledValue(ss_ext_discount_amt#43))] +Aggregate Attributes [2]: [sum#83, count#84] +Results [2]: [sum#85, count#86] + +(59) Exchange +Input [2]: [sum#85, count#86] +Arguments: SinglePartition, true, [id=#87] + +(60) HashAggregate [codegen id : 2] +Input [2]: [sum#85, count#86] +Keys: [] +Functions [1]: [avg(UnscaledValue(ss_ext_discount_amt#43))] +Aggregate Attributes [1]: [avg(UnscaledValue(ss_ext_discount_amt#43))#88] +Results [1]: [cast((avg(UnscaledValue(ss_ext_discount_amt#43))#88 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#89] + +Subquery:9 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#20, [id=#21] +* HashAggregate (67) ++- Exchange (66) + +- * HashAggregate (65) + +- * Project (64) + +- * Filter (63) + +- * ColumnarToRow (62) + +- Scan parquet default.store_sales (61) + + +(61) Scan parquet default.store_sales +Output [2]: [ss_quantity#37, ss_net_paid#51] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,41), LessThanOrEqual(ss_quantity,60)] +ReadSchema: struct + +(62) ColumnarToRow [codegen id : 1] +Input [2]: [ss_quantity#37, ss_net_paid#51] + +(63) Filter [codegen id : 1] +Input [2]: [ss_quantity#37, ss_net_paid#51] +Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 41)) AND (ss_quantity#37 <= 60)) + +(64) Project [codegen id : 1] +Output [1]: [ss_net_paid#51] +Input [2]: [ss_quantity#37, ss_net_paid#51] + +(65) HashAggregate [codegen id : 1] +Input [1]: [ss_net_paid#51] +Keys: [] +Functions [1]: [partial_avg(UnscaledValue(ss_net_paid#51))] +Aggregate Attributes [2]: [sum#90, count#91] +Results [2]: [sum#92, count#93] + +(66) Exchange +Input [2]: [sum#92, count#93] +Arguments: SinglePartition, true, [id=#94] + +(67) HashAggregate [codegen id : 2] +Input [2]: [sum#92, count#93] +Keys: [] +Functions [1]: [avg(UnscaledValue(ss_net_paid#51))] +Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_paid#51))#95] +Results [1]: [cast((avg(UnscaledValue(ss_net_paid#51))#95 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#96] + +Subquery:10 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#23, [id=#24] +* HashAggregate (74) ++- Exchange (73) + +- * HashAggregate (72) + +- * Project (71) + +- * Filter (70) + +- * ColumnarToRow (69) + +- Scan parquet default.store_sales (68) + + +(68) Scan parquet default.store_sales +Output [1]: [ss_quantity#37] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,61), LessThanOrEqual(ss_quantity,80)] +ReadSchema: struct + +(69) ColumnarToRow [codegen id : 1] +Input [1]: [ss_quantity#37] + +(70) Filter [codegen id : 1] +Input [1]: [ss_quantity#37] +Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 61)) AND (ss_quantity#37 <= 80)) + +(71) Project [codegen id : 1] +Output: [] +Input [1]: [ss_quantity#37] + +(72) HashAggregate [codegen id : 1] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#97] +Results [1]: [count#98] + +(73) Exchange +Input [1]: [count#98] +Arguments: SinglePartition, true, [id=#99] + +(74) HashAggregate [codegen id : 2] +Input [1]: [count#98] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#100] +Results [1]: [count(1)#100 AS count(1)#101] + +Subquery:11 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#25, [id=#26] +* HashAggregate (81) ++- Exchange (80) + +- * HashAggregate (79) + +- * Project (78) + +- * Filter (77) + +- * ColumnarToRow (76) + +- Scan parquet default.store_sales (75) + + +(75) Scan parquet default.store_sales +Output [2]: [ss_quantity#37, ss_ext_discount_amt#43] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,61), LessThanOrEqual(ss_quantity,80)] +ReadSchema: struct + +(76) ColumnarToRow [codegen id : 1] +Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] + +(77) Filter [codegen id : 1] +Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] +Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 61)) AND (ss_quantity#37 <= 80)) + +(78) Project [codegen id : 1] +Output [1]: [ss_ext_discount_amt#43] +Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] + +(79) HashAggregate [codegen id : 1] +Input [1]: [ss_ext_discount_amt#43] +Keys: [] +Functions [1]: [partial_avg(UnscaledValue(ss_ext_discount_amt#43))] +Aggregate Attributes [2]: [sum#102, count#103] +Results [2]: [sum#104, count#105] + +(80) Exchange +Input [2]: [sum#104, count#105] +Arguments: SinglePartition, true, [id=#106] + +(81) HashAggregate [codegen id : 2] +Input [2]: [sum#104, count#105] +Keys: [] +Functions [1]: [avg(UnscaledValue(ss_ext_discount_amt#43))] +Aggregate Attributes [1]: [avg(UnscaledValue(ss_ext_discount_amt#43))#107] +Results [1]: [cast((avg(UnscaledValue(ss_ext_discount_amt#43))#107 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#108] + +Subquery:12 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#27, [id=#28] +* HashAggregate (88) ++- Exchange (87) + +- * HashAggregate (86) + +- * Project (85) + +- * Filter (84) + +- * ColumnarToRow (83) + +- Scan parquet default.store_sales (82) + + +(82) Scan parquet default.store_sales +Output [2]: [ss_quantity#37, ss_net_paid#51] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,61), LessThanOrEqual(ss_quantity,80)] +ReadSchema: struct + +(83) ColumnarToRow [codegen id : 1] +Input [2]: [ss_quantity#37, ss_net_paid#51] + +(84) Filter [codegen id : 1] +Input [2]: [ss_quantity#37, ss_net_paid#51] +Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 61)) AND (ss_quantity#37 <= 80)) + +(85) Project [codegen id : 1] +Output [1]: [ss_net_paid#51] +Input [2]: [ss_quantity#37, ss_net_paid#51] + +(86) HashAggregate [codegen id : 1] +Input [1]: [ss_net_paid#51] +Keys: [] +Functions [1]: [partial_avg(UnscaledValue(ss_net_paid#51))] +Aggregate Attributes [2]: [sum#109, count#110] +Results [2]: [sum#111, count#112] + +(87) Exchange +Input [2]: [sum#111, count#112] +Arguments: SinglePartition, true, [id=#113] + +(88) HashAggregate [codegen id : 2] +Input [2]: [sum#111, count#112] +Keys: [] +Functions [1]: [avg(UnscaledValue(ss_net_paid#51))] +Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_paid#51))#114] +Results [1]: [cast((avg(UnscaledValue(ss_net_paid#51))#114 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#115] + +Subquery:13 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#30, [id=#31] +* HashAggregate (95) ++- Exchange (94) + +- * HashAggregate (93) + +- * Project (92) + +- * Filter (91) + +- * ColumnarToRow (90) + +- Scan parquet default.store_sales (89) + + +(89) Scan parquet default.store_sales +Output [1]: [ss_quantity#37] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,81), LessThanOrEqual(ss_quantity,100)] +ReadSchema: struct + +(90) ColumnarToRow [codegen id : 1] +Input [1]: [ss_quantity#37] + +(91) Filter [codegen id : 1] +Input [1]: [ss_quantity#37] +Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 81)) AND (ss_quantity#37 <= 100)) + +(92) Project [codegen id : 1] +Output: [] +Input [1]: [ss_quantity#37] + +(93) HashAggregate [codegen id : 1] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#116] +Results [1]: [count#117] + +(94) Exchange +Input [1]: [count#117] +Arguments: SinglePartition, true, [id=#118] + +(95) HashAggregate [codegen id : 2] +Input [1]: [count#117] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#119] +Results [1]: [count(1)#119 AS count(1)#120] + +Subquery:14 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#32, [id=#33] +* HashAggregate (102) ++- Exchange (101) + +- * HashAggregate (100) + +- * Project (99) + +- * Filter (98) + +- * ColumnarToRow (97) + +- Scan parquet default.store_sales (96) + + +(96) Scan parquet default.store_sales +Output [2]: [ss_quantity#37, ss_ext_discount_amt#43] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,81), LessThanOrEqual(ss_quantity,100)] +ReadSchema: struct + +(97) ColumnarToRow [codegen id : 1] +Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] + +(98) Filter [codegen id : 1] +Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] +Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 81)) AND (ss_quantity#37 <= 100)) + +(99) Project [codegen id : 1] +Output [1]: [ss_ext_discount_amt#43] +Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] + +(100) HashAggregate [codegen id : 1] +Input [1]: [ss_ext_discount_amt#43] +Keys: [] +Functions [1]: [partial_avg(UnscaledValue(ss_ext_discount_amt#43))] +Aggregate Attributes [2]: [sum#121, count#122] +Results [2]: [sum#123, count#124] + +(101) Exchange +Input [2]: [sum#123, count#124] +Arguments: SinglePartition, true, [id=#125] + +(102) HashAggregate [codegen id : 2] +Input [2]: [sum#123, count#124] +Keys: [] +Functions [1]: [avg(UnscaledValue(ss_ext_discount_amt#43))] +Aggregate Attributes [1]: [avg(UnscaledValue(ss_ext_discount_amt#43))#126] +Results [1]: [cast((avg(UnscaledValue(ss_ext_discount_amt#43))#126 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#127] + +Subquery:15 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#34, [id=#35] +* HashAggregate (109) ++- Exchange (108) + +- * HashAggregate (107) + +- * Project (106) + +- * Filter (105) + +- * ColumnarToRow (104) + +- Scan parquet default.store_sales (103) + + +(103) Scan parquet default.store_sales +Output [2]: [ss_quantity#37, ss_net_paid#51] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,81), LessThanOrEqual(ss_quantity,100)] +ReadSchema: struct + +(104) ColumnarToRow [codegen id : 1] +Input [2]: [ss_quantity#37, ss_net_paid#51] + +(105) Filter [codegen id : 1] +Input [2]: [ss_quantity#37, ss_net_paid#51] +Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 81)) AND (ss_quantity#37 <= 100)) + +(106) Project [codegen id : 1] +Output [1]: [ss_net_paid#51] +Input [2]: [ss_quantity#37, ss_net_paid#51] + +(107) HashAggregate [codegen id : 1] +Input [1]: [ss_net_paid#51] +Keys: [] +Functions [1]: [partial_avg(UnscaledValue(ss_net_paid#51))] +Aggregate Attributes [2]: [sum#128, count#129] +Results [2]: [sum#130, count#131] + +(108) Exchange +Input [2]: [sum#130, count#131] +Arguments: SinglePartition, true, [id=#132] + +(109) HashAggregate [codegen id : 2] +Input [2]: [sum#130, count#131] +Keys: [] +Functions [1]: [avg(UnscaledValue(ss_net_paid#51))] +Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_paid#51))#133] +Results [1]: [cast((avg(UnscaledValue(ss_net_paid#51))#133 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#134] + + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/simplified.txt new file mode 100644 index 0000000000000..151ec8b405f2f --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/simplified.txt @@ -0,0 +1,186 @@ +WholeStageCodegen (1) + Project + Subquery #1 + WholeStageCodegen (2) + HashAggregate [count] [count,count(1),count(1)] + InputAdapter + Exchange #1 + WholeStageCodegen (1) + HashAggregate [count,count] + Project + Filter [ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_quantity] + Subquery #2 + WholeStageCodegen (2) + HashAggregate [count,sum] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),count,sum] + InputAdapter + Exchange #2 + WholeStageCodegen (1) + HashAggregate [ss_ext_discount_amt] [count,count,sum,sum] + Project [ss_ext_discount_amt] + Filter [ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_discount_amt,ss_quantity] + Subquery #3 + WholeStageCodegen (2) + HashAggregate [count,sum] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),count,sum] + InputAdapter + Exchange #3 + WholeStageCodegen (1) + HashAggregate [ss_net_paid] [count,count,sum,sum] + Project [ss_net_paid] + Filter [ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_net_paid,ss_quantity] + Subquery #4 + WholeStageCodegen (2) + HashAggregate [count] [count,count(1),count(1)] + InputAdapter + Exchange #4 + WholeStageCodegen (1) + HashAggregate [count,count] + Project + Filter [ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_quantity] + Subquery #5 + WholeStageCodegen (2) + HashAggregate [count,sum] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),count,sum] + InputAdapter + Exchange #5 + WholeStageCodegen (1) + HashAggregate [ss_ext_discount_amt] [count,count,sum,sum] + Project [ss_ext_discount_amt] + Filter [ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_discount_amt,ss_quantity] + Subquery #6 + WholeStageCodegen (2) + HashAggregate [count,sum] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),count,sum] + InputAdapter + Exchange #6 + WholeStageCodegen (1) + HashAggregate [ss_net_paid] [count,count,sum,sum] + Project [ss_net_paid] + Filter [ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_net_paid,ss_quantity] + Subquery #7 + WholeStageCodegen (2) + HashAggregate [count] [count,count(1),count(1)] + InputAdapter + Exchange #7 + WholeStageCodegen (1) + HashAggregate [count,count] + Project + Filter [ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_quantity] + Subquery #8 + WholeStageCodegen (2) + HashAggregate [count,sum] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),count,sum] + InputAdapter + Exchange #8 + WholeStageCodegen (1) + HashAggregate [ss_ext_discount_amt] [count,count,sum,sum] + Project [ss_ext_discount_amt] + Filter [ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_discount_amt,ss_quantity] + Subquery #9 + WholeStageCodegen (2) + HashAggregate [count,sum] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),count,sum] + InputAdapter + Exchange #9 + WholeStageCodegen (1) + HashAggregate [ss_net_paid] [count,count,sum,sum] + Project [ss_net_paid] + Filter [ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_net_paid,ss_quantity] + Subquery #10 + WholeStageCodegen (2) + HashAggregate [count] [count,count(1),count(1)] + InputAdapter + Exchange #10 + WholeStageCodegen (1) + HashAggregate [count,count] + Project + Filter [ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_quantity] + Subquery #11 + WholeStageCodegen (2) + HashAggregate [count,sum] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),count,sum] + InputAdapter + Exchange #11 + WholeStageCodegen (1) + HashAggregate [ss_ext_discount_amt] [count,count,sum,sum] + Project [ss_ext_discount_amt] + Filter [ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_discount_amt,ss_quantity] + Subquery #12 + WholeStageCodegen (2) + HashAggregate [count,sum] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),count,sum] + InputAdapter + Exchange #12 + WholeStageCodegen (1) + HashAggregate [ss_net_paid] [count,count,sum,sum] + Project [ss_net_paid] + Filter [ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_net_paid,ss_quantity] + Subquery #13 + WholeStageCodegen (2) + HashAggregate [count] [count,count(1),count(1)] + InputAdapter + Exchange #13 + WholeStageCodegen (1) + HashAggregate [count,count] + Project + Filter [ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_quantity] + Subquery #14 + WholeStageCodegen (2) + HashAggregate [count,sum] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),count,sum] + InputAdapter + Exchange #14 + WholeStageCodegen (1) + HashAggregate [ss_ext_discount_amt] [count,count,sum,sum] + Project [ss_ext_discount_amt] + Filter [ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_discount_amt,ss_quantity] + Subquery #15 + WholeStageCodegen (2) + HashAggregate [count,sum] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),count,sum] + InputAdapter + Exchange #15 + WholeStageCodegen (1) + HashAggregate [ss_net_paid] [count,count,sum,sum] + Project [ss_net_paid] + Filter [ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_net_paid,ss_quantity] + Filter [r_reason_sk] + ColumnarToRow + InputAdapter + Scan parquet default.reason [r_reason_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90.sf100/explain.txt new file mode 100644 index 0000000000000..9d1c956ebc271 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90.sf100/explain.txt @@ -0,0 +1,280 @@ +== Physical Plan == +TakeOrderedAndProject (50) ++- * Project (49) + +- BroadcastNestedLoopJoin Inner BuildRight (48) + :- * HashAggregate (27) + : +- Exchange (26) + : +- * HashAggregate (25) + : +- * Project (24) + : +- * BroadcastHashJoin Inner BuildRight (23) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.web_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.web_page (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.household_demographics (11) + : +- BroadcastExchange (22) + : +- * Project (21) + : +- * Filter (20) + : +- * ColumnarToRow (19) + : +- Scan parquet default.time_dim (18) + +- BroadcastExchange (47) + +- * HashAggregate (46) + +- Exchange (45) + +- * HashAggregate (44) + +- * Project (43) + +- * BroadcastHashJoin Inner BuildRight (42) + :- * Project (36) + : +- * BroadcastHashJoin Inner BuildRight (35) + : :- * Project (33) + : : +- * BroadcastHashJoin Inner BuildRight (32) + : : :- * Filter (30) + : : : +- * ColumnarToRow (29) + : : : +- Scan parquet default.web_sales (28) + : : +- ReusedExchange (31) + : +- ReusedExchange (34) + +- BroadcastExchange (41) + +- * Project (40) + +- * Filter (39) + +- * ColumnarToRow (38) + +- Scan parquet default.time_dim (37) + + +(1) Scan parquet default.web_sales +Output [3]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_ship_hdemo_sk), IsNotNull(ws_sold_time_sk), IsNotNull(ws_web_page_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [3]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3] + +(3) Filter [codegen id : 4] +Input [3]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3] +Condition : ((isnotnull(ws_ship_hdemo_sk#2) AND isnotnull(ws_sold_time_sk#1)) AND isnotnull(ws_web_page_sk#3)) + +(4) Scan parquet default.web_page +Output [2]: [wp_web_page_sk#4, wp_char_count#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_page] +PushedFilters: [IsNotNull(wp_char_count), GreaterThanOrEqual(wp_char_count,5000), LessThanOrEqual(wp_char_count,5200), IsNotNull(wp_web_page_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [wp_web_page_sk#4, wp_char_count#5] + +(6) Filter [codegen id : 1] +Input [2]: [wp_web_page_sk#4, wp_char_count#5] +Condition : (((isnotnull(wp_char_count#5) AND (wp_char_count#5 >= 5000)) AND (wp_char_count#5 <= 5200)) AND isnotnull(wp_web_page_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [wp_web_page_sk#4] +Input [2]: [wp_web_page_sk#4, wp_char_count#5] + +(8) BroadcastExchange +Input [1]: [wp_web_page_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#6] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ws_web_page_sk#3] +Right keys [1]: [wp_web_page_sk#4] +Join condition: None + +(10) Project [codegen id : 4] +Output [2]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2] +Input [4]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3, wp_web_page_sk#4] + +(11) Scan parquet default.household_demographics +Output [2]: [hd_demo_sk#7, hd_dep_count#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/household_demographics] +PushedFilters: [IsNotNull(hd_dep_count), EqualTo(hd_dep_count,6), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [hd_demo_sk#7, hd_dep_count#8] + +(13) Filter [codegen id : 2] +Input [2]: [hd_demo_sk#7, hd_dep_count#8] +Condition : ((isnotnull(hd_dep_count#8) AND (hd_dep_count#8 = 6)) AND isnotnull(hd_demo_sk#7)) + +(14) Project [codegen id : 2] +Output [1]: [hd_demo_sk#7] +Input [2]: [hd_demo_sk#7, hd_dep_count#8] + +(15) BroadcastExchange +Input [1]: [hd_demo_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] + +(16) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ws_ship_hdemo_sk#2] +Right keys [1]: [hd_demo_sk#7] +Join condition: None + +(17) Project [codegen id : 4] +Output [1]: [ws_sold_time_sk#1] +Input [3]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, hd_demo_sk#7] + +(18) Scan parquet default.time_dim +Output [2]: [t_time_sk#10, t_hour#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/time_dim] +PushedFilters: [IsNotNull(t_hour), GreaterThanOrEqual(t_hour,8), LessThanOrEqual(t_hour,9), IsNotNull(t_time_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [2]: [t_time_sk#10, t_hour#11] + +(20) Filter [codegen id : 3] +Input [2]: [t_time_sk#10, t_hour#11] +Condition : (((isnotnull(t_hour#11) AND (t_hour#11 >= 8)) AND (t_hour#11 <= 9)) AND isnotnull(t_time_sk#10)) + +(21) Project [codegen id : 3] +Output [1]: [t_time_sk#10] +Input [2]: [t_time_sk#10, t_hour#11] + +(22) BroadcastExchange +Input [1]: [t_time_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(23) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ws_sold_time_sk#1] +Right keys [1]: [t_time_sk#10] +Join condition: None + +(24) Project [codegen id : 4] +Output: [] +Input [2]: [ws_sold_time_sk#1, t_time_sk#10] + +(25) HashAggregate [codegen id : 4] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#13] +Results [1]: [count#14] + +(26) Exchange +Input [1]: [count#14] +Arguments: SinglePartition, true, [id=#15] + +(27) HashAggregate [codegen id : 5] +Input [1]: [count#14] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#16] +Results [1]: [count(1)#16 AS amc#17] + +(28) Scan parquet default.web_sales +Output [3]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_ship_hdemo_sk), IsNotNull(ws_sold_time_sk), IsNotNull(ws_web_page_sk)] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 9] +Input [3]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3] + +(30) Filter [codegen id : 9] +Input [3]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3] +Condition : ((isnotnull(ws_ship_hdemo_sk#2) AND isnotnull(ws_sold_time_sk#1)) AND isnotnull(ws_web_page_sk#3)) + +(31) ReusedExchange [Reuses operator id: 8] +Output [1]: [wp_web_page_sk#4] + +(32) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ws_web_page_sk#3] +Right keys [1]: [wp_web_page_sk#4] +Join condition: None + +(33) Project [codegen id : 9] +Output [2]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2] +Input [4]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3, wp_web_page_sk#4] + +(34) ReusedExchange [Reuses operator id: 15] +Output [1]: [hd_demo_sk#7] + +(35) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ws_ship_hdemo_sk#2] +Right keys [1]: [hd_demo_sk#7] +Join condition: None + +(36) Project [codegen id : 9] +Output [1]: [ws_sold_time_sk#1] +Input [3]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, hd_demo_sk#7] + +(37) Scan parquet default.time_dim +Output [2]: [t_time_sk#10, t_hour#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/time_dim] +PushedFilters: [IsNotNull(t_hour), GreaterThanOrEqual(t_hour,19), LessThanOrEqual(t_hour,20), IsNotNull(t_time_sk)] +ReadSchema: struct + +(38) ColumnarToRow [codegen id : 8] +Input [2]: [t_time_sk#10, t_hour#11] + +(39) Filter [codegen id : 8] +Input [2]: [t_time_sk#10, t_hour#11] +Condition : (((isnotnull(t_hour#11) AND (t_hour#11 >= 19)) AND (t_hour#11 <= 20)) AND isnotnull(t_time_sk#10)) + +(40) Project [codegen id : 8] +Output [1]: [t_time_sk#10] +Input [2]: [t_time_sk#10, t_hour#11] + +(41) BroadcastExchange +Input [1]: [t_time_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] + +(42) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ws_sold_time_sk#1] +Right keys [1]: [t_time_sk#10] +Join condition: None + +(43) Project [codegen id : 9] +Output: [] +Input [2]: [ws_sold_time_sk#1, t_time_sk#10] + +(44) HashAggregate [codegen id : 9] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#19] +Results [1]: [count#20] + +(45) Exchange +Input [1]: [count#20] +Arguments: SinglePartition, true, [id=#21] + +(46) HashAggregate [codegen id : 10] +Input [1]: [count#20] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#22] +Results [1]: [count(1)#22 AS pmc#23] + +(47) BroadcastExchange +Input [1]: [pmc#23] +Arguments: IdentityBroadcastMode, [id=#24] + +(48) BroadcastNestedLoopJoin +Join condition: None + +(49) Project [codegen id : 11] +Output [1]: [CheckOverflow((promote_precision(cast(amc#17 as decimal(15,4))) / promote_precision(cast(pmc#23 as decimal(15,4)))), DecimalType(35,20), true) AS am_pm_ratio#25] +Input [2]: [amc#17, pmc#23] + +(50) TakeOrderedAndProject +Input [1]: [am_pm_ratio#25] +Arguments: 100, [am_pm_ratio#25 ASC NULLS FIRST], [am_pm_ratio#25] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90.sf100/simplified.txt new file mode 100644 index 0000000000000..564b98cb6e6b1 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90.sf100/simplified.txt @@ -0,0 +1,74 @@ +TakeOrderedAndProject [am_pm_ratio] + WholeStageCodegen (11) + Project [amc,pmc] + InputAdapter + BroadcastNestedLoopJoin + WholeStageCodegen (5) + HashAggregate [count] [amc,count,count(1)] + InputAdapter + Exchange #1 + WholeStageCodegen (4) + HashAggregate [count,count] + Project + BroadcastHashJoin [t_time_sk,ws_sold_time_sk] + Project [ws_sold_time_sk] + BroadcastHashJoin [hd_demo_sk,ws_ship_hdemo_sk] + Project [ws_ship_hdemo_sk,ws_sold_time_sk] + BroadcastHashJoin [wp_web_page_sk,ws_web_page_sk] + Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [wp_web_page_sk] + Filter [wp_char_count,wp_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_page [wp_char_count,wp_web_page_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [hd_demo_sk] + Filter [hd_demo_sk,hd_dep_count] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [t_time_sk] + Filter [t_hour,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet default.time_dim [t_hour,t_time_sk] + BroadcastExchange #5 + WholeStageCodegen (10) + HashAggregate [count] [count,count(1),pmc] + InputAdapter + Exchange #6 + WholeStageCodegen (9) + HashAggregate [count,count] + Project + BroadcastHashJoin [t_time_sk,ws_sold_time_sk] + Project [ws_sold_time_sk] + BroadcastHashJoin [hd_demo_sk,ws_ship_hdemo_sk] + Project [ws_ship_hdemo_sk,ws_sold_time_sk] + BroadcastHashJoin [wp_web_page_sk,ws_web_page_sk] + Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] + InputAdapter + ReusedExchange [wp_web_page_sk] #2 + InputAdapter + ReusedExchange [hd_demo_sk] #3 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (8) + Project [t_time_sk] + Filter [t_hour,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet default.time_dim [t_hour,t_time_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/explain.txt new file mode 100644 index 0000000000000..332aab8796bd1 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/explain.txt @@ -0,0 +1,280 @@ +== Physical Plan == +TakeOrderedAndProject (50) ++- * Project (49) + +- BroadcastNestedLoopJoin Inner BuildRight (48) + :- * HashAggregate (27) + : +- Exchange (26) + : +- * HashAggregate (25) + : +- * Project (24) + : +- * BroadcastHashJoin Inner BuildRight (23) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.web_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.household_demographics (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.time_dim (11) + : +- BroadcastExchange (22) + : +- * Project (21) + : +- * Filter (20) + : +- * ColumnarToRow (19) + : +- Scan parquet default.web_page (18) + +- BroadcastExchange (47) + +- * HashAggregate (46) + +- Exchange (45) + +- * HashAggregate (44) + +- * Project (43) + +- * BroadcastHashJoin Inner BuildRight (42) + :- * Project (40) + : +- * BroadcastHashJoin Inner BuildRight (39) + : :- * Project (33) + : : +- * BroadcastHashJoin Inner BuildRight (32) + : : :- * Filter (30) + : : : +- * ColumnarToRow (29) + : : : +- Scan parquet default.web_sales (28) + : : +- ReusedExchange (31) + : +- BroadcastExchange (38) + : +- * Project (37) + : +- * Filter (36) + : +- * ColumnarToRow (35) + : +- Scan parquet default.time_dim (34) + +- ReusedExchange (41) + + +(1) Scan parquet default.web_sales +Output [3]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_ship_hdemo_sk), IsNotNull(ws_sold_time_sk), IsNotNull(ws_web_page_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [3]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3] + +(3) Filter [codegen id : 4] +Input [3]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3] +Condition : ((isnotnull(ws_ship_hdemo_sk#2) AND isnotnull(ws_sold_time_sk#1)) AND isnotnull(ws_web_page_sk#3)) + +(4) Scan parquet default.household_demographics +Output [2]: [hd_demo_sk#4, hd_dep_count#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/household_demographics] +PushedFilters: [IsNotNull(hd_dep_count), EqualTo(hd_dep_count,6), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [hd_demo_sk#4, hd_dep_count#5] + +(6) Filter [codegen id : 1] +Input [2]: [hd_demo_sk#4, hd_dep_count#5] +Condition : ((isnotnull(hd_dep_count#5) AND (hd_dep_count#5 = 6)) AND isnotnull(hd_demo_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [hd_demo_sk#4] +Input [2]: [hd_demo_sk#4, hd_dep_count#5] + +(8) BroadcastExchange +Input [1]: [hd_demo_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#6] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ws_ship_hdemo_sk#2] +Right keys [1]: [hd_demo_sk#4] +Join condition: None + +(10) Project [codegen id : 4] +Output [2]: [ws_sold_time_sk#1, ws_web_page_sk#3] +Input [4]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3, hd_demo_sk#4] + +(11) Scan parquet default.time_dim +Output [2]: [t_time_sk#7, t_hour#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/time_dim] +PushedFilters: [IsNotNull(t_hour), GreaterThanOrEqual(t_hour,8), LessThanOrEqual(t_hour,9), IsNotNull(t_time_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [t_time_sk#7, t_hour#8] + +(13) Filter [codegen id : 2] +Input [2]: [t_time_sk#7, t_hour#8] +Condition : (((isnotnull(t_hour#8) AND (t_hour#8 >= 8)) AND (t_hour#8 <= 9)) AND isnotnull(t_time_sk#7)) + +(14) Project [codegen id : 2] +Output [1]: [t_time_sk#7] +Input [2]: [t_time_sk#7, t_hour#8] + +(15) BroadcastExchange +Input [1]: [t_time_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] + +(16) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ws_sold_time_sk#1] +Right keys [1]: [t_time_sk#7] +Join condition: None + +(17) Project [codegen id : 4] +Output [1]: [ws_web_page_sk#3] +Input [3]: [ws_sold_time_sk#1, ws_web_page_sk#3, t_time_sk#7] + +(18) Scan parquet default.web_page +Output [2]: [wp_web_page_sk#10, wp_char_count#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_page] +PushedFilters: [IsNotNull(wp_char_count), GreaterThanOrEqual(wp_char_count,5000), LessThanOrEqual(wp_char_count,5200), IsNotNull(wp_web_page_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [2]: [wp_web_page_sk#10, wp_char_count#11] + +(20) Filter [codegen id : 3] +Input [2]: [wp_web_page_sk#10, wp_char_count#11] +Condition : (((isnotnull(wp_char_count#11) AND (wp_char_count#11 >= 5000)) AND (wp_char_count#11 <= 5200)) AND isnotnull(wp_web_page_sk#10)) + +(21) Project [codegen id : 3] +Output [1]: [wp_web_page_sk#10] +Input [2]: [wp_web_page_sk#10, wp_char_count#11] + +(22) BroadcastExchange +Input [1]: [wp_web_page_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(23) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ws_web_page_sk#3] +Right keys [1]: [wp_web_page_sk#10] +Join condition: None + +(24) Project [codegen id : 4] +Output: [] +Input [2]: [ws_web_page_sk#3, wp_web_page_sk#10] + +(25) HashAggregate [codegen id : 4] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#13] +Results [1]: [count#14] + +(26) Exchange +Input [1]: [count#14] +Arguments: SinglePartition, true, [id=#15] + +(27) HashAggregate [codegen id : 5] +Input [1]: [count#14] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#16] +Results [1]: [count(1)#16 AS amc#17] + +(28) Scan parquet default.web_sales +Output [3]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_ship_hdemo_sk), IsNotNull(ws_sold_time_sk), IsNotNull(ws_web_page_sk)] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 9] +Input [3]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3] + +(30) Filter [codegen id : 9] +Input [3]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3] +Condition : ((isnotnull(ws_ship_hdemo_sk#2) AND isnotnull(ws_sold_time_sk#1)) AND isnotnull(ws_web_page_sk#3)) + +(31) ReusedExchange [Reuses operator id: 8] +Output [1]: [hd_demo_sk#4] + +(32) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ws_ship_hdemo_sk#2] +Right keys [1]: [hd_demo_sk#4] +Join condition: None + +(33) Project [codegen id : 9] +Output [2]: [ws_sold_time_sk#1, ws_web_page_sk#3] +Input [4]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3, hd_demo_sk#4] + +(34) Scan parquet default.time_dim +Output [2]: [t_time_sk#7, t_hour#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/time_dim] +PushedFilters: [IsNotNull(t_hour), GreaterThanOrEqual(t_hour,19), LessThanOrEqual(t_hour,20), IsNotNull(t_time_sk)] +ReadSchema: struct + +(35) ColumnarToRow [codegen id : 7] +Input [2]: [t_time_sk#7, t_hour#8] + +(36) Filter [codegen id : 7] +Input [2]: [t_time_sk#7, t_hour#8] +Condition : (((isnotnull(t_hour#8) AND (t_hour#8 >= 19)) AND (t_hour#8 <= 20)) AND isnotnull(t_time_sk#7)) + +(37) Project [codegen id : 7] +Output [1]: [t_time_sk#7] +Input [2]: [t_time_sk#7, t_hour#8] + +(38) BroadcastExchange +Input [1]: [t_time_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] + +(39) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ws_sold_time_sk#1] +Right keys [1]: [t_time_sk#7] +Join condition: None + +(40) Project [codegen id : 9] +Output [1]: [ws_web_page_sk#3] +Input [3]: [ws_sold_time_sk#1, ws_web_page_sk#3, t_time_sk#7] + +(41) ReusedExchange [Reuses operator id: 22] +Output [1]: [wp_web_page_sk#10] + +(42) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ws_web_page_sk#3] +Right keys [1]: [wp_web_page_sk#10] +Join condition: None + +(43) Project [codegen id : 9] +Output: [] +Input [2]: [ws_web_page_sk#3, wp_web_page_sk#10] + +(44) HashAggregate [codegen id : 9] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#19] +Results [1]: [count#20] + +(45) Exchange +Input [1]: [count#20] +Arguments: SinglePartition, true, [id=#21] + +(46) HashAggregate [codegen id : 10] +Input [1]: [count#20] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#22] +Results [1]: [count(1)#22 AS pmc#23] + +(47) BroadcastExchange +Input [1]: [pmc#23] +Arguments: IdentityBroadcastMode, [id=#24] + +(48) BroadcastNestedLoopJoin +Join condition: None + +(49) Project [codegen id : 11] +Output [1]: [CheckOverflow((promote_precision(cast(amc#17 as decimal(15,4))) / promote_precision(cast(pmc#23 as decimal(15,4)))), DecimalType(35,20), true) AS am_pm_ratio#25] +Input [2]: [amc#17, pmc#23] + +(50) TakeOrderedAndProject +Input [1]: [am_pm_ratio#25] +Arguments: 100, [am_pm_ratio#25 ASC NULLS FIRST], [am_pm_ratio#25] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/simplified.txt new file mode 100644 index 0000000000000..5c0047a2f2ae0 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/simplified.txt @@ -0,0 +1,74 @@ +TakeOrderedAndProject [am_pm_ratio] + WholeStageCodegen (11) + Project [amc,pmc] + InputAdapter + BroadcastNestedLoopJoin + WholeStageCodegen (5) + HashAggregate [count] [amc,count,count(1)] + InputAdapter + Exchange #1 + WholeStageCodegen (4) + HashAggregate [count,count] + Project + BroadcastHashJoin [wp_web_page_sk,ws_web_page_sk] + Project [ws_web_page_sk] + BroadcastHashJoin [t_time_sk,ws_sold_time_sk] + Project [ws_sold_time_sk,ws_web_page_sk] + BroadcastHashJoin [hd_demo_sk,ws_ship_hdemo_sk] + Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [hd_demo_sk] + Filter [hd_demo_sk,hd_dep_count] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [t_time_sk] + Filter [t_hour,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet default.time_dim [t_hour,t_time_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [wp_web_page_sk] + Filter [wp_char_count,wp_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_page [wp_char_count,wp_web_page_sk] + BroadcastExchange #5 + WholeStageCodegen (10) + HashAggregate [count] [count,count(1),pmc] + InputAdapter + Exchange #6 + WholeStageCodegen (9) + HashAggregate [count,count] + Project + BroadcastHashJoin [wp_web_page_sk,ws_web_page_sk] + Project [ws_web_page_sk] + BroadcastHashJoin [t_time_sk,ws_sold_time_sk] + Project [ws_sold_time_sk,ws_web_page_sk] + BroadcastHashJoin [hd_demo_sk,ws_ship_hdemo_sk] + Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] + InputAdapter + ReusedExchange [hd_demo_sk] #2 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (7) + Project [t_time_sk] + Filter [t_hour,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet default.time_dim [t_hour,t_time_sk] + InputAdapter + ReusedExchange [wp_web_page_sk] #4 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.sf100/explain.txt new file mode 100644 index 0000000000000..fc53c0218645f --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.sf100/explain.txt @@ -0,0 +1,264 @@ +== Physical Plan == +* Sort (47) ++- Exchange (46) + +- * HashAggregate (45) + +- Exchange (44) + +- * HashAggregate (43) + +- * Project (42) + +- * BroadcastHashJoin Inner BuildRight (41) + :- * Project (36) + : +- * BroadcastHashJoin Inner BuildRight (35) + : :- * Project (23) + : : +- * BroadcastHashJoin Inner BuildRight (22) + : : :- * Project (16) + : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : :- * Project (9) + : : : : +- * BroadcastHashJoin Inner BuildLeft (8) + : : : : :- BroadcastExchange (4) + : : : : : +- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.customer_demographics (1) + : : : : +- * Filter (7) + : : : : +- * ColumnarToRow (6) + : : : : +- Scan parquet default.customer (5) + : : : +- BroadcastExchange (14) + : : : +- * Project (13) + : : : +- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet default.household_demographics (10) + : : +- BroadcastExchange (21) + : : +- * Project (20) + : : +- * Filter (19) + : : +- * ColumnarToRow (18) + : : +- Scan parquet default.customer_address (17) + : +- BroadcastExchange (34) + : +- * Project (33) + : +- * BroadcastHashJoin Inner BuildLeft (32) + : :- BroadcastExchange (28) + : : +- * Project (27) + : : +- * Filter (26) + : : +- * ColumnarToRow (25) + : : +- Scan parquet default.date_dim (24) + : +- * Filter (31) + : +- * ColumnarToRow (30) + : +- Scan parquet default.catalog_returns (29) + +- BroadcastExchange (40) + +- * Filter (39) + +- * ColumnarToRow (38) + +- Scan parquet default.call_center (37) + + +(1) Scan parquet default.customer_demographics +Output [3]: [cd_demo_sk#1, cd_marital_status#2, cd_education_status#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer_demographics] +PushedFilters: [Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,Unknown)),And(EqualTo(cd_marital_status,W),EqualTo(cd_education_status,Advanced Degree))), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [3]: [cd_demo_sk#1, cd_marital_status#2, cd_education_status#3] + +(3) Filter [codegen id : 1] +Input [3]: [cd_demo_sk#1, cd_marital_status#2, cd_education_status#3] +Condition : ((((cd_marital_status#2 = M) AND (cd_education_status#3 = Unknown)) OR ((cd_marital_status#2 = W) AND (cd_education_status#3 = Advanced Degree))) AND isnotnull(cd_demo_sk#1)) + +(4) BroadcastExchange +Input [3]: [cd_demo_sk#1, cd_marital_status#2, cd_education_status#3] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#4] + +(5) Scan parquet default.customer +Output [4]: [c_customer_sk#5, c_current_cdemo_sk#6, c_current_hdemo_sk#7, c_current_addr_sk#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_hdemo_sk)] +ReadSchema: struct + +(6) ColumnarToRow +Input [4]: [c_customer_sk#5, c_current_cdemo_sk#6, c_current_hdemo_sk#7, c_current_addr_sk#8] + +(7) Filter +Input [4]: [c_customer_sk#5, c_current_cdemo_sk#6, c_current_hdemo_sk#7, c_current_addr_sk#8] +Condition : (((isnotnull(c_customer_sk#5) AND isnotnull(c_current_addr_sk#8)) AND isnotnull(c_current_cdemo_sk#6)) AND isnotnull(c_current_hdemo_sk#7)) + +(8) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [cd_demo_sk#1] +Right keys [1]: [c_current_cdemo_sk#6] +Join condition: None + +(9) Project [codegen id : 7] +Output [5]: [cd_marital_status#2, cd_education_status#3, c_customer_sk#5, c_current_hdemo_sk#7, c_current_addr_sk#8] +Input [7]: [cd_demo_sk#1, cd_marital_status#2, cd_education_status#3, c_customer_sk#5, c_current_cdemo_sk#6, c_current_hdemo_sk#7, c_current_addr_sk#8] + +(10) Scan parquet default.household_demographics +Output [2]: [hd_demo_sk#9, hd_buy_potential#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/household_demographics] +PushedFilters: [IsNotNull(hd_buy_potential), StringStartsWith(hd_buy_potential,Unknown), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [hd_demo_sk#9, hd_buy_potential#10] + +(12) Filter [codegen id : 2] +Input [2]: [hd_demo_sk#9, hd_buy_potential#10] +Condition : ((isnotnull(hd_buy_potential#10) AND StartsWith(hd_buy_potential#10, Unknown)) AND isnotnull(hd_demo_sk#9)) + +(13) Project [codegen id : 2] +Output [1]: [hd_demo_sk#9] +Input [2]: [hd_demo_sk#9, hd_buy_potential#10] + +(14) BroadcastExchange +Input [1]: [hd_demo_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] + +(15) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [c_current_hdemo_sk#7] +Right keys [1]: [hd_demo_sk#9] +Join condition: None + +(16) Project [codegen id : 7] +Output [4]: [cd_marital_status#2, cd_education_status#3, c_customer_sk#5, c_current_addr_sk#8] +Input [6]: [cd_marital_status#2, cd_education_status#3, c_customer_sk#5, c_current_hdemo_sk#7, c_current_addr_sk#8, hd_demo_sk#9] + +(17) Scan parquet default.customer_address +Output [2]: [ca_address_sk#12, ca_gmt_offset#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer_address] +PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-7.00), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [2]: [ca_address_sk#12, ca_gmt_offset#13] + +(19) Filter [codegen id : 3] +Input [2]: [ca_address_sk#12, ca_gmt_offset#13] +Condition : ((isnotnull(ca_gmt_offset#13) AND (ca_gmt_offset#13 = -7.00)) AND isnotnull(ca_address_sk#12)) + +(20) Project [codegen id : 3] +Output [1]: [ca_address_sk#12] +Input [2]: [ca_address_sk#12, ca_gmt_offset#13] + +(21) BroadcastExchange +Input [1]: [ca_address_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] + +(22) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [c_current_addr_sk#8] +Right keys [1]: [ca_address_sk#12] +Join condition: None + +(23) Project [codegen id : 7] +Output [3]: [cd_marital_status#2, cd_education_status#3, c_customer_sk#5] +Input [5]: [cd_marital_status#2, cd_education_status#3, c_customer_sk#5, c_current_addr_sk#8, ca_address_sk#12] + +(24) Scan parquet default.date_dim +Output [3]: [d_date_sk#15, d_year#16, d_moy#17] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,11), IsNotNull(d_date_sk)] +ReadSchema: struct + +(25) ColumnarToRow [codegen id : 4] +Input [3]: [d_date_sk#15, d_year#16, d_moy#17] + +(26) Filter [codegen id : 4] +Input [3]: [d_date_sk#15, d_year#16, d_moy#17] +Condition : ((((isnotnull(d_year#16) AND isnotnull(d_moy#17)) AND (d_year#16 = 1998)) AND (d_moy#17 = 11)) AND isnotnull(d_date_sk#15)) + +(27) Project [codegen id : 4] +Output [1]: [d_date_sk#15] +Input [3]: [d_date_sk#15, d_year#16, d_moy#17] + +(28) BroadcastExchange +Input [1]: [d_date_sk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] + +(29) Scan parquet default.catalog_returns +Output [4]: [cr_returned_date_sk#19, cr_returning_customer_sk#20, cr_call_center_sk#21, cr_net_loss#22] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_returns] +PushedFilters: [IsNotNull(cr_call_center_sk), IsNotNull(cr_returned_date_sk), IsNotNull(cr_returning_customer_sk)] +ReadSchema: struct + +(30) ColumnarToRow +Input [4]: [cr_returned_date_sk#19, cr_returning_customer_sk#20, cr_call_center_sk#21, cr_net_loss#22] + +(31) Filter +Input [4]: [cr_returned_date_sk#19, cr_returning_customer_sk#20, cr_call_center_sk#21, cr_net_loss#22] +Condition : ((isnotnull(cr_call_center_sk#21) AND isnotnull(cr_returned_date_sk#19)) AND isnotnull(cr_returning_customer_sk#20)) + +(32) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [d_date_sk#15] +Right keys [1]: [cr_returned_date_sk#19] +Join condition: None + +(33) Project [codegen id : 5] +Output [3]: [cr_returning_customer_sk#20, cr_call_center_sk#21, cr_net_loss#22] +Input [5]: [d_date_sk#15, cr_returned_date_sk#19, cr_returning_customer_sk#20, cr_call_center_sk#21, cr_net_loss#22] + +(34) BroadcastExchange +Input [3]: [cr_returning_customer_sk#20, cr_call_center_sk#21, cr_net_loss#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#23] + +(35) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [c_customer_sk#5] +Right keys [1]: [cr_returning_customer_sk#20] +Join condition: None + +(36) Project [codegen id : 7] +Output [4]: [cd_marital_status#2, cd_education_status#3, cr_call_center_sk#21, cr_net_loss#22] +Input [6]: [cd_marital_status#2, cd_education_status#3, c_customer_sk#5, cr_returning_customer_sk#20, cr_call_center_sk#21, cr_net_loss#22] + +(37) Scan parquet default.call_center +Output [4]: [cc_call_center_sk#24, cc_call_center_id#25, cc_name#26, cc_manager#27] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/call_center] +PushedFilters: [IsNotNull(cc_call_center_sk)] +ReadSchema: struct + +(38) ColumnarToRow [codegen id : 6] +Input [4]: [cc_call_center_sk#24, cc_call_center_id#25, cc_name#26, cc_manager#27] + +(39) Filter [codegen id : 6] +Input [4]: [cc_call_center_sk#24, cc_call_center_id#25, cc_name#26, cc_manager#27] +Condition : isnotnull(cc_call_center_sk#24) + +(40) BroadcastExchange +Input [4]: [cc_call_center_sk#24, cc_call_center_id#25, cc_name#26, cc_manager#27] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#28] + +(41) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [cr_call_center_sk#21] +Right keys [1]: [cc_call_center_sk#24] +Join condition: None + +(42) Project [codegen id : 7] +Output [6]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cr_net_loss#22, cd_marital_status#2, cd_education_status#3] +Input [8]: [cd_marital_status#2, cd_education_status#3, cr_call_center_sk#21, cr_net_loss#22, cc_call_center_sk#24, cc_call_center_id#25, cc_name#26, cc_manager#27] + +(43) HashAggregate [codegen id : 7] +Input [6]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cr_net_loss#22, cd_marital_status#2, cd_education_status#3] +Keys [5]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cd_marital_status#2, cd_education_status#3] +Functions [1]: [partial_sum(UnscaledValue(cr_net_loss#22))] +Aggregate Attributes [1]: [sum#29] +Results [6]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cd_marital_status#2, cd_education_status#3, sum#30] + +(44) Exchange +Input [6]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cd_marital_status#2, cd_education_status#3, sum#30] +Arguments: hashpartitioning(cc_call_center_id#25, cc_name#26, cc_manager#27, cd_marital_status#2, cd_education_status#3, 5), true, [id=#31] + +(45) HashAggregate [codegen id : 8] +Input [6]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cd_marital_status#2, cd_education_status#3, sum#30] +Keys [5]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cd_marital_status#2, cd_education_status#3] +Functions [1]: [sum(UnscaledValue(cr_net_loss#22))] +Aggregate Attributes [1]: [sum(UnscaledValue(cr_net_loss#22))#32] +Results [4]: [cc_call_center_id#25 AS Call_Center#33, cc_name#26 AS Call_Center_Name#34, cc_manager#27 AS Manager#35, MakeDecimal(sum(UnscaledValue(cr_net_loss#22))#32,17,2) AS Returns_Loss#36] + +(46) Exchange +Input [4]: [Call_Center#33, Call_Center_Name#34, Manager#35, Returns_Loss#36] +Arguments: rangepartitioning(Returns_Loss#36 DESC NULLS LAST, 5), true, [id=#37] + +(47) Sort [codegen id : 9] +Input [4]: [Call_Center#33, Call_Center_Name#34, Manager#35, Returns_Loss#36] +Arguments: [Returns_Loss#36 DESC NULLS LAST], true, 0 + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.sf100/simplified.txt new file mode 100644 index 0000000000000..7ab928267ae7b --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.sf100/simplified.txt @@ -0,0 +1,71 @@ +WholeStageCodegen (9) + Sort [Returns_Loss] + InputAdapter + Exchange [Returns_Loss] #1 + WholeStageCodegen (8) + HashAggregate [cc_call_center_id,cc_manager,cc_name,cd_education_status,cd_marital_status,sum] [Call_Center,Call_Center_Name,Manager,Returns_Loss,sum,sum(UnscaledValue(cr_net_loss))] + InputAdapter + Exchange [cc_call_center_id,cc_manager,cc_name,cd_education_status,cd_marital_status] #2 + WholeStageCodegen (7) + HashAggregate [cc_call_center_id,cc_manager,cc_name,cd_education_status,cd_marital_status,cr_net_loss] [sum,sum] + Project [cc_call_center_id,cc_manager,cc_name,cd_education_status,cd_marital_status,cr_net_loss] + BroadcastHashJoin [cc_call_center_sk,cr_call_center_sk] + Project [cd_education_status,cd_marital_status,cr_call_center_sk,cr_net_loss] + BroadcastHashJoin [c_customer_sk,cr_returning_customer_sk] + Project [c_customer_sk,cd_education_status,cd_marital_status] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_addr_sk,c_customer_sk,cd_education_status,cd_marital_status] + BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] + Project [c_current_addr_sk,c_current_hdemo_sk,c_customer_sk,cd_education_status,cd_marital_status] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [cd_demo_sk,cd_education_status,cd_marital_status] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_education_status,cd_marital_status] + Filter [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [hd_demo_sk] + Filter [hd_buy_potential,hd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_buy_potential,hd_demo_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Project [ca_address_sk] + Filter [ca_address_sk,ca_gmt_offset] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Project [cr_call_center_sk,cr_net_loss,cr_returning_customer_sk] + BroadcastHashJoin [cr_returned_date_sk,d_date_sk] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (4) + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + Filter [cr_call_center_sk,cr_returned_date_sk,cr_returning_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_call_center_sk,cr_net_loss,cr_returned_date_sk,cr_returning_customer_sk] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (6) + Filter [cc_call_center_sk] + ColumnarToRow + InputAdapter + Scan parquet default.call_center [cc_call_center_id,cc_call_center_sk,cc_manager,cc_name] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91/explain.txt new file mode 100644 index 0000000000000..003c7bd5a9835 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91/explain.txt @@ -0,0 +1,264 @@ +== Physical Plan == +* Sort (47) ++- Exchange (46) + +- * HashAggregate (45) + +- Exchange (44) + +- * HashAggregate (43) + +- * Project (42) + +- * BroadcastHashJoin Inner BuildRight (41) + :- * Project (35) + : +- * BroadcastHashJoin Inner BuildRight (34) + : :- * Project (29) + : : +- * BroadcastHashJoin Inner BuildRight (28) + : : :- * Project (22) + : : : +- * BroadcastHashJoin Inner BuildRight (21) + : : : :- * Project (16) + : : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : : :- * Project (9) + : : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : : :- * Filter (3) + : : : : : : +- * ColumnarToRow (2) + : : : : : : +- Scan parquet default.call_center (1) + : : : : : +- BroadcastExchange (7) + : : : : : +- * Filter (6) + : : : : : +- * ColumnarToRow (5) + : : : : : +- Scan parquet default.catalog_returns (4) + : : : : +- BroadcastExchange (14) + : : : : +- * Project (13) + : : : : +- * Filter (12) + : : : : +- * ColumnarToRow (11) + : : : : +- Scan parquet default.date_dim (10) + : : : +- BroadcastExchange (20) + : : : +- * Filter (19) + : : : +- * ColumnarToRow (18) + : : : +- Scan parquet default.customer (17) + : : +- BroadcastExchange (27) + : : +- * Project (26) + : : +- * Filter (25) + : : +- * ColumnarToRow (24) + : : +- Scan parquet default.customer_address (23) + : +- BroadcastExchange (33) + : +- * Filter (32) + : +- * ColumnarToRow (31) + : +- Scan parquet default.customer_demographics (30) + +- BroadcastExchange (40) + +- * Project (39) + +- * Filter (38) + +- * ColumnarToRow (37) + +- Scan parquet default.household_demographics (36) + + +(1) Scan parquet default.call_center +Output [4]: [cc_call_center_sk#1, cc_call_center_id#2, cc_name#3, cc_manager#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/call_center] +PushedFilters: [IsNotNull(cc_call_center_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 7] +Input [4]: [cc_call_center_sk#1, cc_call_center_id#2, cc_name#3, cc_manager#4] + +(3) Filter [codegen id : 7] +Input [4]: [cc_call_center_sk#1, cc_call_center_id#2, cc_name#3, cc_manager#4] +Condition : isnotnull(cc_call_center_sk#1) + +(4) Scan parquet default.catalog_returns +Output [4]: [cr_returned_date_sk#5, cr_returning_customer_sk#6, cr_call_center_sk#7, cr_net_loss#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_returns] +PushedFilters: [IsNotNull(cr_call_center_sk), IsNotNull(cr_returned_date_sk), IsNotNull(cr_returning_customer_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [cr_returned_date_sk#5, cr_returning_customer_sk#6, cr_call_center_sk#7, cr_net_loss#8] + +(6) Filter [codegen id : 1] +Input [4]: [cr_returned_date_sk#5, cr_returning_customer_sk#6, cr_call_center_sk#7, cr_net_loss#8] +Condition : ((isnotnull(cr_call_center_sk#7) AND isnotnull(cr_returned_date_sk#5)) AND isnotnull(cr_returning_customer_sk#6)) + +(7) BroadcastExchange +Input [4]: [cr_returned_date_sk#5, cr_returning_customer_sk#6, cr_call_center_sk#7, cr_net_loss#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, false] as bigint)),false), [id=#9] + +(8) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [cc_call_center_sk#1] +Right keys [1]: [cr_call_center_sk#7] +Join condition: None + +(9) Project [codegen id : 7] +Output [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returned_date_sk#5, cr_returning_customer_sk#6, cr_net_loss#8] +Input [8]: [cc_call_center_sk#1, cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returned_date_sk#5, cr_returning_customer_sk#6, cr_call_center_sk#7, cr_net_loss#8] + +(10) Scan parquet default.date_dim +Output [3]: [d_date_sk#10, d_year#11, d_moy#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,11), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#10, d_year#11, d_moy#12] + +(12) Filter [codegen id : 2] +Input [3]: [d_date_sk#10, d_year#11, d_moy#12] +Condition : ((((isnotnull(d_year#11) AND isnotnull(d_moy#12)) AND (d_year#11 = 1998)) AND (d_moy#12 = 11)) AND isnotnull(d_date_sk#10)) + +(13) Project [codegen id : 2] +Output [1]: [d_date_sk#10] +Input [3]: [d_date_sk#10, d_year#11, d_moy#12] + +(14) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] + +(15) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [cr_returned_date_sk#5] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(16) Project [codegen id : 7] +Output [5]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#6, cr_net_loss#8] +Input [7]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returned_date_sk#5, cr_returning_customer_sk#6, cr_net_loss#8, d_date_sk#10] + +(17) Scan parquet default.customer +Output [4]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_hdemo_sk#16, c_current_addr_sk#17] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_hdemo_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [4]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_hdemo_sk#16, c_current_addr_sk#17] + +(19) Filter [codegen id : 3] +Input [4]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_hdemo_sk#16, c_current_addr_sk#17] +Condition : (((isnotnull(c_customer_sk#14) AND isnotnull(c_current_addr_sk#17)) AND isnotnull(c_current_cdemo_sk#15)) AND isnotnull(c_current_hdemo_sk#16)) + +(20) BroadcastExchange +Input [4]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_hdemo_sk#16, c_current_addr_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#18] + +(21) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [cr_returning_customer_sk#6] +Right keys [1]: [c_customer_sk#14] +Join condition: None + +(22) Project [codegen id : 7] +Output [7]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#8, c_current_cdemo_sk#15, c_current_hdemo_sk#16, c_current_addr_sk#17] +Input [9]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#6, cr_net_loss#8, c_customer_sk#14, c_current_cdemo_sk#15, c_current_hdemo_sk#16, c_current_addr_sk#17] + +(23) Scan parquet default.customer_address +Output [2]: [ca_address_sk#19, ca_gmt_offset#20] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer_address] +PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-7.00), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 4] +Input [2]: [ca_address_sk#19, ca_gmt_offset#20] + +(25) Filter [codegen id : 4] +Input [2]: [ca_address_sk#19, ca_gmt_offset#20] +Condition : ((isnotnull(ca_gmt_offset#20) AND (ca_gmt_offset#20 = -7.00)) AND isnotnull(ca_address_sk#19)) + +(26) Project [codegen id : 4] +Output [1]: [ca_address_sk#19] +Input [2]: [ca_address_sk#19, ca_gmt_offset#20] + +(27) BroadcastExchange +Input [1]: [ca_address_sk#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#21] + +(28) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [c_current_addr_sk#17] +Right keys [1]: [ca_address_sk#19] +Join condition: None + +(29) Project [codegen id : 7] +Output [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#8, c_current_cdemo_sk#15, c_current_hdemo_sk#16] +Input [8]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#8, c_current_cdemo_sk#15, c_current_hdemo_sk#16, c_current_addr_sk#17, ca_address_sk#19] + +(30) Scan parquet default.customer_demographics +Output [3]: [cd_demo_sk#22, cd_marital_status#23, cd_education_status#24] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer_demographics] +PushedFilters: [Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,Unknown)),And(EqualTo(cd_marital_status,W),EqualTo(cd_education_status,Advanced Degree))), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 5] +Input [3]: [cd_demo_sk#22, cd_marital_status#23, cd_education_status#24] + +(32) Filter [codegen id : 5] +Input [3]: [cd_demo_sk#22, cd_marital_status#23, cd_education_status#24] +Condition : ((((cd_marital_status#23 = M) AND (cd_education_status#24 = Unknown)) OR ((cd_marital_status#23 = W) AND (cd_education_status#24 = Advanced Degree))) AND isnotnull(cd_demo_sk#22)) + +(33) BroadcastExchange +Input [3]: [cd_demo_sk#22, cd_marital_status#23, cd_education_status#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#25] + +(34) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [c_current_cdemo_sk#15] +Right keys [1]: [cd_demo_sk#22] +Join condition: None + +(35) Project [codegen id : 7] +Output [7]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#8, c_current_hdemo_sk#16, cd_marital_status#23, cd_education_status#24] +Input [9]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#8, c_current_cdemo_sk#15, c_current_hdemo_sk#16, cd_demo_sk#22, cd_marital_status#23, cd_education_status#24] + +(36) Scan parquet default.household_demographics +Output [2]: [hd_demo_sk#26, hd_buy_potential#27] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/household_demographics] +PushedFilters: [IsNotNull(hd_buy_potential), StringStartsWith(hd_buy_potential,Unknown), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(37) ColumnarToRow [codegen id : 6] +Input [2]: [hd_demo_sk#26, hd_buy_potential#27] + +(38) Filter [codegen id : 6] +Input [2]: [hd_demo_sk#26, hd_buy_potential#27] +Condition : ((isnotnull(hd_buy_potential#27) AND StartsWith(hd_buy_potential#27, Unknown)) AND isnotnull(hd_demo_sk#26)) + +(39) Project [codegen id : 6] +Output [1]: [hd_demo_sk#26] +Input [2]: [hd_demo_sk#26, hd_buy_potential#27] + +(40) BroadcastExchange +Input [1]: [hd_demo_sk#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#28] + +(41) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [c_current_hdemo_sk#16] +Right keys [1]: [hd_demo_sk#26] +Join condition: None + +(42) Project [codegen id : 7] +Output [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#8, cd_marital_status#23, cd_education_status#24] +Input [8]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#8, c_current_hdemo_sk#16, cd_marital_status#23, cd_education_status#24, hd_demo_sk#26] + +(43) HashAggregate [codegen id : 7] +Input [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#8, cd_marital_status#23, cd_education_status#24] +Keys [5]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#23, cd_education_status#24] +Functions [1]: [partial_sum(UnscaledValue(cr_net_loss#8))] +Aggregate Attributes [1]: [sum#29] +Results [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#23, cd_education_status#24, sum#30] + +(44) Exchange +Input [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#23, cd_education_status#24, sum#30] +Arguments: hashpartitioning(cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#23, cd_education_status#24, 5), true, [id=#31] + +(45) HashAggregate [codegen id : 8] +Input [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#23, cd_education_status#24, sum#30] +Keys [5]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#23, cd_education_status#24] +Functions [1]: [sum(UnscaledValue(cr_net_loss#8))] +Aggregate Attributes [1]: [sum(UnscaledValue(cr_net_loss#8))#32] +Results [4]: [cc_call_center_id#2 AS Call_Center#33, cc_name#3 AS Call_Center_Name#34, cc_manager#4 AS Manager#35, MakeDecimal(sum(UnscaledValue(cr_net_loss#8))#32,17,2) AS Returns_Loss#36] + +(46) Exchange +Input [4]: [Call_Center#33, Call_Center_Name#34, Manager#35, Returns_Loss#36] +Arguments: rangepartitioning(Returns_Loss#36 DESC NULLS LAST, 5), true, [id=#37] + +(47) Sort [codegen id : 9] +Input [4]: [Call_Center#33, Call_Center_Name#34, Manager#35, Returns_Loss#36] +Arguments: [Returns_Loss#36 DESC NULLS LAST], true, 0 + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91/simplified.txt new file mode 100644 index 0000000000000..2d55e53e02ccc --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91/simplified.txt @@ -0,0 +1,71 @@ +WholeStageCodegen (9) + Sort [Returns_Loss] + InputAdapter + Exchange [Returns_Loss] #1 + WholeStageCodegen (8) + HashAggregate [cc_call_center_id,cc_manager,cc_name,cd_education_status,cd_marital_status,sum] [Call_Center,Call_Center_Name,Manager,Returns_Loss,sum,sum(UnscaledValue(cr_net_loss))] + InputAdapter + Exchange [cc_call_center_id,cc_manager,cc_name,cd_education_status,cd_marital_status] #2 + WholeStageCodegen (7) + HashAggregate [cc_call_center_id,cc_manager,cc_name,cd_education_status,cd_marital_status,cr_net_loss] [sum,sum] + Project [cc_call_center_id,cc_manager,cc_name,cd_education_status,cd_marital_status,cr_net_loss] + BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] + Project [c_current_hdemo_sk,cc_call_center_id,cc_manager,cc_name,cd_education_status,cd_marital_status,cr_net_loss] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [c_current_cdemo_sk,c_current_hdemo_sk,cc_call_center_id,cc_manager,cc_name,cr_net_loss] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,cc_call_center_id,cc_manager,cc_name,cr_net_loss] + BroadcastHashJoin [c_customer_sk,cr_returning_customer_sk] + Project [cc_call_center_id,cc_manager,cc_name,cr_net_loss,cr_returning_customer_sk] + BroadcastHashJoin [cr_returned_date_sk,d_date_sk] + Project [cc_call_center_id,cc_manager,cc_name,cr_net_loss,cr_returned_date_sk,cr_returning_customer_sk] + BroadcastHashJoin [cc_call_center_sk,cr_call_center_sk] + Filter [cc_call_center_sk] + ColumnarToRow + InputAdapter + Scan parquet default.call_center [cc_call_center_id,cc_call_center_sk,cc_manager,cc_name] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [cr_call_center_sk,cr_returned_date_sk,cr_returning_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_call_center_sk,cr_net_loss,cr_returned_date_sk,cr_returning_customer_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Filter [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (4) + Project [ca_address_sk] + Filter [ca_address_sk,ca_gmt_offset] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (5) + Filter [cd_demo_sk,cd_education_status,cd_marital_status] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_education_status,cd_marital_status] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (6) + Project [hd_demo_sk] + Filter [hd_buy_potential,hd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_buy_potential,hd_demo_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/explain.txt new file mode 100644 index 0000000000000..3a8f6f316837b --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/explain.txt @@ -0,0 +1,196 @@ +== Physical Plan == +TakeOrderedAndProject (34) ++- * HashAggregate (33) + +- Exchange (32) + +- * HashAggregate (31) + +- * Project (30) + +- * BroadcastHashJoin Inner BuildRight (29) + :- * Project (27) + : +- * BroadcastHashJoin Inner BuildLeft (26) + : :- BroadcastExchange (22) + : : +- * Project (21) + : : +- * BroadcastHashJoin Inner BuildLeft (20) + : : :- BroadcastExchange (5) + : : : +- * Project (4) + : : : +- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.item (1) + : : +- * Filter (19) + : : +- * HashAggregate (18) + : : +- Exchange (17) + : : +- * HashAggregate (16) + : : +- * Project (15) + : : +- * BroadcastHashJoin Inner BuildRight (14) + : : :- * Filter (8) + : : : +- * ColumnarToRow (7) + : : : +- Scan parquet default.web_sales (6) + : : +- BroadcastExchange (13) + : : +- * Project (12) + : : +- * Filter (11) + : : +- * ColumnarToRow (10) + : : +- Scan parquet default.date_dim (9) + : +- * Filter (25) + : +- * ColumnarToRow (24) + : +- Scan parquet default.web_sales (23) + +- ReusedExchange (28) + + +(1) Scan parquet default.item +Output [2]: [i_item_sk#1, i_manufact_id#2] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_manufact_id), EqualTo(i_manufact_id,350), IsNotNull(i_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [2]: [i_item_sk#1, i_manufact_id#2] + +(3) Filter [codegen id : 1] +Input [2]: [i_item_sk#1, i_manufact_id#2] +Condition : ((isnotnull(i_manufact_id#2) AND (i_manufact_id#2 = 350)) AND isnotnull(i_item_sk#1)) + +(4) Project [codegen id : 1] +Output [1]: [i_item_sk#1] +Input [2]: [i_item_sk#1, i_manufact_id#2] + +(5) BroadcastExchange +Input [1]: [i_item_sk#1] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#3] + +(6) Scan parquet default.web_sales +Output [3]: [ws_sold_date_sk#4, ws_item_sk#5, ws_ext_discount_amt#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_item_sk)] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 3] +Input [3]: [ws_sold_date_sk#4, ws_item_sk#5, ws_ext_discount_amt#6] + +(8) Filter [codegen id : 3] +Input [3]: [ws_sold_date_sk#4, ws_item_sk#5, ws_ext_discount_amt#6] +Condition : (isnotnull(ws_sold_date_sk#4) AND isnotnull(ws_item_sk#5)) + +(9) Scan parquet default.date_dim +Output [2]: [d_date_sk#7, d_date#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-01-27), LessThanOrEqual(d_date,2000-04-26), IsNotNull(d_date_sk)] +ReadSchema: struct + +(10) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#7, d_date#8] + +(11) Filter [codegen id : 2] +Input [2]: [d_date_sk#7, d_date#8] +Condition : (((isnotnull(d_date#8) AND (d_date#8 >= 10983)) AND (d_date#8 <= 11073)) AND isnotnull(d_date_sk#7)) + +(12) Project [codegen id : 2] +Output [1]: [d_date_sk#7] +Input [2]: [d_date_sk#7, d_date#8] + +(13) BroadcastExchange +Input [1]: [d_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] + +(14) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ws_sold_date_sk#4] +Right keys [1]: [d_date_sk#7] +Join condition: None + +(15) Project [codegen id : 3] +Output [2]: [ws_item_sk#5, ws_ext_discount_amt#6] +Input [4]: [ws_sold_date_sk#4, ws_item_sk#5, ws_ext_discount_amt#6, d_date_sk#7] + +(16) HashAggregate [codegen id : 3] +Input [2]: [ws_item_sk#5, ws_ext_discount_amt#6] +Keys [1]: [ws_item_sk#5] +Functions [1]: [partial_avg(UnscaledValue(ws_ext_discount_amt#6))] +Aggregate Attributes [2]: [sum#10, count#11] +Results [3]: [ws_item_sk#5, sum#12, count#13] + +(17) Exchange +Input [3]: [ws_item_sk#5, sum#12, count#13] +Arguments: hashpartitioning(ws_item_sk#5, 5), true, [id=#14] + +(18) HashAggregate +Input [3]: [ws_item_sk#5, sum#12, count#13] +Keys [1]: [ws_item_sk#5] +Functions [1]: [avg(UnscaledValue(ws_ext_discount_amt#6))] +Aggregate Attributes [1]: [avg(UnscaledValue(ws_ext_discount_amt#6))#15] +Results [2]: [CheckOverflow((1.300000 * promote_precision(cast((avg(UnscaledValue(ws_ext_discount_amt#6))#15 / 100.0) as decimal(11,6)))), DecimalType(14,7), true) AS (CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6)))#16, ws_item_sk#5 AS ws_item_sk#5#17] + +(19) Filter +Input [2]: [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6)))#16, ws_item_sk#5#17] +Condition : isnotnull((CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6)))#16) + +(20) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [ws_item_sk#5#17] +Join condition: None + +(21) Project [codegen id : 4] +Output [2]: [i_item_sk#1, (CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6)))#16] +Input [3]: [i_item_sk#1, (CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6)))#16, ws_item_sk#5#17] + +(22) BroadcastExchange +Input [2]: [i_item_sk#1, (CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6)))#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] + +(23) Scan parquet default.web_sales +Output [3]: [ws_sold_date_sk#4, ws_item_sk#5, ws_ext_discount_amt#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_ext_discount_amt), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(24) ColumnarToRow +Input [3]: [ws_sold_date_sk#4, ws_item_sk#5, ws_ext_discount_amt#6] + +(25) Filter +Input [3]: [ws_sold_date_sk#4, ws_item_sk#5, ws_ext_discount_amt#6] +Condition : ((isnotnull(ws_item_sk#5) AND isnotnull(ws_ext_discount_amt#6)) AND isnotnull(ws_sold_date_sk#4)) + +(26) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [ws_item_sk#5] +Join condition: (cast(ws_ext_discount_amt#6 as decimal(14,7)) > (CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6)))#16) + +(27) Project [codegen id : 6] +Output [2]: [ws_sold_date_sk#4, ws_ext_discount_amt#6] +Input [5]: [i_item_sk#1, (CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6)))#16, ws_sold_date_sk#4, ws_item_sk#5, ws_ext_discount_amt#6] + +(28) ReusedExchange [Reuses operator id: 13] +Output [1]: [d_date_sk#7] + +(29) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_sold_date_sk#4] +Right keys [1]: [d_date_sk#7] +Join condition: None + +(30) Project [codegen id : 6] +Output [1]: [ws_ext_discount_amt#6] +Input [3]: [ws_sold_date_sk#4, ws_ext_discount_amt#6, d_date_sk#7] + +(31) HashAggregate [codegen id : 6] +Input [1]: [ws_ext_discount_amt#6] +Keys: [] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_discount_amt#6))] +Aggregate Attributes [1]: [sum#19] +Results [1]: [sum#20] + +(32) Exchange +Input [1]: [sum#20] +Arguments: SinglePartition, true, [id=#21] + +(33) HashAggregate [codegen id : 7] +Input [1]: [sum#20] +Keys: [] +Functions [1]: [sum(UnscaledValue(ws_ext_discount_amt#6))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_discount_amt#6))#22] +Results [1]: [MakeDecimal(sum(UnscaledValue(ws_ext_discount_amt#6))#22,17,2) AS Excess Discount Amount #23] + +(34) TakeOrderedAndProject +Input [1]: [Excess Discount Amount #23] +Arguments: 100, [Excess Discount Amount #23 ASC NULLS FIRST], [Excess Discount Amount #23] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/simplified.txt new file mode 100644 index 0000000000000..d1713689175aa --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/simplified.txt @@ -0,0 +1,50 @@ +TakeOrderedAndProject [Excess Discount Amount ] + WholeStageCodegen (7) + HashAggregate [sum] [Excess Discount Amount ,sum,sum(UnscaledValue(ws_ext_discount_amt))] + InputAdapter + Exchange #1 + WholeStageCodegen (6) + HashAggregate [ws_ext_discount_amt] [sum,sum] + Project [ws_ext_discount_amt] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ws_ext_discount_amt,ws_sold_date_sk] + BroadcastHashJoin [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6))),i_item_sk,ws_ext_discount_amt,ws_item_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (4) + Project [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6))),i_item_sk] + BroadcastHashJoin [i_item_sk,ws_item_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [i_item_sk] + Filter [i_item_sk,i_manufact_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_manufact_id] + Filter [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6)))] + HashAggregate [count,sum,ws_item_sk] [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6))),avg(UnscaledValue(ws_ext_discount_amt)),count,sum,ws_item_sk] + InputAdapter + Exchange [ws_item_sk] #4 + WholeStageCodegen (3) + HashAggregate [ws_ext_discount_amt,ws_item_sk] [count,count,sum,sum] + Project [ws_ext_discount_amt,ws_item_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_ext_discount_amt,ws_item_sk,ws_sold_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk] + Filter [ws_ext_discount_amt,ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_ext_discount_amt,ws_item_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #5 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/explain.txt new file mode 100644 index 0000000000000..bcda3d7ad72d1 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/explain.txt @@ -0,0 +1,196 @@ +== Physical Plan == +TakeOrderedAndProject (34) ++- * HashAggregate (33) + +- Exchange (32) + +- * HashAggregate (31) + +- * Project (30) + +- * BroadcastHashJoin Inner BuildRight (29) + :- * Project (27) + : +- * BroadcastHashJoin Inner BuildRight (26) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.web_sales (1) + : : +- BroadcastExchange (8) + : : +- * Project (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet default.item (4) + : +- BroadcastExchange (25) + : +- * Filter (24) + : +- * HashAggregate (23) + : +- Exchange (22) + : +- * HashAggregate (21) + : +- * Project (20) + : +- * BroadcastHashJoin Inner BuildRight (19) + : :- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.web_sales (11) + : +- BroadcastExchange (18) + : +- * Project (17) + : +- * Filter (16) + : +- * ColumnarToRow (15) + : +- Scan parquet default.date_dim (14) + +- ReusedExchange (28) + + +(1) Scan parquet default.web_sales +Output [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_ext_discount_amt#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_ext_discount_amt), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 6] +Input [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_ext_discount_amt#3] + +(3) Filter [codegen id : 6] +Input [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_ext_discount_amt#3] +Condition : ((isnotnull(ws_item_sk#2) AND isnotnull(ws_ext_discount_amt#3)) AND isnotnull(ws_sold_date_sk#1)) + +(4) Scan parquet default.item +Output [2]: [i_item_sk#4, i_manufact_id#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_manufact_id), EqualTo(i_manufact_id,350), IsNotNull(i_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [i_item_sk#4, i_manufact_id#5] + +(6) Filter [codegen id : 1] +Input [2]: [i_item_sk#4, i_manufact_id#5] +Condition : ((isnotnull(i_manufact_id#5) AND (i_manufact_id#5 = 350)) AND isnotnull(i_item_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [i_item_sk#4] +Input [2]: [i_item_sk#4, i_manufact_id#5] + +(8) BroadcastExchange +Input [1]: [i_item_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#6] + +(9) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_item_sk#2] +Right keys [1]: [i_item_sk#4] +Join condition: None + +(10) Project [codegen id : 6] +Output [3]: [ws_sold_date_sk#1, ws_ext_discount_amt#3, i_item_sk#4] +Input [4]: [ws_sold_date_sk#1, ws_item_sk#2, ws_ext_discount_amt#3, i_item_sk#4] + +(11) Scan parquet default.web_sales +Output [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_ext_discount_amt#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 3] +Input [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_ext_discount_amt#3] + +(13) Filter [codegen id : 3] +Input [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_ext_discount_amt#3] +Condition : (isnotnull(ws_sold_date_sk#1) AND isnotnull(ws_item_sk#2)) + +(14) Scan parquet default.date_dim +Output [2]: [d_date_sk#7, d_date#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-01-27), LessThanOrEqual(d_date,2000-04-26), IsNotNull(d_date_sk)] +ReadSchema: struct + +(15) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#7, d_date#8] + +(16) Filter [codegen id : 2] +Input [2]: [d_date_sk#7, d_date#8] +Condition : (((isnotnull(d_date#8) AND (d_date#8 >= 10983)) AND (d_date#8 <= 11073)) AND isnotnull(d_date_sk#7)) + +(17) Project [codegen id : 2] +Output [1]: [d_date_sk#7] +Input [2]: [d_date_sk#7, d_date#8] + +(18) BroadcastExchange +Input [1]: [d_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] + +(19) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ws_sold_date_sk#1] +Right keys [1]: [d_date_sk#7] +Join condition: None + +(20) Project [codegen id : 3] +Output [2]: [ws_item_sk#2, ws_ext_discount_amt#3] +Input [4]: [ws_sold_date_sk#1, ws_item_sk#2, ws_ext_discount_amt#3, d_date_sk#7] + +(21) HashAggregate [codegen id : 3] +Input [2]: [ws_item_sk#2, ws_ext_discount_amt#3] +Keys [1]: [ws_item_sk#2] +Functions [1]: [partial_avg(UnscaledValue(ws_ext_discount_amt#3))] +Aggregate Attributes [2]: [sum#10, count#11] +Results [3]: [ws_item_sk#2, sum#12, count#13] + +(22) Exchange +Input [3]: [ws_item_sk#2, sum#12, count#13] +Arguments: hashpartitioning(ws_item_sk#2, 5), true, [id=#14] + +(23) HashAggregate [codegen id : 4] +Input [3]: [ws_item_sk#2, sum#12, count#13] +Keys [1]: [ws_item_sk#2] +Functions [1]: [avg(UnscaledValue(ws_ext_discount_amt#3))] +Aggregate Attributes [1]: [avg(UnscaledValue(ws_ext_discount_amt#3))#15] +Results [2]: [CheckOverflow((1.300000 * promote_precision(cast((avg(UnscaledValue(ws_ext_discount_amt#3))#15 / 100.0) as decimal(11,6)))), DecimalType(14,7), true) AS (CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6)))#16, ws_item_sk#2 AS ws_item_sk#2#17] + +(24) Filter [codegen id : 4] +Input [2]: [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6)))#16, ws_item_sk#2#17] +Condition : isnotnull((CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6)))#16) + +(25) BroadcastExchange +Input [2]: [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6)))#16, ws_item_sk#2#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [id=#18] + +(26) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [i_item_sk#4] +Right keys [1]: [ws_item_sk#2#17] +Join condition: (cast(ws_ext_discount_amt#3 as decimal(14,7)) > (CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6)))#16) + +(27) Project [codegen id : 6] +Output [2]: [ws_sold_date_sk#1, ws_ext_discount_amt#3] +Input [5]: [ws_sold_date_sk#1, ws_ext_discount_amt#3, i_item_sk#4, (CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6)))#16, ws_item_sk#2#17] + +(28) ReusedExchange [Reuses operator id: 18] +Output [1]: [d_date_sk#7] + +(29) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_sold_date_sk#1] +Right keys [1]: [d_date_sk#7] +Join condition: None + +(30) Project [codegen id : 6] +Output [1]: [ws_ext_discount_amt#3] +Input [3]: [ws_sold_date_sk#1, ws_ext_discount_amt#3, d_date_sk#7] + +(31) HashAggregate [codegen id : 6] +Input [1]: [ws_ext_discount_amt#3] +Keys: [] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_discount_amt#3))] +Aggregate Attributes [1]: [sum#19] +Results [1]: [sum#20] + +(32) Exchange +Input [1]: [sum#20] +Arguments: SinglePartition, true, [id=#21] + +(33) HashAggregate [codegen id : 7] +Input [1]: [sum#20] +Keys: [] +Functions [1]: [sum(UnscaledValue(ws_ext_discount_amt#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_discount_amt#3))#22] +Results [1]: [MakeDecimal(sum(UnscaledValue(ws_ext_discount_amt#3))#22,17,2) AS Excess Discount Amount #23] + +(34) TakeOrderedAndProject +Input [1]: [Excess Discount Amount #23] +Arguments: 100, [Excess Discount Amount #23 ASC NULLS FIRST], [Excess Discount Amount #23] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/simplified.txt new file mode 100644 index 0000000000000..98daa0669d83a --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/simplified.txt @@ -0,0 +1,50 @@ +TakeOrderedAndProject [Excess Discount Amount ] + WholeStageCodegen (7) + HashAggregate [sum] [Excess Discount Amount ,sum,sum(UnscaledValue(ws_ext_discount_amt))] + InputAdapter + Exchange #1 + WholeStageCodegen (6) + HashAggregate [ws_ext_discount_amt] [sum,sum] + Project [ws_ext_discount_amt] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ws_ext_discount_amt,ws_sold_date_sk] + BroadcastHashJoin [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6))),i_item_sk,ws_ext_discount_amt,ws_item_sk] + Project [i_item_sk,ws_ext_discount_amt,ws_sold_date_sk] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Filter [ws_ext_discount_amt,ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_ext_discount_amt,ws_item_sk,ws_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [i_item_sk] + Filter [i_item_sk,i_manufact_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_manufact_id] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (4) + Filter [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6)))] + HashAggregate [count,sum,ws_item_sk] [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6))),avg(UnscaledValue(ws_ext_discount_amt)),count,sum,ws_item_sk] + InputAdapter + Exchange [ws_item_sk] #4 + WholeStageCodegen (3) + HashAggregate [ws_ext_discount_amt,ws_item_sk] [count,count,sum,sum] + Project [ws_ext_discount_amt,ws_item_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_ext_discount_amt,ws_item_sk,ws_sold_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #5 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93.sf100/explain.txt new file mode 100644 index 0000000000000..b71f2ab6ae3c1 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93.sf100/explain.txt @@ -0,0 +1,126 @@ +== Physical Plan == +TakeOrderedAndProject (22) ++- * HashAggregate (21) + +- Exchange (20) + +- * HashAggregate (19) + +- * Project (18) + +- * SortMergeJoin Inner (17) + :- * Sort (12) + : +- Exchange (11) + : +- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.store_returns (1) + : +- BroadcastExchange (8) + : +- * Project (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet default.reason (4) + +- * Sort (16) + +- Exchange (15) + +- * ColumnarToRow (14) + +- Scan parquet default.store_sales (13) + + +(1) Scan parquet default.store_returns +Output [4]: [sr_item_sk#1, sr_reason_sk#2, sr_ticket_number#3, sr_return_quantity#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_returns] +PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number), IsNotNull(sr_reason_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 2] +Input [4]: [sr_item_sk#1, sr_reason_sk#2, sr_ticket_number#3, sr_return_quantity#4] + +(3) Filter [codegen id : 2] +Input [4]: [sr_item_sk#1, sr_reason_sk#2, sr_ticket_number#3, sr_return_quantity#4] +Condition : ((isnotnull(sr_item_sk#1) AND isnotnull(sr_ticket_number#3)) AND isnotnull(sr_reason_sk#2)) + +(4) Scan parquet default.reason +Output [2]: [r_reason_sk#5, r_reason_desc#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/reason] +PushedFilters: [IsNotNull(r_reason_desc), EqualTo(r_reason_desc,reason 28), IsNotNull(r_reason_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [r_reason_sk#5, r_reason_desc#6] + +(6) Filter [codegen id : 1] +Input [2]: [r_reason_sk#5, r_reason_desc#6] +Condition : ((isnotnull(r_reason_desc#6) AND (r_reason_desc#6 = reason 28)) AND isnotnull(r_reason_sk#5)) + +(7) Project [codegen id : 1] +Output [1]: [r_reason_sk#5] +Input [2]: [r_reason_sk#5, r_reason_desc#6] + +(8) BroadcastExchange +Input [1]: [r_reason_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#7] + +(9) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [sr_reason_sk#2] +Right keys [1]: [cast(r_reason_sk#5 as bigint)] +Join condition: None + +(10) Project [codegen id : 2] +Output [3]: [sr_item_sk#1, sr_ticket_number#3, sr_return_quantity#4] +Input [5]: [sr_item_sk#1, sr_reason_sk#2, sr_ticket_number#3, sr_return_quantity#4, r_reason_sk#5] + +(11) Exchange +Input [3]: [sr_item_sk#1, sr_ticket_number#3, sr_return_quantity#4] +Arguments: hashpartitioning(sr_item_sk#1, sr_ticket_number#3, 5), true, [id=#8] + +(12) Sort [codegen id : 3] +Input [3]: [sr_item_sk#1, sr_ticket_number#3, sr_return_quantity#4] +Arguments: [sr_item_sk#1 ASC NULLS FIRST, sr_ticket_number#3 ASC NULLS FIRST], false, 0 + +(13) Scan parquet default.store_sales +Output [5]: [ss_item_sk#9, ss_customer_sk#10, ss_ticket_number#11, ss_quantity#12, ss_sales_price#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +ReadSchema: struct + +(14) ColumnarToRow [codegen id : 4] +Input [5]: [ss_item_sk#9, ss_customer_sk#10, ss_ticket_number#11, ss_quantity#12, ss_sales_price#13] + +(15) Exchange +Input [5]: [ss_item_sk#9, ss_customer_sk#10, ss_ticket_number#11, ss_quantity#12, ss_sales_price#13] +Arguments: hashpartitioning(cast(ss_item_sk#9 as bigint), cast(ss_ticket_number#11 as bigint), 5), true, [id=#14] + +(16) Sort [codegen id : 5] +Input [5]: [ss_item_sk#9, ss_customer_sk#10, ss_ticket_number#11, ss_quantity#12, ss_sales_price#13] +Arguments: [cast(ss_item_sk#9 as bigint) ASC NULLS FIRST, cast(ss_ticket_number#11 as bigint) ASC NULLS FIRST], false, 0 + +(17) SortMergeJoin [codegen id : 6] +Left keys [2]: [sr_item_sk#1, sr_ticket_number#3] +Right keys [2]: [cast(ss_item_sk#9 as bigint), cast(ss_ticket_number#11 as bigint)] +Join condition: None + +(18) Project [codegen id : 6] +Output [2]: [ss_customer_sk#10, CASE WHEN isnotnull(sr_return_quantity#4) THEN CheckOverflow((promote_precision(cast(cast((ss_quantity#12 - sr_return_quantity#4) as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#13 as decimal(12,2)))), DecimalType(18,2), true) ELSE CheckOverflow((promote_precision(cast(cast(ss_quantity#12 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#13 as decimal(12,2)))), DecimalType(18,2), true) END AS act_sales#15] +Input [8]: [sr_item_sk#1, sr_ticket_number#3, sr_return_quantity#4, ss_item_sk#9, ss_customer_sk#10, ss_ticket_number#11, ss_quantity#12, ss_sales_price#13] + +(19) HashAggregate [codegen id : 6] +Input [2]: [ss_customer_sk#10, act_sales#15] +Keys [1]: [ss_customer_sk#10] +Functions [1]: [partial_sum(act_sales#15)] +Aggregate Attributes [2]: [sum#16, isEmpty#17] +Results [3]: [ss_customer_sk#10, sum#18, isEmpty#19] + +(20) Exchange +Input [3]: [ss_customer_sk#10, sum#18, isEmpty#19] +Arguments: hashpartitioning(ss_customer_sk#10, 5), true, [id=#20] + +(21) HashAggregate [codegen id : 7] +Input [3]: [ss_customer_sk#10, sum#18, isEmpty#19] +Keys [1]: [ss_customer_sk#10] +Functions [1]: [sum(act_sales#15)] +Aggregate Attributes [1]: [sum(act_sales#15)#21] +Results [2]: [ss_customer_sk#10, sum(act_sales#15)#21 AS sumsales#22] + +(22) TakeOrderedAndProject +Input [2]: [ss_customer_sk#10, sumsales#22] +Arguments: 100, [sumsales#22 ASC NULLS FIRST, ss_customer_sk#10 ASC NULLS FIRST], [ss_customer_sk#10, sumsales#22] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93.sf100/simplified.txt new file mode 100644 index 0000000000000..e1144076af8c6 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93.sf100/simplified.txt @@ -0,0 +1,38 @@ +TakeOrderedAndProject [ss_customer_sk,sumsales] + WholeStageCodegen (7) + HashAggregate [isEmpty,ss_customer_sk,sum] [isEmpty,sum,sum(act_sales),sumsales] + InputAdapter + Exchange [ss_customer_sk] #1 + WholeStageCodegen (6) + HashAggregate [act_sales,ss_customer_sk] [isEmpty,isEmpty,sum,sum] + Project [sr_return_quantity,ss_customer_sk,ss_quantity,ss_sales_price] + SortMergeJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + InputAdapter + WholeStageCodegen (3) + Sort [sr_item_sk,sr_ticket_number] + InputAdapter + Exchange [sr_item_sk,sr_ticket_number] #2 + WholeStageCodegen (2) + Project [sr_item_sk,sr_return_quantity,sr_ticket_number] + BroadcastHashJoin [r_reason_sk,sr_reason_sk] + Filter [sr_item_sk,sr_reason_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_item_sk,sr_reason_sk,sr_return_quantity,sr_ticket_number] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [r_reason_sk] + Filter [r_reason_desc,r_reason_sk] + ColumnarToRow + InputAdapter + Scan parquet default.reason [r_reason_desc,r_reason_sk] + InputAdapter + WholeStageCodegen (5) + Sort [ss_item_sk,ss_ticket_number] + InputAdapter + Exchange [ss_item_sk,ss_ticket_number] #4 + WholeStageCodegen (4) + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_quantity,ss_sales_price,ss_ticket_number] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/explain.txt new file mode 100644 index 0000000000000..496c2ab591de4 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/explain.txt @@ -0,0 +1,111 @@ +== Physical Plan == +TakeOrderedAndProject (19) ++- * HashAggregate (18) + +- Exchange (17) + +- * HashAggregate (16) + +- * Project (15) + +- * BroadcastHashJoin Inner BuildRight (14) + :- * Project (8) + : +- * BroadcastHashJoin Inner BuildRight (7) + : :- * ColumnarToRow (2) + : : +- Scan parquet default.store_sales (1) + : +- BroadcastExchange (6) + : +- * Filter (5) + : +- * ColumnarToRow (4) + : +- Scan parquet default.store_returns (3) + +- BroadcastExchange (13) + +- * Project (12) + +- * Filter (11) + +- * ColumnarToRow (10) + +- Scan parquet default.reason (9) + + +(1) Scan parquet default.store_sales +Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5] + +(3) Scan parquet default.store_returns +Output [4]: [sr_item_sk#6, sr_reason_sk#7, sr_ticket_number#8, sr_return_quantity#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_returns] +PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number), IsNotNull(sr_reason_sk)] +ReadSchema: struct + +(4) ColumnarToRow [codegen id : 1] +Input [4]: [sr_item_sk#6, sr_reason_sk#7, sr_ticket_number#8, sr_return_quantity#9] + +(5) Filter [codegen id : 1] +Input [4]: [sr_item_sk#6, sr_reason_sk#7, sr_ticket_number#8, sr_return_quantity#9] +Condition : ((isnotnull(sr_item_sk#6) AND isnotnull(sr_ticket_number#8)) AND isnotnull(sr_reason_sk#7)) + +(6) BroadcastExchange +Input [4]: [sr_item_sk#6, sr_reason_sk#7, sr_ticket_number#8, sr_return_quantity#9] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false], input[2, bigint, false]),false), [id=#10] + +(7) BroadcastHashJoin [codegen id : 3] +Left keys [2]: [cast(ss_item_sk#1 as bigint), cast(ss_ticket_number#3 as bigint)] +Right keys [2]: [sr_item_sk#6, sr_ticket_number#8] +Join condition: None + +(8) Project [codegen id : 3] +Output [5]: [ss_customer_sk#2, ss_quantity#4, ss_sales_price#5, sr_reason_sk#7, sr_return_quantity#9] +Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5, sr_item_sk#6, sr_reason_sk#7, sr_ticket_number#8, sr_return_quantity#9] + +(9) Scan parquet default.reason +Output [2]: [r_reason_sk#11, r_reason_desc#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/reason] +PushedFilters: [IsNotNull(r_reason_desc), EqualTo(r_reason_desc,reason 28), IsNotNull(r_reason_sk)] +ReadSchema: struct + +(10) ColumnarToRow [codegen id : 2] +Input [2]: [r_reason_sk#11, r_reason_desc#12] + +(11) Filter [codegen id : 2] +Input [2]: [r_reason_sk#11, r_reason_desc#12] +Condition : ((isnotnull(r_reason_desc#12) AND (r_reason_desc#12 = reason 28)) AND isnotnull(r_reason_sk#11)) + +(12) Project [codegen id : 2] +Output [1]: [r_reason_sk#11] +Input [2]: [r_reason_sk#11, r_reason_desc#12] + +(13) BroadcastExchange +Input [1]: [r_reason_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] + +(14) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [sr_reason_sk#7] +Right keys [1]: [cast(r_reason_sk#11 as bigint)] +Join condition: None + +(15) Project [codegen id : 3] +Output [2]: [ss_customer_sk#2, CASE WHEN isnotnull(sr_return_quantity#9) THEN CheckOverflow((promote_precision(cast(cast((ss_quantity#4 - sr_return_quantity#9) as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#5 as decimal(12,2)))), DecimalType(18,2), true) ELSE CheckOverflow((promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#5 as decimal(12,2)))), DecimalType(18,2), true) END AS act_sales#14] +Input [6]: [ss_customer_sk#2, ss_quantity#4, ss_sales_price#5, sr_reason_sk#7, sr_return_quantity#9, r_reason_sk#11] + +(16) HashAggregate [codegen id : 3] +Input [2]: [ss_customer_sk#2, act_sales#14] +Keys [1]: [ss_customer_sk#2] +Functions [1]: [partial_sum(act_sales#14)] +Aggregate Attributes [2]: [sum#15, isEmpty#16] +Results [3]: [ss_customer_sk#2, sum#17, isEmpty#18] + +(17) Exchange +Input [3]: [ss_customer_sk#2, sum#17, isEmpty#18] +Arguments: hashpartitioning(ss_customer_sk#2, 5), true, [id=#19] + +(18) HashAggregate [codegen id : 4] +Input [3]: [ss_customer_sk#2, sum#17, isEmpty#18] +Keys [1]: [ss_customer_sk#2] +Functions [1]: [sum(act_sales#14)] +Aggregate Attributes [1]: [sum(act_sales#14)#20] +Results [2]: [ss_customer_sk#2, sum(act_sales#14)#20 AS sumsales#21] + +(19) TakeOrderedAndProject +Input [2]: [ss_customer_sk#2, sumsales#21] +Arguments: 100, [sumsales#21 ASC NULLS FIRST, ss_customer_sk#2 ASC NULLS FIRST], [ss_customer_sk#2, sumsales#21] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/simplified.txt new file mode 100644 index 0000000000000..05e17cc4b3bd8 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/simplified.txt @@ -0,0 +1,29 @@ +TakeOrderedAndProject [ss_customer_sk,sumsales] + WholeStageCodegen (4) + HashAggregate [isEmpty,ss_customer_sk,sum] [isEmpty,sum,sum(act_sales),sumsales] + InputAdapter + Exchange [ss_customer_sk] #1 + WholeStageCodegen (3) + HashAggregate [act_sales,ss_customer_sk] [isEmpty,isEmpty,sum,sum] + Project [sr_return_quantity,ss_customer_sk,ss_quantity,ss_sales_price] + BroadcastHashJoin [r_reason_sk,sr_reason_sk] + Project [sr_reason_sk,sr_return_quantity,ss_customer_sk,ss_quantity,ss_sales_price] + BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_quantity,ss_sales_price,ss_ticket_number] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [sr_item_sk,sr_reason_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_item_sk,sr_reason_sk,sr_return_quantity,sr_ticket_number] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [r_reason_sk] + Filter [r_reason_desc,r_reason_sk] + ColumnarToRow + InputAdapter + Scan parquet default.reason [r_reason_desc,r_reason_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/explain.txt new file mode 100644 index 0000000000000..aed3635e09bfe --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/explain.txt @@ -0,0 +1,265 @@ +== Physical Plan == +TakeOrderedAndProject (47) ++- * HashAggregate (46) + +- Exchange (45) + +- * HashAggregate (44) + +- * HashAggregate (43) + +- Exchange (42) + +- * HashAggregate (41) + +- * Project (40) + +- * BroadcastHashJoin Inner BuildRight (39) + :- * Project (33) + : +- * BroadcastHashJoin Inner BuildRight (32) + : :- * Project (26) + : : +- * BroadcastHashJoin Inner BuildRight (25) + : : :- SortMergeJoin LeftAnti (19) + : : : :- * Sort (14) + : : : : +- Exchange (13) + : : : : +- * Project (12) + : : : : +- SortMergeJoin LeftSemi (11) + : : : : :- * Sort (5) + : : : : : +- Exchange (4) + : : : : : +- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.web_sales (1) + : : : : +- * Sort (10) + : : : : +- Exchange (9) + : : : : +- * Project (8) + : : : : +- * ColumnarToRow (7) + : : : : +- Scan parquet default.web_sales (6) + : : : +- * Sort (18) + : : : +- Exchange (17) + : : : +- * ColumnarToRow (16) + : : : +- Scan parquet default.web_returns (15) + : : +- BroadcastExchange (24) + : : +- * Project (23) + : : +- * Filter (22) + : : +- * ColumnarToRow (21) + : : +- Scan parquet default.customer_address (20) + : +- BroadcastExchange (31) + : +- * Project (30) + : +- * Filter (29) + : +- * ColumnarToRow (28) + : +- Scan parquet default.web_site (27) + +- BroadcastExchange (38) + +- * Project (37) + +- * Filter (36) + +- * ColumnarToRow (35) + +- Scan parquet default.date_dim (34) + + +(1) Scan parquet default.web_sales +Output [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_ship_date_sk), IsNotNull(ws_ship_addr_sk), IsNotNull(ws_web_site_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] + +(3) Filter [codegen id : 1] +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Condition : ((isnotnull(ws_ship_date_sk#1) AND isnotnull(ws_ship_addr_sk#2)) AND isnotnull(ws_web_site_sk#3)) + +(4) Exchange +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Arguments: hashpartitioning(ws_order_number#5, 5), true, [id=#8] + +(5) Sort [codegen id : 2] +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Arguments: [ws_order_number#5 ASC NULLS FIRST], false, 0 + +(6) Scan parquet default.web_sales +Output [2]: [ws_warehouse_sk#4, ws_order_number#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_sales] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 3] +Input [2]: [ws_warehouse_sk#4, ws_order_number#5] + +(8) Project [codegen id : 3] +Output [2]: [ws_warehouse_sk#4 AS ws_warehouse_sk#4#9, ws_order_number#5 AS ws_order_number#5#10] +Input [2]: [ws_warehouse_sk#4, ws_order_number#5] + +(9) Exchange +Input [2]: [ws_warehouse_sk#4#9, ws_order_number#5#10] +Arguments: hashpartitioning(ws_order_number#5#10, 5), true, [id=#11] + +(10) Sort [codegen id : 4] +Input [2]: [ws_warehouse_sk#4#9, ws_order_number#5#10] +Arguments: [ws_order_number#5#10 ASC NULLS FIRST], false, 0 + +(11) SortMergeJoin +Left keys [1]: [ws_order_number#5] +Right keys [1]: [ws_order_number#5#10] +Join condition: NOT (ws_warehouse_sk#4 = ws_warehouse_sk#4#9) + +(12) Project [codegen id : 5] +Output [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] + +(13) Exchange +Input [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Arguments: hashpartitioning(cast(ws_order_number#5 as bigint), 5), true, [id=#12] + +(14) Sort [codegen id : 6] +Input [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Arguments: [cast(ws_order_number#5 as bigint) ASC NULLS FIRST], false, 0 + +(15) Scan parquet default.web_returns +Output [1]: [wr_order_number#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_returns] +ReadSchema: struct + +(16) ColumnarToRow [codegen id : 7] +Input [1]: [wr_order_number#13] + +(17) Exchange +Input [1]: [wr_order_number#13] +Arguments: hashpartitioning(wr_order_number#13, 5), true, [id=#14] + +(18) Sort [codegen id : 8] +Input [1]: [wr_order_number#13] +Arguments: [wr_order_number#13 ASC NULLS FIRST], false, 0 + +(19) SortMergeJoin +Left keys [1]: [cast(ws_order_number#5 as bigint)] +Right keys [1]: [wr_order_number#13] +Join condition: None + +(20) Scan parquet default.customer_address +Output [2]: [ca_address_sk#15, ca_state#16] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer_address] +PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,IL), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 9] +Input [2]: [ca_address_sk#15, ca_state#16] + +(22) Filter [codegen id : 9] +Input [2]: [ca_address_sk#15, ca_state#16] +Condition : ((isnotnull(ca_state#16) AND (ca_state#16 = IL)) AND isnotnull(ca_address_sk#15)) + +(23) Project [codegen id : 9] +Output [1]: [ca_address_sk#15] +Input [2]: [ca_address_sk#15, ca_state#16] + +(24) BroadcastExchange +Input [1]: [ca_address_sk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#17] + +(25) BroadcastHashJoin [codegen id : 12] +Left keys [1]: [ws_ship_addr_sk#2] +Right keys [1]: [ca_address_sk#15] +Join condition: None + +(26) Project [codegen id : 12] +Output [5]: [ws_ship_date_sk#1, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, ca_address_sk#15] + +(27) Scan parquet default.web_site +Output [2]: [web_site_sk#18, web_company_name#19] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_site] +PushedFilters: [IsNotNull(web_company_name), EqualTo(web_company_name,pri), IsNotNull(web_site_sk)] +ReadSchema: struct + +(28) ColumnarToRow [codegen id : 10] +Input [2]: [web_site_sk#18, web_company_name#19] + +(29) Filter [codegen id : 10] +Input [2]: [web_site_sk#18, web_company_name#19] +Condition : ((isnotnull(web_company_name#19) AND (web_company_name#19 = pri)) AND isnotnull(web_site_sk#18)) + +(30) Project [codegen id : 10] +Output [1]: [web_site_sk#18] +Input [2]: [web_site_sk#18, web_company_name#19] + +(31) BroadcastExchange +Input [1]: [web_site_sk#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] + +(32) BroadcastHashJoin [codegen id : 12] +Left keys [1]: [ws_web_site_sk#3] +Right keys [1]: [web_site_sk#18] +Join condition: None + +(33) Project [codegen id : 12] +Output [4]: [ws_ship_date_sk#1, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Input [6]: [ws_ship_date_sk#1, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, web_site_sk#18] + +(34) Scan parquet default.date_dim +Output [2]: [d_date_sk#21, d_date#22] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-01), LessThanOrEqual(d_date,1999-04-02), IsNotNull(d_date_sk)] +ReadSchema: struct + +(35) ColumnarToRow [codegen id : 11] +Input [2]: [d_date_sk#21, d_date#22] + +(36) Filter [codegen id : 11] +Input [2]: [d_date_sk#21, d_date#22] +Condition : (((isnotnull(d_date#22) AND (d_date#22 >= 10623)) AND (d_date#22 <= 10683)) AND isnotnull(d_date_sk#21)) + +(37) Project [codegen id : 11] +Output [1]: [d_date_sk#21] +Input [2]: [d_date_sk#21, d_date#22] + +(38) BroadcastExchange +Input [1]: [d_date_sk#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#23] + +(39) BroadcastHashJoin [codegen id : 12] +Left keys [1]: [ws_ship_date_sk#1] +Right keys [1]: [d_date_sk#21] +Join condition: None + +(40) Project [codegen id : 12] +Output [3]: [ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Input [5]: [ws_ship_date_sk#1, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, d_date_sk#21] + +(41) HashAggregate [codegen id : 12] +Input [3]: [ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Keys [1]: [ws_order_number#5] +Functions [2]: [partial_sum(UnscaledValue(ws_ext_ship_cost#6)), partial_sum(UnscaledValue(ws_net_profit#7))] +Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#6))#24, sum(UnscaledValue(ws_net_profit#7))#25] +Results [3]: [ws_order_number#5, sum#26, sum#27] + +(42) Exchange +Input [3]: [ws_order_number#5, sum#26, sum#27] +Arguments: hashpartitioning(ws_order_number#5, 5), true, [id=#28] + +(43) HashAggregate [codegen id : 13] +Input [3]: [ws_order_number#5, sum#26, sum#27] +Keys [1]: [ws_order_number#5] +Functions [2]: [merge_sum(UnscaledValue(ws_ext_ship_cost#6)), merge_sum(UnscaledValue(ws_net_profit#7))] +Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#6))#24, sum(UnscaledValue(ws_net_profit#7))#25] +Results [3]: [ws_order_number#5, sum#26, sum#27] + +(44) HashAggregate [codegen id : 13] +Input [3]: [ws_order_number#5, sum#26, sum#27] +Keys: [] +Functions [3]: [merge_sum(UnscaledValue(ws_ext_ship_cost#6)), merge_sum(UnscaledValue(ws_net_profit#7)), partial_count(distinct ws_order_number#5)] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#6))#24, sum(UnscaledValue(ws_net_profit#7))#25, count(ws_order_number#5)#29] +Results [3]: [sum#26, sum#27, count#30] + +(45) Exchange +Input [3]: [sum#26, sum#27, count#30] +Arguments: SinglePartition, true, [id=#31] + +(46) HashAggregate [codegen id : 14] +Input [3]: [sum#26, sum#27, count#30] +Keys: [] +Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#6)), sum(UnscaledValue(ws_net_profit#7)), count(distinct ws_order_number#5)] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#6))#24, sum(UnscaledValue(ws_net_profit#7))#25, count(ws_order_number#5)#29] +Results [3]: [count(ws_order_number#5)#29 AS order count #32, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#6))#24,17,2) AS total shipping cost #33, MakeDecimal(sum(UnscaledValue(ws_net_profit#7))#25,17,2) AS total net profit #34] + +(47) TakeOrderedAndProject +Input [3]: [order count #32, total shipping cost #33, total net profit #34] +Arguments: 100, [order count #32 ASC NULLS FIRST], [order count #32, total shipping cost #33, total net profit #34] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/simplified.txt new file mode 100644 index 0000000000000..95ff0dbb4ad50 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/simplified.txt @@ -0,0 +1,78 @@ +TakeOrderedAndProject [order count ,total net profit ,total shipping cost ] + WholeStageCodegen (14) + HashAggregate [count,sum,sum] [count,count(ws_order_number),order count ,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),total net profit ,total shipping cost ] + InputAdapter + Exchange #1 + WholeStageCodegen (13) + HashAggregate [ws_order_number] [count,count,count(ws_order_number),sum,sum,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit))] + HashAggregate [ws_order_number] [sum,sum,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit))] + InputAdapter + Exchange [ws_order_number] #2 + WholeStageCodegen (12) + HashAggregate [ws_ext_ship_cost,ws_net_profit,ws_order_number] [sum,sum,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit))] + Project [ws_ext_ship_cost,ws_net_profit,ws_order_number] + BroadcastHashJoin [d_date_sk,ws_ship_date_sk] + Project [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_ship_date_sk] + BroadcastHashJoin [web_site_sk,ws_web_site_sk] + Project [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_ship_date_sk,ws_web_site_sk] + BroadcastHashJoin [ca_address_sk,ws_ship_addr_sk] + InputAdapter + SortMergeJoin [wr_order_number,ws_order_number] + WholeStageCodegen (6) + Sort [ws_order_number] + InputAdapter + Exchange [ws_order_number] #3 + WholeStageCodegen (5) + Project [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_ship_addr_sk,ws_ship_date_sk,ws_web_site_sk] + InputAdapter + SortMergeJoin [ws_order_number,ws_order_number,ws_warehouse_sk,ws_warehouse_sk] + WholeStageCodegen (2) + Sort [ws_order_number] + InputAdapter + Exchange [ws_order_number] #4 + WholeStageCodegen (1) + Filter [ws_ship_addr_sk,ws_ship_date_sk,ws_web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_ship_addr_sk,ws_ship_date_sk,ws_warehouse_sk,ws_web_site_sk] + WholeStageCodegen (4) + Sort [ws_order_number] + InputAdapter + Exchange [ws_order_number] #5 + WholeStageCodegen (3) + Project [ws_order_number,ws_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_order_number,ws_warehouse_sk] + WholeStageCodegen (8) + Sort [wr_order_number] + InputAdapter + Exchange [wr_order_number] #6 + WholeStageCodegen (7) + ColumnarToRow + InputAdapter + Scan parquet default.web_returns [wr_order_number] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (9) + Project [ca_address_sk] + Filter [ca_address_sk,ca_state] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (10) + Project [web_site_sk] + Filter [web_company_name,web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_site [web_company_name,web_site_sk] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (11) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/explain.txt new file mode 100644 index 0000000000000..3a7c91dc09301 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/explain.txt @@ -0,0 +1,235 @@ +== Physical Plan == +TakeOrderedAndProject (41) ++- * HashAggregate (40) + +- Exchange (39) + +- * HashAggregate (38) + +- * HashAggregate (37) + +- Exchange (36) + +- * HashAggregate (35) + +- * Project (34) + +- * BroadcastHashJoin Inner BuildRight (33) + :- * Project (27) + : +- * BroadcastHashJoin Inner BuildRight (26) + : :- * Project (20) + : : +- * BroadcastHashJoin Inner BuildRight (19) + : : :- * BroadcastHashJoin LeftAnti BuildRight (13) + : : : :- * Project (9) + : : : : +- * BroadcastHashJoin LeftSemi BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.web_sales (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Project (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.web_sales (4) + : : : +- BroadcastExchange (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet default.web_returns (10) + : : +- BroadcastExchange (18) + : : +- * Project (17) + : : +- * Filter (16) + : : +- * ColumnarToRow (15) + : : +- Scan parquet default.date_dim (14) + : +- BroadcastExchange (25) + : +- * Project (24) + : +- * Filter (23) + : +- * ColumnarToRow (22) + : +- Scan parquet default.customer_address (21) + +- BroadcastExchange (32) + +- * Project (31) + +- * Filter (30) + +- * ColumnarToRow (29) + +- Scan parquet default.web_site (28) + + +(1) Scan parquet default.web_sales +Output [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_ship_date_sk), IsNotNull(ws_ship_addr_sk), IsNotNull(ws_web_site_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 6] +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] + +(3) Filter [codegen id : 6] +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Condition : ((isnotnull(ws_ship_date_sk#1) AND isnotnull(ws_ship_addr_sk#2)) AND isnotnull(ws_web_site_sk#3)) + +(4) Scan parquet default.web_sales +Output [2]: [ws_warehouse_sk#4, ws_order_number#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_sales] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [ws_warehouse_sk#4, ws_order_number#5] + +(6) Project [codegen id : 1] +Output [2]: [ws_warehouse_sk#4 AS ws_warehouse_sk#4#8, ws_order_number#5 AS ws_order_number#5#9] +Input [2]: [ws_warehouse_sk#4, ws_order_number#5] + +(7) BroadcastExchange +Input [2]: [ws_warehouse_sk#4#8, ws_order_number#5#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [id=#10] + +(8) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_order_number#5] +Right keys [1]: [ws_order_number#5#9] +Join condition: NOT (ws_warehouse_sk#4 = ws_warehouse_sk#4#8) + +(9) Project [codegen id : 6] +Output [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] + +(10) Scan parquet default.web_returns +Output [1]: [wr_order_number#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_returns] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [1]: [wr_order_number#11] + +(12) BroadcastExchange +Input [1]: [wr_order_number#11] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [id=#12] + +(13) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cast(ws_order_number#5 as bigint)] +Right keys [1]: [wr_order_number#11] +Join condition: None + +(14) Scan parquet default.date_dim +Output [2]: [d_date_sk#13, d_date#14] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-01), LessThanOrEqual(d_date,1999-04-02), IsNotNull(d_date_sk)] +ReadSchema: struct + +(15) ColumnarToRow [codegen id : 3] +Input [2]: [d_date_sk#13, d_date#14] + +(16) Filter [codegen id : 3] +Input [2]: [d_date_sk#13, d_date#14] +Condition : (((isnotnull(d_date#14) AND (d_date#14 >= 10623)) AND (d_date#14 <= 10683)) AND isnotnull(d_date_sk#13)) + +(17) Project [codegen id : 3] +Output [1]: [d_date_sk#13] +Input [2]: [d_date_sk#13, d_date#14] + +(18) BroadcastExchange +Input [1]: [d_date_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] + +(19) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_ship_date_sk#1] +Right keys [1]: [d_date_sk#13] +Join condition: None + +(20) Project [codegen id : 6] +Output [5]: [ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, d_date_sk#13] + +(21) Scan parquet default.customer_address +Output [2]: [ca_address_sk#16, ca_state#17] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer_address] +PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,IL), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(22) ColumnarToRow [codegen id : 4] +Input [2]: [ca_address_sk#16, ca_state#17] + +(23) Filter [codegen id : 4] +Input [2]: [ca_address_sk#16, ca_state#17] +Condition : ((isnotnull(ca_state#17) AND (ca_state#17 = IL)) AND isnotnull(ca_address_sk#16)) + +(24) Project [codegen id : 4] +Output [1]: [ca_address_sk#16] +Input [2]: [ca_address_sk#16, ca_state#17] + +(25) BroadcastExchange +Input [1]: [ca_address_sk#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] + +(26) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_ship_addr_sk#2] +Right keys [1]: [ca_address_sk#16] +Join condition: None + +(27) Project [codegen id : 6] +Output [4]: [ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Input [6]: [ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, ca_address_sk#16] + +(28) Scan parquet default.web_site +Output [2]: [web_site_sk#19, web_company_name#20] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_site] +PushedFilters: [IsNotNull(web_company_name), EqualTo(web_company_name,pri), IsNotNull(web_site_sk)] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 5] +Input [2]: [web_site_sk#19, web_company_name#20] + +(30) Filter [codegen id : 5] +Input [2]: [web_site_sk#19, web_company_name#20] +Condition : ((isnotnull(web_company_name#20) AND (web_company_name#20 = pri)) AND isnotnull(web_site_sk#19)) + +(31) Project [codegen id : 5] +Output [1]: [web_site_sk#19] +Input [2]: [web_site_sk#19, web_company_name#20] + +(32) BroadcastExchange +Input [1]: [web_site_sk#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#21] + +(33) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_web_site_sk#3] +Right keys [1]: [web_site_sk#19] +Join condition: None + +(34) Project [codegen id : 6] +Output [3]: [ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Input [5]: [ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, web_site_sk#19] + +(35) HashAggregate [codegen id : 6] +Input [3]: [ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Keys [1]: [ws_order_number#5] +Functions [2]: [partial_sum(UnscaledValue(ws_ext_ship_cost#6)), partial_sum(UnscaledValue(ws_net_profit#7))] +Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#6))#22, sum(UnscaledValue(ws_net_profit#7))#23] +Results [3]: [ws_order_number#5, sum#24, sum#25] + +(36) Exchange +Input [3]: [ws_order_number#5, sum#24, sum#25] +Arguments: hashpartitioning(ws_order_number#5, 5), true, [id=#26] + +(37) HashAggregate [codegen id : 7] +Input [3]: [ws_order_number#5, sum#24, sum#25] +Keys [1]: [ws_order_number#5] +Functions [2]: [merge_sum(UnscaledValue(ws_ext_ship_cost#6)), merge_sum(UnscaledValue(ws_net_profit#7))] +Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#6))#22, sum(UnscaledValue(ws_net_profit#7))#23] +Results [3]: [ws_order_number#5, sum#24, sum#25] + +(38) HashAggregate [codegen id : 7] +Input [3]: [ws_order_number#5, sum#24, sum#25] +Keys: [] +Functions [3]: [merge_sum(UnscaledValue(ws_ext_ship_cost#6)), merge_sum(UnscaledValue(ws_net_profit#7)), partial_count(distinct ws_order_number#5)] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#6))#22, sum(UnscaledValue(ws_net_profit#7))#23, count(ws_order_number#5)#27] +Results [3]: [sum#24, sum#25, count#28] + +(39) Exchange +Input [3]: [sum#24, sum#25, count#28] +Arguments: SinglePartition, true, [id=#29] + +(40) HashAggregate [codegen id : 8] +Input [3]: [sum#24, sum#25, count#28] +Keys: [] +Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#6)), sum(UnscaledValue(ws_net_profit#7)), count(distinct ws_order_number#5)] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#6))#22, sum(UnscaledValue(ws_net_profit#7))#23, count(ws_order_number#5)#27] +Results [3]: [count(ws_order_number#5)#27 AS order count #30, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#6))#22,17,2) AS total shipping cost #31, MakeDecimal(sum(UnscaledValue(ws_net_profit#7))#23,17,2) AS total net profit #32] + +(41) TakeOrderedAndProject +Input [3]: [order count #30, total shipping cost #31, total net profit #32] +Arguments: 100, [order count #30 ASC NULLS FIRST], [order count #30, total shipping cost #31, total net profit #32] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt new file mode 100644 index 0000000000000..fda5a9f037ad4 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt @@ -0,0 +1,62 @@ +TakeOrderedAndProject [order count ,total net profit ,total shipping cost ] + WholeStageCodegen (8) + HashAggregate [count,sum,sum] [count,count(ws_order_number),order count ,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),total net profit ,total shipping cost ] + InputAdapter + Exchange #1 + WholeStageCodegen (7) + HashAggregate [ws_order_number] [count,count,count(ws_order_number),sum,sum,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit))] + HashAggregate [ws_order_number] [sum,sum,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit))] + InputAdapter + Exchange [ws_order_number] #2 + WholeStageCodegen (6) + HashAggregate [ws_ext_ship_cost,ws_net_profit,ws_order_number] [sum,sum,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit))] + Project [ws_ext_ship_cost,ws_net_profit,ws_order_number] + BroadcastHashJoin [web_site_sk,ws_web_site_sk] + Project [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_web_site_sk] + BroadcastHashJoin [ca_address_sk,ws_ship_addr_sk] + Project [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_ship_addr_sk,ws_web_site_sk] + BroadcastHashJoin [d_date_sk,ws_ship_date_sk] + BroadcastHashJoin [wr_order_number,ws_order_number] + Project [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_ship_addr_sk,ws_ship_date_sk,ws_web_site_sk] + BroadcastHashJoin [ws_order_number,ws_order_number,ws_warehouse_sk,ws_warehouse_sk] + Filter [ws_ship_addr_sk,ws_ship_date_sk,ws_web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_ship_addr_sk,ws_ship_date_sk,ws_warehouse_sk,ws_web_site_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [ws_order_number,ws_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_order_number,ws_warehouse_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + ColumnarToRow + InputAdapter + Scan parquet default.web_returns [wr_order_number] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (4) + Project [ca_address_sk] + Filter [ca_address_sk,ca_state] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (5) + Project [web_site_sk] + Filter [web_company_name,web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_site [web_company_name,web_site_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/explain.txt new file mode 100644 index 0000000000000..c2baa5ba23ffc --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/explain.txt @@ -0,0 +1,347 @@ +== Physical Plan == +TakeOrderedAndProject (63) ++- * HashAggregate (62) + +- Exchange (61) + +- * HashAggregate (60) + +- * HashAggregate (59) + +- Exchange (58) + +- * HashAggregate (57) + +- * Project (56) + +- * BroadcastHashJoin Inner BuildRight (55) + :- * Project (49) + : +- * BroadcastHashJoin Inner BuildRight (48) + : :- * Project (42) + : : +- * BroadcastHashJoin Inner BuildRight (41) + : : :- SortMergeJoin LeftSemi (35) + : : : :- * Sort (19) + : : : : +- Exchange (18) + : : : : +- SortMergeJoin LeftSemi (17) + : : : : :- * Sort (5) + : : : : : +- Exchange (4) + : : : : : +- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.web_sales (1) + : : : : +- * Sort (16) + : : : : +- Exchange (15) + : : : : +- * Project (14) + : : : : +- * SortMergeJoin Inner (13) + : : : : :- * Sort (10) + : : : : : +- Exchange (9) + : : : : : +- * Filter (8) + : : : : : +- * ColumnarToRow (7) + : : : : : +- Scan parquet default.web_sales (6) + : : : : +- * Sort (12) + : : : : +- ReusedExchange (11) + : : : +- * Project (34) + : : : +- * SortMergeJoin Inner (33) + : : : :- * Sort (27) + : : : : +- Exchange (26) + : : : : +- * Project (25) + : : : : +- * SortMergeJoin Inner (24) + : : : : :- * Sort (21) + : : : : : +- ReusedExchange (20) + : : : : +- * Sort (23) + : : : : +- ReusedExchange (22) + : : : +- * Sort (32) + : : : +- Exchange (31) + : : : +- * Filter (30) + : : : +- * ColumnarToRow (29) + : : : +- Scan parquet default.web_returns (28) + : : +- BroadcastExchange (40) + : : +- * Project (39) + : : +- * Filter (38) + : : +- * ColumnarToRow (37) + : : +- Scan parquet default.customer_address (36) + : +- BroadcastExchange (47) + : +- * Project (46) + : +- * Filter (45) + : +- * ColumnarToRow (44) + : +- Scan parquet default.web_site (43) + +- BroadcastExchange (54) + +- * Project (53) + +- * Filter (52) + +- * ColumnarToRow (51) + +- Scan parquet default.date_dim (50) + + +(1) Scan parquet default.web_sales +Output [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_ship_date_sk), IsNotNull(ws_ship_addr_sk), IsNotNull(ws_web_site_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] + +(3) Filter [codegen id : 1] +Input [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Condition : ((isnotnull(ws_ship_date_sk#1) AND isnotnull(ws_ship_addr_sk#2)) AND isnotnull(ws_web_site_sk#3)) + +(4) Exchange +Input [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Arguments: hashpartitioning(ws_order_number#4, 5), true, [id=#7] + +(5) Sort [codegen id : 2] +Input [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Arguments: [ws_order_number#4 ASC NULLS FIRST], false, 0 + +(6) Scan parquet default.web_sales +Output [2]: [ws_warehouse_sk#8, ws_order_number#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_order_number), IsNotNull(ws_warehouse_sk)] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 3] +Input [2]: [ws_warehouse_sk#8, ws_order_number#4] + +(8) Filter [codegen id : 3] +Input [2]: [ws_warehouse_sk#8, ws_order_number#4] +Condition : (isnotnull(ws_order_number#4) AND isnotnull(ws_warehouse_sk#8)) + +(9) Exchange +Input [2]: [ws_warehouse_sk#8, ws_order_number#4] +Arguments: hashpartitioning(ws_order_number#4, 5), true, [id=#9] + +(10) Sort [codegen id : 4] +Input [2]: [ws_warehouse_sk#8, ws_order_number#4] +Arguments: [ws_order_number#4 ASC NULLS FIRST], false, 0 + +(11) ReusedExchange [Reuses operator id: 9] +Output [2]: [ws_warehouse_sk#10, ws_order_number#11] + +(12) Sort [codegen id : 6] +Input [2]: [ws_warehouse_sk#10, ws_order_number#11] +Arguments: [ws_order_number#11 ASC NULLS FIRST], false, 0 + +(13) SortMergeJoin [codegen id : 7] +Left keys [1]: [ws_order_number#4] +Right keys [1]: [ws_order_number#11] +Join condition: NOT (ws_warehouse_sk#8 = ws_warehouse_sk#10) + +(14) Project [codegen id : 7] +Output [1]: [ws_order_number#4 AS ws_order_number#4#12] +Input [4]: [ws_warehouse_sk#8, ws_order_number#4, ws_warehouse_sk#10, ws_order_number#11] + +(15) Exchange +Input [1]: [ws_order_number#4#12] +Arguments: hashpartitioning(ws_order_number#4#12, 5), true, [id=#13] + +(16) Sort [codegen id : 8] +Input [1]: [ws_order_number#4#12] +Arguments: [ws_order_number#4#12 ASC NULLS FIRST], false, 0 + +(17) SortMergeJoin +Left keys [1]: [ws_order_number#4] +Right keys [1]: [ws_order_number#4#12] +Join condition: None + +(18) Exchange +Input [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Arguments: hashpartitioning(cast(ws_order_number#4 as bigint), 5), true, [id=#14] + +(19) Sort [codegen id : 9] +Input [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Arguments: [cast(ws_order_number#4 as bigint) ASC NULLS FIRST], false, 0 + +(20) ReusedExchange [Reuses operator id: 9] +Output [2]: [ws_warehouse_sk#8, ws_order_number#4] + +(21) Sort [codegen id : 11] +Input [2]: [ws_warehouse_sk#8, ws_order_number#4] +Arguments: [ws_order_number#4 ASC NULLS FIRST], false, 0 + +(22) ReusedExchange [Reuses operator id: 9] +Output [2]: [ws_warehouse_sk#15, ws_order_number#16] + +(23) Sort [codegen id : 13] +Input [2]: [ws_warehouse_sk#15, ws_order_number#16] +Arguments: [ws_order_number#16 ASC NULLS FIRST], false, 0 + +(24) SortMergeJoin [codegen id : 14] +Left keys [1]: [ws_order_number#4] +Right keys [1]: [ws_order_number#16] +Join condition: NOT (ws_warehouse_sk#8 = ws_warehouse_sk#15) + +(25) Project [codegen id : 14] +Output [1]: [ws_order_number#4] +Input [4]: [ws_warehouse_sk#8, ws_order_number#4, ws_warehouse_sk#15, ws_order_number#16] + +(26) Exchange +Input [1]: [ws_order_number#4] +Arguments: hashpartitioning(cast(ws_order_number#4 as bigint), 5), true, [id=#17] + +(27) Sort [codegen id : 15] +Input [1]: [ws_order_number#4] +Arguments: [cast(ws_order_number#4 as bigint) ASC NULLS FIRST], false, 0 + +(28) Scan parquet default.web_returns +Output [1]: [wr_order_number#18] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_returns] +PushedFilters: [IsNotNull(wr_order_number)] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 16] +Input [1]: [wr_order_number#18] + +(30) Filter [codegen id : 16] +Input [1]: [wr_order_number#18] +Condition : isnotnull(wr_order_number#18) + +(31) Exchange +Input [1]: [wr_order_number#18] +Arguments: hashpartitioning(wr_order_number#18, 5), true, [id=#19] + +(32) Sort [codegen id : 17] +Input [1]: [wr_order_number#18] +Arguments: [wr_order_number#18 ASC NULLS FIRST], false, 0 + +(33) SortMergeJoin [codegen id : 18] +Left keys [1]: [cast(ws_order_number#4 as bigint)] +Right keys [1]: [wr_order_number#18] +Join condition: None + +(34) Project [codegen id : 18] +Output [1]: [wr_order_number#18] +Input [2]: [ws_order_number#4, wr_order_number#18] + +(35) SortMergeJoin +Left keys [1]: [cast(ws_order_number#4 as bigint)] +Right keys [1]: [wr_order_number#18] +Join condition: None + +(36) Scan parquet default.customer_address +Output [2]: [ca_address_sk#20, ca_state#21] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/customer_address] +PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,IL), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(37) ColumnarToRow [codegen id : 19] +Input [2]: [ca_address_sk#20, ca_state#21] + +(38) Filter [codegen id : 19] +Input [2]: [ca_address_sk#20, ca_state#21] +Condition : ((isnotnull(ca_state#21) AND (ca_state#21 = IL)) AND isnotnull(ca_address_sk#20)) + +(39) Project [codegen id : 19] +Output [1]: [ca_address_sk#20] +Input [2]: [ca_address_sk#20, ca_state#21] + +(40) BroadcastExchange +Input [1]: [ca_address_sk#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#22] + +(41) BroadcastHashJoin [codegen id : 22] +Left keys [1]: [ws_ship_addr_sk#2] +Right keys [1]: [ca_address_sk#20] +Join condition: None + +(42) Project [codegen id : 22] +Output [5]: [ws_ship_date_sk#1, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, ca_address_sk#20] + +(43) Scan parquet default.web_site +Output [2]: [web_site_sk#23, web_company_name#24] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/web_site] +PushedFilters: [IsNotNull(web_company_name), EqualTo(web_company_name,pri), IsNotNull(web_site_sk)] +ReadSchema: struct + +(44) ColumnarToRow [codegen id : 20] +Input [2]: [web_site_sk#23, web_company_name#24] + +(45) Filter [codegen id : 20] +Input [2]: [web_site_sk#23, web_company_name#24] +Condition : ((isnotnull(web_company_name#24) AND (web_company_name#24 = pri)) AND isnotnull(web_site_sk#23)) + +(46) Project [codegen id : 20] +Output [1]: [web_site_sk#23] +Input [2]: [web_site_sk#23, web_company_name#24] + +(47) BroadcastExchange +Input [1]: [web_site_sk#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#25] + +(48) BroadcastHashJoin [codegen id : 22] +Left keys [1]: [ws_web_site_sk#3] +Right keys [1]: [web_site_sk#23] +Join condition: None + +(49) Project [codegen id : 22] +Output [4]: [ws_ship_date_sk#1, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Input [6]: [ws_ship_date_sk#1, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, web_site_sk#23] + +(50) Scan parquet default.date_dim +Output [2]: [d_date_sk#26, d_date#27] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-01), LessThanOrEqual(d_date,1999-04-02), IsNotNull(d_date_sk)] +ReadSchema: struct + +(51) ColumnarToRow [codegen id : 21] +Input [2]: [d_date_sk#26, d_date#27] + +(52) Filter [codegen id : 21] +Input [2]: [d_date_sk#26, d_date#27] +Condition : (((isnotnull(d_date#27) AND (d_date#27 >= 10623)) AND (d_date#27 <= 10683)) AND isnotnull(d_date_sk#26)) + +(53) Project [codegen id : 21] +Output [1]: [d_date_sk#26] +Input [2]: [d_date_sk#26, d_date#27] + +(54) BroadcastExchange +Input [1]: [d_date_sk#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#28] + +(55) BroadcastHashJoin [codegen id : 22] +Left keys [1]: [ws_ship_date_sk#1] +Right keys [1]: [d_date_sk#26] +Join condition: None + +(56) Project [codegen id : 22] +Output [3]: [ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Input [5]: [ws_ship_date_sk#1, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, d_date_sk#26] + +(57) HashAggregate [codegen id : 22] +Input [3]: [ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Keys [1]: [ws_order_number#4] +Functions [2]: [partial_sum(UnscaledValue(ws_ext_ship_cost#5)), partial_sum(UnscaledValue(ws_net_profit#6))] +Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#5))#29, sum(UnscaledValue(ws_net_profit#6))#30] +Results [3]: [ws_order_number#4, sum#31, sum#32] + +(58) Exchange +Input [3]: [ws_order_number#4, sum#31, sum#32] +Arguments: hashpartitioning(ws_order_number#4, 5), true, [id=#33] + +(59) HashAggregate [codegen id : 23] +Input [3]: [ws_order_number#4, sum#31, sum#32] +Keys [1]: [ws_order_number#4] +Functions [2]: [merge_sum(UnscaledValue(ws_ext_ship_cost#5)), merge_sum(UnscaledValue(ws_net_profit#6))] +Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#5))#29, sum(UnscaledValue(ws_net_profit#6))#30] +Results [3]: [ws_order_number#4, sum#31, sum#32] + +(60) HashAggregate [codegen id : 23] +Input [3]: [ws_order_number#4, sum#31, sum#32] +Keys: [] +Functions [3]: [merge_sum(UnscaledValue(ws_ext_ship_cost#5)), merge_sum(UnscaledValue(ws_net_profit#6)), partial_count(distinct ws_order_number#4)] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#5))#29, sum(UnscaledValue(ws_net_profit#6))#30, count(ws_order_number#4)#34] +Results [3]: [sum#31, sum#32, count#35] + +(61) Exchange +Input [3]: [sum#31, sum#32, count#35] +Arguments: SinglePartition, true, [id=#36] + +(62) HashAggregate [codegen id : 24] +Input [3]: [sum#31, sum#32, count#35] +Keys: [] +Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#5)), sum(UnscaledValue(ws_net_profit#6)), count(distinct ws_order_number#4)] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#5))#29, sum(UnscaledValue(ws_net_profit#6))#30, count(ws_order_number#4)#34] +Results [3]: [count(ws_order_number#4)#34 AS order count #37, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#5))#29,17,2) AS total shipping cost #38, MakeDecimal(sum(UnscaledValue(ws_net_profit#6))#30,17,2) AS total net profit #39] + +(63) TakeOrderedAndProject +Input [3]: [order count #37, total shipping cost #38, total net profit #39] +Arguments: 100, [order count #37 ASC NULLS FIRST], [order count #37, total shipping cost #38, total net profit #39] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/simplified.txt new file mode 100644 index 0000000000000..420cd6f89b02a --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/simplified.txt @@ -0,0 +1,111 @@ +TakeOrderedAndProject [order count ,total net profit ,total shipping cost ] + WholeStageCodegen (24) + HashAggregate [count,sum,sum] [count,count(ws_order_number),order count ,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),total net profit ,total shipping cost ] + InputAdapter + Exchange #1 + WholeStageCodegen (23) + HashAggregate [ws_order_number] [count,count,count(ws_order_number),sum,sum,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit))] + HashAggregate [ws_order_number] [sum,sum,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit))] + InputAdapter + Exchange [ws_order_number] #2 + WholeStageCodegen (22) + HashAggregate [ws_ext_ship_cost,ws_net_profit,ws_order_number] [sum,sum,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit))] + Project [ws_ext_ship_cost,ws_net_profit,ws_order_number] + BroadcastHashJoin [d_date_sk,ws_ship_date_sk] + Project [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_ship_date_sk] + BroadcastHashJoin [web_site_sk,ws_web_site_sk] + Project [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_ship_date_sk,ws_web_site_sk] + BroadcastHashJoin [ca_address_sk,ws_ship_addr_sk] + InputAdapter + SortMergeJoin [wr_order_number,ws_order_number] + WholeStageCodegen (9) + Sort [ws_order_number] + InputAdapter + Exchange [ws_order_number] #3 + SortMergeJoin [ws_order_number,ws_order_number] + WholeStageCodegen (2) + Sort [ws_order_number] + InputAdapter + Exchange [ws_order_number] #4 + WholeStageCodegen (1) + Filter [ws_ship_addr_sk,ws_ship_date_sk,ws_web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_ship_addr_sk,ws_ship_date_sk,ws_web_site_sk] + WholeStageCodegen (8) + Sort [ws_order_number] + InputAdapter + Exchange [ws_order_number] #5 + WholeStageCodegen (7) + Project [ws_order_number] + SortMergeJoin [ws_order_number,ws_order_number,ws_warehouse_sk,ws_warehouse_sk] + InputAdapter + WholeStageCodegen (4) + Sort [ws_order_number] + InputAdapter + Exchange [ws_order_number] #6 + WholeStageCodegen (3) + Filter [ws_order_number,ws_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_order_number,ws_warehouse_sk] + InputAdapter + WholeStageCodegen (6) + Sort [ws_order_number] + InputAdapter + ReusedExchange [ws_order_number,ws_warehouse_sk] #6 + WholeStageCodegen (18) + Project [wr_order_number] + SortMergeJoin [wr_order_number,ws_order_number] + InputAdapter + WholeStageCodegen (15) + Sort [ws_order_number] + InputAdapter + Exchange [ws_order_number] #7 + WholeStageCodegen (14) + Project [ws_order_number] + SortMergeJoin [ws_order_number,ws_order_number,ws_warehouse_sk,ws_warehouse_sk] + InputAdapter + WholeStageCodegen (11) + Sort [ws_order_number] + InputAdapter + ReusedExchange [ws_order_number,ws_warehouse_sk] #6 + InputAdapter + WholeStageCodegen (13) + Sort [ws_order_number] + InputAdapter + ReusedExchange [ws_order_number,ws_warehouse_sk] #6 + InputAdapter + WholeStageCodegen (17) + Sort [wr_order_number] + InputAdapter + Exchange [wr_order_number] #8 + WholeStageCodegen (16) + Filter [wr_order_number] + ColumnarToRow + InputAdapter + Scan parquet default.web_returns [wr_order_number] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (19) + Project [ca_address_sk] + Filter [ca_address_sk,ca_state] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (20) + Project [web_site_sk] + Filter [web_company_name,web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_site [web_company_name,web_site_sk] + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (21) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/explain.txt new file mode 100644 index 0000000000000..5b232d915efdd --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/explain.txt @@ -0,0 +1,318 @@ +== Physical Plan == +TakeOrderedAndProject (56) ++- * HashAggregate (55) + +- Exchange (54) + +- * HashAggregate (53) + +- * HashAggregate (52) + +- Exchange (51) + +- * HashAggregate (50) + +- * Project (49) + +- * BroadcastHashJoin Inner BuildRight (48) + :- * Project (42) + : +- * BroadcastHashJoin Inner BuildRight (41) + : :- * Project (35) + : : +- * BroadcastHashJoin Inner BuildRight (34) + : : :- * BroadcastHashJoin LeftSemi BuildRight (28) + : : : :- * BroadcastHashJoin LeftSemi BuildRight (14) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.web_sales (1) + : : : : +- BroadcastExchange (13) + : : : : +- * Project (12) + : : : : +- * BroadcastHashJoin Inner BuildRight (11) + : : : : :- * Filter (6) + : : : : : +- * ColumnarToRow (5) + : : : : : +- Scan parquet default.web_sales (4) + : : : : +- BroadcastExchange (10) + : : : : +- * Filter (9) + : : : : +- * ColumnarToRow (8) + : : : : +- Scan parquet default.web_sales (7) + : : : +- BroadcastExchange (27) + : : : +- * Project (26) + : : : +- * BroadcastHashJoin Inner BuildRight (25) + : : : :- * Filter (17) + : : : : +- * ColumnarToRow (16) + : : : : +- Scan parquet default.web_returns (15) + : : : +- BroadcastExchange (24) + : : : +- * Project (23) + : : : +- * BroadcastHashJoin Inner BuildRight (22) + : : : :- * Filter (20) + : : : : +- * ColumnarToRow (19) + : : : : +- Scan parquet default.web_sales (18) + : : : +- ReusedExchange (21) + : : +- BroadcastExchange (33) + : : +- * Project (32) + : : +- * Filter (31) + : : +- * ColumnarToRow (30) + : : +- Scan parquet default.date_dim (29) + : +- BroadcastExchange (40) + : +- * Project (39) + : +- * Filter (38) + : +- * ColumnarToRow (37) + : +- Scan parquet default.customer_address (36) + +- BroadcastExchange (47) + +- * Project (46) + +- * Filter (45) + +- * ColumnarToRow (44) + +- Scan parquet default.web_site (43) + + +(1) Scan parquet default.web_sales +Output [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_ship_date_sk), IsNotNull(ws_ship_addr_sk), IsNotNull(ws_web_site_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 9] +Input [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] + +(3) Filter [codegen id : 9] +Input [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Condition : ((isnotnull(ws_ship_date_sk#1) AND isnotnull(ws_ship_addr_sk#2)) AND isnotnull(ws_web_site_sk#3)) + +(4) Scan parquet default.web_sales +Output [2]: [ws_warehouse_sk#7, ws_order_number#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_order_number), IsNotNull(ws_warehouse_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 2] +Input [2]: [ws_warehouse_sk#7, ws_order_number#4] + +(6) Filter [codegen id : 2] +Input [2]: [ws_warehouse_sk#7, ws_order_number#4] +Condition : (isnotnull(ws_order_number#4) AND isnotnull(ws_warehouse_sk#7)) + +(7) Scan parquet default.web_sales +Output [2]: [ws_warehouse_sk#8, ws_order_number#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_order_number), IsNotNull(ws_warehouse_sk)] +ReadSchema: struct + +(8) ColumnarToRow [codegen id : 1] +Input [2]: [ws_warehouse_sk#8, ws_order_number#9] + +(9) Filter [codegen id : 1] +Input [2]: [ws_warehouse_sk#8, ws_order_number#9] +Condition : (isnotnull(ws_order_number#9) AND isnotnull(ws_warehouse_sk#8)) + +(10) BroadcastExchange +Input [2]: [ws_warehouse_sk#8, ws_order_number#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#10] + +(11) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ws_order_number#4] +Right keys [1]: [ws_order_number#9] +Join condition: NOT (ws_warehouse_sk#7 = ws_warehouse_sk#8) + +(12) Project [codegen id : 2] +Output [1]: [ws_order_number#4 AS ws_order_number#4#11] +Input [4]: [ws_warehouse_sk#7, ws_order_number#4, ws_warehouse_sk#8, ws_order_number#9] + +(13) BroadcastExchange +Input [1]: [ws_order_number#4#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(14) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ws_order_number#4] +Right keys [1]: [ws_order_number#4#11] +Join condition: None + +(15) Scan parquet default.web_returns +Output [1]: [wr_order_number#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_returns] +PushedFilters: [IsNotNull(wr_order_number)] +ReadSchema: struct + +(16) ColumnarToRow [codegen id : 5] +Input [1]: [wr_order_number#13] + +(17) Filter [codegen id : 5] +Input [1]: [wr_order_number#13] +Condition : isnotnull(wr_order_number#13) + +(18) Scan parquet default.web_sales +Output [2]: [ws_warehouse_sk#7, ws_order_number#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_order_number), IsNotNull(ws_warehouse_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 4] +Input [2]: [ws_warehouse_sk#7, ws_order_number#4] + +(20) Filter [codegen id : 4] +Input [2]: [ws_warehouse_sk#7, ws_order_number#4] +Condition : (isnotnull(ws_order_number#4) AND isnotnull(ws_warehouse_sk#7)) + +(21) ReusedExchange [Reuses operator id: 10] +Output [2]: [ws_warehouse_sk#14, ws_order_number#15] + +(22) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ws_order_number#4] +Right keys [1]: [ws_order_number#15] +Join condition: NOT (ws_warehouse_sk#7 = ws_warehouse_sk#14) + +(23) Project [codegen id : 4] +Output [1]: [ws_order_number#4] +Input [4]: [ws_warehouse_sk#7, ws_order_number#4, ws_warehouse_sk#14, ws_order_number#15] + +(24) BroadcastExchange +Input [1]: [ws_order_number#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] + +(25) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [wr_order_number#13] +Right keys [1]: [cast(ws_order_number#4 as bigint)] +Join condition: None + +(26) Project [codegen id : 5] +Output [1]: [wr_order_number#13] +Input [2]: [wr_order_number#13, ws_order_number#4] + +(27) BroadcastExchange +Input [1]: [wr_order_number#13] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [id=#17] + +(28) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [cast(ws_order_number#4 as bigint)] +Right keys [1]: [wr_order_number#13] +Join condition: None + +(29) Scan parquet default.date_dim +Output [2]: [d_date_sk#18, d_date#19] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-01), LessThanOrEqual(d_date,1999-04-02), IsNotNull(d_date_sk)] +ReadSchema: struct + +(30) ColumnarToRow [codegen id : 6] +Input [2]: [d_date_sk#18, d_date#19] + +(31) Filter [codegen id : 6] +Input [2]: [d_date_sk#18, d_date#19] +Condition : (((isnotnull(d_date#19) AND (d_date#19 >= 10623)) AND (d_date#19 <= 10683)) AND isnotnull(d_date_sk#18)) + +(32) Project [codegen id : 6] +Output [1]: [d_date_sk#18] +Input [2]: [d_date_sk#18, d_date#19] + +(33) BroadcastExchange +Input [1]: [d_date_sk#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] + +(34) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ws_ship_date_sk#1] +Right keys [1]: [d_date_sk#18] +Join condition: None + +(35) Project [codegen id : 9] +Output [5]: [ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, d_date_sk#18] + +(36) Scan parquet default.customer_address +Output [2]: [ca_address_sk#21, ca_state#22] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/customer_address] +PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,IL), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(37) ColumnarToRow [codegen id : 7] +Input [2]: [ca_address_sk#21, ca_state#22] + +(38) Filter [codegen id : 7] +Input [2]: [ca_address_sk#21, ca_state#22] +Condition : ((isnotnull(ca_state#22) AND (ca_state#22 = IL)) AND isnotnull(ca_address_sk#21)) + +(39) Project [codegen id : 7] +Output [1]: [ca_address_sk#21] +Input [2]: [ca_address_sk#21, ca_state#22] + +(40) BroadcastExchange +Input [1]: [ca_address_sk#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#23] + +(41) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ws_ship_addr_sk#2] +Right keys [1]: [ca_address_sk#21] +Join condition: None + +(42) Project [codegen id : 9] +Output [4]: [ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Input [6]: [ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, ca_address_sk#21] + +(43) Scan parquet default.web_site +Output [2]: [web_site_sk#24, web_company_name#25] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/web_site] +PushedFilters: [IsNotNull(web_company_name), EqualTo(web_company_name,pri), IsNotNull(web_site_sk)] +ReadSchema: struct + +(44) ColumnarToRow [codegen id : 8] +Input [2]: [web_site_sk#24, web_company_name#25] + +(45) Filter [codegen id : 8] +Input [2]: [web_site_sk#24, web_company_name#25] +Condition : ((isnotnull(web_company_name#25) AND (web_company_name#25 = pri)) AND isnotnull(web_site_sk#24)) + +(46) Project [codegen id : 8] +Output [1]: [web_site_sk#24] +Input [2]: [web_site_sk#24, web_company_name#25] + +(47) BroadcastExchange +Input [1]: [web_site_sk#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#26] + +(48) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ws_web_site_sk#3] +Right keys [1]: [web_site_sk#24] +Join condition: None + +(49) Project [codegen id : 9] +Output [3]: [ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Input [5]: [ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, web_site_sk#24] + +(50) HashAggregate [codegen id : 9] +Input [3]: [ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Keys [1]: [ws_order_number#4] +Functions [2]: [partial_sum(UnscaledValue(ws_ext_ship_cost#5)), partial_sum(UnscaledValue(ws_net_profit#6))] +Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#5))#27, sum(UnscaledValue(ws_net_profit#6))#28] +Results [3]: [ws_order_number#4, sum#29, sum#30] + +(51) Exchange +Input [3]: [ws_order_number#4, sum#29, sum#30] +Arguments: hashpartitioning(ws_order_number#4, 5), true, [id=#31] + +(52) HashAggregate [codegen id : 10] +Input [3]: [ws_order_number#4, sum#29, sum#30] +Keys [1]: [ws_order_number#4] +Functions [2]: [merge_sum(UnscaledValue(ws_ext_ship_cost#5)), merge_sum(UnscaledValue(ws_net_profit#6))] +Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#5))#27, sum(UnscaledValue(ws_net_profit#6))#28] +Results [3]: [ws_order_number#4, sum#29, sum#30] + +(53) HashAggregate [codegen id : 10] +Input [3]: [ws_order_number#4, sum#29, sum#30] +Keys: [] +Functions [3]: [merge_sum(UnscaledValue(ws_ext_ship_cost#5)), merge_sum(UnscaledValue(ws_net_profit#6)), partial_count(distinct ws_order_number#4)] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#5))#27, sum(UnscaledValue(ws_net_profit#6))#28, count(ws_order_number#4)#32] +Results [3]: [sum#29, sum#30, count#33] + +(54) Exchange +Input [3]: [sum#29, sum#30, count#33] +Arguments: SinglePartition, true, [id=#34] + +(55) HashAggregate [codegen id : 11] +Input [3]: [sum#29, sum#30, count#33] +Keys: [] +Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#5)), sum(UnscaledValue(ws_net_profit#6)), count(distinct ws_order_number#4)] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#5))#27, sum(UnscaledValue(ws_net_profit#6))#28, count(ws_order_number#4)#32] +Results [3]: [count(ws_order_number#4)#32 AS order count #35, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#5))#27,17,2) AS total shipping cost #36, MakeDecimal(sum(UnscaledValue(ws_net_profit#6))#28,17,2) AS total net profit #37] + +(56) TakeOrderedAndProject +Input [3]: [order count #35, total shipping cost #36, total net profit #37] +Arguments: 100, [order count #35 ASC NULLS FIRST], [order count #35, total shipping cost #36, total net profit #37] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt new file mode 100644 index 0000000000000..00f1e8cc15922 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt @@ -0,0 +1,84 @@ +TakeOrderedAndProject [order count ,total net profit ,total shipping cost ] + WholeStageCodegen (11) + HashAggregate [count,sum,sum] [count,count(ws_order_number),order count ,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),total net profit ,total shipping cost ] + InputAdapter + Exchange #1 + WholeStageCodegen (10) + HashAggregate [ws_order_number] [count,count,count(ws_order_number),sum,sum,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit))] + HashAggregate [ws_order_number] [sum,sum,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit))] + InputAdapter + Exchange [ws_order_number] #2 + WholeStageCodegen (9) + HashAggregate [ws_ext_ship_cost,ws_net_profit,ws_order_number] [sum,sum,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit))] + Project [ws_ext_ship_cost,ws_net_profit,ws_order_number] + BroadcastHashJoin [web_site_sk,ws_web_site_sk] + Project [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_web_site_sk] + BroadcastHashJoin [ca_address_sk,ws_ship_addr_sk] + Project [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_ship_addr_sk,ws_web_site_sk] + BroadcastHashJoin [d_date_sk,ws_ship_date_sk] + BroadcastHashJoin [wr_order_number,ws_order_number] + BroadcastHashJoin [ws_order_number,ws_order_number] + Filter [ws_ship_addr_sk,ws_ship_date_sk,ws_web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_ship_addr_sk,ws_ship_date_sk,ws_web_site_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [ws_order_number] + BroadcastHashJoin [ws_order_number,ws_order_number,ws_warehouse_sk,ws_warehouse_sk] + Filter [ws_order_number,ws_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_order_number,ws_warehouse_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Filter [ws_order_number,ws_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_order_number,ws_warehouse_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (5) + Project [wr_order_number] + BroadcastHashJoin [wr_order_number,ws_order_number] + Filter [wr_order_number] + ColumnarToRow + InputAdapter + Scan parquet default.web_returns [wr_order_number] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (4) + Project [ws_order_number] + BroadcastHashJoin [ws_order_number,ws_order_number,ws_warehouse_sk,ws_warehouse_sk] + Filter [ws_order_number,ws_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_order_number,ws_warehouse_sk] + InputAdapter + ReusedExchange [ws_order_number,ws_warehouse_sk] #4 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (6) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (7) + Project [ca_address_sk] + Filter [ca_address_sk,ca_state] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (8) + Project [web_site_sk] + Filter [web_company_name,web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_site [web_company_name,web_site_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/explain.txt new file mode 100644 index 0000000000000..53a1642b95700 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/explain.txt @@ -0,0 +1,160 @@ +== Physical Plan == +TakeOrderedAndProject (28) ++- * HashAggregate (27) + +- Exchange (26) + +- * HashAggregate (25) + +- * Project (24) + +- * BroadcastHashJoin Inner BuildRight (23) + :- * Project (17) + : +- * BroadcastHashJoin Inner BuildRight (16) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.store_sales (1) + : : +- BroadcastExchange (8) + : : +- * Project (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet default.time_dim (4) + : +- BroadcastExchange (15) + : +- * Project (14) + : +- * Filter (13) + : +- * ColumnarToRow (12) + : +- Scan parquet default.store (11) + +- BroadcastExchange (22) + +- * Project (21) + +- * Filter (20) + +- * ColumnarToRow (19) + +- Scan parquet default.household_demographics (18) + + +(1) Scan parquet default.store_sales +Output [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] + +(3) Filter [codegen id : 4] +Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Condition : ((isnotnull(ss_hdemo_sk#2) AND isnotnull(ss_sold_time_sk#1)) AND isnotnull(ss_store_sk#3)) + +(4) Scan parquet default.time_dim +Output [3]: [t_time_sk#4, t_hour#5, t_minute#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/time_dim] +PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,20), GreaterThanOrEqual(t_minute,30), IsNotNull(t_time_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [t_time_sk#4, t_hour#5, t_minute#6] + +(6) Filter [codegen id : 1] +Input [3]: [t_time_sk#4, t_hour#5, t_minute#6] +Condition : ((((isnotnull(t_hour#5) AND isnotnull(t_minute#6)) AND (t_hour#5 = 20)) AND (t_minute#6 >= 30)) AND isnotnull(t_time_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [t_time_sk#4] +Input [3]: [t_time_sk#4, t_hour#5, t_minute#6] + +(8) BroadcastExchange +Input [1]: [t_time_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#7] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_time_sk#1] +Right keys [1]: [t_time_sk#4] +Join condition: None + +(10) Project [codegen id : 4] +Output [2]: [ss_hdemo_sk#2, ss_store_sk#3] +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, t_time_sk#4] + +(11) Scan parquet default.store +Output [2]: [s_store_sk#8, s_store_name#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_store_name), EqualTo(s_store_name,ese), IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [s_store_sk#8, s_store_name#9] + +(13) Filter [codegen id : 2] +Input [2]: [s_store_sk#8, s_store_name#9] +Condition : ((isnotnull(s_store_name#9) AND (s_store_name#9 = ese)) AND isnotnull(s_store_sk#8)) + +(14) Project [codegen id : 2] +Output [1]: [s_store_sk#8] +Input [2]: [s_store_sk#8, s_store_name#9] + +(15) BroadcastExchange +Input [1]: [s_store_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#10] + +(16) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#8] +Join condition: None + +(17) Project [codegen id : 4] +Output [1]: [ss_hdemo_sk#2] +Input [3]: [ss_hdemo_sk#2, ss_store_sk#3, s_store_sk#8] + +(18) Scan parquet default.household_demographics +Output [2]: [hd_demo_sk#11, hd_dep_count#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/household_demographics] +PushedFilters: [IsNotNull(hd_dep_count), EqualTo(hd_dep_count,7), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [2]: [hd_demo_sk#11, hd_dep_count#12] + +(20) Filter [codegen id : 3] +Input [2]: [hd_demo_sk#11, hd_dep_count#12] +Condition : ((isnotnull(hd_dep_count#12) AND (hd_dep_count#12 = 7)) AND isnotnull(hd_demo_sk#11)) + +(21) Project [codegen id : 3] +Output [1]: [hd_demo_sk#11] +Input [2]: [hd_demo_sk#11, hd_dep_count#12] + +(22) BroadcastExchange +Input [1]: [hd_demo_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] + +(23) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_hdemo_sk#2] +Right keys [1]: [hd_demo_sk#11] +Join condition: None + +(24) Project [codegen id : 4] +Output: [] +Input [2]: [ss_hdemo_sk#2, hd_demo_sk#11] + +(25) HashAggregate [codegen id : 4] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#14] +Results [1]: [count#15] + +(26) Exchange +Input [1]: [count#15] +Arguments: SinglePartition, true, [id=#16] + +(27) HashAggregate [codegen id : 5] +Input [1]: [count#15] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#17] +Results [1]: [count(1)#17 AS count(1)#18] + +(28) TakeOrderedAndProject +Input [1]: [count(1)#18] +Arguments: 100, [count(1)#18 ASC NULLS FIRST], [count(1)#18] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/simplified.txt new file mode 100644 index 0000000000000..68a1f4798d933 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/simplified.txt @@ -0,0 +1,41 @@ +TakeOrderedAndProject [count(1)] + WholeStageCodegen (5) + HashAggregate [count] [count,count(1),count(1)] + InputAdapter + Exchange #1 + WholeStageCodegen (4) + HashAggregate [count,count] + Project + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [ss_hdemo_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_hdemo_sk,ss_store_sk] + BroadcastHashJoin [ss_sold_time_sk,t_time_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [t_time_sk] + Filter [t_hour,t_minute,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet default.time_dim [t_hour,t_minute,t_time_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [s_store_sk] + Filter [s_store_name,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_name,s_store_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [hd_demo_sk] + Filter [hd_demo_sk,hd_dep_count] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/explain.txt new file mode 100644 index 0000000000000..a2c549bdb3444 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/explain.txt @@ -0,0 +1,160 @@ +== Physical Plan == +TakeOrderedAndProject (28) ++- * HashAggregate (27) + +- Exchange (26) + +- * HashAggregate (25) + +- * Project (24) + +- * BroadcastHashJoin Inner BuildRight (23) + :- * Project (17) + : +- * BroadcastHashJoin Inner BuildRight (16) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.store_sales (1) + : : +- BroadcastExchange (8) + : : +- * Project (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet default.household_demographics (4) + : +- BroadcastExchange (15) + : +- * Project (14) + : +- * Filter (13) + : +- * ColumnarToRow (12) + : +- Scan parquet default.time_dim (11) + +- BroadcastExchange (22) + +- * Project (21) + +- * Filter (20) + +- * ColumnarToRow (19) + +- Scan parquet default.store (18) + + +(1) Scan parquet default.store_sales +Output [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] + +(3) Filter [codegen id : 4] +Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Condition : ((isnotnull(ss_hdemo_sk#2) AND isnotnull(ss_sold_time_sk#1)) AND isnotnull(ss_store_sk#3)) + +(4) Scan parquet default.household_demographics +Output [2]: [hd_demo_sk#4, hd_dep_count#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/household_demographics] +PushedFilters: [IsNotNull(hd_dep_count), EqualTo(hd_dep_count,7), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [hd_demo_sk#4, hd_dep_count#5] + +(6) Filter [codegen id : 1] +Input [2]: [hd_demo_sk#4, hd_dep_count#5] +Condition : ((isnotnull(hd_dep_count#5) AND (hd_dep_count#5 = 7)) AND isnotnull(hd_demo_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [hd_demo_sk#4] +Input [2]: [hd_demo_sk#4, hd_dep_count#5] + +(8) BroadcastExchange +Input [1]: [hd_demo_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#6] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_hdemo_sk#2] +Right keys [1]: [hd_demo_sk#4] +Join condition: None + +(10) Project [codegen id : 4] +Output [2]: [ss_sold_time_sk#1, ss_store_sk#3] +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, hd_demo_sk#4] + +(11) Scan parquet default.time_dim +Output [3]: [t_time_sk#7, t_hour#8, t_minute#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/time_dim] +PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,20), GreaterThanOrEqual(t_minute,30), IsNotNull(t_time_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [t_time_sk#7, t_hour#8, t_minute#9] + +(13) Filter [codegen id : 2] +Input [3]: [t_time_sk#7, t_hour#8, t_minute#9] +Condition : ((((isnotnull(t_hour#8) AND isnotnull(t_minute#9)) AND (t_hour#8 = 20)) AND (t_minute#9 >= 30)) AND isnotnull(t_time_sk#7)) + +(14) Project [codegen id : 2] +Output [1]: [t_time_sk#7] +Input [3]: [t_time_sk#7, t_hour#8, t_minute#9] + +(15) BroadcastExchange +Input [1]: [t_time_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#10] + +(16) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_time_sk#1] +Right keys [1]: [t_time_sk#7] +Join condition: None + +(17) Project [codegen id : 4] +Output [1]: [ss_store_sk#3] +Input [3]: [ss_sold_time_sk#1, ss_store_sk#3, t_time_sk#7] + +(18) Scan parquet default.store +Output [2]: [s_store_sk#11, s_store_name#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_store_name), EqualTo(s_store_name,ese), IsNotNull(s_store_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [2]: [s_store_sk#11, s_store_name#12] + +(20) Filter [codegen id : 3] +Input [2]: [s_store_sk#11, s_store_name#12] +Condition : ((isnotnull(s_store_name#12) AND (s_store_name#12 = ese)) AND isnotnull(s_store_sk#11)) + +(21) Project [codegen id : 3] +Output [1]: [s_store_sk#11] +Input [2]: [s_store_sk#11, s_store_name#12] + +(22) BroadcastExchange +Input [1]: [s_store_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] + +(23) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#11] +Join condition: None + +(24) Project [codegen id : 4] +Output: [] +Input [2]: [ss_store_sk#3, s_store_sk#11] + +(25) HashAggregate [codegen id : 4] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#14] +Results [1]: [count#15] + +(26) Exchange +Input [1]: [count#15] +Arguments: SinglePartition, true, [id=#16] + +(27) HashAggregate [codegen id : 5] +Input [1]: [count#15] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#17] +Results [1]: [count(1)#17 AS count(1)#18] + +(28) TakeOrderedAndProject +Input [1]: [count(1)#18] +Arguments: 100, [count(1)#18 ASC NULLS FIRST], [count(1)#18] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/simplified.txt new file mode 100644 index 0000000000000..d2b7ff2fdc2c2 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/simplified.txt @@ -0,0 +1,41 @@ +TakeOrderedAndProject [count(1)] + WholeStageCodegen (5) + HashAggregate [count] [count,count(1),count(1)] + InputAdapter + Exchange #1 + WholeStageCodegen (4) + HashAggregate [count,count] + Project + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_store_sk] + BroadcastHashJoin [ss_sold_time_sk,t_time_sk] + Project [ss_sold_time_sk,ss_store_sk] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [hd_demo_sk] + Filter [hd_demo_sk,hd_dep_count] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [t_time_sk] + Filter [t_hour,t_minute,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet default.time_dim [t_hour,t_minute,t_time_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [s_store_sk] + Filter [s_store_name,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_name,s_store_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/explain.txt new file mode 100644 index 0000000000000..cf04505c74a34 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/explain.txt @@ -0,0 +1,179 @@ +== Physical Plan == +CollectLimit (30) ++- * HashAggregate (29) + +- Exchange (28) + +- * HashAggregate (27) + +- * Project (26) + +- SortMergeJoin FullOuter (25) + :- * Sort (14) + : +- * HashAggregate (13) + : +- Exchange (12) + : +- * HashAggregate (11) + : +- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.store_sales (1) + : +- BroadcastExchange (8) + : +- * Project (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet default.date_dim (4) + +- * Sort (24) + +- * HashAggregate (23) + +- Exchange (22) + +- * HashAggregate (21) + +- * Project (20) + +- * BroadcastHashJoin Inner BuildRight (19) + :- * Filter (17) + : +- * ColumnarToRow (16) + : +- Scan parquet default.catalog_sales (15) + +- ReusedExchange (18) + + +(1) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 2] +Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3] + +(3) Filter [codegen id : 2] +Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3] +Condition : isnotnull(ss_sold_date_sk#1) + +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#4, d_month_seq#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1200)) AND (d_month_seq#5 <= 1211)) AND isnotnull(d_date_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#4] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(8) BroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#6] + +(9) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(10) Project [codegen id : 2] +Output [2]: [ss_item_sk#2, ss_customer_sk#3] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, d_date_sk#4] + +(11) HashAggregate [codegen id : 2] +Input [2]: [ss_item_sk#2, ss_customer_sk#3] +Keys [2]: [ss_customer_sk#3, ss_item_sk#2] +Functions: [] +Aggregate Attributes: [] +Results [2]: [ss_customer_sk#3, ss_item_sk#2] + +(12) Exchange +Input [2]: [ss_customer_sk#3, ss_item_sk#2] +Arguments: hashpartitioning(ss_customer_sk#3, ss_item_sk#2, 5), true, [id=#7] + +(13) HashAggregate [codegen id : 3] +Input [2]: [ss_customer_sk#3, ss_item_sk#2] +Keys [2]: [ss_customer_sk#3, ss_item_sk#2] +Functions: [] +Aggregate Attributes: [] +Results [2]: [ss_customer_sk#3 AS customer_sk#8, ss_item_sk#2 AS item_sk#9] + +(14) Sort [codegen id : 3] +Input [2]: [customer_sk#8, item_sk#9] +Arguments: [customer_sk#8 ASC NULLS FIRST, item_sk#9 ASC NULLS FIRST], false, 0 + +(15) Scan parquet default.catalog_sales +Output [3]: [cs_sold_date_sk#10, cs_bill_customer_sk#11, cs_item_sk#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(16) ColumnarToRow [codegen id : 5] +Input [3]: [cs_sold_date_sk#10, cs_bill_customer_sk#11, cs_item_sk#12] + +(17) Filter [codegen id : 5] +Input [3]: [cs_sold_date_sk#10, cs_bill_customer_sk#11, cs_item_sk#12] +Condition : isnotnull(cs_sold_date_sk#10) + +(18) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#4] + +(19) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_sold_date_sk#10] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(20) Project [codegen id : 5] +Output [2]: [cs_bill_customer_sk#11, cs_item_sk#12] +Input [4]: [cs_sold_date_sk#10, cs_bill_customer_sk#11, cs_item_sk#12, d_date_sk#4] + +(21) HashAggregate [codegen id : 5] +Input [2]: [cs_bill_customer_sk#11, cs_item_sk#12] +Keys [2]: [cs_bill_customer_sk#11, cs_item_sk#12] +Functions: [] +Aggregate Attributes: [] +Results [2]: [cs_bill_customer_sk#11, cs_item_sk#12] + +(22) Exchange +Input [2]: [cs_bill_customer_sk#11, cs_item_sk#12] +Arguments: hashpartitioning(cs_bill_customer_sk#11, cs_item_sk#12, 5), true, [id=#13] + +(23) HashAggregate [codegen id : 6] +Input [2]: [cs_bill_customer_sk#11, cs_item_sk#12] +Keys [2]: [cs_bill_customer_sk#11, cs_item_sk#12] +Functions: [] +Aggregate Attributes: [] +Results [2]: [cs_bill_customer_sk#11 AS customer_sk#14, cs_item_sk#12 AS item_sk#15] + +(24) Sort [codegen id : 6] +Input [2]: [customer_sk#14, item_sk#15] +Arguments: [customer_sk#14 ASC NULLS FIRST, item_sk#15 ASC NULLS FIRST], false, 0 + +(25) SortMergeJoin +Left keys [2]: [customer_sk#8, item_sk#9] +Right keys [2]: [customer_sk#14, item_sk#15] +Join condition: None + +(26) Project [codegen id : 7] +Output [2]: [customer_sk#8, customer_sk#14] +Input [4]: [customer_sk#8, item_sk#9, customer_sk#14, item_sk#15] + +(27) HashAggregate [codegen id : 7] +Input [2]: [customer_sk#8, customer_sk#14] +Keys: [] +Functions [3]: [partial_sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))] +Aggregate Attributes [3]: [sum#16, sum#17, sum#18] +Results [3]: [sum#19, sum#20, sum#21] + +(28) Exchange +Input [3]: [sum#19, sum#20, sum#21] +Arguments: SinglePartition, true, [id=#22] + +(29) HashAggregate [codegen id : 8] +Input [3]: [sum#19, sum#20, sum#21] +Keys: [] +Functions [3]: [sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))] +Aggregate Attributes [3]: [sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#23, sum(cast(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#24, sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#25] +Results [3]: [sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#23 AS store_only#26, sum(cast(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#24 AS catalog_only#27, sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#25 AS store_and_catalog#28] + +(30) CollectLimit +Input [3]: [store_only#26, catalog_only#27, store_and_catalog#28] +Arguments: 100 + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/simplified.txt new file mode 100644 index 0000000000000..7d6b96e705c39 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/simplified.txt @@ -0,0 +1,46 @@ +CollectLimit + WholeStageCodegen (8) + HashAggregate [sum,sum,sum] [catalog_only,store_and_catalog,store_only,sum,sum,sum,sum(cast(CASE WHEN (isnotnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnotnull(customer_sk) AND isnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint))] + InputAdapter + Exchange #1 + WholeStageCodegen (7) + HashAggregate [customer_sk,customer_sk] [sum,sum,sum,sum,sum,sum] + Project [customer_sk,customer_sk] + InputAdapter + SortMergeJoin [customer_sk,customer_sk,item_sk,item_sk] + WholeStageCodegen (3) + Sort [customer_sk,item_sk] + HashAggregate [ss_customer_sk,ss_item_sk] [customer_sk,item_sk] + InputAdapter + Exchange [ss_customer_sk,ss_item_sk] #2 + WholeStageCodegen (2) + HashAggregate [ss_customer_sk,ss_item_sk] + Project [ss_customer_sk,ss_item_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_month_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq] + WholeStageCodegen (6) + Sort [customer_sk,item_sk] + HashAggregate [cs_bill_customer_sk,cs_item_sk] [customer_sk,item_sk] + InputAdapter + Exchange [cs_bill_customer_sk,cs_item_sk] #4 + WholeStageCodegen (5) + HashAggregate [cs_bill_customer_sk,cs_item_sk] + Project [cs_bill_customer_sk,cs_item_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/explain.txt new file mode 100644 index 0000000000000..82ab0df435670 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/explain.txt @@ -0,0 +1,179 @@ +== Physical Plan == +CollectLimit (30) ++- * HashAggregate (29) + +- Exchange (28) + +- * HashAggregate (27) + +- * Project (26) + +- SortMergeJoin FullOuter (25) + :- * Sort (14) + : +- * HashAggregate (13) + : +- Exchange (12) + : +- * HashAggregate (11) + : +- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.store_sales (1) + : +- BroadcastExchange (8) + : +- * Project (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet default.date_dim (4) + +- * Sort (24) + +- * HashAggregate (23) + +- Exchange (22) + +- * HashAggregate (21) + +- * Project (20) + +- * BroadcastHashJoin Inner BuildRight (19) + :- * Filter (17) + : +- * ColumnarToRow (16) + : +- Scan parquet default.catalog_sales (15) + +- ReusedExchange (18) + + +(1) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 2] +Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3] + +(3) Filter [codegen id : 2] +Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3] +Condition : isnotnull(ss_sold_date_sk#1) + +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#4, d_month_seq#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1200)) AND (d_month_seq#5 <= 1211)) AND isnotnull(d_date_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#4] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(8) BroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#6] + +(9) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(10) Project [codegen id : 2] +Output [2]: [ss_item_sk#2, ss_customer_sk#3] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, d_date_sk#4] + +(11) HashAggregate [codegen id : 2] +Input [2]: [ss_item_sk#2, ss_customer_sk#3] +Keys [2]: [ss_customer_sk#3, ss_item_sk#2] +Functions: [] +Aggregate Attributes: [] +Results [2]: [ss_customer_sk#3, ss_item_sk#2] + +(12) Exchange +Input [2]: [ss_customer_sk#3, ss_item_sk#2] +Arguments: hashpartitioning(ss_customer_sk#3, ss_item_sk#2, 5), true, [id=#7] + +(13) HashAggregate [codegen id : 3] +Input [2]: [ss_customer_sk#3, ss_item_sk#2] +Keys [2]: [ss_customer_sk#3, ss_item_sk#2] +Functions: [] +Aggregate Attributes: [] +Results [2]: [ss_customer_sk#3 AS customer_sk#8, ss_item_sk#2 AS item_sk#9] + +(14) Sort [codegen id : 3] +Input [2]: [customer_sk#8, item_sk#9] +Arguments: [customer_sk#8 ASC NULLS FIRST, item_sk#9 ASC NULLS FIRST], false, 0 + +(15) Scan parquet default.catalog_sales +Output [3]: [cs_sold_date_sk#10, cs_bill_customer_sk#11, cs_item_sk#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(16) ColumnarToRow [codegen id : 5] +Input [3]: [cs_sold_date_sk#10, cs_bill_customer_sk#11, cs_item_sk#12] + +(17) Filter [codegen id : 5] +Input [3]: [cs_sold_date_sk#10, cs_bill_customer_sk#11, cs_item_sk#12] +Condition : isnotnull(cs_sold_date_sk#10) + +(18) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#4] + +(19) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_sold_date_sk#10] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(20) Project [codegen id : 5] +Output [2]: [cs_bill_customer_sk#11, cs_item_sk#12] +Input [4]: [cs_sold_date_sk#10, cs_bill_customer_sk#11, cs_item_sk#12, d_date_sk#4] + +(21) HashAggregate [codegen id : 5] +Input [2]: [cs_bill_customer_sk#11, cs_item_sk#12] +Keys [2]: [cs_bill_customer_sk#11, cs_item_sk#12] +Functions: [] +Aggregate Attributes: [] +Results [2]: [cs_bill_customer_sk#11, cs_item_sk#12] + +(22) Exchange +Input [2]: [cs_bill_customer_sk#11, cs_item_sk#12] +Arguments: hashpartitioning(cs_bill_customer_sk#11, cs_item_sk#12, 5), true, [id=#13] + +(23) HashAggregate [codegen id : 6] +Input [2]: [cs_bill_customer_sk#11, cs_item_sk#12] +Keys [2]: [cs_bill_customer_sk#11, cs_item_sk#12] +Functions: [] +Aggregate Attributes: [] +Results [2]: [cs_bill_customer_sk#11 AS customer_sk#14, cs_item_sk#12 AS item_sk#15] + +(24) Sort [codegen id : 6] +Input [2]: [customer_sk#14, item_sk#15] +Arguments: [customer_sk#14 ASC NULLS FIRST, item_sk#15 ASC NULLS FIRST], false, 0 + +(25) SortMergeJoin +Left keys [2]: [customer_sk#8, item_sk#9] +Right keys [2]: [customer_sk#14, item_sk#15] +Join condition: None + +(26) Project [codegen id : 7] +Output [2]: [customer_sk#8, customer_sk#14] +Input [4]: [customer_sk#8, item_sk#9, customer_sk#14, item_sk#15] + +(27) HashAggregate [codegen id : 7] +Input [2]: [customer_sk#8, customer_sk#14] +Keys: [] +Functions [3]: [partial_sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))] +Aggregate Attributes [3]: [sum#16, sum#17, sum#18] +Results [3]: [sum#19, sum#20, sum#21] + +(28) Exchange +Input [3]: [sum#19, sum#20, sum#21] +Arguments: SinglePartition, true, [id=#22] + +(29) HashAggregate [codegen id : 8] +Input [3]: [sum#19, sum#20, sum#21] +Keys: [] +Functions [3]: [sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))] +Aggregate Attributes [3]: [sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#23, sum(cast(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#24, sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#25] +Results [3]: [sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#23 AS store_only#26, sum(cast(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#24 AS catalog_only#27, sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#25 AS store_and_catalog#28] + +(30) CollectLimit +Input [3]: [store_only#26, catalog_only#27, store_and_catalog#28] +Arguments: 100 + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/simplified.txt new file mode 100644 index 0000000000000..7d6b96e705c39 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/simplified.txt @@ -0,0 +1,46 @@ +CollectLimit + WholeStageCodegen (8) + HashAggregate [sum,sum,sum] [catalog_only,store_and_catalog,store_only,sum,sum,sum,sum(cast(CASE WHEN (isnotnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnotnull(customer_sk) AND isnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint))] + InputAdapter + Exchange #1 + WholeStageCodegen (7) + HashAggregate [customer_sk,customer_sk] [sum,sum,sum,sum,sum,sum] + Project [customer_sk,customer_sk] + InputAdapter + SortMergeJoin [customer_sk,customer_sk,item_sk,item_sk] + WholeStageCodegen (3) + Sort [customer_sk,item_sk] + HashAggregate [ss_customer_sk,ss_item_sk] [customer_sk,item_sk] + InputAdapter + Exchange [ss_customer_sk,ss_item_sk] #2 + WholeStageCodegen (2) + HashAggregate [ss_customer_sk,ss_item_sk] + Project [ss_customer_sk,ss_item_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_month_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq] + WholeStageCodegen (6) + Sort [customer_sk,item_sk] + HashAggregate [cs_bill_customer_sk,cs_item_sk] [customer_sk,item_sk] + InputAdapter + Exchange [cs_bill_customer_sk,cs_item_sk] #4 + WholeStageCodegen (5) + HashAggregate [cs_bill_customer_sk,cs_item_sk] + Project [cs_bill_customer_sk,cs_item_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98.sf100/explain.txt new file mode 100644 index 0000000000000..bbbec343e1f05 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98.sf100/explain.txt @@ -0,0 +1,162 @@ +== Physical Plan == +* Project (29) ++- * Sort (28) + +- Exchange (27) + +- * Project (26) + +- Window (25) + +- * Sort (24) + +- Exchange (23) + +- * HashAggregate (22) + +- Exchange (21) + +- * HashAggregate (20) + +- * Project (19) + +- * SortMergeJoin Inner (18) + :- * Sort (12) + : +- Exchange (11) + : +- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.store_sales (1) + : +- BroadcastExchange (8) + : +- * Project (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet default.date_dim (4) + +- * Sort (17) + +- Exchange (16) + +- * Filter (15) + +- * ColumnarToRow (14) + +- Scan parquet default.item (13) + + +(1) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 2] +Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3] + +(3) Filter [codegen id : 2] +Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3] +Condition : (isnotnull(ss_item_sk#2) AND isnotnull(ss_sold_date_sk#1)) + +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#4, d_date#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#4, d_date#5] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#4, d_date#5] +Condition : (((isnotnull(d_date#5) AND (d_date#5 >= 10644)) AND (d_date#5 <= 10674)) AND isnotnull(d_date_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#4] +Input [2]: [d_date_sk#4, d_date#5] + +(8) BroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#6] + +(9) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(10) Project [codegen id : 2] +Output [2]: [ss_item_sk#2, ss_ext_sales_price#3] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3, d_date_sk#4] + +(11) Exchange +Input [2]: [ss_item_sk#2, ss_ext_sales_price#3] +Arguments: hashpartitioning(ss_item_sk#2, 5), true, [id=#7] + +(12) Sort [codegen id : 3] +Input [2]: [ss_item_sk#2, ss_ext_sales_price#3] +Arguments: [ss_item_sk#2 ASC NULLS FIRST], false, 0 + +(13) Scan parquet default.item +Output [6]: [i_item_sk#8, i_item_id#9, i_item_desc#10, i_current_price#11, i_class#12, i_category#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/item] +PushedFilters: [In(i_category, [Sports,Books,Home]), IsNotNull(i_item_sk)] +ReadSchema: struct + +(14) ColumnarToRow [codegen id : 4] +Input [6]: [i_item_sk#8, i_item_id#9, i_item_desc#10, i_current_price#11, i_class#12, i_category#13] + +(15) Filter [codegen id : 4] +Input [6]: [i_item_sk#8, i_item_id#9, i_item_desc#10, i_current_price#11, i_class#12, i_category#13] +Condition : (i_category#13 IN (Sports,Books,Home) AND isnotnull(i_item_sk#8)) + +(16) Exchange +Input [6]: [i_item_sk#8, i_item_id#9, i_item_desc#10, i_current_price#11, i_class#12, i_category#13] +Arguments: hashpartitioning(i_item_sk#8, 5), true, [id=#14] + +(17) Sort [codegen id : 5] +Input [6]: [i_item_sk#8, i_item_id#9, i_item_desc#10, i_current_price#11, i_class#12, i_category#13] +Arguments: [i_item_sk#8 ASC NULLS FIRST], false, 0 + +(18) SortMergeJoin [codegen id : 6] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#8] +Join condition: None + +(19) Project [codegen id : 6] +Output [6]: [ss_ext_sales_price#3, i_item_id#9, i_item_desc#10, i_current_price#11, i_class#12, i_category#13] +Input [8]: [ss_item_sk#2, ss_ext_sales_price#3, i_item_sk#8, i_item_id#9, i_item_desc#10, i_current_price#11, i_class#12, i_category#13] + +(20) HashAggregate [codegen id : 6] +Input [6]: [ss_ext_sales_price#3, i_item_id#9, i_item_desc#10, i_current_price#11, i_class#12, i_category#13] +Keys [5]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum#15] +Results [6]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11, sum#16] + +(21) Exchange +Input [6]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11, sum#16] +Arguments: hashpartitioning(i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11, 5), true, [id=#17] + +(22) HashAggregate [codegen id : 7] +Input [6]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11, sum#16] +Keys [5]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#18] +Results [8]: [i_item_desc#10, i_category#13, i_class#12, i_current_price#11, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#18,17,2) AS itemrevenue#19, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#18,17,2) AS _w0#20, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#18,17,2) AS _w1#21, i_item_id#9] + +(23) Exchange +Input [8]: [i_item_desc#10, i_category#13, i_class#12, i_current_price#11, itemrevenue#19, _w0#20, _w1#21, i_item_id#9] +Arguments: hashpartitioning(i_class#12, 5), true, [id=#22] + +(24) Sort [codegen id : 8] +Input [8]: [i_item_desc#10, i_category#13, i_class#12, i_current_price#11, itemrevenue#19, _w0#20, _w1#21, i_item_id#9] +Arguments: [i_class#12 ASC NULLS FIRST], false, 0 + +(25) Window +Input [8]: [i_item_desc#10, i_category#13, i_class#12, i_current_price#11, itemrevenue#19, _w0#20, _w1#21, i_item_id#9] +Arguments: [sum(_w1#21) windowspecdefinition(i_class#12, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#23], [i_class#12] + +(26) Project [codegen id : 9] +Output [7]: [i_item_desc#10, i_category#13, i_class#12, i_current_price#11, itemrevenue#19, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#20) * 100.00), DecimalType(21,2), true) as decimal(27,2))) / promote_precision(_we0#23)), DecimalType(38,17), true) AS revenueratio#24, i_item_id#9] +Input [9]: [i_item_desc#10, i_category#13, i_class#12, i_current_price#11, itemrevenue#19, _w0#20, _w1#21, i_item_id#9, _we0#23] + +(27) Exchange +Input [7]: [i_item_desc#10, i_category#13, i_class#12, i_current_price#11, itemrevenue#19, revenueratio#24, i_item_id#9] +Arguments: rangepartitioning(i_category#13 ASC NULLS FIRST, i_class#12 ASC NULLS FIRST, i_item_id#9 ASC NULLS FIRST, i_item_desc#10 ASC NULLS FIRST, revenueratio#24 ASC NULLS FIRST, 5), true, [id=#25] + +(28) Sort [codegen id : 10] +Input [7]: [i_item_desc#10, i_category#13, i_class#12, i_current_price#11, itemrevenue#19, revenueratio#24, i_item_id#9] +Arguments: [i_category#13 ASC NULLS FIRST, i_class#12 ASC NULLS FIRST, i_item_id#9 ASC NULLS FIRST, i_item_desc#10 ASC NULLS FIRST, revenueratio#24 ASC NULLS FIRST], true, 0 + +(29) Project [codegen id : 10] +Output [6]: [i_item_desc#10, i_category#13, i_class#12, i_current_price#11, itemrevenue#19, revenueratio#24] +Input [7]: [i_item_desc#10, i_category#13, i_class#12, i_current_price#11, itemrevenue#19, revenueratio#24, i_item_id#9] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98.sf100/simplified.txt new file mode 100644 index 0000000000000..8e55b010434aa --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98.sf100/simplified.txt @@ -0,0 +1,51 @@ +WholeStageCodegen (10) + Project [i_category,i_class,i_current_price,i_item_desc,itemrevenue,revenueratio] + Sort [i_category,i_class,i_item_desc,i_item_id,revenueratio] + InputAdapter + Exchange [i_category,i_class,i_item_desc,i_item_id,revenueratio] #1 + WholeStageCodegen (9) + Project [_w0,_we0,i_category,i_class,i_current_price,i_item_desc,i_item_id,itemrevenue] + InputAdapter + Window [_w1,i_class] + WholeStageCodegen (8) + Sort [i_class] + InputAdapter + Exchange [i_class] #2 + WholeStageCodegen (7) + HashAggregate [i_category,i_class,i_current_price,i_item_desc,i_item_id,sum] [_w0,_w1,itemrevenue,sum,sum(UnscaledValue(ss_ext_sales_price))] + InputAdapter + Exchange [i_category,i_class,i_current_price,i_item_desc,i_item_id] #3 + WholeStageCodegen (6) + HashAggregate [i_category,i_class,i_current_price,i_item_desc,i_item_id,ss_ext_sales_price] [sum,sum] + Project [i_category,i_class,i_current_price,i_item_desc,i_item_id,ss_ext_sales_price] + SortMergeJoin [i_item_sk,ss_item_sk] + InputAdapter + WholeStageCodegen (3) + Sort [ss_item_sk] + InputAdapter + Exchange [ss_item_sk] #4 + WholeStageCodegen (2) + Project [ss_ext_sales_price,ss_item_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk] + InputAdapter + WholeStageCodegen (5) + Sort [i_item_sk] + InputAdapter + Exchange [i_item_sk] #6 + WholeStageCodegen (4) + Filter [i_category,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_category,i_class,i_current_price,i_item_desc,i_item_id,i_item_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98/explain.txt new file mode 100644 index 0000000000000..db2cc37d26cfc --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98/explain.txt @@ -0,0 +1,147 @@ +== Physical Plan == +* Project (26) ++- * Sort (25) + +- Exchange (24) + +- * Project (23) + +- Window (22) + +- * Sort (21) + +- Exchange (20) + +- * HashAggregate (19) + +- Exchange (18) + +- * HashAggregate (17) + +- * Project (16) + +- * BroadcastHashJoin Inner BuildRight (15) + :- * Project (9) + : +- * BroadcastHashJoin Inner BuildRight (8) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.store_sales (1) + : +- BroadcastExchange (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet default.item (4) + +- BroadcastExchange (14) + +- * Project (13) + +- * Filter (12) + +- * ColumnarToRow (11) + +- Scan parquet default.date_dim (10) + + +(1) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3] + +(3) Filter [codegen id : 3] +Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3] +Condition : (isnotnull(ss_item_sk#2) AND isnotnull(ss_sold_date_sk#1)) + +(4) Scan parquet default.item +Output [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/item] +PushedFilters: [In(i_category, [Sports,Books,Home]), IsNotNull(i_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(6) Filter [codegen id : 1] +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Condition : (i_category#9 IN (Sports,Books,Home) AND isnotnull(i_item_sk#4)) + +(7) BroadcastExchange +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#10] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#4] +Join condition: None + +(9) Project [codegen id : 3] +Output [7]: [ss_sold_date_sk#1, ss_ext_sales_price#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3, i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(10) Scan parquet default.date_dim +Output [2]: [d_date_sk#11, d_date#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#11, d_date#12] + +(12) Filter [codegen id : 2] +Input [2]: [d_date_sk#11, d_date#12] +Condition : (((isnotnull(d_date#12) AND (d_date#12 >= 10644)) AND (d_date#12 <= 10674)) AND isnotnull(d_date_sk#11)) + +(13) Project [codegen id : 2] +Output [1]: [d_date_sk#11] +Input [2]: [d_date_sk#11, d_date#12] + +(14) BroadcastExchange +Input [1]: [d_date_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#11] +Join condition: None + +(16) Project [codegen id : 3] +Output [6]: [ss_ext_sales_price#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Input [8]: [ss_sold_date_sk#1, ss_ext_sales_price#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9, d_date_sk#11] + +(17) HashAggregate [codegen id : 3] +Input [6]: [ss_ext_sales_price#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum#14] +Results [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#15] + +(18) Exchange +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#15] +Arguments: hashpartitioning(i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, 5), true, [id=#16] + +(19) HashAggregate [codegen id : 4] +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#15] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#17] +Results [8]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#17,17,2) AS itemrevenue#18, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#17,17,2) AS _w0#19, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#17,17,2) AS _w1#20, i_item_id#5] + +(20) Exchange +Input [8]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, _w0#19, _w1#20, i_item_id#5] +Arguments: hashpartitioning(i_class#8, 5), true, [id=#21] + +(21) Sort [codegen id : 5] +Input [8]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, _w0#19, _w1#20, i_item_id#5] +Arguments: [i_class#8 ASC NULLS FIRST], false, 0 + +(22) Window +Input [8]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, _w0#19, _w1#20, i_item_id#5] +Arguments: [sum(_w1#20) windowspecdefinition(i_class#8, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#22], [i_class#8] + +(23) Project [codegen id : 6] +Output [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#19) * 100.00), DecimalType(21,2), true) as decimal(27,2))) / promote_precision(_we0#22)), DecimalType(38,17), true) AS revenueratio#23, i_item_id#5] +Input [9]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, _w0#19, _w1#20, i_item_id#5, _we0#22] + +(24) Exchange +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, revenueratio#23, i_item_id#5] +Arguments: rangepartitioning(i_category#9 ASC NULLS FIRST, i_class#8 ASC NULLS FIRST, i_item_id#5 ASC NULLS FIRST, i_item_desc#6 ASC NULLS FIRST, revenueratio#23 ASC NULLS FIRST, 5), true, [id=#24] + +(25) Sort [codegen id : 7] +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, revenueratio#23, i_item_id#5] +Arguments: [i_category#9 ASC NULLS FIRST, i_class#8 ASC NULLS FIRST, i_item_id#5 ASC NULLS FIRST, i_item_desc#6 ASC NULLS FIRST, revenueratio#23 ASC NULLS FIRST], true, 0 + +(26) Project [codegen id : 7] +Output [6]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, revenueratio#23] +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, revenueratio#23, i_item_id#5] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98/simplified.txt new file mode 100644 index 0000000000000..6f39176d9a9e1 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98/simplified.txt @@ -0,0 +1,42 @@ +WholeStageCodegen (7) + Project [i_category,i_class,i_current_price,i_item_desc,itemrevenue,revenueratio] + Sort [i_category,i_class,i_item_desc,i_item_id,revenueratio] + InputAdapter + Exchange [i_category,i_class,i_item_desc,i_item_id,revenueratio] #1 + WholeStageCodegen (6) + Project [_w0,_we0,i_category,i_class,i_current_price,i_item_desc,i_item_id,itemrevenue] + InputAdapter + Window [_w1,i_class] + WholeStageCodegen (5) + Sort [i_class] + InputAdapter + Exchange [i_class] #2 + WholeStageCodegen (4) + HashAggregate [i_category,i_class,i_current_price,i_item_desc,i_item_id,sum] [_w0,_w1,itemrevenue,sum,sum(UnscaledValue(ss_ext_sales_price))] + InputAdapter + Exchange [i_category,i_class,i_current_price,i_item_desc,i_item_id] #3 + WholeStageCodegen (3) + HashAggregate [i_category,i_class,i_current_price,i_item_desc,i_item_id,ss_ext_sales_price] [sum,sum] + Project [i_category,i_class,i_current_price,i_item_desc,i_item_id,ss_ext_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_category,i_class,i_current_price,i_item_desc,i_item_id,ss_ext_sales_price,ss_sold_date_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Filter [i_category,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_category,i_class,i_current_price,i_item_desc,i_item_id,i_item_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.sf100/explain.txt new file mode 100644 index 0000000000000..67946af47b28a --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.sf100/explain.txt @@ -0,0 +1,183 @@ +== Physical Plan == +TakeOrderedAndProject (32) ++- * HashAggregate (31) + +- Exchange (30) + +- * HashAggregate (29) + +- * Project (28) + +- * BroadcastHashJoin Inner BuildRight (27) + :- * Project (22) + : +- * BroadcastHashJoin Inner BuildRight (21) + : :- * Project (16) + : : +- * BroadcastHashJoin Inner BuildRight (15) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildLeft (9) + : : : :- BroadcastExchange (5) + : : : : +- * Project (4) + : : : : +- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.date_dim (1) + : : : +- * Filter (8) + : : : +- * ColumnarToRow (7) + : : : +- Scan parquet default.catalog_sales (6) + : : +- BroadcastExchange (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.ship_mode (11) + : +- BroadcastExchange (20) + : +- * Filter (19) + : +- * ColumnarToRow (18) + : +- Scan parquet default.call_center (17) + +- BroadcastExchange (26) + +- * Filter (25) + +- * ColumnarToRow (24) + +- Scan parquet default.warehouse (23) + + +(1) Scan parquet default.date_dim +Output [2]: [d_date_sk#1, d_month_seq#2] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#1, d_month_seq#2] + +(3) Filter [codegen id : 1] +Input [2]: [d_date_sk#1, d_month_seq#2] +Condition : (((isnotnull(d_month_seq#2) AND (d_month_seq#2 >= 1200)) AND (d_month_seq#2 <= 1211)) AND isnotnull(d_date_sk#1)) + +(4) Project [codegen id : 1] +Output [1]: [d_date_sk#1] +Input [2]: [d_date_sk#1, d_month_seq#2] + +(5) BroadcastExchange +Input [1]: [d_date_sk#1] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#3] + +(6) Scan parquet default.catalog_sales +Output [5]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_call_center_sk#6, cs_ship_mode_sk#7, cs_warehouse_sk#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_warehouse_sk), IsNotNull(cs_ship_mode_sk), IsNotNull(cs_call_center_sk), IsNotNull(cs_ship_date_sk)] +ReadSchema: struct + +(7) ColumnarToRow +Input [5]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_call_center_sk#6, cs_ship_mode_sk#7, cs_warehouse_sk#8] + +(8) Filter +Input [5]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_call_center_sk#6, cs_ship_mode_sk#7, cs_warehouse_sk#8] +Condition : (((isnotnull(cs_warehouse_sk#8) AND isnotnull(cs_ship_mode_sk#7)) AND isnotnull(cs_call_center_sk#6)) AND isnotnull(cs_ship_date_sk#5)) + +(9) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [d_date_sk#1] +Right keys [1]: [cs_ship_date_sk#5] +Join condition: None + +(10) Project [codegen id : 5] +Output [5]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_call_center_sk#6, cs_ship_mode_sk#7, cs_warehouse_sk#8] +Input [6]: [d_date_sk#1, cs_sold_date_sk#4, cs_ship_date_sk#5, cs_call_center_sk#6, cs_ship_mode_sk#7, cs_warehouse_sk#8] + +(11) Scan parquet default.ship_mode +Output [2]: [sm_ship_mode_sk#9, sm_type#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/ship_mode] +PushedFilters: [IsNotNull(sm_ship_mode_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [sm_ship_mode_sk#9, sm_type#10] + +(13) Filter [codegen id : 2] +Input [2]: [sm_ship_mode_sk#9, sm_type#10] +Condition : isnotnull(sm_ship_mode_sk#9) + +(14) BroadcastExchange +Input [2]: [sm_ship_mode_sk#9, sm_type#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#11] + +(15) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_ship_mode_sk#7] +Right keys [1]: [sm_ship_mode_sk#9] +Join condition: None + +(16) Project [codegen id : 5] +Output [5]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_call_center_sk#6, cs_warehouse_sk#8, sm_type#10] +Input [7]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_call_center_sk#6, cs_ship_mode_sk#7, cs_warehouse_sk#8, sm_ship_mode_sk#9, sm_type#10] + +(17) Scan parquet default.call_center +Output [2]: [cc_call_center_sk#12, cc_name#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/call_center] +PushedFilters: [IsNotNull(cc_call_center_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [2]: [cc_call_center_sk#12, cc_name#13] + +(19) Filter [codegen id : 3] +Input [2]: [cc_call_center_sk#12, cc_name#13] +Condition : isnotnull(cc_call_center_sk#12) + +(20) BroadcastExchange +Input [2]: [cc_call_center_sk#12, cc_name#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] + +(21) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_call_center_sk#6] +Right keys [1]: [cc_call_center_sk#12] +Join condition: None + +(22) Project [codegen id : 5] +Output [5]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_warehouse_sk#8, sm_type#10, cc_name#13] +Input [7]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_call_center_sk#6, cs_warehouse_sk#8, sm_type#10, cc_call_center_sk#12, cc_name#13] + +(23) Scan parquet default.warehouse +Output [2]: [w_warehouse_sk#15, w_warehouse_name#16] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilityWithStatsSuite/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 4] +Input [2]: [w_warehouse_sk#15, w_warehouse_name#16] + +(25) Filter [codegen id : 4] +Input [2]: [w_warehouse_sk#15, w_warehouse_name#16] +Condition : isnotnull(w_warehouse_sk#15) + +(26) BroadcastExchange +Input [2]: [w_warehouse_sk#15, w_warehouse_name#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#17] + +(27) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_warehouse_sk#8] +Right keys [1]: [w_warehouse_sk#15] +Join condition: None + +(28) Project [codegen id : 5] +Output [5]: [cs_sold_date_sk#4, cs_ship_date_sk#5, w_warehouse_name#16, sm_type#10, cc_name#13] +Input [7]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_warehouse_sk#8, sm_type#10, cc_name#13, w_warehouse_sk#15, w_warehouse_name#16] + +(29) HashAggregate [codegen id : 5] +Input [5]: [cs_sold_date_sk#4, cs_ship_date_sk#5, w_warehouse_name#16, sm_type#10, cc_name#13] +Keys [3]: [substr(w_warehouse_name#16, 1, 20) AS substr(w_warehouse_name#16, 1, 20)#18, sm_type#10, cc_name#13] +Functions [5]: [partial_sum(cast(CASE WHEN ((cs_ship_date_sk#5 - cs_sold_date_sk#4) <= 30) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((cs_ship_date_sk#5 - cs_sold_date_sk#4) > 30) AND ((cs_ship_date_sk#5 - cs_sold_date_sk#4) <= 60)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((cs_ship_date_sk#5 - cs_sold_date_sk#4) > 60) AND ((cs_ship_date_sk#5 - cs_sold_date_sk#4) <= 90)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((cs_ship_date_sk#5 - cs_sold_date_sk#4) > 90) AND ((cs_ship_date_sk#5 - cs_sold_date_sk#4) <= 120)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN ((cs_ship_date_sk#5 - cs_sold_date_sk#4) > 120) THEN 1 ELSE 0 END as bigint))] +Aggregate Attributes [5]: [sum#19, sum#20, sum#21, sum#22, sum#23] +Results [8]: [substr(w_warehouse_name#16, 1, 20)#18, sm_type#10, cc_name#13, sum#24, sum#25, sum#26, sum#27, sum#28] + +(30) Exchange +Input [8]: [substr(w_warehouse_name#16, 1, 20)#18, sm_type#10, cc_name#13, sum#24, sum#25, sum#26, sum#27, sum#28] +Arguments: hashpartitioning(substr(w_warehouse_name#16, 1, 20)#18, sm_type#10, cc_name#13, 5), true, [id=#29] + +(31) HashAggregate [codegen id : 6] +Input [8]: [substr(w_warehouse_name#16, 1, 20)#18, sm_type#10, cc_name#13, sum#24, sum#25, sum#26, sum#27, sum#28] +Keys [3]: [substr(w_warehouse_name#16, 1, 20)#18, sm_type#10, cc_name#13] +Functions [5]: [sum(cast(CASE WHEN ((cs_ship_date_sk#5 - cs_sold_date_sk#4) <= 30) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((cs_ship_date_sk#5 - cs_sold_date_sk#4) > 30) AND ((cs_ship_date_sk#5 - cs_sold_date_sk#4) <= 60)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((cs_ship_date_sk#5 - cs_sold_date_sk#4) > 60) AND ((cs_ship_date_sk#5 - cs_sold_date_sk#4) <= 90)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((cs_ship_date_sk#5 - cs_sold_date_sk#4) > 90) AND ((cs_ship_date_sk#5 - cs_sold_date_sk#4) <= 120)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN ((cs_ship_date_sk#5 - cs_sold_date_sk#4) > 120) THEN 1 ELSE 0 END as bigint))] +Aggregate Attributes [5]: [sum(cast(CASE WHEN ((cs_ship_date_sk#5 - cs_sold_date_sk#4) <= 30) THEN 1 ELSE 0 END as bigint))#30, sum(cast(CASE WHEN (((cs_ship_date_sk#5 - cs_sold_date_sk#4) > 30) AND ((cs_ship_date_sk#5 - cs_sold_date_sk#4) <= 60)) THEN 1 ELSE 0 END as bigint))#31, sum(cast(CASE WHEN (((cs_ship_date_sk#5 - cs_sold_date_sk#4) > 60) AND ((cs_ship_date_sk#5 - cs_sold_date_sk#4) <= 90)) THEN 1 ELSE 0 END as bigint))#32, sum(cast(CASE WHEN (((cs_ship_date_sk#5 - cs_sold_date_sk#4) > 90) AND ((cs_ship_date_sk#5 - cs_sold_date_sk#4) <= 120)) THEN 1 ELSE 0 END as bigint))#33, sum(cast(CASE WHEN ((cs_ship_date_sk#5 - cs_sold_date_sk#4) > 120) THEN 1 ELSE 0 END as bigint))#34] +Results [8]: [substr(w_warehouse_name#16, 1, 20)#18 AS substr(w_warehouse_name, 1, 20)#35, sm_type#10, cc_name#13, sum(cast(CASE WHEN ((cs_ship_date_sk#5 - cs_sold_date_sk#4) <= 30) THEN 1 ELSE 0 END as bigint))#30 AS 30 days #36, sum(cast(CASE WHEN (((cs_ship_date_sk#5 - cs_sold_date_sk#4) > 30) AND ((cs_ship_date_sk#5 - cs_sold_date_sk#4) <= 60)) THEN 1 ELSE 0 END as bigint))#31 AS 31 - 60 days #37, sum(cast(CASE WHEN (((cs_ship_date_sk#5 - cs_sold_date_sk#4) > 60) AND ((cs_ship_date_sk#5 - cs_sold_date_sk#4) <= 90)) THEN 1 ELSE 0 END as bigint))#32 AS 61 - 90 days #38, sum(cast(CASE WHEN (((cs_ship_date_sk#5 - cs_sold_date_sk#4) > 90) AND ((cs_ship_date_sk#5 - cs_sold_date_sk#4) <= 120)) THEN 1 ELSE 0 END as bigint))#33 AS 91 - 120 days #39, sum(cast(CASE WHEN ((cs_ship_date_sk#5 - cs_sold_date_sk#4) > 120) THEN 1 ELSE 0 END as bigint))#34 AS >120 days #40] + +(32) TakeOrderedAndProject +Input [8]: [substr(w_warehouse_name, 1, 20)#35, sm_type#10, cc_name#13, 30 days #36, 31 - 60 days #37, 61 - 90 days #38, 91 - 120 days #39, >120 days #40] +Arguments: 100, [substr(w_warehouse_name, 1, 20)#35 ASC NULLS FIRST, sm_type#10 ASC NULLS FIRST, cc_name#13 ASC NULLS FIRST], [substr(w_warehouse_name, 1, 20)#35, sm_type#10, cc_name#13, 30 days #36, 31 - 60 days #37, 61 - 90 days #38, 91 - 120 days #39, >120 days #40] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.sf100/simplified.txt new file mode 100644 index 0000000000000..0f3000ced5421 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.sf100/simplified.txt @@ -0,0 +1,48 @@ +TakeOrderedAndProject [30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,cc_name,sm_type,substr(w_warehouse_name, 1, 20)] + WholeStageCodegen (6) + HashAggregate [cc_name,sm_type,substr(w_warehouse_name, 1, 20),sum,sum,sum,sum,sum] [30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,substr(w_warehouse_name, 1, 20),sum,sum,sum,sum,sum,sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 30) AND ((cs_ship_date_sk - cs_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 60) AND ((cs_ship_date_sk - cs_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 90) AND ((cs_ship_date_sk - cs_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint))] + InputAdapter + Exchange [cc_name,sm_type,substr(w_warehouse_name, 1, 20)] #1 + WholeStageCodegen (5) + HashAggregate [cc_name,cs_ship_date_sk,cs_sold_date_sk,sm_type,w_warehouse_name] [substr(w_warehouse_name, 1, 20),sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [cc_name,cs_ship_date_sk,cs_sold_date_sk,sm_type,w_warehouse_name] + BroadcastHashJoin [cs_warehouse_sk,w_warehouse_sk] + Project [cc_name,cs_ship_date_sk,cs_sold_date_sk,cs_warehouse_sk,sm_type] + BroadcastHashJoin [cc_call_center_sk,cs_call_center_sk] + Project [cs_call_center_sk,cs_ship_date_sk,cs_sold_date_sk,cs_warehouse_sk,sm_type] + BroadcastHashJoin [cs_ship_mode_sk,sm_ship_mode_sk] + Project [cs_call_center_sk,cs_ship_date_sk,cs_ship_mode_sk,cs_sold_date_sk,cs_warehouse_sk] + BroadcastHashJoin [cs_ship_date_sk,d_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_month_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq] + Filter [cs_call_center_sk,cs_ship_date_sk,cs_ship_mode_sk,cs_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_call_center_sk,cs_ship_date_sk,cs_ship_mode_sk,cs_sold_date_sk,cs_warehouse_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [sm_ship_mode_sk] + ColumnarToRow + InputAdapter + Scan parquet default.ship_mode [sm_ship_mode_sk,sm_type] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [cc_call_center_sk] + ColumnarToRow + InputAdapter + Scan parquet default.call_center [cc_call_center_sk,cc_name] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Filter [w_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.warehouse [w_warehouse_name,w_warehouse_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/explain.txt new file mode 100644 index 0000000000000..48aa878fe8d6d --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/explain.txt @@ -0,0 +1,183 @@ +== Physical Plan == +TakeOrderedAndProject (32) ++- * HashAggregate (31) + +- Exchange (30) + +- * HashAggregate (29) + +- * Project (28) + +- * BroadcastHashJoin Inner BuildRight (27) + :- * Project (21) + : +- * BroadcastHashJoin Inner BuildRight (20) + : :- * Project (15) + : : +- * BroadcastHashJoin Inner BuildRight (14) + : : :- * Project (9) + : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.catalog_sales (1) + : : : +- BroadcastExchange (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.warehouse (4) + : : +- BroadcastExchange (13) + : : +- * Filter (12) + : : +- * ColumnarToRow (11) + : : +- Scan parquet default.ship_mode (10) + : +- BroadcastExchange (19) + : +- * Filter (18) + : +- * ColumnarToRow (17) + : +- Scan parquet default.call_center (16) + +- BroadcastExchange (26) + +- * Project (25) + +- * Filter (24) + +- * ColumnarToRow (23) + +- Scan parquet default.date_dim (22) + + +(1) Scan parquet default.catalog_sales +Output [5]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_call_center_sk#3, cs_ship_mode_sk#4, cs_warehouse_sk#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_warehouse_sk), IsNotNull(cs_ship_mode_sk), IsNotNull(cs_call_center_sk), IsNotNull(cs_ship_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [5]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_call_center_sk#3, cs_ship_mode_sk#4, cs_warehouse_sk#5] + +(3) Filter [codegen id : 5] +Input [5]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_call_center_sk#3, cs_ship_mode_sk#4, cs_warehouse_sk#5] +Condition : (((isnotnull(cs_warehouse_sk#5) AND isnotnull(cs_ship_mode_sk#4)) AND isnotnull(cs_call_center_sk#3)) AND isnotnull(cs_ship_date_sk#2)) + +(4) Scan parquet default.warehouse +Output [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] + +(6) Filter [codegen id : 1] +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Condition : isnotnull(w_warehouse_sk#6) + +(7) BroadcastExchange +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#8] + +(8) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_warehouse_sk#5] +Right keys [1]: [w_warehouse_sk#6] +Join condition: None + +(9) Project [codegen id : 5] +Output [5]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_call_center_sk#3, cs_ship_mode_sk#4, w_warehouse_name#7] +Input [7]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_call_center_sk#3, cs_ship_mode_sk#4, cs_warehouse_sk#5, w_warehouse_sk#6, w_warehouse_name#7] + +(10) Scan parquet default.ship_mode +Output [2]: [sm_ship_mode_sk#9, sm_type#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/ship_mode] +PushedFilters: [IsNotNull(sm_ship_mode_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [sm_ship_mode_sk#9, sm_type#10] + +(12) Filter [codegen id : 2] +Input [2]: [sm_ship_mode_sk#9, sm_type#10] +Condition : isnotnull(sm_ship_mode_sk#9) + +(13) BroadcastExchange +Input [2]: [sm_ship_mode_sk#9, sm_type#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#11] + +(14) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_ship_mode_sk#4] +Right keys [1]: [sm_ship_mode_sk#9] +Join condition: None + +(15) Project [codegen id : 5] +Output [5]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_call_center_sk#3, w_warehouse_name#7, sm_type#10] +Input [7]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_call_center_sk#3, cs_ship_mode_sk#4, w_warehouse_name#7, sm_ship_mode_sk#9, sm_type#10] + +(16) Scan parquet default.call_center +Output [2]: [cc_call_center_sk#12, cc_name#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/call_center] +PushedFilters: [IsNotNull(cc_call_center_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [2]: [cc_call_center_sk#12, cc_name#13] + +(18) Filter [codegen id : 3] +Input [2]: [cc_call_center_sk#12, cc_name#13] +Condition : isnotnull(cc_call_center_sk#12) + +(19) BroadcastExchange +Input [2]: [cc_call_center_sk#12, cc_name#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] + +(20) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_call_center_sk#3] +Right keys [1]: [cc_call_center_sk#12] +Join condition: None + +(21) Project [codegen id : 5] +Output [5]: [cs_sold_date_sk#1, cs_ship_date_sk#2, w_warehouse_name#7, sm_type#10, cc_name#13] +Input [7]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_call_center_sk#3, w_warehouse_name#7, sm_type#10, cc_call_center_sk#12, cc_name#13] + +(22) Scan parquet default.date_dim +Output [2]: [d_date_sk#15, d_month_seq#16] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV1_4_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 4] +Input [2]: [d_date_sk#15, d_month_seq#16] + +(24) Filter [codegen id : 4] +Input [2]: [d_date_sk#15, d_month_seq#16] +Condition : (((isnotnull(d_month_seq#16) AND (d_month_seq#16 >= 1200)) AND (d_month_seq#16 <= 1211)) AND isnotnull(d_date_sk#15)) + +(25) Project [codegen id : 4] +Output [1]: [d_date_sk#15] +Input [2]: [d_date_sk#15, d_month_seq#16] + +(26) BroadcastExchange +Input [1]: [d_date_sk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#17] + +(27) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_ship_date_sk#2] +Right keys [1]: [d_date_sk#15] +Join condition: None + +(28) Project [codegen id : 5] +Output [5]: [cs_sold_date_sk#1, cs_ship_date_sk#2, w_warehouse_name#7, sm_type#10, cc_name#13] +Input [6]: [cs_sold_date_sk#1, cs_ship_date_sk#2, w_warehouse_name#7, sm_type#10, cc_name#13, d_date_sk#15] + +(29) HashAggregate [codegen id : 5] +Input [5]: [cs_sold_date_sk#1, cs_ship_date_sk#2, w_warehouse_name#7, sm_type#10, cc_name#13] +Keys [3]: [substr(w_warehouse_name#7, 1, 20) AS substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, cc_name#13] +Functions [5]: [partial_sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 30) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 60) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 90) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))] +Aggregate Attributes [5]: [sum#19, sum#20, sum#21, sum#22, sum#23] +Results [8]: [substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, cc_name#13, sum#24, sum#25, sum#26, sum#27, sum#28] + +(30) Exchange +Input [8]: [substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, cc_name#13, sum#24, sum#25, sum#26, sum#27, sum#28] +Arguments: hashpartitioning(substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, cc_name#13, 5), true, [id=#29] + +(31) HashAggregate [codegen id : 6] +Input [8]: [substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, cc_name#13, sum#24, sum#25, sum#26, sum#27, sum#28] +Keys [3]: [substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, cc_name#13] +Functions [5]: [sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 30) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 60) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 90) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))] +Aggregate Attributes [5]: [sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint))#30, sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 30) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint))#31, sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 60) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint))#32, sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 90) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint))#33, sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))#34] +Results [8]: [substr(w_warehouse_name#7, 1, 20)#18 AS substr(w_warehouse_name, 1, 20)#35, sm_type#10, cc_name#13, sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint))#30 AS 30 days #36, sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 30) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint))#31 AS 31 - 60 days #37, sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 60) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint))#32 AS 61 - 90 days #38, sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 90) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint))#33 AS 91 - 120 days #39, sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))#34 AS >120 days #40] + +(32) TakeOrderedAndProject +Input [8]: [substr(w_warehouse_name, 1, 20)#35, sm_type#10, cc_name#13, 30 days #36, 31 - 60 days #37, 61 - 90 days #38, 91 - 120 days #39, >120 days #40] +Arguments: 100, [substr(w_warehouse_name, 1, 20)#35 ASC NULLS FIRST, sm_type#10 ASC NULLS FIRST, cc_name#13 ASC NULLS FIRST], [substr(w_warehouse_name, 1, 20)#35, sm_type#10, cc_name#13, 30 days #36, 31 - 60 days #37, 61 - 90 days #38, 91 - 120 days #39, >120 days #40] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/simplified.txt new file mode 100644 index 0000000000000..1cb20efd6fc34 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/simplified.txt @@ -0,0 +1,48 @@ +TakeOrderedAndProject [30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,cc_name,sm_type,substr(w_warehouse_name, 1, 20)] + WholeStageCodegen (6) + HashAggregate [cc_name,sm_type,substr(w_warehouse_name, 1, 20),sum,sum,sum,sum,sum] [30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,substr(w_warehouse_name, 1, 20),sum,sum,sum,sum,sum,sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 30) AND ((cs_ship_date_sk - cs_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 60) AND ((cs_ship_date_sk - cs_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 90) AND ((cs_ship_date_sk - cs_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint))] + InputAdapter + Exchange [cc_name,sm_type,substr(w_warehouse_name, 1, 20)] #1 + WholeStageCodegen (5) + HashAggregate [cc_name,cs_ship_date_sk,cs_sold_date_sk,sm_type,w_warehouse_name] [substr(w_warehouse_name, 1, 20),sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [cc_name,cs_ship_date_sk,cs_sold_date_sk,sm_type,w_warehouse_name] + BroadcastHashJoin [cs_ship_date_sk,d_date_sk] + Project [cc_name,cs_ship_date_sk,cs_sold_date_sk,sm_type,w_warehouse_name] + BroadcastHashJoin [cc_call_center_sk,cs_call_center_sk] + Project [cs_call_center_sk,cs_ship_date_sk,cs_sold_date_sk,sm_type,w_warehouse_name] + BroadcastHashJoin [cs_ship_mode_sk,sm_ship_mode_sk] + Project [cs_call_center_sk,cs_ship_date_sk,cs_ship_mode_sk,cs_sold_date_sk,w_warehouse_name] + BroadcastHashJoin [cs_warehouse_sk,w_warehouse_sk] + Filter [cs_call_center_sk,cs_ship_date_sk,cs_ship_mode_sk,cs_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_call_center_sk,cs_ship_date_sk,cs_ship_mode_sk,cs_sold_date_sk,cs_warehouse_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [w_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.warehouse [w_warehouse_name,w_warehouse_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [sm_ship_mode_sk] + ColumnarToRow + InputAdapter + Scan parquet default.ship_mode [sm_ship_mode_sk,sm_type] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [cc_call_center_sk] + ColumnarToRow + InputAdapter + Scan parquet default.call_center [cc_call_center_sk,cc_name] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Project [d_date_sk] + Filter [d_date_sk,d_month_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a.sf100/explain.txt new file mode 100644 index 0000000000000..26797aa2de40e --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a.sf100/explain.txt @@ -0,0 +1,286 @@ +== Physical Plan == +TakeOrderedAndProject (52) ++- * HashAggregate (51) + +- Exchange (50) + +- * HashAggregate (49) + +- * Project (48) + +- * BroadcastHashJoin Inner BuildLeft (47) + :- BroadcastExchange (43) + : +- * Project (42) + : +- * BroadcastHashJoin Inner BuildRight (41) + : :- * Project (35) + : : +- SortMergeJoin LeftSemi (34) + : : :- SortMergeJoin LeftSemi (18) + : : : :- * Sort (5) + : : : : +- Exchange (4) + : : : : +- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.customer (1) + : : : +- * Sort (17) + : : : +- Exchange (16) + : : : +- * Project (15) + : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : :- * Filter (8) + : : : : +- * ColumnarToRow (7) + : : : : +- Scan parquet default.store_sales (6) + : : : +- BroadcastExchange (13) + : : : +- * Project (12) + : : : +- * Filter (11) + : : : +- * ColumnarToRow (10) + : : : +- Scan parquet default.date_dim (9) + : : +- * Sort (33) + : : +- Exchange (32) + : : +- Union (31) + : : :- * Project (24) + : : : +- * BroadcastHashJoin Inner BuildRight (23) + : : : :- * Filter (21) + : : : : +- * ColumnarToRow (20) + : : : : +- Scan parquet default.web_sales (19) + : : : +- ReusedExchange (22) + : : +- * Project (30) + : : +- * BroadcastHashJoin Inner BuildRight (29) + : : :- * Filter (27) + : : : +- * ColumnarToRow (26) + : : : +- Scan parquet default.catalog_sales (25) + : : +- ReusedExchange (28) + : +- BroadcastExchange (40) + : +- * Project (39) + : +- * Filter (38) + : +- * ColumnarToRow (37) + : +- Scan parquet default.customer_address (36) + +- * Filter (46) + +- * ColumnarToRow (45) + +- Scan parquet default.customer_demographics (44) + + +(1) Scan parquet default.customer +Output [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/customer] +PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] + +(3) Filter [codegen id : 1] +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Condition : (isnotnull(c_current_addr_sk#3) AND isnotnull(c_current_cdemo_sk#2)) + +(4) Exchange +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Arguments: hashpartitioning(c_customer_sk#1, 5), true, [id=#4] + +(5) Sort [codegen id : 2] +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Arguments: [c_customer_sk#1 ASC NULLS FIRST], false, 0 + +(6) Scan parquet default.store_sales +Output [2]: [ss_sold_date_sk#5, ss_customer_sk#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 4] +Input [2]: [ss_sold_date_sk#5, ss_customer_sk#6] + +(8) Filter [codegen id : 4] +Input [2]: [ss_sold_date_sk#5, ss_customer_sk#6] +Condition : isnotnull(ss_sold_date_sk#5) + +(9) Scan parquet default.date_dim +Output [3]: [d_date_sk#7, d_year#8, d_moy#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_year,2002), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,7), IsNotNull(d_date_sk)] +ReadSchema: struct + +(10) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#7, d_year#8, d_moy#9] + +(11) Filter [codegen id : 3] +Input [3]: [d_date_sk#7, d_year#8, d_moy#9] +Condition : (((((isnotnull(d_moy#9) AND isnotnull(d_year#8)) AND (d_year#8 = 2002)) AND (d_moy#9 >= 4)) AND (d_moy#9 <= 7)) AND isnotnull(d_date_sk#7)) + +(12) Project [codegen id : 3] +Output [1]: [d_date_sk#7] +Input [3]: [d_date_sk#7, d_year#8, d_moy#9] + +(13) BroadcastExchange +Input [1]: [d_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#10] + +(14) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#5] +Right keys [1]: [d_date_sk#7] +Join condition: None + +(15) Project [codegen id : 4] +Output [1]: [ss_customer_sk#6] +Input [3]: [ss_sold_date_sk#5, ss_customer_sk#6, d_date_sk#7] + +(16) Exchange +Input [1]: [ss_customer_sk#6] +Arguments: hashpartitioning(ss_customer_sk#6, 5), true, [id=#11] + +(17) Sort [codegen id : 5] +Input [1]: [ss_customer_sk#6] +Arguments: [ss_customer_sk#6 ASC NULLS FIRST], false, 0 + +(18) SortMergeJoin +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ss_customer_sk#6] +Join condition: None + +(19) Scan parquet default.web_sales +Output [2]: [ws_sold_date_sk#12, ws_bill_customer_sk#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 7] +Input [2]: [ws_sold_date_sk#12, ws_bill_customer_sk#13] + +(21) Filter [codegen id : 7] +Input [2]: [ws_sold_date_sk#12, ws_bill_customer_sk#13] +Condition : isnotnull(ws_sold_date_sk#12) + +(22) ReusedExchange [Reuses operator id: 13] +Output [1]: [d_date_sk#7] + +(23) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ws_sold_date_sk#12] +Right keys [1]: [d_date_sk#7] +Join condition: None + +(24) Project [codegen id : 7] +Output [1]: [ws_bill_customer_sk#13 AS customer_sk#14] +Input [3]: [ws_sold_date_sk#12, ws_bill_customer_sk#13, d_date_sk#7] + +(25) Scan parquet default.catalog_sales +Output [2]: [cs_sold_date_sk#15, cs_ship_customer_sk#16] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(26) ColumnarToRow [codegen id : 9] +Input [2]: [cs_sold_date_sk#15, cs_ship_customer_sk#16] + +(27) Filter [codegen id : 9] +Input [2]: [cs_sold_date_sk#15, cs_ship_customer_sk#16] +Condition : isnotnull(cs_sold_date_sk#15) + +(28) ReusedExchange [Reuses operator id: 13] +Output [1]: [d_date_sk#7] + +(29) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [cs_sold_date_sk#15] +Right keys [1]: [d_date_sk#7] +Join condition: None + +(30) Project [codegen id : 9] +Output [1]: [cs_ship_customer_sk#16 AS customer_sk#17] +Input [3]: [cs_sold_date_sk#15, cs_ship_customer_sk#16, d_date_sk#7] + +(31) Union + +(32) Exchange +Input [1]: [customer_sk#14] +Arguments: hashpartitioning(customer_sk#14, 5), true, [id=#18] + +(33) Sort [codegen id : 10] +Input [1]: [customer_sk#14] +Arguments: [customer_sk#14 ASC NULLS FIRST], false, 0 + +(34) SortMergeJoin +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [customer_sk#14] +Join condition: None + +(35) Project [codegen id : 12] +Output [2]: [c_current_cdemo_sk#2, c_current_addr_sk#3] +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] + +(36) Scan parquet default.customer_address +Output [2]: [ca_address_sk#19, ca_county#20] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/customer_address] +PushedFilters: [In(ca_county, [Walker County,Richland County,Gaines County,Douglas County,Dona Ana County]), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(37) ColumnarToRow [codegen id : 11] +Input [2]: [ca_address_sk#19, ca_county#20] + +(38) Filter [codegen id : 11] +Input [2]: [ca_address_sk#19, ca_county#20] +Condition : (ca_county#20 IN (Walker County,Richland County,Gaines County,Douglas County,Dona Ana County) AND isnotnull(ca_address_sk#19)) + +(39) Project [codegen id : 11] +Output [1]: [ca_address_sk#19] +Input [2]: [ca_address_sk#19, ca_county#20] + +(40) BroadcastExchange +Input [1]: [ca_address_sk#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#21] + +(41) BroadcastHashJoin [codegen id : 12] +Left keys [1]: [c_current_addr_sk#3] +Right keys [1]: [ca_address_sk#19] +Join condition: None + +(42) Project [codegen id : 12] +Output [1]: [c_current_cdemo_sk#2] +Input [3]: [c_current_cdemo_sk#2, c_current_addr_sk#3, ca_address_sk#19] + +(43) BroadcastExchange +Input [1]: [c_current_cdemo_sk#2] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#22] + +(44) Scan parquet default.customer_demographics +Output [9]: [cd_demo_sk#23, cd_gender#24, cd_marital_status#25, cd_education_status#26, cd_purchase_estimate#27, cd_credit_rating#28, cd_dep_count#29, cd_dep_employed_count#30, cd_dep_college_count#31] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(45) ColumnarToRow +Input [9]: [cd_demo_sk#23, cd_gender#24, cd_marital_status#25, cd_education_status#26, cd_purchase_estimate#27, cd_credit_rating#28, cd_dep_count#29, cd_dep_employed_count#30, cd_dep_college_count#31] + +(46) Filter +Input [9]: [cd_demo_sk#23, cd_gender#24, cd_marital_status#25, cd_education_status#26, cd_purchase_estimate#27, cd_credit_rating#28, cd_dep_count#29, cd_dep_employed_count#30, cd_dep_college_count#31] +Condition : isnotnull(cd_demo_sk#23) + +(47) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [c_current_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#23] +Join condition: None + +(48) Project [codegen id : 13] +Output [8]: [cd_gender#24, cd_marital_status#25, cd_education_status#26, cd_purchase_estimate#27, cd_credit_rating#28, cd_dep_count#29, cd_dep_employed_count#30, cd_dep_college_count#31] +Input [10]: [c_current_cdemo_sk#2, cd_demo_sk#23, cd_gender#24, cd_marital_status#25, cd_education_status#26, cd_purchase_estimate#27, cd_credit_rating#28, cd_dep_count#29, cd_dep_employed_count#30, cd_dep_college_count#31] + +(49) HashAggregate [codegen id : 13] +Input [8]: [cd_gender#24, cd_marital_status#25, cd_education_status#26, cd_purchase_estimate#27, cd_credit_rating#28, cd_dep_count#29, cd_dep_employed_count#30, cd_dep_college_count#31] +Keys [8]: [cd_gender#24, cd_marital_status#25, cd_education_status#26, cd_purchase_estimate#27, cd_credit_rating#28, cd_dep_count#29, cd_dep_employed_count#30, cd_dep_college_count#31] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#32] +Results [9]: [cd_gender#24, cd_marital_status#25, cd_education_status#26, cd_purchase_estimate#27, cd_credit_rating#28, cd_dep_count#29, cd_dep_employed_count#30, cd_dep_college_count#31, count#33] + +(50) Exchange +Input [9]: [cd_gender#24, cd_marital_status#25, cd_education_status#26, cd_purchase_estimate#27, cd_credit_rating#28, cd_dep_count#29, cd_dep_employed_count#30, cd_dep_college_count#31, count#33] +Arguments: hashpartitioning(cd_gender#24, cd_marital_status#25, cd_education_status#26, cd_purchase_estimate#27, cd_credit_rating#28, cd_dep_count#29, cd_dep_employed_count#30, cd_dep_college_count#31, 5), true, [id=#34] + +(51) HashAggregate [codegen id : 14] +Input [9]: [cd_gender#24, cd_marital_status#25, cd_education_status#26, cd_purchase_estimate#27, cd_credit_rating#28, cd_dep_count#29, cd_dep_employed_count#30, cd_dep_college_count#31, count#33] +Keys [8]: [cd_gender#24, cd_marital_status#25, cd_education_status#26, cd_purchase_estimate#27, cd_credit_rating#28, cd_dep_count#29, cd_dep_employed_count#30, cd_dep_college_count#31] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#35] +Results [14]: [cd_gender#24, cd_marital_status#25, cd_education_status#26, count(1)#35 AS cnt1#36, cd_purchase_estimate#27, count(1)#35 AS cnt2#37, cd_credit_rating#28, count(1)#35 AS cnt3#38, cd_dep_count#29, count(1)#35 AS cnt4#39, cd_dep_employed_count#30, count(1)#35 AS cnt5#40, cd_dep_college_count#31, count(1)#35 AS cnt6#41] + +(52) TakeOrderedAndProject +Input [14]: [cd_gender#24, cd_marital_status#25, cd_education_status#26, cnt1#36, cd_purchase_estimate#27, cnt2#37, cd_credit_rating#28, cnt3#38, cd_dep_count#29, cnt4#39, cd_dep_employed_count#30, cnt5#40, cd_dep_college_count#31, cnt6#41] +Arguments: 100, [cd_gender#24 ASC NULLS FIRST, cd_marital_status#25 ASC NULLS FIRST, cd_education_status#26 ASC NULLS FIRST, cd_purchase_estimate#27 ASC NULLS FIRST, cd_credit_rating#28 ASC NULLS FIRST, cd_dep_count#29 ASC NULLS FIRST, cd_dep_employed_count#30 ASC NULLS FIRST, cd_dep_college_count#31 ASC NULLS FIRST], [cd_gender#24, cd_marital_status#25, cd_education_status#26, cnt1#36, cd_purchase_estimate#27, cnt2#37, cd_credit_rating#28, cnt3#38, cd_dep_count#29, cnt4#39, cd_dep_employed_count#30, cnt5#40, cd_dep_college_count#31, cnt6#41] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a.sf100/simplified.txt new file mode 100644 index 0000000000000..9a144aee5f12d --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a.sf100/simplified.txt @@ -0,0 +1,81 @@ +TakeOrderedAndProject [cd_credit_rating,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate,cnt1,cnt2,cnt3,cnt4,cnt5,cnt6] + WholeStageCodegen (14) + HashAggregate [cd_credit_rating,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate,count] [cnt1,cnt2,cnt3,cnt4,cnt5,cnt6,count,count(1)] + InputAdapter + Exchange [cd_credit_rating,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate] #1 + WholeStageCodegen (13) + HashAggregate [cd_credit_rating,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate] [count,count] + Project [cd_credit_rating,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (12) + Project [c_current_cdemo_sk] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_addr_sk,c_current_cdemo_sk] + InputAdapter + SortMergeJoin [c_customer_sk,customer_sk] + SortMergeJoin [c_customer_sk,ss_customer_sk] + WholeStageCodegen (2) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #3 + WholeStageCodegen (1) + Filter [c_current_addr_sk,c_current_cdemo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] + WholeStageCodegen (5) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #4 + WholeStageCodegen (4) + Project [ss_customer_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + WholeStageCodegen (10) + Sort [customer_sk] + InputAdapter + Exchange [customer_sk] #6 + Union + WholeStageCodegen (7) + Project [ws_bill_customer_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #5 + WholeStageCodegen (9) + Project [cs_ship_customer_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #5 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (11) + Project [ca_address_sk] + Filter [ca_address_sk,ca_county] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_county] + Filter [cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_credit_rating,cd_demo_sk,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a/explain.txt new file mode 100644 index 0000000000000..aba866b2117a1 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a/explain.txt @@ -0,0 +1,266 @@ +== Physical Plan == +TakeOrderedAndProject (48) ++- * HashAggregate (47) + +- Exchange (46) + +- * HashAggregate (45) + +- * Project (44) + +- * BroadcastHashJoin Inner BuildRight (43) + :- * Project (38) + : +- * BroadcastHashJoin Inner BuildRight (37) + : :- * Project (31) + : : +- * BroadcastHashJoin LeftSemi BuildRight (30) + : : :- * BroadcastHashJoin LeftSemi BuildRight (15) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.customer (1) + : : : +- BroadcastExchange (14) + : : : +- * Project (13) + : : : +- * BroadcastHashJoin Inner BuildRight (12) + : : : :- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.store_sales (4) + : : : +- BroadcastExchange (11) + : : : +- * Project (10) + : : : +- * Filter (9) + : : : +- * ColumnarToRow (8) + : : : +- Scan parquet default.date_dim (7) + : : +- BroadcastExchange (29) + : : +- Union (28) + : : :- * Project (21) + : : : +- * BroadcastHashJoin Inner BuildRight (20) + : : : :- * Filter (18) + : : : : +- * ColumnarToRow (17) + : : : : +- Scan parquet default.web_sales (16) + : : : +- ReusedExchange (19) + : : +- * Project (27) + : : +- * BroadcastHashJoin Inner BuildRight (26) + : : :- * Filter (24) + : : : +- * ColumnarToRow (23) + : : : +- Scan parquet default.catalog_sales (22) + : : +- ReusedExchange (25) + : +- BroadcastExchange (36) + : +- * Project (35) + : +- * Filter (34) + : +- * ColumnarToRow (33) + : +- Scan parquet default.customer_address (32) + +- BroadcastExchange (42) + +- * Filter (41) + +- * ColumnarToRow (40) + +- Scan parquet default.customer_demographics (39) + + +(1) Scan parquet default.customer +Output [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 9] +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] + +(3) Filter [codegen id : 9] +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Condition : (isnotnull(c_current_addr_sk#3) AND isnotnull(c_current_cdemo_sk#2)) + +(4) Scan parquet default.store_sales +Output [2]: [ss_sold_date_sk#4, ss_customer_sk#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 2] +Input [2]: [ss_sold_date_sk#4, ss_customer_sk#5] + +(6) Filter [codegen id : 2] +Input [2]: [ss_sold_date_sk#4, ss_customer_sk#5] +Condition : isnotnull(ss_sold_date_sk#4) + +(7) Scan parquet default.date_dim +Output [3]: [d_date_sk#6, d_year#7, d_moy#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_year,2002), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,7), IsNotNull(d_date_sk)] +ReadSchema: struct + +(8) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_moy#8] + +(9) Filter [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_moy#8] +Condition : (((((isnotnull(d_moy#8) AND isnotnull(d_year#7)) AND (d_year#7 = 2002)) AND (d_moy#8 >= 4)) AND (d_moy#8 <= 7)) AND isnotnull(d_date_sk#6)) + +(10) Project [codegen id : 1] +Output [1]: [d_date_sk#6] +Input [3]: [d_date_sk#6, d_year#7, d_moy#8] + +(11) BroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] + +(12) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#4] +Right keys [1]: [d_date_sk#6] +Join condition: None + +(13) Project [codegen id : 2] +Output [1]: [ss_customer_sk#5] +Input [3]: [ss_sold_date_sk#4, ss_customer_sk#5, d_date_sk#6] + +(14) BroadcastExchange +Input [1]: [ss_customer_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#10] + +(15) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ss_customer_sk#5] +Join condition: None + +(16) Scan parquet default.web_sales +Output [2]: [ws_sold_date_sk#11, ws_bill_customer_sk#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 4] +Input [2]: [ws_sold_date_sk#11, ws_bill_customer_sk#12] + +(18) Filter [codegen id : 4] +Input [2]: [ws_sold_date_sk#11, ws_bill_customer_sk#12] +Condition : isnotnull(ws_sold_date_sk#11) + +(19) ReusedExchange [Reuses operator id: 11] +Output [1]: [d_date_sk#6] + +(20) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ws_sold_date_sk#11] +Right keys [1]: [d_date_sk#6] +Join condition: None + +(21) Project [codegen id : 4] +Output [1]: [ws_bill_customer_sk#12 AS customer_sk#13] +Input [3]: [ws_sold_date_sk#11, ws_bill_customer_sk#12, d_date_sk#6] + +(22) Scan parquet default.catalog_sales +Output [2]: [cs_sold_date_sk#14, cs_ship_customer_sk#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 6] +Input [2]: [cs_sold_date_sk#14, cs_ship_customer_sk#15] + +(24) Filter [codegen id : 6] +Input [2]: [cs_sold_date_sk#14, cs_ship_customer_sk#15] +Condition : isnotnull(cs_sold_date_sk#14) + +(25) ReusedExchange [Reuses operator id: 11] +Output [1]: [d_date_sk#6] + +(26) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cs_sold_date_sk#14] +Right keys [1]: [d_date_sk#6] +Join condition: None + +(27) Project [codegen id : 6] +Output [1]: [cs_ship_customer_sk#15 AS customer_sk#16] +Input [3]: [cs_sold_date_sk#14, cs_ship_customer_sk#15, d_date_sk#6] + +(28) Union + +(29) BroadcastExchange +Input [1]: [customer_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#17] + +(30) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [customer_sk#13] +Join condition: None + +(31) Project [codegen id : 9] +Output [2]: [c_current_cdemo_sk#2, c_current_addr_sk#3] +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] + +(32) Scan parquet default.customer_address +Output [2]: [ca_address_sk#18, ca_county#19] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/customer_address] +PushedFilters: [In(ca_county, [Walker County,Richland County,Gaines County,Douglas County,Dona Ana County]), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(33) ColumnarToRow [codegen id : 7] +Input [2]: [ca_address_sk#18, ca_county#19] + +(34) Filter [codegen id : 7] +Input [2]: [ca_address_sk#18, ca_county#19] +Condition : (ca_county#19 IN (Walker County,Richland County,Gaines County,Douglas County,Dona Ana County) AND isnotnull(ca_address_sk#18)) + +(35) Project [codegen id : 7] +Output [1]: [ca_address_sk#18] +Input [2]: [ca_address_sk#18, ca_county#19] + +(36) BroadcastExchange +Input [1]: [ca_address_sk#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] + +(37) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_current_addr_sk#3] +Right keys [1]: [ca_address_sk#18] +Join condition: None + +(38) Project [codegen id : 9] +Output [1]: [c_current_cdemo_sk#2] +Input [3]: [c_current_cdemo_sk#2, c_current_addr_sk#3, ca_address_sk#18] + +(39) Scan parquet default.customer_demographics +Output [9]: [cd_demo_sk#21, cd_gender#22, cd_marital_status#23, cd_education_status#24, cd_purchase_estimate#25, cd_credit_rating#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(40) ColumnarToRow [codegen id : 8] +Input [9]: [cd_demo_sk#21, cd_gender#22, cd_marital_status#23, cd_education_status#24, cd_purchase_estimate#25, cd_credit_rating#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29] + +(41) Filter [codegen id : 8] +Input [9]: [cd_demo_sk#21, cd_gender#22, cd_marital_status#23, cd_education_status#24, cd_purchase_estimate#25, cd_credit_rating#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29] +Condition : isnotnull(cd_demo_sk#21) + +(42) BroadcastExchange +Input [9]: [cd_demo_sk#21, cd_gender#22, cd_marital_status#23, cd_education_status#24, cd_purchase_estimate#25, cd_credit_rating#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#30] + +(43) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_current_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#21] +Join condition: None + +(44) Project [codegen id : 9] +Output [8]: [cd_gender#22, cd_marital_status#23, cd_education_status#24, cd_purchase_estimate#25, cd_credit_rating#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29] +Input [10]: [c_current_cdemo_sk#2, cd_demo_sk#21, cd_gender#22, cd_marital_status#23, cd_education_status#24, cd_purchase_estimate#25, cd_credit_rating#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29] + +(45) HashAggregate [codegen id : 9] +Input [8]: [cd_gender#22, cd_marital_status#23, cd_education_status#24, cd_purchase_estimate#25, cd_credit_rating#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29] +Keys [8]: [cd_gender#22, cd_marital_status#23, cd_education_status#24, cd_purchase_estimate#25, cd_credit_rating#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#31] +Results [9]: [cd_gender#22, cd_marital_status#23, cd_education_status#24, cd_purchase_estimate#25, cd_credit_rating#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29, count#32] + +(46) Exchange +Input [9]: [cd_gender#22, cd_marital_status#23, cd_education_status#24, cd_purchase_estimate#25, cd_credit_rating#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29, count#32] +Arguments: hashpartitioning(cd_gender#22, cd_marital_status#23, cd_education_status#24, cd_purchase_estimate#25, cd_credit_rating#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29, 5), true, [id=#33] + +(47) HashAggregate [codegen id : 10] +Input [9]: [cd_gender#22, cd_marital_status#23, cd_education_status#24, cd_purchase_estimate#25, cd_credit_rating#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29, count#32] +Keys [8]: [cd_gender#22, cd_marital_status#23, cd_education_status#24, cd_purchase_estimate#25, cd_credit_rating#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#34] +Results [14]: [cd_gender#22, cd_marital_status#23, cd_education_status#24, count(1)#34 AS cnt1#35, cd_purchase_estimate#25, count(1)#34 AS cnt2#36, cd_credit_rating#26, count(1)#34 AS cnt3#37, cd_dep_count#27, count(1)#34 AS cnt4#38, cd_dep_employed_count#28, count(1)#34 AS cnt5#39, cd_dep_college_count#29, count(1)#34 AS cnt6#40] + +(48) TakeOrderedAndProject +Input [14]: [cd_gender#22, cd_marital_status#23, cd_education_status#24, cnt1#35, cd_purchase_estimate#25, cnt2#36, cd_credit_rating#26, cnt3#37, cd_dep_count#27, cnt4#38, cd_dep_employed_count#28, cnt5#39, cd_dep_college_count#29, cnt6#40] +Arguments: 100, [cd_gender#22 ASC NULLS FIRST, cd_marital_status#23 ASC NULLS FIRST, cd_education_status#24 ASC NULLS FIRST, cd_purchase_estimate#25 ASC NULLS FIRST, cd_credit_rating#26 ASC NULLS FIRST, cd_dep_count#27 ASC NULLS FIRST, cd_dep_employed_count#28 ASC NULLS FIRST, cd_dep_college_count#29 ASC NULLS FIRST], [cd_gender#22, cd_marital_status#23, cd_education_status#24, cnt1#35, cd_purchase_estimate#25, cnt2#36, cd_credit_rating#26, cnt3#37, cd_dep_count#27, cnt4#38, cd_dep_employed_count#28, cnt5#39, cd_dep_college_count#29, cnt6#40] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a/simplified.txt new file mode 100644 index 0000000000000..90105ffd73ea8 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a/simplified.txt @@ -0,0 +1,71 @@ +TakeOrderedAndProject [cd_credit_rating,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate,cnt1,cnt2,cnt3,cnt4,cnt5,cnt6] + WholeStageCodegen (10) + HashAggregate [cd_credit_rating,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate,count] [cnt1,cnt2,cnt3,cnt4,cnt5,cnt6,count,count(1)] + InputAdapter + Exchange [cd_credit_rating,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate] #1 + WholeStageCodegen (9) + HashAggregate [cd_credit_rating,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate] [count,count] + Project [cd_credit_rating,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [c_current_cdemo_sk] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_addr_sk,c_current_cdemo_sk] + BroadcastHashJoin [c_customer_sk,customer_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [c_current_addr_sk,c_current_cdemo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (2) + Project [ss_customer_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + InputAdapter + BroadcastExchange #4 + Union + WholeStageCodegen (4) + Project [ws_bill_customer_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + WholeStageCodegen (6) + Project [cs_ship_customer_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (7) + Project [ca_address_sk] + Filter [ca_address_sk,ca_county] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_county] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (8) + Filter [cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_credit_rating,cd_demo_sk,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11.sf100/explain.txt new file mode 100644 index 0000000000000..9349e4629a28f --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11.sf100/explain.txt @@ -0,0 +1,477 @@ +== Physical Plan == +TakeOrderedAndProject (86) ++- * Project (85) + +- * SortMergeJoin Inner (84) + :- * Project (66) + : +- * SortMergeJoin Inner (65) + : :- * SortMergeJoin Inner (45) + : : :- * Sort (24) + : : : +- Exchange (23) + : : : +- * Filter (22) + : : : +- * HashAggregate (21) + : : : +- Exchange (20) + : : : +- * HashAggregate (19) + : : : +- * Project (18) + : : : +- * SortMergeJoin Inner (17) + : : : :- * Sort (11) + : : : : +- Exchange (10) + : : : : +- * Project (9) + : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.store_sales (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.date_dim (4) + : : : +- * Sort (16) + : : : +- Exchange (15) + : : : +- * Filter (14) + : : : +- * ColumnarToRow (13) + : : : +- Scan parquet default.customer (12) + : : +- * Sort (44) + : : +- Exchange (43) + : : +- * HashAggregate (42) + : : +- Exchange (41) + : : +- * HashAggregate (40) + : : +- * Project (39) + : : +- * SortMergeJoin Inner (38) + : : :- * Sort (35) + : : : +- Exchange (34) + : : : +- * Project (33) + : : : +- * BroadcastHashJoin Inner BuildRight (32) + : : : :- * Filter (27) + : : : : +- * ColumnarToRow (26) + : : : : +- Scan parquet default.store_sales (25) + : : : +- BroadcastExchange (31) + : : : +- * Filter (30) + : : : +- * ColumnarToRow (29) + : : : +- Scan parquet default.date_dim (28) + : : +- * Sort (37) + : : +- ReusedExchange (36) + : +- * Sort (64) + : +- Exchange (63) + : +- * Project (62) + : +- * Filter (61) + : +- * HashAggregate (60) + : +- Exchange (59) + : +- * HashAggregate (58) + : +- * Project (57) + : +- * SortMergeJoin Inner (56) + : :- * Sort (53) + : : +- Exchange (52) + : : +- * Project (51) + : : +- * BroadcastHashJoin Inner BuildRight (50) + : : :- * Filter (48) + : : : +- * ColumnarToRow (47) + : : : +- Scan parquet default.web_sales (46) + : : +- ReusedExchange (49) + : +- * Sort (55) + : +- ReusedExchange (54) + +- * Sort (83) + +- Exchange (82) + +- * HashAggregate (81) + +- Exchange (80) + +- * HashAggregate (79) + +- * Project (78) + +- * SortMergeJoin Inner (77) + :- * Sort (74) + : +- Exchange (73) + : +- * Project (72) + : +- * BroadcastHashJoin Inner BuildRight (71) + : :- * Filter (69) + : : +- * ColumnarToRow (68) + : : +- Scan parquet default.web_sales (67) + : +- ReusedExchange (70) + +- * Sort (76) + +- ReusedExchange (75) + + +(1) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_ext_discount_amt#3, ss_ext_list_price#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 2] +Input [4]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_ext_discount_amt#3, ss_ext_list_price#4] + +(3) Filter [codegen id : 2] +Input [4]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_ext_discount_amt#3, ss_ext_list_price#4] +Condition : (isnotnull(ss_customer_sk#2) AND isnotnull(ss_sold_date_sk#1)) + +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#5, d_year#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#5, d_year#6] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#5, d_year#6] +Condition : ((isnotnull(d_year#6) AND (d_year#6 = 2001)) AND isnotnull(d_date_sk#5)) + +(7) BroadcastExchange +Input [2]: [d_date_sk#5, d_year#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7] + +(8) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(9) Project [codegen id : 2] +Output [4]: [ss_customer_sk#2, ss_ext_discount_amt#3, ss_ext_list_price#4, d_year#6] +Input [6]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_ext_discount_amt#3, ss_ext_list_price#4, d_date_sk#5, d_year#6] + +(10) Exchange +Input [4]: [ss_customer_sk#2, ss_ext_discount_amt#3, ss_ext_list_price#4, d_year#6] +Arguments: hashpartitioning(ss_customer_sk#2, 5), true, [id=#8] + +(11) Sort [codegen id : 3] +Input [4]: [ss_customer_sk#2, ss_ext_discount_amt#3, ss_ext_list_price#4, d_year#6] +Arguments: [ss_customer_sk#2 ASC NULLS FIRST], false, 0 + +(12) Scan parquet default.customer +Output [8]: [c_customer_sk#9, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(13) ColumnarToRow [codegen id : 4] +Input [8]: [c_customer_sk#9, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16] + +(14) Filter [codegen id : 4] +Input [8]: [c_customer_sk#9, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16] +Condition : (isnotnull(c_customer_sk#9) AND isnotnull(c_customer_id#10)) + +(15) Exchange +Input [8]: [c_customer_sk#9, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16] +Arguments: hashpartitioning(c_customer_sk#9, 5), true, [id=#17] + +(16) Sort [codegen id : 5] +Input [8]: [c_customer_sk#9, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16] +Arguments: [c_customer_sk#9 ASC NULLS FIRST], false, 0 + +(17) SortMergeJoin [codegen id : 6] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#9] +Join condition: None + +(18) Project [codegen id : 6] +Output [10]: [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, ss_ext_discount_amt#3, ss_ext_list_price#4, d_year#6] +Input [12]: [ss_customer_sk#2, ss_ext_discount_amt#3, ss_ext_list_price#4, d_year#6, c_customer_sk#9, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16] + +(19) HashAggregate [codegen id : 6] +Input [10]: [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, ss_ext_discount_amt#3, ss_ext_list_price#4, d_year#6] +Keys [8]: [c_customer_id#10, c_first_name#11, c_last_name#12, d_year#6, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16] +Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#4 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#3 as decimal(8,2)))), DecimalType(8,2), true)))] +Aggregate Attributes [1]: [sum#18] +Results [9]: [c_customer_id#10, c_first_name#11, c_last_name#12, d_year#6, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, sum#19] + +(20) Exchange +Input [9]: [c_customer_id#10, c_first_name#11, c_last_name#12, d_year#6, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, sum#19] +Arguments: hashpartitioning(c_customer_id#10, c_first_name#11, c_last_name#12, d_year#6, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, 5), true, [id=#20] + +(21) HashAggregate [codegen id : 7] +Input [9]: [c_customer_id#10, c_first_name#11, c_last_name#12, d_year#6, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, sum#19] +Keys [8]: [c_customer_id#10, c_first_name#11, c_last_name#12, d_year#6, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16] +Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#4 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#3 as decimal(8,2)))), DecimalType(8,2), true)))] +Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#4 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#3 as decimal(8,2)))), DecimalType(8,2), true)))#21] +Results [2]: [c_customer_id#10 AS customer_id#22, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#4 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#3 as decimal(8,2)))), DecimalType(8,2), true)))#21,18,2) AS year_total#23] + +(22) Filter [codegen id : 7] +Input [2]: [customer_id#22, year_total#23] +Condition : (isnotnull(year_total#23) AND (year_total#23 > 0.00)) + +(23) Exchange +Input [2]: [customer_id#22, year_total#23] +Arguments: hashpartitioning(customer_id#22, 5), true, [id=#24] + +(24) Sort [codegen id : 8] +Input [2]: [customer_id#22, year_total#23] +Arguments: [customer_id#22 ASC NULLS FIRST], false, 0 + +(25) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_ext_discount_amt#3, ss_ext_list_price#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(26) ColumnarToRow [codegen id : 10] +Input [4]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_ext_discount_amt#3, ss_ext_list_price#4] + +(27) Filter [codegen id : 10] +Input [4]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_ext_discount_amt#3, ss_ext_list_price#4] +Condition : (isnotnull(ss_customer_sk#2) AND isnotnull(ss_sold_date_sk#1)) + +(28) Scan parquet default.date_dim +Output [2]: [d_date_sk#5, d_year#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 9] +Input [2]: [d_date_sk#5, d_year#6] + +(30) Filter [codegen id : 9] +Input [2]: [d_date_sk#5, d_year#6] +Condition : ((isnotnull(d_year#6) AND (d_year#6 = 2002)) AND isnotnull(d_date_sk#5)) + +(31) BroadcastExchange +Input [2]: [d_date_sk#5, d_year#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#25] + +(32) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(33) Project [codegen id : 10] +Output [4]: [ss_customer_sk#2, ss_ext_discount_amt#3, ss_ext_list_price#4, d_year#6] +Input [6]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_ext_discount_amt#3, ss_ext_list_price#4, d_date_sk#5, d_year#6] + +(34) Exchange +Input [4]: [ss_customer_sk#2, ss_ext_discount_amt#3, ss_ext_list_price#4, d_year#6] +Arguments: hashpartitioning(ss_customer_sk#2, 5), true, [id=#26] + +(35) Sort [codegen id : 11] +Input [4]: [ss_customer_sk#2, ss_ext_discount_amt#3, ss_ext_list_price#4, d_year#6] +Arguments: [ss_customer_sk#2 ASC NULLS FIRST], false, 0 + +(36) ReusedExchange [Reuses operator id: 15] +Output [8]: [c_customer_sk#9, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16] + +(37) Sort [codegen id : 13] +Input [8]: [c_customer_sk#9, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16] +Arguments: [c_customer_sk#9 ASC NULLS FIRST], false, 0 + +(38) SortMergeJoin [codegen id : 14] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#9] +Join condition: None + +(39) Project [codegen id : 14] +Output [10]: [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, ss_ext_discount_amt#3, ss_ext_list_price#4, d_year#6] +Input [12]: [ss_customer_sk#2, ss_ext_discount_amt#3, ss_ext_list_price#4, d_year#6, c_customer_sk#9, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16] + +(40) HashAggregate [codegen id : 14] +Input [10]: [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, ss_ext_discount_amt#3, ss_ext_list_price#4, d_year#6] +Keys [8]: [c_customer_id#10, c_first_name#11, c_last_name#12, d_year#6, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16] +Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#4 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#3 as decimal(8,2)))), DecimalType(8,2), true)))] +Aggregate Attributes [1]: [sum#27] +Results [9]: [c_customer_id#10, c_first_name#11, c_last_name#12, d_year#6, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, sum#28] + +(41) Exchange +Input [9]: [c_customer_id#10, c_first_name#11, c_last_name#12, d_year#6, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, sum#28] +Arguments: hashpartitioning(c_customer_id#10, c_first_name#11, c_last_name#12, d_year#6, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, 5), true, [id=#29] + +(42) HashAggregate [codegen id : 15] +Input [9]: [c_customer_id#10, c_first_name#11, c_last_name#12, d_year#6, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, sum#28] +Keys [8]: [c_customer_id#10, c_first_name#11, c_last_name#12, d_year#6, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16] +Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#4 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#3 as decimal(8,2)))), DecimalType(8,2), true)))] +Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#4 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#3 as decimal(8,2)))), DecimalType(8,2), true)))#30] +Results [5]: [c_customer_id#10 AS customer_id#31, c_first_name#11 AS customer_first_name#32, c_last_name#12 AS customer_last_name#33, c_email_address#16 AS customer_email_address#34, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#4 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#3 as decimal(8,2)))), DecimalType(8,2), true)))#30,18,2) AS year_total#35] + +(43) Exchange +Input [5]: [customer_id#31, customer_first_name#32, customer_last_name#33, customer_email_address#34, year_total#35] +Arguments: hashpartitioning(customer_id#31, 5), true, [id=#36] + +(44) Sort [codegen id : 16] +Input [5]: [customer_id#31, customer_first_name#32, customer_last_name#33, customer_email_address#34, year_total#35] +Arguments: [customer_id#31 ASC NULLS FIRST], false, 0 + +(45) SortMergeJoin [codegen id : 17] +Left keys [1]: [customer_id#22] +Right keys [1]: [customer_id#31] +Join condition: None + +(46) Scan parquet default.web_sales +Output [4]: [ws_sold_date_sk#37, ws_bill_customer_sk#38, ws_ext_discount_amt#39, ws_ext_list_price#40] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_bill_customer_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(47) ColumnarToRow [codegen id : 19] +Input [4]: [ws_sold_date_sk#37, ws_bill_customer_sk#38, ws_ext_discount_amt#39, ws_ext_list_price#40] + +(48) Filter [codegen id : 19] +Input [4]: [ws_sold_date_sk#37, ws_bill_customer_sk#38, ws_ext_discount_amt#39, ws_ext_list_price#40] +Condition : (isnotnull(ws_bill_customer_sk#38) AND isnotnull(ws_sold_date_sk#37)) + +(49) ReusedExchange [Reuses operator id: 7] +Output [2]: [d_date_sk#5, d_year#6] + +(50) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [ws_sold_date_sk#37] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(51) Project [codegen id : 19] +Output [4]: [ws_bill_customer_sk#38, ws_ext_discount_amt#39, ws_ext_list_price#40, d_year#6] +Input [6]: [ws_sold_date_sk#37, ws_bill_customer_sk#38, ws_ext_discount_amt#39, ws_ext_list_price#40, d_date_sk#5, d_year#6] + +(52) Exchange +Input [4]: [ws_bill_customer_sk#38, ws_ext_discount_amt#39, ws_ext_list_price#40, d_year#6] +Arguments: hashpartitioning(ws_bill_customer_sk#38, 5), true, [id=#41] + +(53) Sort [codegen id : 20] +Input [4]: [ws_bill_customer_sk#38, ws_ext_discount_amt#39, ws_ext_list_price#40, d_year#6] +Arguments: [ws_bill_customer_sk#38 ASC NULLS FIRST], false, 0 + +(54) ReusedExchange [Reuses operator id: 15] +Output [8]: [c_customer_sk#9, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16] + +(55) Sort [codegen id : 22] +Input [8]: [c_customer_sk#9, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16] +Arguments: [c_customer_sk#9 ASC NULLS FIRST], false, 0 + +(56) SortMergeJoin [codegen id : 23] +Left keys [1]: [ws_bill_customer_sk#38] +Right keys [1]: [c_customer_sk#9] +Join condition: None + +(57) Project [codegen id : 23] +Output [10]: [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, ws_ext_discount_amt#39, ws_ext_list_price#40, d_year#6] +Input [12]: [ws_bill_customer_sk#38, ws_ext_discount_amt#39, ws_ext_list_price#40, d_year#6, c_customer_sk#9, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16] + +(58) HashAggregate [codegen id : 23] +Input [10]: [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, ws_ext_discount_amt#39, ws_ext_list_price#40, d_year#6] +Keys [8]: [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, d_year#6] +Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#40 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#39 as decimal(8,2)))), DecimalType(8,2), true)))] +Aggregate Attributes [1]: [sum#42] +Results [9]: [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, d_year#6, sum#43] + +(59) Exchange +Input [9]: [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, d_year#6, sum#43] +Arguments: hashpartitioning(c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, d_year#6, 5), true, [id=#44] + +(60) HashAggregate [codegen id : 24] +Input [9]: [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, d_year#6, sum#43] +Keys [8]: [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, d_year#6] +Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#40 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#39 as decimal(8,2)))), DecimalType(8,2), true)))] +Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#40 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#39 as decimal(8,2)))), DecimalType(8,2), true)))#45] +Results [2]: [c_customer_id#10 AS customer_id#46, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#40 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#39 as decimal(8,2)))), DecimalType(8,2), true)))#45,18,2) AS year_total#47] + +(61) Filter [codegen id : 24] +Input [2]: [customer_id#46, year_total#47] +Condition : (isnotnull(year_total#47) AND (year_total#47 > 0.00)) + +(62) Project [codegen id : 24] +Output [2]: [customer_id#46 AS customer_id#48, year_total#47 AS year_total#49] +Input [2]: [customer_id#46, year_total#47] + +(63) Exchange +Input [2]: [customer_id#48, year_total#49] +Arguments: hashpartitioning(customer_id#48, 5), true, [id=#50] + +(64) Sort [codegen id : 25] +Input [2]: [customer_id#48, year_total#49] +Arguments: [customer_id#48 ASC NULLS FIRST], false, 0 + +(65) SortMergeJoin [codegen id : 26] +Left keys [1]: [customer_id#22] +Right keys [1]: [customer_id#48] +Join condition: None + +(66) Project [codegen id : 26] +Output [8]: [customer_id#22, year_total#23, customer_id#31, customer_first_name#32, customer_last_name#33, customer_email_address#34, year_total#35, year_total#49] +Input [9]: [customer_id#22, year_total#23, customer_id#31, customer_first_name#32, customer_last_name#33, customer_email_address#34, year_total#35, customer_id#48, year_total#49] + +(67) Scan parquet default.web_sales +Output [4]: [ws_sold_date_sk#37, ws_bill_customer_sk#38, ws_ext_discount_amt#39, ws_ext_list_price#40] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_bill_customer_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(68) ColumnarToRow [codegen id : 28] +Input [4]: [ws_sold_date_sk#37, ws_bill_customer_sk#38, ws_ext_discount_amt#39, ws_ext_list_price#40] + +(69) Filter [codegen id : 28] +Input [4]: [ws_sold_date_sk#37, ws_bill_customer_sk#38, ws_ext_discount_amt#39, ws_ext_list_price#40] +Condition : (isnotnull(ws_bill_customer_sk#38) AND isnotnull(ws_sold_date_sk#37)) + +(70) ReusedExchange [Reuses operator id: 31] +Output [2]: [d_date_sk#5, d_year#6] + +(71) BroadcastHashJoin [codegen id : 28] +Left keys [1]: [ws_sold_date_sk#37] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(72) Project [codegen id : 28] +Output [4]: [ws_bill_customer_sk#38, ws_ext_discount_amt#39, ws_ext_list_price#40, d_year#6] +Input [6]: [ws_sold_date_sk#37, ws_bill_customer_sk#38, ws_ext_discount_amt#39, ws_ext_list_price#40, d_date_sk#5, d_year#6] + +(73) Exchange +Input [4]: [ws_bill_customer_sk#38, ws_ext_discount_amt#39, ws_ext_list_price#40, d_year#6] +Arguments: hashpartitioning(ws_bill_customer_sk#38, 5), true, [id=#51] + +(74) Sort [codegen id : 29] +Input [4]: [ws_bill_customer_sk#38, ws_ext_discount_amt#39, ws_ext_list_price#40, d_year#6] +Arguments: [ws_bill_customer_sk#38 ASC NULLS FIRST], false, 0 + +(75) ReusedExchange [Reuses operator id: 15] +Output [8]: [c_customer_sk#9, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16] + +(76) Sort [codegen id : 31] +Input [8]: [c_customer_sk#9, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16] +Arguments: [c_customer_sk#9 ASC NULLS FIRST], false, 0 + +(77) SortMergeJoin [codegen id : 32] +Left keys [1]: [ws_bill_customer_sk#38] +Right keys [1]: [c_customer_sk#9] +Join condition: None + +(78) Project [codegen id : 32] +Output [10]: [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, ws_ext_discount_amt#39, ws_ext_list_price#40, d_year#6] +Input [12]: [ws_bill_customer_sk#38, ws_ext_discount_amt#39, ws_ext_list_price#40, d_year#6, c_customer_sk#9, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16] + +(79) HashAggregate [codegen id : 32] +Input [10]: [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, ws_ext_discount_amt#39, ws_ext_list_price#40, d_year#6] +Keys [8]: [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, d_year#6] +Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#40 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#39 as decimal(8,2)))), DecimalType(8,2), true)))] +Aggregate Attributes [1]: [sum#52] +Results [9]: [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, d_year#6, sum#53] + +(80) Exchange +Input [9]: [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, d_year#6, sum#53] +Arguments: hashpartitioning(c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, d_year#6, 5), true, [id=#54] + +(81) HashAggregate [codegen id : 33] +Input [9]: [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, d_year#6, sum#53] +Keys [8]: [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, d_year#6] +Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#40 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#39 as decimal(8,2)))), DecimalType(8,2), true)))] +Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#40 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#39 as decimal(8,2)))), DecimalType(8,2), true)))#55] +Results [2]: [c_customer_id#10 AS customer_id#56, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#40 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#39 as decimal(8,2)))), DecimalType(8,2), true)))#55,18,2) AS year_total#57] + +(82) Exchange +Input [2]: [customer_id#56, year_total#57] +Arguments: hashpartitioning(customer_id#56, 5), true, [id=#58] + +(83) Sort [codegen id : 34] +Input [2]: [customer_id#56, year_total#57] +Arguments: [customer_id#56 ASC NULLS FIRST], false, 0 + +(84) SortMergeJoin [codegen id : 35] +Left keys [1]: [customer_id#22] +Right keys [1]: [customer_id#56] +Join condition: (CASE WHEN (year_total#49 > 0.00) THEN CheckOverflow((promote_precision(year_total#57) / promote_precision(year_total#49)), DecimalType(38,20), true) ELSE 0E-20 END > CASE WHEN (year_total#23 > 0.00) THEN CheckOverflow((promote_precision(year_total#35) / promote_precision(year_total#23)), DecimalType(38,20), true) ELSE 0E-20 END) + +(85) Project [codegen id : 35] +Output [4]: [customer_id#31, customer_first_name#32, customer_last_name#33, customer_email_address#34] +Input [10]: [customer_id#22, year_total#23, customer_id#31, customer_first_name#32, customer_last_name#33, customer_email_address#34, year_total#35, year_total#49, customer_id#56, year_total#57] + +(86) TakeOrderedAndProject +Input [4]: [customer_id#31, customer_first_name#32, customer_last_name#33, customer_email_address#34] +Arguments: 100, [customer_id#31 ASC NULLS FIRST, customer_first_name#32 ASC NULLS FIRST, customer_last_name#33 ASC NULLS FIRST, customer_email_address#34 ASC NULLS FIRST], [customer_id#31, customer_first_name#32, customer_last_name#33, customer_email_address#34] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11.sf100/simplified.txt new file mode 100644 index 0000000000000..2481e9900c5f6 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11.sf100/simplified.txt @@ -0,0 +1,157 @@ +TakeOrderedAndProject [customer_email_address,customer_first_name,customer_id,customer_last_name] + WholeStageCodegen (35) + Project [customer_email_address,customer_first_name,customer_id,customer_last_name] + SortMergeJoin [customer_id,customer_id,year_total,year_total,year_total,year_total] + InputAdapter + WholeStageCodegen (26) + Project [customer_email_address,customer_first_name,customer_id,customer_id,customer_last_name,year_total,year_total,year_total] + SortMergeJoin [customer_id,customer_id] + InputAdapter + WholeStageCodegen (17) + SortMergeJoin [customer_id,customer_id] + InputAdapter + WholeStageCodegen (8) + Sort [customer_id] + InputAdapter + Exchange [customer_id] #1 + WholeStageCodegen (7) + Filter [year_total] + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,sum] [customer_id,sum,sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2), true))),year_total] + InputAdapter + Exchange [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year] #2 + WholeStageCodegen (6) + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ss_ext_discount_amt,ss_ext_list_price] [sum,sum] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ss_ext_discount_amt,ss_ext_list_price] + SortMergeJoin [c_customer_sk,ss_customer_sk] + InputAdapter + WholeStageCodegen (3) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #3 + WholeStageCodegen (2) + Project [d_year,ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_customer_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + WholeStageCodegen (5) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #5 + WholeStageCodegen (4) + Filter [c_customer_id,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] + InputAdapter + WholeStageCodegen (16) + Sort [customer_id] + InputAdapter + Exchange [customer_id] #6 + WholeStageCodegen (15) + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,sum] [customer_email_address,customer_first_name,customer_id,customer_last_name,sum,sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2), true))),year_total] + InputAdapter + Exchange [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year] #7 + WholeStageCodegen (14) + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ss_ext_discount_amt,ss_ext_list_price] [sum,sum] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ss_ext_discount_amt,ss_ext_list_price] + SortMergeJoin [c_customer_sk,ss_customer_sk] + InputAdapter + WholeStageCodegen (11) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #8 + WholeStageCodegen (10) + Project [d_year,ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_customer_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (9) + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + WholeStageCodegen (13) + Sort [c_customer_sk] + InputAdapter + ReusedExchange [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] #5 + InputAdapter + WholeStageCodegen (25) + Sort [customer_id] + InputAdapter + Exchange [customer_id] #10 + WholeStageCodegen (24) + Project [customer_id,year_total] + Filter [year_total] + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,sum] [customer_id,sum,sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2), true))),year_total] + InputAdapter + Exchange [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year] #11 + WholeStageCodegen (23) + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ws_ext_discount_amt,ws_ext_list_price] [sum,sum] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ws_ext_discount_amt,ws_ext_list_price] + SortMergeJoin [c_customer_sk,ws_bill_customer_sk] + InputAdapter + WholeStageCodegen (20) + Sort [ws_bill_customer_sk] + InputAdapter + Exchange [ws_bill_customer_sk] #12 + WholeStageCodegen (19) + Project [d_year,ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_bill_customer_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_year] #4 + InputAdapter + WholeStageCodegen (22) + Sort [c_customer_sk] + InputAdapter + ReusedExchange [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] #5 + InputAdapter + WholeStageCodegen (34) + Sort [customer_id] + InputAdapter + Exchange [customer_id] #13 + WholeStageCodegen (33) + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,sum] [customer_id,sum,sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2), true))),year_total] + InputAdapter + Exchange [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year] #14 + WholeStageCodegen (32) + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ws_ext_discount_amt,ws_ext_list_price] [sum,sum] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ws_ext_discount_amt,ws_ext_list_price] + SortMergeJoin [c_customer_sk,ws_bill_customer_sk] + InputAdapter + WholeStageCodegen (29) + Sort [ws_bill_customer_sk] + InputAdapter + Exchange [ws_bill_customer_sk] #15 + WholeStageCodegen (28) + Project [d_year,ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_bill_customer_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_year] #9 + InputAdapter + WholeStageCodegen (31) + Sort [c_customer_sk] + InputAdapter + ReusedExchange [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] #5 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11/explain.txt new file mode 100644 index 0000000000000..15e3a1ec9706f --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11/explain.txt @@ -0,0 +1,410 @@ +== Physical Plan == +TakeOrderedAndProject (72) ++- * Project (71) + +- * BroadcastHashJoin Inner BuildRight (70) + :- * Project (56) + : +- * BroadcastHashJoin Inner BuildRight (55) + : :- * BroadcastHashJoin Inner BuildRight (36) + : : :- * Filter (19) + : : : +- * HashAggregate (18) + : : : +- Exchange (17) + : : : +- * HashAggregate (16) + : : : +- * Project (15) + : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : :- * Project (9) + : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.customer (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.store_sales (4) + : : : +- BroadcastExchange (13) + : : : +- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet default.date_dim (10) + : : +- BroadcastExchange (35) + : : +- * HashAggregate (34) + : : +- Exchange (33) + : : +- * HashAggregate (32) + : : +- * Project (31) + : : +- * BroadcastHashJoin Inner BuildRight (30) + : : :- * Project (25) + : : : +- * BroadcastHashJoin Inner BuildRight (24) + : : : :- * Filter (22) + : : : : +- * ColumnarToRow (21) + : : : : +- Scan parquet default.customer (20) + : : : +- ReusedExchange (23) + : : +- BroadcastExchange (29) + : : +- * Filter (28) + : : +- * ColumnarToRow (27) + : : +- Scan parquet default.date_dim (26) + : +- BroadcastExchange (54) + : +- * Project (53) + : +- * Filter (52) + : +- * HashAggregate (51) + : +- Exchange (50) + : +- * HashAggregate (49) + : +- * Project (48) + : +- * BroadcastHashJoin Inner BuildRight (47) + : :- * Project (45) + : : +- * BroadcastHashJoin Inner BuildRight (44) + : : :- * Filter (39) + : : : +- * ColumnarToRow (38) + : : : +- Scan parquet default.customer (37) + : : +- BroadcastExchange (43) + : : +- * Filter (42) + : : +- * ColumnarToRow (41) + : : +- Scan parquet default.web_sales (40) + : +- ReusedExchange (46) + +- BroadcastExchange (69) + +- * HashAggregate (68) + +- Exchange (67) + +- * HashAggregate (66) + +- * Project (65) + +- * BroadcastHashJoin Inner BuildRight (64) + :- * Project (62) + : +- * BroadcastHashJoin Inner BuildRight (61) + : :- * Filter (59) + : : +- * ColumnarToRow (58) + : : +- Scan parquet default.customer (57) + : +- ReusedExchange (60) + +- ReusedExchange (63) + + +(1) Scan parquet default.customer +Output [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] + +(3) Filter [codegen id : 3] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(4) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#9, ss_customer_sk#10, ss_ext_discount_amt#11, ss_ext_list_price#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [ss_sold_date_sk#9, ss_customer_sk#10, ss_ext_discount_amt#11, ss_ext_list_price#12] + +(6) Filter [codegen id : 1] +Input [4]: [ss_sold_date_sk#9, ss_customer_sk#10, ss_ext_discount_amt#11, ss_ext_list_price#12] +Condition : (isnotnull(ss_customer_sk#10) AND isnotnull(ss_sold_date_sk#9)) + +(7) BroadcastExchange +Input [4]: [ss_sold_date_sk#9, ss_customer_sk#10, ss_ext_discount_amt#11, ss_ext_list_price#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#13] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ss_customer_sk#10] +Join condition: None + +(9) Project [codegen id : 3] +Output [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_sold_date_sk#9, ss_ext_discount_amt#11, ss_ext_list_price#12] +Input [12]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_sold_date_sk#9, ss_customer_sk#10, ss_ext_discount_amt#11, ss_ext_list_price#12] + +(10) Scan parquet default.date_dim +Output [2]: [d_date_sk#14, d_year#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#14, d_year#15] + +(12) Filter [codegen id : 2] +Input [2]: [d_date_sk#14, d_year#15] +Condition : ((isnotnull(d_year#15) AND (d_year#15 = 2001)) AND isnotnull(d_date_sk#14)) + +(13) BroadcastExchange +Input [2]: [d_date_sk#14, d_year#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#16] + +(14) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#9] +Right keys [1]: [d_date_sk#14] +Join condition: None + +(15) Project [codegen id : 3] +Output [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#11, ss_ext_list_price#12, d_year#15] +Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_sold_date_sk#9, ss_ext_discount_amt#11, ss_ext_list_price#12, d_date_sk#14, d_year#15] + +(16) HashAggregate [codegen id : 3] +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#11, ss_ext_list_price#12, d_year#15] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#12 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#11 as decimal(8,2)))), DecimalType(8,2), true)))] +Aggregate Attributes [1]: [sum#17] +Results [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#18] + +(17) Exchange +Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#18] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, 5), true, [id=#19] + +(18) HashAggregate [codegen id : 16] +Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#18] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#12 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#11 as decimal(8,2)))), DecimalType(8,2), true)))] +Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#12 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#11 as decimal(8,2)))), DecimalType(8,2), true)))#20] +Results [2]: [c_customer_id#2 AS customer_id#21, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#12 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#11 as decimal(8,2)))), DecimalType(8,2), true)))#20,18,2) AS year_total#22] + +(19) Filter [codegen id : 16] +Input [2]: [customer_id#21, year_total#22] +Condition : (isnotnull(year_total#22) AND (year_total#22 > 0.00)) + +(20) Scan parquet default.customer +Output [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 6] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] + +(22) Filter [codegen id : 6] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(23) ReusedExchange [Reuses operator id: 7] +Output [4]: [ss_sold_date_sk#9, ss_customer_sk#10, ss_ext_discount_amt#11, ss_ext_list_price#12] + +(24) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ss_customer_sk#10] +Join condition: None + +(25) Project [codegen id : 6] +Output [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_sold_date_sk#9, ss_ext_discount_amt#11, ss_ext_list_price#12] +Input [12]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_sold_date_sk#9, ss_customer_sk#10, ss_ext_discount_amt#11, ss_ext_list_price#12] + +(26) Scan parquet default.date_dim +Output [2]: [d_date_sk#14, d_year#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 5] +Input [2]: [d_date_sk#14, d_year#15] + +(28) Filter [codegen id : 5] +Input [2]: [d_date_sk#14, d_year#15] +Condition : ((isnotnull(d_year#15) AND (d_year#15 = 2002)) AND isnotnull(d_date_sk#14)) + +(29) BroadcastExchange +Input [2]: [d_date_sk#14, d_year#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#23] + +(30) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_sold_date_sk#9] +Right keys [1]: [d_date_sk#14] +Join condition: None + +(31) Project [codegen id : 6] +Output [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#11, ss_ext_list_price#12, d_year#15] +Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_sold_date_sk#9, ss_ext_discount_amt#11, ss_ext_list_price#12, d_date_sk#14, d_year#15] + +(32) HashAggregate [codegen id : 6] +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#11, ss_ext_list_price#12, d_year#15] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#12 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#11 as decimal(8,2)))), DecimalType(8,2), true)))] +Aggregate Attributes [1]: [sum#24] +Results [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#25] + +(33) Exchange +Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#25] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, 5), true, [id=#26] + +(34) HashAggregate [codegen id : 7] +Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#25] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#12 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#11 as decimal(8,2)))), DecimalType(8,2), true)))] +Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#12 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#11 as decimal(8,2)))), DecimalType(8,2), true)))#27] +Results [5]: [c_customer_id#2 AS customer_id#28, c_first_name#3 AS customer_first_name#29, c_last_name#4 AS customer_last_name#30, c_email_address#8 AS customer_email_address#31, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#12 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#11 as decimal(8,2)))), DecimalType(8,2), true)))#27,18,2) AS year_total#32] + +(35) BroadcastExchange +Input [5]: [customer_id#28, customer_first_name#29, customer_last_name#30, customer_email_address#31, year_total#32] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#33] + +(36) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [customer_id#21] +Right keys [1]: [customer_id#28] +Join condition: None + +(37) Scan parquet default.customer +Output [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(38) ColumnarToRow [codegen id : 10] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] + +(39) Filter [codegen id : 10] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(40) Scan parquet default.web_sales +Output [4]: [ws_sold_date_sk#34, ws_bill_customer_sk#35, ws_ext_discount_amt#36, ws_ext_list_price#37] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_bill_customer_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(41) ColumnarToRow [codegen id : 8] +Input [4]: [ws_sold_date_sk#34, ws_bill_customer_sk#35, ws_ext_discount_amt#36, ws_ext_list_price#37] + +(42) Filter [codegen id : 8] +Input [4]: [ws_sold_date_sk#34, ws_bill_customer_sk#35, ws_ext_discount_amt#36, ws_ext_list_price#37] +Condition : (isnotnull(ws_bill_customer_sk#35) AND isnotnull(ws_sold_date_sk#34)) + +(43) BroadcastExchange +Input [4]: [ws_sold_date_sk#34, ws_bill_customer_sk#35, ws_ext_discount_amt#36, ws_ext_list_price#37] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#38] + +(44) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ws_bill_customer_sk#35] +Join condition: None + +(45) Project [codegen id : 10] +Output [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_sold_date_sk#34, ws_ext_discount_amt#36, ws_ext_list_price#37] +Input [12]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_sold_date_sk#34, ws_bill_customer_sk#35, ws_ext_discount_amt#36, ws_ext_list_price#37] + +(46) ReusedExchange [Reuses operator id: 13] +Output [2]: [d_date_sk#14, d_year#15] + +(47) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ws_sold_date_sk#34] +Right keys [1]: [d_date_sk#14] +Join condition: None + +(48) Project [codegen id : 10] +Output [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_ext_discount_amt#36, ws_ext_list_price#37, d_year#15] +Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_sold_date_sk#34, ws_ext_discount_amt#36, ws_ext_list_price#37, d_date_sk#14, d_year#15] + +(49) HashAggregate [codegen id : 10] +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_ext_discount_amt#36, ws_ext_list_price#37, d_year#15] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#15] +Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#37 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#36 as decimal(8,2)))), DecimalType(8,2), true)))] +Aggregate Attributes [1]: [sum#39] +Results [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#15, sum#40] + +(50) Exchange +Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#15, sum#40] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#15, 5), true, [id=#41] + +(51) HashAggregate [codegen id : 11] +Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#15, sum#40] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#15] +Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#37 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#36 as decimal(8,2)))), DecimalType(8,2), true)))] +Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#37 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#36 as decimal(8,2)))), DecimalType(8,2), true)))#42] +Results [2]: [c_customer_id#2 AS customer_id#43, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#37 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#36 as decimal(8,2)))), DecimalType(8,2), true)))#42,18,2) AS year_total#44] + +(52) Filter [codegen id : 11] +Input [2]: [customer_id#43, year_total#44] +Condition : (isnotnull(year_total#44) AND (year_total#44 > 0.00)) + +(53) Project [codegen id : 11] +Output [2]: [customer_id#43 AS customer_id#45, year_total#44 AS year_total#46] +Input [2]: [customer_id#43, year_total#44] + +(54) BroadcastExchange +Input [2]: [customer_id#45, year_total#46] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#47] + +(55) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [customer_id#21] +Right keys [1]: [customer_id#45] +Join condition: None + +(56) Project [codegen id : 16] +Output [8]: [customer_id#21, year_total#22, customer_id#28, customer_first_name#29, customer_last_name#30, customer_email_address#31, year_total#32, year_total#46] +Input [9]: [customer_id#21, year_total#22, customer_id#28, customer_first_name#29, customer_last_name#30, customer_email_address#31, year_total#32, customer_id#45, year_total#46] + +(57) Scan parquet default.customer +Output [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(58) ColumnarToRow [codegen id : 14] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] + +(59) Filter [codegen id : 14] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(60) ReusedExchange [Reuses operator id: 43] +Output [4]: [ws_sold_date_sk#34, ws_bill_customer_sk#35, ws_ext_discount_amt#36, ws_ext_list_price#37] + +(61) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ws_bill_customer_sk#35] +Join condition: None + +(62) Project [codegen id : 14] +Output [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_sold_date_sk#34, ws_ext_discount_amt#36, ws_ext_list_price#37] +Input [12]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_sold_date_sk#34, ws_bill_customer_sk#35, ws_ext_discount_amt#36, ws_ext_list_price#37] + +(63) ReusedExchange [Reuses operator id: 29] +Output [2]: [d_date_sk#14, d_year#15] + +(64) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [ws_sold_date_sk#34] +Right keys [1]: [d_date_sk#14] +Join condition: None + +(65) Project [codegen id : 14] +Output [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_ext_discount_amt#36, ws_ext_list_price#37, d_year#15] +Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_sold_date_sk#34, ws_ext_discount_amt#36, ws_ext_list_price#37, d_date_sk#14, d_year#15] + +(66) HashAggregate [codegen id : 14] +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_ext_discount_amt#36, ws_ext_list_price#37, d_year#15] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#15] +Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#37 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#36 as decimal(8,2)))), DecimalType(8,2), true)))] +Aggregate Attributes [1]: [sum#48] +Results [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#15, sum#49] + +(67) Exchange +Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#15, sum#49] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#15, 5), true, [id=#50] + +(68) HashAggregate [codegen id : 15] +Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#15, sum#49] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#15] +Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#37 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#36 as decimal(8,2)))), DecimalType(8,2), true)))] +Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#37 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#36 as decimal(8,2)))), DecimalType(8,2), true)))#51] +Results [2]: [c_customer_id#2 AS customer_id#52, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#37 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#36 as decimal(8,2)))), DecimalType(8,2), true)))#51,18,2) AS year_total#53] + +(69) BroadcastExchange +Input [2]: [customer_id#52, year_total#53] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#54] + +(70) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [customer_id#21] +Right keys [1]: [customer_id#52] +Join condition: (CASE WHEN (year_total#46 > 0.00) THEN CheckOverflow((promote_precision(year_total#53) / promote_precision(year_total#46)), DecimalType(38,20), true) ELSE 0E-20 END > CASE WHEN (year_total#22 > 0.00) THEN CheckOverflow((promote_precision(year_total#32) / promote_precision(year_total#22)), DecimalType(38,20), true) ELSE 0E-20 END) + +(71) Project [codegen id : 16] +Output [4]: [customer_id#28, customer_first_name#29, customer_last_name#30, customer_email_address#31] +Input [10]: [customer_id#21, year_total#22, customer_id#28, customer_first_name#29, customer_last_name#30, customer_email_address#31, year_total#32, year_total#46, customer_id#52, year_total#53] + +(72) TakeOrderedAndProject +Input [4]: [customer_id#28, customer_first_name#29, customer_last_name#30, customer_email_address#31] +Arguments: 100, [customer_id#28 ASC NULLS FIRST, customer_first_name#29 ASC NULLS FIRST, customer_last_name#30 ASC NULLS FIRST, customer_email_address#31 ASC NULLS FIRST], [customer_id#28, customer_first_name#29, customer_last_name#30, customer_email_address#31] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11/simplified.txt new file mode 100644 index 0000000000000..4369a86a18237 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11/simplified.txt @@ -0,0 +1,107 @@ +TakeOrderedAndProject [customer_email_address,customer_first_name,customer_id,customer_last_name] + WholeStageCodegen (16) + Project [customer_email_address,customer_first_name,customer_id,customer_last_name] + BroadcastHashJoin [customer_id,customer_id,year_total,year_total,year_total,year_total] + Project [customer_email_address,customer_first_name,customer_id,customer_id,customer_last_name,year_total,year_total,year_total] + BroadcastHashJoin [customer_id,customer_id] + BroadcastHashJoin [customer_id,customer_id] + Filter [year_total] + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,sum] [customer_id,sum,sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2), true))),year_total] + InputAdapter + Exchange [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year] #1 + WholeStageCodegen (3) + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ss_ext_discount_amt,ss_ext_list_price] [sum,sum] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ss_ext_discount_amt,ss_ext_list_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [c_customer_id,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [ss_customer_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (7) + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,sum] [customer_email_address,customer_first_name,customer_id,customer_last_name,sum,sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2), true))),year_total] + InputAdapter + Exchange [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year] #5 + WholeStageCodegen (6) + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ss_ext_discount_amt,ss_ext_list_price] [sum,sum] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ss_ext_discount_amt,ss_ext_list_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [c_customer_id,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] + InputAdapter + ReusedExchange [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] #2 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (11) + Project [customer_id,year_total] + Filter [year_total] + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,sum] [customer_id,sum,sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2), true))),year_total] + InputAdapter + Exchange [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year] #8 + WholeStageCodegen (10) + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ws_ext_discount_amt,ws_ext_list_price] [sum,sum] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ws_ext_discount_amt,ws_ext_list_price] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + Filter [c_customer_id,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (8) + Filter [ws_bill_customer_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_year] #3 + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (15) + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,sum] [customer_id,sum,sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2), true))),year_total] + InputAdapter + Exchange [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year] #11 + WholeStageCodegen (14) + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ws_ext_discount_amt,ws_ext_list_price] [sum,sum] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ws_ext_discount_amt,ws_ext_list_price] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + Filter [c_customer_id,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] + InputAdapter + ReusedExchange [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] #9 + InputAdapter + ReusedExchange [d_date_sk,d_year] #6 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12.sf100/explain.txt new file mode 100644 index 0000000000000..94a43d84cc7e7 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12.sf100/explain.txt @@ -0,0 +1,152 @@ +== Physical Plan == +TakeOrderedAndProject (27) ++- * Project (26) + +- Window (25) + +- * Sort (24) + +- Exchange (23) + +- * HashAggregate (22) + +- Exchange (21) + +- * HashAggregate (20) + +- * Project (19) + +- * SortMergeJoin Inner (18) + :- * Sort (12) + : +- Exchange (11) + : +- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.web_sales (1) + : +- BroadcastExchange (8) + : +- * Project (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet default.date_dim (4) + +- * Sort (17) + +- Exchange (16) + +- * Filter (15) + +- * ColumnarToRow (14) + +- Scan parquet default.item (13) + + +(1) Scan parquet default.web_sales +Output [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_ext_sales_price#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 2] +Input [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_ext_sales_price#3] + +(3) Filter [codegen id : 2] +Input [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_ext_sales_price#3] +Condition : (isnotnull(ws_item_sk#2) AND isnotnull(ws_sold_date_sk#1)) + +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#4, d_date#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#4, d_date#5] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#4, d_date#5] +Condition : (((isnotnull(d_date#5) AND (d_date#5 >= 10644)) AND (d_date#5 <= 10674)) AND isnotnull(d_date_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#4] +Input [2]: [d_date_sk#4, d_date#5] + +(8) BroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#6] + +(9) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ws_sold_date_sk#1] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(10) Project [codegen id : 2] +Output [2]: [ws_item_sk#2, ws_ext_sales_price#3] +Input [4]: [ws_sold_date_sk#1, ws_item_sk#2, ws_ext_sales_price#3, d_date_sk#4] + +(11) Exchange +Input [2]: [ws_item_sk#2, ws_ext_sales_price#3] +Arguments: hashpartitioning(ws_item_sk#2, 5), true, [id=#7] + +(12) Sort [codegen id : 3] +Input [2]: [ws_item_sk#2, ws_ext_sales_price#3] +Arguments: [ws_item_sk#2 ASC NULLS FIRST], false, 0 + +(13) Scan parquet default.item +Output [6]: [i_item_sk#8, i_item_id#9, i_item_desc#10, i_current_price#11, i_class#12, i_category#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/item] +PushedFilters: [In(i_category, [Sports,Books,Home]), IsNotNull(i_item_sk)] +ReadSchema: struct + +(14) ColumnarToRow [codegen id : 4] +Input [6]: [i_item_sk#8, i_item_id#9, i_item_desc#10, i_current_price#11, i_class#12, i_category#13] + +(15) Filter [codegen id : 4] +Input [6]: [i_item_sk#8, i_item_id#9, i_item_desc#10, i_current_price#11, i_class#12, i_category#13] +Condition : (i_category#13 IN (Sports,Books,Home) AND isnotnull(i_item_sk#8)) + +(16) Exchange +Input [6]: [i_item_sk#8, i_item_id#9, i_item_desc#10, i_current_price#11, i_class#12, i_category#13] +Arguments: hashpartitioning(i_item_sk#8, 5), true, [id=#14] + +(17) Sort [codegen id : 5] +Input [6]: [i_item_sk#8, i_item_id#9, i_item_desc#10, i_current_price#11, i_class#12, i_category#13] +Arguments: [i_item_sk#8 ASC NULLS FIRST], false, 0 + +(18) SortMergeJoin [codegen id : 6] +Left keys [1]: [ws_item_sk#2] +Right keys [1]: [i_item_sk#8] +Join condition: None + +(19) Project [codegen id : 6] +Output [6]: [ws_ext_sales_price#3, i_item_id#9, i_item_desc#10, i_current_price#11, i_class#12, i_category#13] +Input [8]: [ws_item_sk#2, ws_ext_sales_price#3, i_item_sk#8, i_item_id#9, i_item_desc#10, i_current_price#11, i_class#12, i_category#13] + +(20) HashAggregate [codegen id : 6] +Input [6]: [ws_ext_sales_price#3, i_item_id#9, i_item_desc#10, i_current_price#11, i_class#12, i_category#13] +Keys [5]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#3))] +Aggregate Attributes [1]: [sum#15] +Results [6]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11, sum#16] + +(21) Exchange +Input [6]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11, sum#16] +Arguments: hashpartitioning(i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11, 5), true, [id=#17] + +(22) HashAggregate [codegen id : 7] +Input [6]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11, sum#16] +Keys [5]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#3))#18] +Results [8]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#3))#18,17,2) AS itemrevenue#19, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#3))#18,17,2) AS _w0#20, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#3))#18,17,2) AS _w1#21] + +(23) Exchange +Input [8]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11, itemrevenue#19, _w0#20, _w1#21] +Arguments: hashpartitioning(i_class#12, 5), true, [id=#22] + +(24) Sort [codegen id : 8] +Input [8]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11, itemrevenue#19, _w0#20, _w1#21] +Arguments: [i_class#12 ASC NULLS FIRST], false, 0 + +(25) Window +Input [8]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11, itemrevenue#19, _w0#20, _w1#21] +Arguments: [sum(_w1#21) windowspecdefinition(i_class#12, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#23], [i_class#12] + +(26) Project [codegen id : 9] +Output [7]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11, itemrevenue#19, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#20) * 100.00), DecimalType(21,2), true) as decimal(27,2))) / promote_precision(_we0#23)), DecimalType(38,17), true) AS revenueratio#24] +Input [9]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11, itemrevenue#19, _w0#20, _w1#21, _we0#23] + +(27) TakeOrderedAndProject +Input [7]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11, itemrevenue#19, revenueratio#24] +Arguments: 100, [i_category#13 ASC NULLS FIRST, i_class#12 ASC NULLS FIRST, i_item_id#9 ASC NULLS FIRST, i_item_desc#10 ASC NULLS FIRST, revenueratio#24 ASC NULLS FIRST], [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11, itemrevenue#19, revenueratio#24] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12.sf100/simplified.txt new file mode 100644 index 0000000000000..cf472842f1431 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12.sf100/simplified.txt @@ -0,0 +1,47 @@ +TakeOrderedAndProject [i_category,i_class,i_current_price,i_item_desc,i_item_id,itemrevenue,revenueratio] + WholeStageCodegen (9) + Project [_w0,_we0,i_category,i_class,i_current_price,i_item_desc,i_item_id,itemrevenue] + InputAdapter + Window [_w1,i_class] + WholeStageCodegen (8) + Sort [i_class] + InputAdapter + Exchange [i_class] #1 + WholeStageCodegen (7) + HashAggregate [i_category,i_class,i_current_price,i_item_desc,i_item_id,sum] [_w0,_w1,itemrevenue,sum,sum(UnscaledValue(ws_ext_sales_price))] + InputAdapter + Exchange [i_category,i_class,i_current_price,i_item_desc,i_item_id] #2 + WholeStageCodegen (6) + HashAggregate [i_category,i_class,i_current_price,i_item_desc,i_item_id,ws_ext_sales_price] [sum,sum] + Project [i_category,i_class,i_current_price,i_item_desc,i_item_id,ws_ext_sales_price] + SortMergeJoin [i_item_sk,ws_item_sk] + InputAdapter + WholeStageCodegen (3) + Sort [ws_item_sk] + InputAdapter + Exchange [ws_item_sk] #3 + WholeStageCodegen (2) + Project [ws_ext_sales_price,ws_item_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk] + InputAdapter + WholeStageCodegen (5) + Sort [i_item_sk] + InputAdapter + Exchange [i_item_sk] #5 + WholeStageCodegen (4) + Filter [i_category,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_category,i_class,i_current_price,i_item_desc,i_item_id,i_item_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12/explain.txt new file mode 100644 index 0000000000000..86262bb562644 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12/explain.txt @@ -0,0 +1,137 @@ +== Physical Plan == +TakeOrderedAndProject (24) ++- * Project (23) + +- Window (22) + +- * Sort (21) + +- Exchange (20) + +- * HashAggregate (19) + +- Exchange (18) + +- * HashAggregate (17) + +- * Project (16) + +- * BroadcastHashJoin Inner BuildRight (15) + :- * Project (9) + : +- * BroadcastHashJoin Inner BuildRight (8) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.web_sales (1) + : +- BroadcastExchange (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet default.item (4) + +- BroadcastExchange (14) + +- * Project (13) + +- * Filter (12) + +- * ColumnarToRow (11) + +- Scan parquet default.date_dim (10) + + +(1) Scan parquet default.web_sales +Output [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_ext_sales_price#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_ext_sales_price#3] + +(3) Filter [codegen id : 3] +Input [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_ext_sales_price#3] +Condition : (isnotnull(ws_item_sk#2) AND isnotnull(ws_sold_date_sk#1)) + +(4) Scan parquet default.item +Output [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/item] +PushedFilters: [In(i_category, [Sports,Books,Home]), IsNotNull(i_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(6) Filter [codegen id : 1] +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Condition : (i_category#9 IN (Sports,Books,Home) AND isnotnull(i_item_sk#4)) + +(7) BroadcastExchange +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#10] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ws_item_sk#2] +Right keys [1]: [i_item_sk#4] +Join condition: None + +(9) Project [codegen id : 3] +Output [7]: [ws_sold_date_sk#1, ws_ext_sales_price#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Input [9]: [ws_sold_date_sk#1, ws_item_sk#2, ws_ext_sales_price#3, i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(10) Scan parquet default.date_dim +Output [2]: [d_date_sk#11, d_date#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#11, d_date#12] + +(12) Filter [codegen id : 2] +Input [2]: [d_date_sk#11, d_date#12] +Condition : (((isnotnull(d_date#12) AND (d_date#12 >= 10644)) AND (d_date#12 <= 10674)) AND isnotnull(d_date_sk#11)) + +(13) Project [codegen id : 2] +Output [1]: [d_date_sk#11] +Input [2]: [d_date_sk#11, d_date#12] + +(14) BroadcastExchange +Input [1]: [d_date_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ws_sold_date_sk#1] +Right keys [1]: [d_date_sk#11] +Join condition: None + +(16) Project [codegen id : 3] +Output [6]: [ws_ext_sales_price#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Input [8]: [ws_sold_date_sk#1, ws_ext_sales_price#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9, d_date_sk#11] + +(17) HashAggregate [codegen id : 3] +Input [6]: [ws_ext_sales_price#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#3))] +Aggregate Attributes [1]: [sum#14] +Results [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#15] + +(18) Exchange +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#15] +Arguments: hashpartitioning(i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, 5), true, [id=#16] + +(19) HashAggregate [codegen id : 4] +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#15] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#3))#17] +Results [8]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#3))#17,17,2) AS itemrevenue#18, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#3))#17,17,2) AS _w0#19, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#3))#17,17,2) AS _w1#20] + +(20) Exchange +Input [8]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, _w0#19, _w1#20] +Arguments: hashpartitioning(i_class#8, 5), true, [id=#21] + +(21) Sort [codegen id : 5] +Input [8]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, _w0#19, _w1#20] +Arguments: [i_class#8 ASC NULLS FIRST], false, 0 + +(22) Window +Input [8]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, _w0#19, _w1#20] +Arguments: [sum(_w1#20) windowspecdefinition(i_class#8, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#22], [i_class#8] + +(23) Project [codegen id : 6] +Output [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#19) * 100.00), DecimalType(21,2), true) as decimal(27,2))) / promote_precision(_we0#22)), DecimalType(38,17), true) AS revenueratio#23] +Input [9]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, _w0#19, _w1#20, _we0#22] + +(24) TakeOrderedAndProject +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, revenueratio#23] +Arguments: 100, [i_category#9 ASC NULLS FIRST, i_class#8 ASC NULLS FIRST, i_item_id#5 ASC NULLS FIRST, i_item_desc#6 ASC NULLS FIRST, revenueratio#23 ASC NULLS FIRST], [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, revenueratio#23] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12/simplified.txt new file mode 100644 index 0000000000000..620baa8d07fa8 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12/simplified.txt @@ -0,0 +1,38 @@ +TakeOrderedAndProject [i_category,i_class,i_current_price,i_item_desc,i_item_id,itemrevenue,revenueratio] + WholeStageCodegen (6) + Project [_w0,_we0,i_category,i_class,i_current_price,i_item_desc,i_item_id,itemrevenue] + InputAdapter + Window [_w1,i_class] + WholeStageCodegen (5) + Sort [i_class] + InputAdapter + Exchange [i_class] #1 + WholeStageCodegen (4) + HashAggregate [i_category,i_class,i_current_price,i_item_desc,i_item_id,sum] [_w0,_w1,itemrevenue,sum,sum(UnscaledValue(ws_ext_sales_price))] + InputAdapter + Exchange [i_category,i_class,i_current_price,i_item_desc,i_item_id] #2 + WholeStageCodegen (3) + HashAggregate [i_category,i_class,i_current_price,i_item_desc,i_item_id,ws_ext_sales_price] [sum,sum] + Project [i_category,i_class,i_current_price,i_item_desc,i_item_id,ws_ext_sales_price] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [i_category,i_class,i_current_price,i_item_desc,i_item_id,ws_ext_sales_price,ws_sold_date_sk] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Filter [ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [i_category,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_category,i_class,i_current_price,i_item_desc,i_item_id,i_item_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/explain.txt new file mode 100644 index 0000000000000..5282470abdc5f --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/explain.txt @@ -0,0 +1,810 @@ +== Physical Plan == +TakeOrderedAndProject (110) ++- * BroadcastHashJoin Inner BuildRight (109) + :- * Project (87) + : +- * Filter (86) + : +- * HashAggregate (85) + : +- Exchange (84) + : +- * HashAggregate (83) + : +- * Project (82) + : +- * BroadcastHashJoin Inner BuildRight (81) + : :- * Project (71) + : : +- * BroadcastHashJoin Inner BuildRight (70) + : : :- SortMergeJoin LeftSemi (64) + : : : :- * Sort (5) + : : : : +- Exchange (4) + : : : : +- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- * Sort (63) + : : : +- Exchange (62) + : : : +- * Project (61) + : : : +- * BroadcastHashJoin Inner BuildRight (60) + : : : :- * Filter (8) + : : : : +- * ColumnarToRow (7) + : : : : +- Scan parquet default.item (6) + : : : +- BroadcastExchange (59) + : : : +- * HashAggregate (58) + : : : +- * HashAggregate (57) + : : : +- * HashAggregate (56) + : : : +- Exchange (55) + : : : +- * HashAggregate (54) + : : : +- SortMergeJoin LeftSemi (53) + : : : :- SortMergeJoin LeftSemi (41) + : : : : :- * Sort (26) + : : : : : +- Exchange (25) + : : : : : +- * Project (24) + : : : : : +- * BroadcastHashJoin Inner BuildRight (23) + : : : : : :- * Project (18) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (17) + : : : : : : :- * Filter (11) + : : : : : : : +- * ColumnarToRow (10) + : : : : : : : +- Scan parquet default.store_sales (9) + : : : : : : +- BroadcastExchange (16) + : : : : : : +- * Project (15) + : : : : : : +- * Filter (14) + : : : : : : +- * ColumnarToRow (13) + : : : : : : +- Scan parquet default.date_dim (12) + : : : : : +- BroadcastExchange (22) + : : : : : +- * Filter (21) + : : : : : +- * ColumnarToRow (20) + : : : : : +- Scan parquet default.item (19) + : : : : +- * Sort (40) + : : : : +- Exchange (39) + : : : : +- * Project (38) + : : : : +- * BroadcastHashJoin Inner BuildRight (37) + : : : : :- * Project (32) + : : : : : +- * BroadcastHashJoin Inner BuildRight (31) + : : : : : :- * Filter (29) + : : : : : : +- * ColumnarToRow (28) + : : : : : : +- Scan parquet default.catalog_sales (27) + : : : : : +- ReusedExchange (30) + : : : : +- BroadcastExchange (36) + : : : : +- * Filter (35) + : : : : +- * ColumnarToRow (34) + : : : : +- Scan parquet default.item (33) + : : : +- * Sort (52) + : : : +- Exchange (51) + : : : +- * Project (50) + : : : +- * BroadcastHashJoin Inner BuildRight (49) + : : : :- * Project (47) + : : : : +- * BroadcastHashJoin Inner BuildRight (46) + : : : : :- * Filter (44) + : : : : : +- * ColumnarToRow (43) + : : : : : +- Scan parquet default.web_sales (42) + : : : : +- ReusedExchange (45) + : : : +- ReusedExchange (48) + : : +- BroadcastExchange (69) + : : +- * Project (68) + : : +- * Filter (67) + : : +- * ColumnarToRow (66) + : : +- Scan parquet default.date_dim (65) + : +- BroadcastExchange (80) + : +- SortMergeJoin LeftSemi (79) + : :- * Sort (76) + : : +- Exchange (75) + : : +- * Filter (74) + : : +- * ColumnarToRow (73) + : : +- Scan parquet default.item (72) + : +- * Sort (78) + : +- ReusedExchange (77) + +- BroadcastExchange (108) + +- * Project (107) + +- * Filter (106) + +- * HashAggregate (105) + +- Exchange (104) + +- * HashAggregate (103) + +- * Project (102) + +- * BroadcastHashJoin Inner BuildRight (101) + :- * Project (99) + : +- * BroadcastHashJoin Inner BuildRight (98) + : :- SortMergeJoin LeftSemi (92) + : : :- * Sort (89) + : : : +- ReusedExchange (88) + : : +- * Sort (91) + : : +- ReusedExchange (90) + : +- BroadcastExchange (97) + : +- * Project (96) + : +- * Filter (95) + : +- * ColumnarToRow (94) + : +- Scan parquet default.date_dim (93) + +- ReusedExchange (100) + + +(1) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] + +(3) Filter [codegen id : 1] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] +Condition : (isnotnull(ss_item_sk#2) AND isnotnull(ss_sold_date_sk#1)) + +(4) Exchange +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] +Arguments: hashpartitioning(ss_item_sk#2, 5), true, [id=#5] + +(5) Sort [codegen id : 2] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] +Arguments: [ss_item_sk#2 ASC NULLS FIRST], false, 0 + +(6) Scan parquet default.item +Output [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_brand_id), IsNotNull(i_category_id), IsNotNull(i_class_id)] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 17] +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] + +(8) Filter [codegen id : 17] +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] +Condition : ((isnotnull(i_brand_id#7) AND isnotnull(i_category_id#9)) AND isnotnull(i_class_id#8)) + +(9) Scan parquet default.store_sales +Output [2]: [ss_sold_date_sk#1, ss_item_sk#2] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(10) ColumnarToRow [codegen id : 5] +Input [2]: [ss_sold_date_sk#1, ss_item_sk#2] + +(11) Filter [codegen id : 5] +Input [2]: [ss_sold_date_sk#1, ss_item_sk#2] +Condition : (isnotnull(ss_item_sk#2) AND isnotnull(ss_sold_date_sk#1)) + +(12) Scan parquet default.date_dim +Output [2]: [d_date_sk#10, d_year#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1998), LessThanOrEqual(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(13) ColumnarToRow [codegen id : 3] +Input [2]: [d_date_sk#10, d_year#11] + +(14) Filter [codegen id : 3] +Input [2]: [d_date_sk#10, d_year#11] +Condition : (((isnotnull(d_year#11) AND (d_year#11 >= 1998)) AND (d_year#11 <= 2000)) AND isnotnull(d_date_sk#10)) + +(15) Project [codegen id : 3] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_year#11] + +(16) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(17) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(18) Project [codegen id : 5] +Output [1]: [ss_item_sk#2] +Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, d_date_sk#10] + +(19) Scan parquet default.item +Output [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_category_id), IsNotNull(i_class_id), IsNotNull(i_brand_id)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 4] +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] + +(21) Filter [codegen id : 4] +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] +Condition : (((isnotnull(i_item_sk#6) AND isnotnull(i_category_id#9)) AND isnotnull(i_class_id#8)) AND isnotnull(i_brand_id#7)) + +(22) BroadcastExchange +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#13] + +(23) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#6] +Join condition: None + +(24) Project [codegen id : 5] +Output [3]: [i_brand_id#7 AS brand_id#14, i_class_id#8 AS class_id#15, i_category_id#9 AS category_id#16] +Input [5]: [ss_item_sk#2, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] + +(25) Exchange +Input [3]: [brand_id#14, class_id#15, category_id#16] +Arguments: hashpartitioning(coalesce(brand_id#14, 0), isnull(brand_id#14), coalesce(class_id#15, 0), isnull(class_id#15), coalesce(category_id#16, 0), isnull(category_id#16), 5), true, [id=#17] + +(26) Sort [codegen id : 6] +Input [3]: [brand_id#14, class_id#15, category_id#16] +Arguments: [coalesce(brand_id#14, 0) ASC NULLS FIRST, isnull(brand_id#14) ASC NULLS FIRST, coalesce(class_id#15, 0) ASC NULLS FIRST, isnull(class_id#15) ASC NULLS FIRST, coalesce(category_id#16, 0) ASC NULLS FIRST, isnull(category_id#16) ASC NULLS FIRST], false, 0 + +(27) Scan parquet default.catalog_sales +Output [2]: [cs_sold_date_sk#18, cs_item_sk#19] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(28) ColumnarToRow [codegen id : 9] +Input [2]: [cs_sold_date_sk#18, cs_item_sk#19] + +(29) Filter [codegen id : 9] +Input [2]: [cs_sold_date_sk#18, cs_item_sk#19] +Condition : (isnotnull(cs_item_sk#19) AND isnotnull(cs_sold_date_sk#18)) + +(30) ReusedExchange [Reuses operator id: 16] +Output [1]: [d_date_sk#10] + +(31) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [cs_sold_date_sk#18] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(32) Project [codegen id : 9] +Output [1]: [cs_item_sk#19] +Input [3]: [cs_sold_date_sk#18, cs_item_sk#19, d_date_sk#10] + +(33) Scan parquet default.item +Output [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 8] +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] + +(35) Filter [codegen id : 8] +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] +Condition : isnotnull(i_item_sk#6) + +(36) BroadcastExchange +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#20] + +(37) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [cs_item_sk#19] +Right keys [1]: [i_item_sk#6] +Join condition: None + +(38) Project [codegen id : 9] +Output [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Input [5]: [cs_item_sk#19, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] + +(39) Exchange +Input [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Arguments: hashpartitioning(coalesce(i_brand_id#7, 0), isnull(i_brand_id#7), coalesce(i_class_id#8, 0), isnull(i_class_id#8), coalesce(i_category_id#9, 0), isnull(i_category_id#9), 5), true, [id=#21] + +(40) Sort [codegen id : 10] +Input [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Arguments: [coalesce(i_brand_id#7, 0) ASC NULLS FIRST, isnull(i_brand_id#7) ASC NULLS FIRST, coalesce(i_class_id#8, 0) ASC NULLS FIRST, isnull(i_class_id#8) ASC NULLS FIRST, coalesce(i_category_id#9, 0) ASC NULLS FIRST, isnull(i_category_id#9) ASC NULLS FIRST], false, 0 + +(41) SortMergeJoin +Left keys [6]: [coalesce(brand_id#14, 0), isnull(brand_id#14), coalesce(class_id#15, 0), isnull(class_id#15), coalesce(category_id#16, 0), isnull(category_id#16)] +Right keys [6]: [coalesce(i_brand_id#7, 0), isnull(i_brand_id#7), coalesce(i_class_id#8, 0), isnull(i_class_id#8), coalesce(i_category_id#9, 0), isnull(i_category_id#9)] +Join condition: None + +(42) Scan parquet default.web_sales +Output [2]: [ws_sold_date_sk#22, ws_item_sk#23] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(43) ColumnarToRow [codegen id : 13] +Input [2]: [ws_sold_date_sk#22, ws_item_sk#23] + +(44) Filter [codegen id : 13] +Input [2]: [ws_sold_date_sk#22, ws_item_sk#23] +Condition : (isnotnull(ws_item_sk#23) AND isnotnull(ws_sold_date_sk#22)) + +(45) ReusedExchange [Reuses operator id: 16] +Output [1]: [d_date_sk#10] + +(46) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [ws_sold_date_sk#22] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(47) Project [codegen id : 13] +Output [1]: [ws_item_sk#23] +Input [3]: [ws_sold_date_sk#22, ws_item_sk#23, d_date_sk#10] + +(48) ReusedExchange [Reuses operator id: 36] +Output [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] + +(49) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [ws_item_sk#23] +Right keys [1]: [i_item_sk#6] +Join condition: None + +(50) Project [codegen id : 13] +Output [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Input [5]: [ws_item_sk#23, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] + +(51) Exchange +Input [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Arguments: hashpartitioning(coalesce(i_brand_id#7, 0), isnull(i_brand_id#7), coalesce(i_class_id#8, 0), isnull(i_class_id#8), coalesce(i_category_id#9, 0), isnull(i_category_id#9), 5), true, [id=#24] + +(52) Sort [codegen id : 14] +Input [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Arguments: [coalesce(i_brand_id#7, 0) ASC NULLS FIRST, isnull(i_brand_id#7) ASC NULLS FIRST, coalesce(i_class_id#8, 0) ASC NULLS FIRST, isnull(i_class_id#8) ASC NULLS FIRST, coalesce(i_category_id#9, 0) ASC NULLS FIRST, isnull(i_category_id#9) ASC NULLS FIRST], false, 0 + +(53) SortMergeJoin +Left keys [6]: [coalesce(brand_id#14, 0), isnull(brand_id#14), coalesce(class_id#15, 0), isnull(class_id#15), coalesce(category_id#16, 0), isnull(category_id#16)] +Right keys [6]: [coalesce(i_brand_id#7, 0), isnull(i_brand_id#7), coalesce(i_class_id#8, 0), isnull(i_class_id#8), coalesce(i_category_id#9, 0), isnull(i_category_id#9)] +Join condition: None + +(54) HashAggregate [codegen id : 15] +Input [3]: [brand_id#14, class_id#15, category_id#16] +Keys [3]: [brand_id#14, class_id#15, category_id#16] +Functions: [] +Aggregate Attributes: [] +Results [3]: [brand_id#14, class_id#15, category_id#16] + +(55) Exchange +Input [3]: [brand_id#14, class_id#15, category_id#16] +Arguments: hashpartitioning(brand_id#14, class_id#15, category_id#16, 5), true, [id=#25] + +(56) HashAggregate [codegen id : 16] +Input [3]: [brand_id#14, class_id#15, category_id#16] +Keys [3]: [brand_id#14, class_id#15, category_id#16] +Functions: [] +Aggregate Attributes: [] +Results [3]: [brand_id#14, class_id#15, category_id#16] + +(57) HashAggregate [codegen id : 16] +Input [3]: [brand_id#14, class_id#15, category_id#16] +Keys [3]: [brand_id#14, class_id#15, category_id#16] +Functions: [] +Aggregate Attributes: [] +Results [3]: [brand_id#14, class_id#15, category_id#16] + +(58) HashAggregate [codegen id : 16] +Input [3]: [brand_id#14, class_id#15, category_id#16] +Keys [3]: [brand_id#14, class_id#15, category_id#16] +Functions: [] +Aggregate Attributes: [] +Results [3]: [brand_id#14, class_id#15, category_id#16] + +(59) BroadcastExchange +Input [3]: [brand_id#14, class_id#15, category_id#16] +Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [id=#26] + +(60) BroadcastHashJoin [codegen id : 17] +Left keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Right keys [3]: [brand_id#14, class_id#15, category_id#16] +Join condition: None + +(61) Project [codegen id : 17] +Output [1]: [i_item_sk#6 AS ss_item_sk#27] +Input [7]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, brand_id#14, class_id#15, category_id#16] + +(62) Exchange +Input [1]: [ss_item_sk#27] +Arguments: hashpartitioning(ss_item_sk#27, 5), true, [id=#28] + +(63) Sort [codegen id : 18] +Input [1]: [ss_item_sk#27] +Arguments: [ss_item_sk#27 ASC NULLS FIRST], false, 0 + +(64) SortMergeJoin +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [ss_item_sk#27] +Join condition: None + +(65) Scan parquet default.date_dim +Output [2]: [d_date_sk#10, d_week_seq#29] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)] +ReadSchema: struct + +(66) ColumnarToRow [codegen id : 19] +Input [2]: [d_date_sk#10, d_week_seq#29] + +(67) Filter [codegen id : 19] +Input [2]: [d_date_sk#10, d_week_seq#29] +Condition : ((isnotnull(d_week_seq#29) AND (d_week_seq#29 = Subquery scalar-subquery#30, [id=#31])) AND isnotnull(d_date_sk#10)) + +(68) Project [codegen id : 19] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_week_seq#29] + +(69) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#32] + +(70) BroadcastHashJoin [codegen id : 38] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(71) Project [codegen id : 38] +Output [3]: [ss_item_sk#2, ss_quantity#3, ss_list_price#4] +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4, d_date_sk#10] + +(72) Scan parquet default.item +Output [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_category_id), IsNotNull(i_class_id), IsNotNull(i_brand_id)] +ReadSchema: struct + +(73) ColumnarToRow [codegen id : 20] +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] + +(74) Filter [codegen id : 20] +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] +Condition : (((isnotnull(i_item_sk#6) AND isnotnull(i_category_id#9)) AND isnotnull(i_class_id#8)) AND isnotnull(i_brand_id#7)) + +(75) Exchange +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] +Arguments: hashpartitioning(i_item_sk#6, 5), true, [id=#33] + +(76) Sort [codegen id : 21] +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] +Arguments: [i_item_sk#6 ASC NULLS FIRST], false, 0 + +(77) ReusedExchange [Reuses operator id: 62] +Output [1]: [ss_item_sk#27] + +(78) Sort [codegen id : 37] +Input [1]: [ss_item_sk#27] +Arguments: [ss_item_sk#27 ASC NULLS FIRST], false, 0 + +(79) SortMergeJoin +Left keys [1]: [i_item_sk#6] +Right keys [1]: [ss_item_sk#27] +Join condition: None + +(80) BroadcastExchange +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#34] + +(81) BroadcastHashJoin [codegen id : 38] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#6] +Join condition: None + +(82) Project [codegen id : 38] +Output [5]: [ss_quantity#3, ss_list_price#4, i_brand_id#7, i_class_id#8, i_category_id#9] +Input [7]: [ss_item_sk#2, ss_quantity#3, ss_list_price#4, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] + +(83) HashAggregate [codegen id : 38] +Input [5]: [ss_quantity#3, ss_list_price#4, i_brand_id#7, i_class_id#8, i_category_id#9] +Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)] +Aggregate Attributes [3]: [sum#35, isEmpty#36, count#37] +Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#38, isEmpty#39, count#40] + +(84) Exchange +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#38, isEmpty#39, count#40] +Arguments: hashpartitioning(i_brand_id#7, i_class_id#8, i_category_id#9, 5), true, [id=#41] + +(85) HashAggregate [codegen id : 78] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#38, isEmpty#39, count#40] +Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#42, count(1)#43] +Results [7]: [store AS channel#44, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#42 AS sales#45, count(1)#43 AS number_sales#46, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#42 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#47] + +(86) Filter [codegen id : 78] +Input [7]: [channel#44, i_brand_id#7, i_class_id#8, i_category_id#9, sales#45, number_sales#46, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#47] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#47) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#47 as decimal(32,6)) > cast(Subquery scalar-subquery#48, [id=#49] as decimal(32,6)))) + +(87) Project [codegen id : 78] +Output [6]: [channel#44, i_brand_id#7, i_class_id#8, i_category_id#9, sales#45, number_sales#46] +Input [7]: [channel#44, i_brand_id#7, i_class_id#8, i_category_id#9, sales#45, number_sales#46, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#47] + +(88) ReusedExchange [Reuses operator id: 4] +Output [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] + +(89) Sort [codegen id : 40] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] +Arguments: [ss_item_sk#2 ASC NULLS FIRST], false, 0 + +(90) ReusedExchange [Reuses operator id: 62] +Output [1]: [ss_item_sk#27] + +(91) Sort [codegen id : 56] +Input [1]: [ss_item_sk#27] +Arguments: [ss_item_sk#27 ASC NULLS FIRST], false, 0 + +(92) SortMergeJoin +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [ss_item_sk#27] +Join condition: None + +(93) Scan parquet default.date_dim +Output [2]: [d_date_sk#10, d_week_seq#29] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)] +ReadSchema: struct + +(94) ColumnarToRow [codegen id : 57] +Input [2]: [d_date_sk#10, d_week_seq#29] + +(95) Filter [codegen id : 57] +Input [2]: [d_date_sk#10, d_week_seq#29] +Condition : ((isnotnull(d_week_seq#29) AND (d_week_seq#29 = Subquery scalar-subquery#50, [id=#51])) AND isnotnull(d_date_sk#10)) + +(96) Project [codegen id : 57] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_week_seq#29] + +(97) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#52] + +(98) BroadcastHashJoin [codegen id : 76] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(99) Project [codegen id : 76] +Output [3]: [ss_item_sk#2, ss_quantity#3, ss_list_price#4] +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4, d_date_sk#10] + +(100) ReusedExchange [Reuses operator id: 80] +Output [4]: [i_item_sk#53, i_brand_id#54, i_class_id#55, i_category_id#56] + +(101) BroadcastHashJoin [codegen id : 76] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#53] +Join condition: None + +(102) Project [codegen id : 76] +Output [5]: [ss_quantity#3, ss_list_price#4, i_brand_id#54, i_class_id#55, i_category_id#56] +Input [7]: [ss_item_sk#2, ss_quantity#3, ss_list_price#4, i_item_sk#53, i_brand_id#54, i_class_id#55, i_category_id#56] + +(103) HashAggregate [codegen id : 76] +Input [5]: [ss_quantity#3, ss_list_price#4, i_brand_id#54, i_class_id#55, i_category_id#56] +Keys [3]: [i_brand_id#54, i_class_id#55, i_category_id#56] +Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)] +Aggregate Attributes [3]: [sum#57, isEmpty#58, count#59] +Results [6]: [i_brand_id#54, i_class_id#55, i_category_id#56, sum#60, isEmpty#61, count#62] + +(104) Exchange +Input [6]: [i_brand_id#54, i_class_id#55, i_category_id#56, sum#60, isEmpty#61, count#62] +Arguments: hashpartitioning(i_brand_id#54, i_class_id#55, i_category_id#56, 5), true, [id=#63] + +(105) HashAggregate [codegen id : 77] +Input [6]: [i_brand_id#54, i_class_id#55, i_category_id#56, sum#60, isEmpty#61, count#62] +Keys [3]: [i_brand_id#54, i_class_id#55, i_category_id#56] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#64, count(1)#65] +Results [7]: [store AS channel#66, i_brand_id#54, i_class_id#55, i_category_id#56, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#64 AS sales#67, count(1)#65 AS number_sales#68, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#64 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#69] + +(106) Filter [codegen id : 77] +Input [7]: [channel#66, i_brand_id#54, i_class_id#55, i_category_id#56, sales#67, number_sales#68, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#69] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#69) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#69 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#48, [id=#49] as decimal(32,6)))) + +(107) Project [codegen id : 77] +Output [6]: [channel#66, i_brand_id#54, i_class_id#55, i_category_id#56, sales#67, number_sales#68] +Input [7]: [channel#66, i_brand_id#54, i_class_id#55, i_category_id#56, sales#67, number_sales#68, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#69] + +(108) BroadcastExchange +Input [6]: [channel#66, i_brand_id#54, i_class_id#55, i_category_id#56, sales#67, number_sales#68] +Arguments: HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true]),false), [id=#70] + +(109) BroadcastHashJoin [codegen id : 78] +Left keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Right keys [3]: [i_brand_id#54, i_class_id#55, i_category_id#56] +Join condition: None + +(110) TakeOrderedAndProject +Input [12]: [channel#44, i_brand_id#7, i_class_id#8, i_category_id#9, sales#45, number_sales#46, channel#66, i_brand_id#54, i_class_id#55, i_category_id#56, sales#67, number_sales#68] +Arguments: 100, [channel#44 ASC NULLS FIRST, i_brand_id#7 ASC NULLS FIRST, i_class_id#8 ASC NULLS FIRST, i_category_id#9 ASC NULLS FIRST], [channel#44, i_brand_id#7, i_class_id#8, i_category_id#9, sales#45, number_sales#46, channel#66, i_brand_id#54, i_class_id#55, i_category_id#56, sales#67, number_sales#68] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 86 Hosting Expression = Subquery scalar-subquery#48, [id=#49] +* HashAggregate (136) ++- Exchange (135) + +- * HashAggregate (134) + +- Union (133) + :- * Project (120) + : +- * BroadcastHashJoin Inner BuildRight (119) + : :- * Filter (113) + : : +- * ColumnarToRow (112) + : : +- Scan parquet default.store_sales (111) + : +- BroadcastExchange (118) + : +- * Project (117) + : +- * Filter (116) + : +- * ColumnarToRow (115) + : +- Scan parquet default.date_dim (114) + :- * Project (126) + : +- * BroadcastHashJoin Inner BuildRight (125) + : :- * Filter (123) + : : +- * ColumnarToRow (122) + : : +- Scan parquet default.catalog_sales (121) + : +- ReusedExchange (124) + +- * Project (132) + +- * BroadcastHashJoin Inner BuildRight (131) + :- * Filter (129) + : +- * ColumnarToRow (128) + : +- Scan parquet default.web_sales (127) + +- ReusedExchange (130) + + +(111) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(112) ColumnarToRow [codegen id : 2] +Input [3]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4] + +(113) Filter [codegen id : 2] +Input [3]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4] +Condition : isnotnull(ss_sold_date_sk#1) + +(114) Scan parquet default.date_dim +Output [2]: [d_date_sk#10, d_year#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1998), LessThanOrEqual(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(115) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#10, d_year#11] + +(116) Filter [codegen id : 1] +Input [2]: [d_date_sk#10, d_year#11] +Condition : (((isnotnull(d_year#11) AND (d_year#11 >= 1998)) AND (d_year#11 <= 2000)) AND isnotnull(d_date_sk#10)) + +(117) Project [codegen id : 1] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_year#11] + +(118) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#71] + +(119) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(120) Project [codegen id : 2] +Output [2]: [ss_quantity#3 AS quantity#72, ss_list_price#4 AS list_price#73] +Input [4]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4, d_date_sk#10] + +(121) Scan parquet default.catalog_sales +Output [3]: [cs_sold_date_sk#18, cs_quantity#74, cs_list_price#75] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(122) ColumnarToRow [codegen id : 4] +Input [3]: [cs_sold_date_sk#18, cs_quantity#74, cs_list_price#75] + +(123) Filter [codegen id : 4] +Input [3]: [cs_sold_date_sk#18, cs_quantity#74, cs_list_price#75] +Condition : isnotnull(cs_sold_date_sk#18) + +(124) ReusedExchange [Reuses operator id: 118] +Output [1]: [d_date_sk#10] + +(125) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_sold_date_sk#18] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(126) Project [codegen id : 4] +Output [2]: [cs_quantity#74 AS quantity#76, cs_list_price#75 AS list_price#77] +Input [4]: [cs_sold_date_sk#18, cs_quantity#74, cs_list_price#75, d_date_sk#10] + +(127) Scan parquet default.web_sales +Output [3]: [ws_sold_date_sk#22, ws_quantity#78, ws_list_price#79] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(128) ColumnarToRow [codegen id : 6] +Input [3]: [ws_sold_date_sk#22, ws_quantity#78, ws_list_price#79] + +(129) Filter [codegen id : 6] +Input [3]: [ws_sold_date_sk#22, ws_quantity#78, ws_list_price#79] +Condition : isnotnull(ws_sold_date_sk#22) + +(130) ReusedExchange [Reuses operator id: 118] +Output [1]: [d_date_sk#10] + +(131) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_sold_date_sk#22] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(132) Project [codegen id : 6] +Output [2]: [ws_quantity#78 AS quantity#80, ws_list_price#79 AS list_price#81] +Input [4]: [ws_sold_date_sk#22, ws_quantity#78, ws_list_price#79, d_date_sk#10] + +(133) Union + +(134) HashAggregate [codegen id : 7] +Input [2]: [quantity#72, list_price#73] +Keys: [] +Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#72 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#73 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [2]: [sum#82, count#83] +Results [2]: [sum#84, count#85] + +(135) Exchange +Input [2]: [sum#84, count#85] +Arguments: SinglePartition, true, [id=#86] + +(136) HashAggregate [codegen id : 8] +Input [2]: [sum#84, count#85] +Keys: [] +Functions [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#72 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#73 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#72 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#73 as decimal(12,2)))), DecimalType(18,2), true))#87] +Results [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#72 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#73 as decimal(12,2)))), DecimalType(18,2), true))#87 AS average_sales#88] + +Subquery:2 Hosting operator id = 67 Hosting Expression = Subquery scalar-subquery#30, [id=#31] +* Project (140) ++- * Filter (139) + +- * ColumnarToRow (138) + +- Scan parquet default.date_dim (137) + + +(137) Scan parquet default.date_dim +Output [4]: [d_week_seq#29, d_year#11, d_moy#89, d_dom#90] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_dom), IsNotNull(d_year), EqualTo(d_year,1999), EqualTo(d_moy,12), EqualTo(d_dom,16)] +ReadSchema: struct + +(138) ColumnarToRow [codegen id : 1] +Input [4]: [d_week_seq#29, d_year#11, d_moy#89, d_dom#90] + +(139) Filter [codegen id : 1] +Input [4]: [d_week_seq#29, d_year#11, d_moy#89, d_dom#90] +Condition : (((((isnotnull(d_moy#89) AND isnotnull(d_dom#90)) AND isnotnull(d_year#11)) AND (d_year#11 = 1999)) AND (d_moy#89 = 12)) AND (d_dom#90 = 16)) + +(140) Project [codegen id : 1] +Output [1]: [d_week_seq#29] +Input [4]: [d_week_seq#29, d_year#11, d_moy#89, d_dom#90] + +Subquery:3 Hosting operator id = 106 Hosting Expression = ReusedSubquery Subquery scalar-subquery#48, [id=#49] + +Subquery:4 Hosting operator id = 95 Hosting Expression = Subquery scalar-subquery#50, [id=#51] +* Project (144) ++- * Filter (143) + +- * ColumnarToRow (142) + +- Scan parquet default.date_dim (141) + + +(141) Scan parquet default.date_dim +Output [4]: [d_week_seq#29, d_year#11, d_moy#89, d_dom#90] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_dom), IsNotNull(d_year), EqualTo(d_year,1998), EqualTo(d_moy,12), EqualTo(d_dom,16)] +ReadSchema: struct + +(142) ColumnarToRow [codegen id : 1] +Input [4]: [d_week_seq#29, d_year#11, d_moy#89, d_dom#90] + +(143) Filter [codegen id : 1] +Input [4]: [d_week_seq#29, d_year#11, d_moy#89, d_dom#90] +Condition : (((((isnotnull(d_moy#89) AND isnotnull(d_dom#90)) AND isnotnull(d_year#11)) AND (d_year#11 = 1998)) AND (d_moy#89 = 12)) AND (d_dom#90 = 16)) + +(144) Project [codegen id : 1] +Output [1]: [d_week_seq#29] +Input [4]: [d_week_seq#29, d_year#11, d_moy#89, d_dom#90] + + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/simplified.txt new file mode 100644 index 0000000000000..5141d40e7325b --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/simplified.txt @@ -0,0 +1,231 @@ +TakeOrderedAndProject [channel,channel,i_brand_id,i_brand_id,i_category_id,i_category_id,i_class_id,i_class_id,number_sales,number_sales,sales,sales] + WholeStageCodegen (78) + BroadcastHashJoin [i_brand_id,i_brand_id,i_category_id,i_category_id,i_class_id,i_class_id] + Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] + Subquery #2 + WholeStageCodegen (8) + HashAggregate [count,sum] [average_sales,avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2), true)),count,sum] + InputAdapter + Exchange #15 + WholeStageCodegen (7) + HashAggregate [list_price,quantity] [count,count,sum,sum] + InputAdapter + Union + WholeStageCodegen (2) + Project [ss_list_price,ss_quantity] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_list_price,ss_quantity,ss_sold_date_sk] + InputAdapter + BroadcastExchange #16 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + WholeStageCodegen (4) + Project [cs_list_price,cs_quantity] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_list_price,cs_quantity,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #16 + WholeStageCodegen (6) + Project [ws_list_price,ws_quantity] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_list_price,ws_quantity,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #16 + HashAggregate [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] [channel,count,count(1),isEmpty,number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] + InputAdapter + Exchange [i_brand_id,i_category_id,i_class_id] #1 + WholeStageCodegen (38) + HashAggregate [i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity] [count,count,isEmpty,isEmpty,sum,sum] + Project [i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_item_sk,ss_list_price,ss_quantity] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + InputAdapter + SortMergeJoin [ss_item_sk,ss_item_sk] + WholeStageCodegen (2) + Sort [ss_item_sk] + InputAdapter + Exchange [ss_item_sk] #2 + WholeStageCodegen (1) + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_list_price,ss_quantity,ss_sold_date_sk] + WholeStageCodegen (18) + Sort [ss_item_sk] + InputAdapter + Exchange [ss_item_sk] #3 + WholeStageCodegen (17) + Project [i_item_sk] + BroadcastHashJoin [brand_id,category_id,class_id,i_brand_id,i_category_id,i_class_id] + Filter [i_brand_id,i_category_id,i_class_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (16) + HashAggregate [brand_id,category_id,class_id] + HashAggregate [brand_id,category_id,class_id] + HashAggregate [brand_id,category_id,class_id] + InputAdapter + Exchange [brand_id,category_id,class_id] #5 + WholeStageCodegen (15) + HashAggregate [brand_id,category_id,class_id] + InputAdapter + SortMergeJoin [brand_id,category_id,class_id,i_brand_id,i_category_id,i_class_id] + SortMergeJoin [brand_id,category_id,class_id,i_brand_id,i_category_id,i_class_id] + WholeStageCodegen (6) + Sort [brand_id,category_id,class_id] + InputAdapter + Exchange [brand_id,category_id,class_id] #6 + WholeStageCodegen (5) + Project [i_brand_id,i_category_id,i_class_id] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_item_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (4) + Filter [i_brand_id,i_category_id,i_class_id,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] + WholeStageCodegen (10) + Sort [i_brand_id,i_category_id,i_class_id] + InputAdapter + Exchange [i_brand_id,i_category_id,i_class_id] #9 + WholeStageCodegen (9) + Project [i_brand_id,i_category_id,i_class_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_item_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_item_sk,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #7 + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (8) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] + WholeStageCodegen (14) + Sort [i_brand_id,i_category_id,i_class_id] + InputAdapter + Exchange [i_brand_id,i_category_id,i_class_id] #11 + WholeStageCodegen (13) + Project [i_brand_id,i_category_id,i_class_id] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Project [ws_item_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_item_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #7 + InputAdapter + ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk] #10 + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (19) + Project [d_date_sk] + Filter [d_date_sk,d_week_seq] + Subquery #1 + WholeStageCodegen (1) + Project [d_week_seq] + Filter [d_dom,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_dom,d_moy,d_week_seq,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_week_seq] + InputAdapter + BroadcastExchange #13 + SortMergeJoin [i_item_sk,ss_item_sk] + WholeStageCodegen (21) + Sort [i_item_sk] + InputAdapter + Exchange [i_item_sk] #14 + WholeStageCodegen (20) + Filter [i_brand_id,i_category_id,i_class_id,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] + WholeStageCodegen (37) + Sort [ss_item_sk] + InputAdapter + ReusedExchange [ss_item_sk] #3 + InputAdapter + BroadcastExchange #17 + WholeStageCodegen (77) + Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #2 + HashAggregate [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] [channel,count,count(1),isEmpty,number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] + InputAdapter + Exchange [i_brand_id,i_category_id,i_class_id] #18 + WholeStageCodegen (76) + HashAggregate [i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity] [count,count,isEmpty,isEmpty,sum,sum] + Project [i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_item_sk,ss_list_price,ss_quantity] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + InputAdapter + SortMergeJoin [ss_item_sk,ss_item_sk] + WholeStageCodegen (40) + Sort [ss_item_sk] + InputAdapter + ReusedExchange [ss_item_sk,ss_list_price,ss_quantity,ss_sold_date_sk] #2 + WholeStageCodegen (56) + Sort [ss_item_sk] + InputAdapter + ReusedExchange [ss_item_sk] #3 + InputAdapter + BroadcastExchange #19 + WholeStageCodegen (57) + Project [d_date_sk] + Filter [d_date_sk,d_week_seq] + Subquery #3 + WholeStageCodegen (1) + Project [d_week_seq] + Filter [d_dom,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_dom,d_moy,d_week_seq,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_week_seq] + InputAdapter + ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk] #13 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/explain.txt new file mode 100644 index 0000000000000..9ee58f9b3d604 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/explain.txt @@ -0,0 +1,763 @@ +== Physical Plan == +TakeOrderedAndProject (100) ++- * BroadcastHashJoin Inner BuildRight (99) + :- * Project (77) + : +- * Filter (76) + : +- * HashAggregate (75) + : +- Exchange (74) + : +- * HashAggregate (73) + : +- * Project (72) + : +- * BroadcastHashJoin Inner BuildRight (71) + : :- * Project (65) + : : +- * BroadcastHashJoin Inner BuildRight (64) + : : :- * BroadcastHashJoin LeftSemi BuildRight (57) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (56) + : : : +- * Project (55) + : : : +- * BroadcastHashJoin Inner BuildRight (54) + : : : :- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.item (4) + : : : +- BroadcastExchange (53) + : : : +- * HashAggregate (52) + : : : +- * HashAggregate (51) + : : : +- * HashAggregate (50) + : : : +- Exchange (49) + : : : +- * HashAggregate (48) + : : : +- * BroadcastHashJoin LeftSemi BuildRight (47) + : : : :- * BroadcastHashJoin LeftSemi BuildRight (36) + : : : : :- * Project (22) + : : : : : +- * BroadcastHashJoin Inner BuildRight (21) + : : : : : :- * Project (15) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : : : : :- * Filter (9) + : : : : : : : +- * ColumnarToRow (8) + : : : : : : : +- Scan parquet default.store_sales (7) + : : : : : : +- BroadcastExchange (13) + : : : : : : +- * Filter (12) + : : : : : : +- * ColumnarToRow (11) + : : : : : : +- Scan parquet default.item (10) + : : : : : +- BroadcastExchange (20) + : : : : : +- * Project (19) + : : : : : +- * Filter (18) + : : : : : +- * ColumnarToRow (17) + : : : : : +- Scan parquet default.date_dim (16) + : : : : +- BroadcastExchange (35) + : : : : +- * Project (34) + : : : : +- * BroadcastHashJoin Inner BuildRight (33) + : : : : :- * Project (31) + : : : : : +- * BroadcastHashJoin Inner BuildRight (30) + : : : : : :- * Filter (25) + : : : : : : +- * ColumnarToRow (24) + : : : : : : +- Scan parquet default.catalog_sales (23) + : : : : : +- BroadcastExchange (29) + : : : : : +- * Filter (28) + : : : : : +- * ColumnarToRow (27) + : : : : : +- Scan parquet default.item (26) + : : : : +- ReusedExchange (32) + : : : +- BroadcastExchange (46) + : : : +- * Project (45) + : : : +- * BroadcastHashJoin Inner BuildRight (44) + : : : :- * Project (42) + : : : : +- * BroadcastHashJoin Inner BuildRight (41) + : : : : :- * Filter (39) + : : : : : +- * ColumnarToRow (38) + : : : : : +- Scan parquet default.web_sales (37) + : : : : +- ReusedExchange (40) + : : : +- ReusedExchange (43) + : : +- BroadcastExchange (63) + : : +- * BroadcastHashJoin LeftSemi BuildRight (62) + : : :- * Filter (60) + : : : +- * ColumnarToRow (59) + : : : +- Scan parquet default.item (58) + : : +- ReusedExchange (61) + : +- BroadcastExchange (70) + : +- * Project (69) + : +- * Filter (68) + : +- * ColumnarToRow (67) + : +- Scan parquet default.date_dim (66) + +- BroadcastExchange (98) + +- * Project (97) + +- * Filter (96) + +- * HashAggregate (95) + +- Exchange (94) + +- * HashAggregate (93) + +- * Project (92) + +- * BroadcastHashJoin Inner BuildRight (91) + :- * Project (85) + : +- * BroadcastHashJoin Inner BuildRight (84) + : :- * BroadcastHashJoin LeftSemi BuildRight (82) + : : :- * Filter (80) + : : : +- * ColumnarToRow (79) + : : : +- Scan parquet default.store_sales (78) + : : +- ReusedExchange (81) + : +- ReusedExchange (83) + +- BroadcastExchange (90) + +- * Project (89) + +- * Filter (88) + +- * ColumnarToRow (87) + +- Scan parquet default.date_dim (86) + + +(1) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 25] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] + +(3) Filter [codegen id : 25] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] +Condition : (isnotnull(ss_item_sk#2) AND isnotnull(ss_sold_date_sk#1)) + +(4) Scan parquet default.item +Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_class_id), IsNotNull(i_category_id), IsNotNull(i_brand_id)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 11] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(6) Filter [codegen id : 11] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Condition : ((isnotnull(i_class_id#7) AND isnotnull(i_category_id#8)) AND isnotnull(i_brand_id#6)) + +(7) Scan parquet default.store_sales +Output [2]: [ss_sold_date_sk#1, ss_item_sk#2] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(8) ColumnarToRow [codegen id : 9] +Input [2]: [ss_sold_date_sk#1, ss_item_sk#2] + +(9) Filter [codegen id : 9] +Input [2]: [ss_sold_date_sk#1, ss_item_sk#2] +Condition : (isnotnull(ss_item_sk#2) AND isnotnull(ss_sold_date_sk#1)) + +(10) Scan parquet default.item +Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_category_id), IsNotNull(i_class_id)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 1] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(12) Filter [codegen id : 1] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Condition : (((isnotnull(i_item_sk#5) AND isnotnull(i_brand_id#6)) AND isnotnull(i_category_id#8)) AND isnotnull(i_class_id#7)) + +(13) BroadcastExchange +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#9] + +(14) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#5] +Join condition: None + +(15) Project [codegen id : 9] +Output [4]: [ss_sold_date_sk#1, i_brand_id#6, i_class_id#7, i_category_id#8] +Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(16) Scan parquet default.date_dim +Output [2]: [d_date_sk#10, d_year#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1998), LessThanOrEqual(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#10, d_year#11] + +(18) Filter [codegen id : 2] +Input [2]: [d_date_sk#10, d_year#11] +Condition : (((isnotnull(d_year#11) AND (d_year#11 >= 1998)) AND (d_year#11 <= 2000)) AND isnotnull(d_date_sk#10)) + +(19) Project [codegen id : 2] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_year#11] + +(20) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(21) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(22) Project [codegen id : 9] +Output [3]: [i_brand_id#6 AS brand_id#13, i_class_id#7 AS class_id#14, i_category_id#8 AS category_id#15] +Input [5]: [ss_sold_date_sk#1, i_brand_id#6, i_class_id#7, i_category_id#8, d_date_sk#10] + +(23) Scan parquet default.catalog_sales +Output [2]: [cs_sold_date_sk#16, cs_item_sk#17] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 5] +Input [2]: [cs_sold_date_sk#16, cs_item_sk#17] + +(25) Filter [codegen id : 5] +Input [2]: [cs_sold_date_sk#16, cs_item_sk#17] +Condition : (isnotnull(cs_item_sk#17) AND isnotnull(cs_sold_date_sk#16)) + +(26) Scan parquet default.item +Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 3] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(28) Filter [codegen id : 3] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Condition : isnotnull(i_item_sk#5) + +(29) BroadcastExchange +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#18] + +(30) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_item_sk#17] +Right keys [1]: [i_item_sk#5] +Join condition: None + +(31) Project [codegen id : 5] +Output [4]: [cs_sold_date_sk#16, i_brand_id#6, i_class_id#7, i_category_id#8] +Input [6]: [cs_sold_date_sk#16, cs_item_sk#17, i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(32) ReusedExchange [Reuses operator id: 20] +Output [1]: [d_date_sk#10] + +(33) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_sold_date_sk#16] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(34) Project [codegen id : 5] +Output [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Input [5]: [cs_sold_date_sk#16, i_brand_id#6, i_class_id#7, i_category_id#8, d_date_sk#10] + +(35) BroadcastExchange +Input [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), isnull(input[0, int, true]), coalesce(input[1, int, true], 0), isnull(input[1, int, true]), coalesce(input[2, int, true], 0), isnull(input[2, int, true])),false), [id=#19] + +(36) BroadcastHashJoin [codegen id : 9] +Left keys [6]: [coalesce(brand_id#13, 0), isnull(brand_id#13), coalesce(class_id#14, 0), isnull(class_id#14), coalesce(category_id#15, 0), isnull(category_id#15)] +Right keys [6]: [coalesce(i_brand_id#6, 0), isnull(i_brand_id#6), coalesce(i_class_id#7, 0), isnull(i_class_id#7), coalesce(i_category_id#8, 0), isnull(i_category_id#8)] +Join condition: None + +(37) Scan parquet default.web_sales +Output [2]: [ws_sold_date_sk#20, ws_item_sk#21] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(38) ColumnarToRow [codegen id : 8] +Input [2]: [ws_sold_date_sk#20, ws_item_sk#21] + +(39) Filter [codegen id : 8] +Input [2]: [ws_sold_date_sk#20, ws_item_sk#21] +Condition : (isnotnull(ws_item_sk#21) AND isnotnull(ws_sold_date_sk#20)) + +(40) ReusedExchange [Reuses operator id: 29] +Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(41) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ws_item_sk#21] +Right keys [1]: [i_item_sk#5] +Join condition: None + +(42) Project [codegen id : 8] +Output [4]: [ws_sold_date_sk#20, i_brand_id#6, i_class_id#7, i_category_id#8] +Input [6]: [ws_sold_date_sk#20, ws_item_sk#21, i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(43) ReusedExchange [Reuses operator id: 20] +Output [1]: [d_date_sk#10] + +(44) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ws_sold_date_sk#20] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(45) Project [codegen id : 8] +Output [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Input [5]: [ws_sold_date_sk#20, i_brand_id#6, i_class_id#7, i_category_id#8, d_date_sk#10] + +(46) BroadcastExchange +Input [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), isnull(input[0, int, true]), coalesce(input[1, int, true], 0), isnull(input[1, int, true]), coalesce(input[2, int, true], 0), isnull(input[2, int, true])),false), [id=#22] + +(47) BroadcastHashJoin [codegen id : 9] +Left keys [6]: [coalesce(brand_id#13, 0), isnull(brand_id#13), coalesce(class_id#14, 0), isnull(class_id#14), coalesce(category_id#15, 0), isnull(category_id#15)] +Right keys [6]: [coalesce(i_brand_id#6, 0), isnull(i_brand_id#6), coalesce(i_class_id#7, 0), isnull(i_class_id#7), coalesce(i_category_id#8, 0), isnull(i_category_id#8)] +Join condition: None + +(48) HashAggregate [codegen id : 9] +Input [3]: [brand_id#13, class_id#14, category_id#15] +Keys [3]: [brand_id#13, class_id#14, category_id#15] +Functions: [] +Aggregate Attributes: [] +Results [3]: [brand_id#13, class_id#14, category_id#15] + +(49) Exchange +Input [3]: [brand_id#13, class_id#14, category_id#15] +Arguments: hashpartitioning(brand_id#13, class_id#14, category_id#15, 5), true, [id=#23] + +(50) HashAggregate [codegen id : 10] +Input [3]: [brand_id#13, class_id#14, category_id#15] +Keys [3]: [brand_id#13, class_id#14, category_id#15] +Functions: [] +Aggregate Attributes: [] +Results [3]: [brand_id#13, class_id#14, category_id#15] + +(51) HashAggregate [codegen id : 10] +Input [3]: [brand_id#13, class_id#14, category_id#15] +Keys [3]: [brand_id#13, class_id#14, category_id#15] +Functions: [] +Aggregate Attributes: [] +Results [3]: [brand_id#13, class_id#14, category_id#15] + +(52) HashAggregate [codegen id : 10] +Input [3]: [brand_id#13, class_id#14, category_id#15] +Keys [3]: [brand_id#13, class_id#14, category_id#15] +Functions: [] +Aggregate Attributes: [] +Results [3]: [brand_id#13, class_id#14, category_id#15] + +(53) BroadcastExchange +Input [3]: [brand_id#13, class_id#14, category_id#15] +Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [id=#24] + +(54) BroadcastHashJoin [codegen id : 11] +Left keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Right keys [3]: [brand_id#13, class_id#14, category_id#15] +Join condition: None + +(55) Project [codegen id : 11] +Output [1]: [i_item_sk#5 AS ss_item_sk#25] +Input [7]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8, brand_id#13, class_id#14, category_id#15] + +(56) BroadcastExchange +Input [1]: [ss_item_sk#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#26] + +(57) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [ss_item_sk#25] +Join condition: None + +(58) Scan parquet default.item +Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_class_id), IsNotNull(i_category_id), IsNotNull(i_brand_id)] +ReadSchema: struct + +(59) ColumnarToRow [codegen id : 23] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(60) Filter [codegen id : 23] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Condition : (((isnotnull(i_item_sk#5) AND isnotnull(i_class_id#7)) AND isnotnull(i_category_id#8)) AND isnotnull(i_brand_id#6)) + +(61) ReusedExchange [Reuses operator id: 56] +Output [1]: [ss_item_sk#25] + +(62) BroadcastHashJoin [codegen id : 23] +Left keys [1]: [i_item_sk#5] +Right keys [1]: [ss_item_sk#25] +Join condition: None + +(63) BroadcastExchange +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#27] + +(64) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#5] +Join condition: None + +(65) Project [codegen id : 25] +Output [6]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4, i_brand_id#6, i_class_id#7, i_category_id#8] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4, i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(66) Scan parquet default.date_dim +Output [2]: [d_date_sk#10, d_week_seq#28] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)] +ReadSchema: struct + +(67) ColumnarToRow [codegen id : 24] +Input [2]: [d_date_sk#10, d_week_seq#28] + +(68) Filter [codegen id : 24] +Input [2]: [d_date_sk#10, d_week_seq#28] +Condition : ((isnotnull(d_week_seq#28) AND (d_week_seq#28 = Subquery scalar-subquery#29, [id=#30])) AND isnotnull(d_date_sk#10)) + +(69) Project [codegen id : 24] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_week_seq#28] + +(70) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#31] + +(71) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(72) Project [codegen id : 25] +Output [5]: [ss_quantity#3, ss_list_price#4, i_brand_id#6, i_class_id#7, i_category_id#8] +Input [7]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4, i_brand_id#6, i_class_id#7, i_category_id#8, d_date_sk#10] + +(73) HashAggregate [codegen id : 25] +Input [5]: [ss_quantity#3, ss_list_price#4, i_brand_id#6, i_class_id#7, i_category_id#8] +Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)] +Aggregate Attributes [3]: [sum#32, isEmpty#33, count#34] +Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#35, isEmpty#36, count#37] + +(74) Exchange +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#35, isEmpty#36, count#37] +Arguments: hashpartitioning(i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#38] + +(75) HashAggregate [codegen id : 52] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#35, isEmpty#36, count#37] +Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#39, count(1)#40] +Results [7]: [store AS channel#41, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#39 AS sales#42, count(1)#40 AS number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#39 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#44] + +(76) Filter [codegen id : 52] +Input [7]: [channel#41, i_brand_id#6, i_class_id#7, i_category_id#8, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#44] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#44) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#44 as decimal(32,6)) > cast(Subquery scalar-subquery#45, [id=#46] as decimal(32,6)))) + +(77) Project [codegen id : 52] +Output [6]: [channel#41, i_brand_id#6, i_class_id#7, i_category_id#8, sales#42, number_sales#43] +Input [7]: [channel#41, i_brand_id#6, i_class_id#7, i_category_id#8, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#44] + +(78) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(79) ColumnarToRow [codegen id : 50] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] + +(80) Filter [codegen id : 50] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] +Condition : (isnotnull(ss_item_sk#2) AND isnotnull(ss_sold_date_sk#1)) + +(81) ReusedExchange [Reuses operator id: 56] +Output [1]: [ss_item_sk#25] + +(82) BroadcastHashJoin [codegen id : 50] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [ss_item_sk#25] +Join condition: None + +(83) ReusedExchange [Reuses operator id: 63] +Output [4]: [i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50] + +(84) BroadcastHashJoin [codegen id : 50] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#47] +Join condition: None + +(85) Project [codegen id : 50] +Output [6]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4, i_brand_id#48, i_class_id#49, i_category_id#50] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4, i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50] + +(86) Scan parquet default.date_dim +Output [2]: [d_date_sk#10, d_week_seq#28] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)] +ReadSchema: struct + +(87) ColumnarToRow [codegen id : 49] +Input [2]: [d_date_sk#10, d_week_seq#28] + +(88) Filter [codegen id : 49] +Input [2]: [d_date_sk#10, d_week_seq#28] +Condition : ((isnotnull(d_week_seq#28) AND (d_week_seq#28 = Subquery scalar-subquery#51, [id=#52])) AND isnotnull(d_date_sk#10)) + +(89) Project [codegen id : 49] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_week_seq#28] + +(90) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#53] + +(91) BroadcastHashJoin [codegen id : 50] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(92) Project [codegen id : 50] +Output [5]: [ss_quantity#3, ss_list_price#4, i_brand_id#48, i_class_id#49, i_category_id#50] +Input [7]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4, i_brand_id#48, i_class_id#49, i_category_id#50, d_date_sk#10] + +(93) HashAggregate [codegen id : 50] +Input [5]: [ss_quantity#3, ss_list_price#4, i_brand_id#48, i_class_id#49, i_category_id#50] +Keys [3]: [i_brand_id#48, i_class_id#49, i_category_id#50] +Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)] +Aggregate Attributes [3]: [sum#54, isEmpty#55, count#56] +Results [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sum#57, isEmpty#58, count#59] + +(94) Exchange +Input [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sum#57, isEmpty#58, count#59] +Arguments: hashpartitioning(i_brand_id#48, i_class_id#49, i_category_id#50, 5), true, [id=#60] + +(95) HashAggregate [codegen id : 51] +Input [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sum#57, isEmpty#58, count#59] +Keys [3]: [i_brand_id#48, i_class_id#49, i_category_id#50] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#61, count(1)#62] +Results [7]: [store AS channel#63, i_brand_id#48, i_class_id#49, i_category_id#50, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#61 AS sales#64, count(1)#62 AS number_sales#65, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#61 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#66] + +(96) Filter [codegen id : 51] +Input [7]: [channel#63, i_brand_id#48, i_class_id#49, i_category_id#50, sales#64, number_sales#65, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#66] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#66) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#66 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6)))) + +(97) Project [codegen id : 51] +Output [6]: [channel#63, i_brand_id#48, i_class_id#49, i_category_id#50, sales#64, number_sales#65] +Input [7]: [channel#63, i_brand_id#48, i_class_id#49, i_category_id#50, sales#64, number_sales#65, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#66] + +(98) BroadcastExchange +Input [6]: [channel#63, i_brand_id#48, i_class_id#49, i_category_id#50, sales#64, number_sales#65] +Arguments: HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true]),false), [id=#67] + +(99) BroadcastHashJoin [codegen id : 52] +Left keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Right keys [3]: [i_brand_id#48, i_class_id#49, i_category_id#50] +Join condition: None + +(100) TakeOrderedAndProject +Input [12]: [channel#41, i_brand_id#6, i_class_id#7, i_category_id#8, sales#42, number_sales#43, channel#63, i_brand_id#48, i_class_id#49, i_category_id#50, sales#64, number_sales#65] +Arguments: 100, [channel#41 ASC NULLS FIRST, i_brand_id#6 ASC NULLS FIRST, i_class_id#7 ASC NULLS FIRST, i_category_id#8 ASC NULLS FIRST], [channel#41, i_brand_id#6, i_class_id#7, i_category_id#8, sales#42, number_sales#43, channel#63, i_brand_id#48, i_class_id#49, i_category_id#50, sales#64, number_sales#65] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 76 Hosting Expression = Subquery scalar-subquery#45, [id=#46] +* HashAggregate (126) ++- Exchange (125) + +- * HashAggregate (124) + +- Union (123) + :- * Project (110) + : +- * BroadcastHashJoin Inner BuildRight (109) + : :- * Filter (103) + : : +- * ColumnarToRow (102) + : : +- Scan parquet default.store_sales (101) + : +- BroadcastExchange (108) + : +- * Project (107) + : +- * Filter (106) + : +- * ColumnarToRow (105) + : +- Scan parquet default.date_dim (104) + :- * Project (116) + : +- * BroadcastHashJoin Inner BuildRight (115) + : :- * Filter (113) + : : +- * ColumnarToRow (112) + : : +- Scan parquet default.catalog_sales (111) + : +- ReusedExchange (114) + +- * Project (122) + +- * BroadcastHashJoin Inner BuildRight (121) + :- * Filter (119) + : +- * ColumnarToRow (118) + : +- Scan parquet default.web_sales (117) + +- ReusedExchange (120) + + +(101) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(102) ColumnarToRow [codegen id : 2] +Input [3]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4] + +(103) Filter [codegen id : 2] +Input [3]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4] +Condition : isnotnull(ss_sold_date_sk#1) + +(104) Scan parquet default.date_dim +Output [2]: [d_date_sk#10, d_year#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1998), LessThanOrEqual(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(105) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#10, d_year#11] + +(106) Filter [codegen id : 1] +Input [2]: [d_date_sk#10, d_year#11] +Condition : (((isnotnull(d_year#11) AND (d_year#11 >= 1998)) AND (d_year#11 <= 2000)) AND isnotnull(d_date_sk#10)) + +(107) Project [codegen id : 1] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_year#11] + +(108) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#68] + +(109) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(110) Project [codegen id : 2] +Output [2]: [ss_quantity#3 AS quantity#69, ss_list_price#4 AS list_price#70] +Input [4]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4, d_date_sk#10] + +(111) Scan parquet default.catalog_sales +Output [3]: [cs_sold_date_sk#16, cs_quantity#71, cs_list_price#72] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(112) ColumnarToRow [codegen id : 4] +Input [3]: [cs_sold_date_sk#16, cs_quantity#71, cs_list_price#72] + +(113) Filter [codegen id : 4] +Input [3]: [cs_sold_date_sk#16, cs_quantity#71, cs_list_price#72] +Condition : isnotnull(cs_sold_date_sk#16) + +(114) ReusedExchange [Reuses operator id: 108] +Output [1]: [d_date_sk#10] + +(115) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_sold_date_sk#16] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(116) Project [codegen id : 4] +Output [2]: [cs_quantity#71 AS quantity#73, cs_list_price#72 AS list_price#74] +Input [4]: [cs_sold_date_sk#16, cs_quantity#71, cs_list_price#72, d_date_sk#10] + +(117) Scan parquet default.web_sales +Output [3]: [ws_sold_date_sk#20, ws_quantity#75, ws_list_price#76] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(118) ColumnarToRow [codegen id : 6] +Input [3]: [ws_sold_date_sk#20, ws_quantity#75, ws_list_price#76] + +(119) Filter [codegen id : 6] +Input [3]: [ws_sold_date_sk#20, ws_quantity#75, ws_list_price#76] +Condition : isnotnull(ws_sold_date_sk#20) + +(120) ReusedExchange [Reuses operator id: 108] +Output [1]: [d_date_sk#10] + +(121) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_sold_date_sk#20] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(122) Project [codegen id : 6] +Output [2]: [ws_quantity#75 AS quantity#77, ws_list_price#76 AS list_price#78] +Input [4]: [ws_sold_date_sk#20, ws_quantity#75, ws_list_price#76, d_date_sk#10] + +(123) Union + +(124) HashAggregate [codegen id : 7] +Input [2]: [quantity#69, list_price#70] +Keys: [] +Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#69 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#70 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [2]: [sum#79, count#80] +Results [2]: [sum#81, count#82] + +(125) Exchange +Input [2]: [sum#81, count#82] +Arguments: SinglePartition, true, [id=#83] + +(126) HashAggregate [codegen id : 8] +Input [2]: [sum#81, count#82] +Keys: [] +Functions [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#69 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#70 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#69 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#70 as decimal(12,2)))), DecimalType(18,2), true))#84] +Results [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#69 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#70 as decimal(12,2)))), DecimalType(18,2), true))#84 AS average_sales#85] + +Subquery:2 Hosting operator id = 68 Hosting Expression = Subquery scalar-subquery#29, [id=#30] +* Project (130) ++- * Filter (129) + +- * ColumnarToRow (128) + +- Scan parquet default.date_dim (127) + + +(127) Scan parquet default.date_dim +Output [4]: [d_week_seq#28, d_year#11, d_moy#86, d_dom#87] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_dom), IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_year,1999), EqualTo(d_moy,12), EqualTo(d_dom,16)] +ReadSchema: struct + +(128) ColumnarToRow [codegen id : 1] +Input [4]: [d_week_seq#28, d_year#11, d_moy#86, d_dom#87] + +(129) Filter [codegen id : 1] +Input [4]: [d_week_seq#28, d_year#11, d_moy#86, d_dom#87] +Condition : (((((isnotnull(d_dom#87) AND isnotnull(d_moy#86)) AND isnotnull(d_year#11)) AND (d_year#11 = 1999)) AND (d_moy#86 = 12)) AND (d_dom#87 = 16)) + +(130) Project [codegen id : 1] +Output [1]: [d_week_seq#28] +Input [4]: [d_week_seq#28, d_year#11, d_moy#86, d_dom#87] + +Subquery:3 Hosting operator id = 96 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46] + +Subquery:4 Hosting operator id = 88 Hosting Expression = Subquery scalar-subquery#51, [id=#52] +* Project (134) ++- * Filter (133) + +- * ColumnarToRow (132) + +- Scan parquet default.date_dim (131) + + +(131) Scan parquet default.date_dim +Output [4]: [d_week_seq#28, d_year#11, d_moy#86, d_dom#87] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_dom), IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_year,1998), EqualTo(d_moy,12), EqualTo(d_dom,16)] +ReadSchema: struct + +(132) ColumnarToRow [codegen id : 1] +Input [4]: [d_week_seq#28, d_year#11, d_moy#86, d_dom#87] + +(133) Filter [codegen id : 1] +Input [4]: [d_week_seq#28, d_year#11, d_moy#86, d_dom#87] +Condition : (((((isnotnull(d_dom#87) AND isnotnull(d_moy#86)) AND isnotnull(d_year#11)) AND (d_year#11 = 1998)) AND (d_moy#86 = 12)) AND (d_dom#87 = 16)) + +(134) Project [codegen id : 1] +Output [1]: [d_week_seq#28] +Input [4]: [d_week_seq#28, d_year#11, d_moy#86, d_dom#87] + + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/simplified.txt new file mode 100644 index 0000000000000..f1668ea399807 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/simplified.txt @@ -0,0 +1,204 @@ +TakeOrderedAndProject [channel,channel,i_brand_id,i_brand_id,i_category_id,i_category_id,i_class_id,i_class_id,number_sales,number_sales,sales,sales] + WholeStageCodegen (52) + BroadcastHashJoin [i_brand_id,i_brand_id,i_category_id,i_category_id,i_class_id,i_class_id] + Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] + Subquery #2 + WholeStageCodegen (8) + HashAggregate [count,sum] [average_sales,avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2), true)),count,sum] + InputAdapter + Exchange #12 + WholeStageCodegen (7) + HashAggregate [list_price,quantity] [count,count,sum,sum] + InputAdapter + Union + WholeStageCodegen (2) + Project [ss_list_price,ss_quantity] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_list_price,ss_quantity,ss_sold_date_sk] + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + WholeStageCodegen (4) + Project [cs_list_price,cs_quantity] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_list_price,cs_quantity,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #13 + WholeStageCodegen (6) + Project [ws_list_price,ws_quantity] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_list_price,ws_quantity,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #13 + HashAggregate [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] [channel,count,count(1),isEmpty,number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] + InputAdapter + Exchange [i_brand_id,i_category_id,i_class_id] #1 + WholeStageCodegen (25) + HashAggregate [i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity] [count,count,isEmpty,isEmpty,sum,sum] + Project [i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity,ss_sold_date_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + BroadcastHashJoin [ss_item_sk,ss_item_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_list_price,ss_quantity,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (11) + Project [i_item_sk] + BroadcastHashJoin [brand_id,category_id,class_id,i_brand_id,i_category_id,i_class_id] + Filter [i_brand_id,i_category_id,i_class_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (10) + HashAggregate [brand_id,category_id,class_id] + HashAggregate [brand_id,category_id,class_id] + HashAggregate [brand_id,category_id,class_id] + InputAdapter + Exchange [brand_id,category_id,class_id] #4 + WholeStageCodegen (9) + HashAggregate [brand_id,category_id,class_id] + BroadcastHashJoin [brand_id,category_id,class_id,i_brand_id,i_category_id,i_class_id] + BroadcastHashJoin [brand_id,category_id,class_id,i_brand_id,i_category_id,i_class_id] + Project [i_brand_id,i_category_id,i_class_id] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_brand_id,i_category_id,i_class_id,ss_sold_date_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (1) + Filter [i_brand_id,i_category_id,i_class_id,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (5) + Project [i_brand_id,i_category_id,i_class_id] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,i_brand_id,i_category_id,i_class_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Filter [cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_item_sk,cs_sold_date_sk] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (3) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] + InputAdapter + ReusedExchange [d_date_sk] #6 + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (8) + Project [i_brand_id,i_category_id,i_class_id] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [i_brand_id,i_category_id,i_class_id,ws_sold_date_sk] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Filter [ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_item_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk] #8 + InputAdapter + ReusedExchange [d_date_sk] #6 + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (23) + BroadcastHashJoin [i_item_sk,ss_item_sk] + Filter [i_brand_id,i_category_id,i_class_id,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] + InputAdapter + ReusedExchange [ss_item_sk] #2 + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (24) + Project [d_date_sk] + Filter [d_date_sk,d_week_seq] + Subquery #1 + WholeStageCodegen (1) + Project [d_week_seq] + Filter [d_dom,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_dom,d_moy,d_week_seq,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_week_seq] + InputAdapter + BroadcastExchange #14 + WholeStageCodegen (51) + Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #2 + HashAggregate [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] [channel,count,count(1),isEmpty,number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] + InputAdapter + Exchange [i_brand_id,i_category_id,i_class_id] #15 + WholeStageCodegen (50) + HashAggregate [i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity] [count,count,isEmpty,isEmpty,sum,sum] + Project [i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity,ss_sold_date_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + BroadcastHashJoin [ss_item_sk,ss_item_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_list_price,ss_quantity,ss_sold_date_sk] + InputAdapter + ReusedExchange [ss_item_sk] #2 + InputAdapter + ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk] #10 + InputAdapter + BroadcastExchange #16 + WholeStageCodegen (49) + Project [d_date_sk] + Filter [d_date_sk,d_week_seq] + Subquery #3 + WholeStageCodegen (1) + Project [d_week_seq] + Filter [d_dom,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_dom,d_moy,d_week_seq,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_week_seq] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/explain.txt new file mode 100644 index 0000000000000..705abacb4f572 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/explain.txt @@ -0,0 +1,1460 @@ +== Physical Plan == +TakeOrderedAndProject (222) ++- * HashAggregate (221) + +- Exchange (220) + +- * HashAggregate (219) + +- Union (218) + :- * HashAggregate (198) + : +- Exchange (197) + : +- * HashAggregate (196) + : +- Union (195) + : :- * HashAggregate (175) + : : +- Exchange (174) + : : +- * HashAggregate (173) + : : +- Union (172) + : : :- * HashAggregate (152) + : : : +- Exchange (151) + : : : +- * HashAggregate (150) + : : : +- Union (149) + : : : :- * HashAggregate (129) + : : : : +- Exchange (128) + : : : : +- * HashAggregate (127) + : : : : +- Union (126) + : : : : :- * Project (87) + : : : : : +- * Filter (86) + : : : : : +- * HashAggregate (85) + : : : : : +- Exchange (84) + : : : : : +- * HashAggregate (83) + : : : : : +- * Project (82) + : : : : : +- * BroadcastHashJoin Inner BuildRight (81) + : : : : : :- * Project (71) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (70) + : : : : : : :- SortMergeJoin LeftSemi (64) + : : : : : : : :- * Sort (5) + : : : : : : : : +- Exchange (4) + : : : : : : : : +- * Filter (3) + : : : : : : : : +- * ColumnarToRow (2) + : : : : : : : : +- Scan parquet default.store_sales (1) + : : : : : : : +- * Sort (63) + : : : : : : : +- Exchange (62) + : : : : : : : +- * Project (61) + : : : : : : : +- * BroadcastHashJoin Inner BuildRight (60) + : : : : : : : :- * Filter (8) + : : : : : : : : +- * ColumnarToRow (7) + : : : : : : : : +- Scan parquet default.item (6) + : : : : : : : +- BroadcastExchange (59) + : : : : : : : +- * HashAggregate (58) + : : : : : : : +- * HashAggregate (57) + : : : : : : : +- * HashAggregate (56) + : : : : : : : +- Exchange (55) + : : : : : : : +- * HashAggregate (54) + : : : : : : : +- SortMergeJoin LeftSemi (53) + : : : : : : : :- SortMergeJoin LeftSemi (41) + : : : : : : : : :- * Sort (26) + : : : : : : : : : +- Exchange (25) + : : : : : : : : : +- * Project (24) + : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (23) + : : : : : : : : : :- * Project (18) + : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (17) + : : : : : : : : : : :- * Filter (11) + : : : : : : : : : : : +- * ColumnarToRow (10) + : : : : : : : : : : : +- Scan parquet default.store_sales (9) + : : : : : : : : : : +- BroadcastExchange (16) + : : : : : : : : : : +- * Project (15) + : : : : : : : : : : +- * Filter (14) + : : : : : : : : : : +- * ColumnarToRow (13) + : : : : : : : : : : +- Scan parquet default.date_dim (12) + : : : : : : : : : +- BroadcastExchange (22) + : : : : : : : : : +- * Filter (21) + : : : : : : : : : +- * ColumnarToRow (20) + : : : : : : : : : +- Scan parquet default.item (19) + : : : : : : : : +- * Sort (40) + : : : : : : : : +- Exchange (39) + : : : : : : : : +- * Project (38) + : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (37) + : : : : : : : : :- * Project (32) + : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (31) + : : : : : : : : : :- * Filter (29) + : : : : : : : : : : +- * ColumnarToRow (28) + : : : : : : : : : : +- Scan parquet default.catalog_sales (27) + : : : : : : : : : +- ReusedExchange (30) + : : : : : : : : +- BroadcastExchange (36) + : : : : : : : : +- * Filter (35) + : : : : : : : : +- * ColumnarToRow (34) + : : : : : : : : +- Scan parquet default.item (33) + : : : : : : : +- * Sort (52) + : : : : : : : +- Exchange (51) + : : : : : : : +- * Project (50) + : : : : : : : +- * BroadcastHashJoin Inner BuildRight (49) + : : : : : : : :- * Project (47) + : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (46) + : : : : : : : : :- * Filter (44) + : : : : : : : : : +- * ColumnarToRow (43) + : : : : : : : : : +- Scan parquet default.web_sales (42) + : : : : : : : : +- ReusedExchange (45) + : : : : : : : +- ReusedExchange (48) + : : : : : : +- BroadcastExchange (69) + : : : : : : +- * Project (68) + : : : : : : +- * Filter (67) + : : : : : : +- * ColumnarToRow (66) + : : : : : : +- Scan parquet default.date_dim (65) + : : : : : +- BroadcastExchange (80) + : : : : : +- SortMergeJoin LeftSemi (79) + : : : : : :- * Sort (76) + : : : : : : +- Exchange (75) + : : : : : : +- * Filter (74) + : : : : : : +- * ColumnarToRow (73) + : : : : : : +- Scan parquet default.item (72) + : : : : : +- * Sort (78) + : : : : : +- ReusedExchange (77) + : : : : :- * Project (106) + : : : : : +- * Filter (105) + : : : : : +- * HashAggregate (104) + : : : : : +- Exchange (103) + : : : : : +- * HashAggregate (102) + : : : : : +- * Project (101) + : : : : : +- * BroadcastHashJoin Inner BuildRight (100) + : : : : : :- * Project (98) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (97) + : : : : : : :- SortMergeJoin LeftSemi (95) + : : : : : : : :- * Sort (92) + : : : : : : : : +- Exchange (91) + : : : : : : : : +- * Filter (90) + : : : : : : : : +- * ColumnarToRow (89) + : : : : : : : : +- Scan parquet default.catalog_sales (88) + : : : : : : : +- * Sort (94) + : : : : : : : +- ReusedExchange (93) + : : : : : : +- ReusedExchange (96) + : : : : : +- ReusedExchange (99) + : : : : +- * Project (125) + : : : : +- * Filter (124) + : : : : +- * HashAggregate (123) + : : : : +- Exchange (122) + : : : : +- * HashAggregate (121) + : : : : +- * Project (120) + : : : : +- * BroadcastHashJoin Inner BuildRight (119) + : : : : :- * Project (117) + : : : : : +- * BroadcastHashJoin Inner BuildRight (116) + : : : : : :- SortMergeJoin LeftSemi (114) + : : : : : : :- * Sort (111) + : : : : : : : +- Exchange (110) + : : : : : : : +- * Filter (109) + : : : : : : : +- * ColumnarToRow (108) + : : : : : : : +- Scan parquet default.web_sales (107) + : : : : : : +- * Sort (113) + : : : : : : +- ReusedExchange (112) + : : : : : +- ReusedExchange (115) + : : : : +- ReusedExchange (118) + : : : +- * HashAggregate (148) + : : : +- Exchange (147) + : : : +- * HashAggregate (146) + : : : +- * HashAggregate (145) + : : : +- Exchange (144) + : : : +- * HashAggregate (143) + : : : +- Union (142) + : : : :- * Project (133) + : : : : +- * Filter (132) + : : : : +- * HashAggregate (131) + : : : : +- ReusedExchange (130) + : : : :- * Project (137) + : : : : +- * Filter (136) + : : : : +- * HashAggregate (135) + : : : : +- ReusedExchange (134) + : : : +- * Project (141) + : : : +- * Filter (140) + : : : +- * HashAggregate (139) + : : : +- ReusedExchange (138) + : : +- * HashAggregate (171) + : : +- Exchange (170) + : : +- * HashAggregate (169) + : : +- * HashAggregate (168) + : : +- Exchange (167) + : : +- * HashAggregate (166) + : : +- Union (165) + : : :- * Project (156) + : : : +- * Filter (155) + : : : +- * HashAggregate (154) + : : : +- ReusedExchange (153) + : : :- * Project (160) + : : : +- * Filter (159) + : : : +- * HashAggregate (158) + : : : +- ReusedExchange (157) + : : +- * Project (164) + : : +- * Filter (163) + : : +- * HashAggregate (162) + : : +- ReusedExchange (161) + : +- * HashAggregate (194) + : +- Exchange (193) + : +- * HashAggregate (192) + : +- * HashAggregate (191) + : +- Exchange (190) + : +- * HashAggregate (189) + : +- Union (188) + : :- * Project (179) + : : +- * Filter (178) + : : +- * HashAggregate (177) + : : +- ReusedExchange (176) + : :- * Project (183) + : : +- * Filter (182) + : : +- * HashAggregate (181) + : : +- ReusedExchange (180) + : +- * Project (187) + : +- * Filter (186) + : +- * HashAggregate (185) + : +- ReusedExchange (184) + +- * HashAggregate (217) + +- Exchange (216) + +- * HashAggregate (215) + +- * HashAggregate (214) + +- Exchange (213) + +- * HashAggregate (212) + +- Union (211) + :- * Project (202) + : +- * Filter (201) + : +- * HashAggregate (200) + : +- ReusedExchange (199) + :- * Project (206) + : +- * Filter (205) + : +- * HashAggregate (204) + : +- ReusedExchange (203) + +- * Project (210) + +- * Filter (209) + +- * HashAggregate (208) + +- ReusedExchange (207) + + +(1) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] + +(3) Filter [codegen id : 1] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] +Condition : (isnotnull(ss_item_sk#2) AND isnotnull(ss_sold_date_sk#1)) + +(4) Exchange +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] +Arguments: hashpartitioning(ss_item_sk#2, 5), true, [id=#5] + +(5) Sort [codegen id : 2] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] +Arguments: [ss_item_sk#2 ASC NULLS FIRST], false, 0 + +(6) Scan parquet default.item +Output [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_brand_id), IsNotNull(i_category_id), IsNotNull(i_class_id)] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 17] +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] + +(8) Filter [codegen id : 17] +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] +Condition : ((isnotnull(i_brand_id#7) AND isnotnull(i_category_id#9)) AND isnotnull(i_class_id#8)) + +(9) Scan parquet default.store_sales +Output [2]: [ss_sold_date_sk#1, ss_item_sk#2] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(10) ColumnarToRow [codegen id : 5] +Input [2]: [ss_sold_date_sk#1, ss_item_sk#2] + +(11) Filter [codegen id : 5] +Input [2]: [ss_sold_date_sk#1, ss_item_sk#2] +Condition : (isnotnull(ss_item_sk#2) AND isnotnull(ss_sold_date_sk#1)) + +(12) Scan parquet default.date_dim +Output [2]: [d_date_sk#10, d_year#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(13) ColumnarToRow [codegen id : 3] +Input [2]: [d_date_sk#10, d_year#11] + +(14) Filter [codegen id : 3] +Input [2]: [d_date_sk#10, d_year#11] +Condition : (((isnotnull(d_year#11) AND (d_year#11 >= 1999)) AND (d_year#11 <= 2001)) AND isnotnull(d_date_sk#10)) + +(15) Project [codegen id : 3] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_year#11] + +(16) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(17) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(18) Project [codegen id : 5] +Output [1]: [ss_item_sk#2] +Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, d_date_sk#10] + +(19) Scan parquet default.item +Output [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_category_id), IsNotNull(i_class_id)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 4] +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] + +(21) Filter [codegen id : 4] +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] +Condition : (((isnotnull(i_item_sk#6) AND isnotnull(i_brand_id#7)) AND isnotnull(i_category_id#9)) AND isnotnull(i_class_id#8)) + +(22) BroadcastExchange +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#13] + +(23) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#6] +Join condition: None + +(24) Project [codegen id : 5] +Output [3]: [i_brand_id#7 AS brand_id#14, i_class_id#8 AS class_id#15, i_category_id#9 AS category_id#16] +Input [5]: [ss_item_sk#2, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] + +(25) Exchange +Input [3]: [brand_id#14, class_id#15, category_id#16] +Arguments: hashpartitioning(coalesce(brand_id#14, 0), isnull(brand_id#14), coalesce(class_id#15, 0), isnull(class_id#15), coalesce(category_id#16, 0), isnull(category_id#16), 5), true, [id=#17] + +(26) Sort [codegen id : 6] +Input [3]: [brand_id#14, class_id#15, category_id#16] +Arguments: [coalesce(brand_id#14, 0) ASC NULLS FIRST, isnull(brand_id#14) ASC NULLS FIRST, coalesce(class_id#15, 0) ASC NULLS FIRST, isnull(class_id#15) ASC NULLS FIRST, coalesce(category_id#16, 0) ASC NULLS FIRST, isnull(category_id#16) ASC NULLS FIRST], false, 0 + +(27) Scan parquet default.catalog_sales +Output [2]: [cs_sold_date_sk#18, cs_item_sk#19] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(28) ColumnarToRow [codegen id : 9] +Input [2]: [cs_sold_date_sk#18, cs_item_sk#19] + +(29) Filter [codegen id : 9] +Input [2]: [cs_sold_date_sk#18, cs_item_sk#19] +Condition : (isnotnull(cs_item_sk#19) AND isnotnull(cs_sold_date_sk#18)) + +(30) ReusedExchange [Reuses operator id: 16] +Output [1]: [d_date_sk#10] + +(31) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [cs_sold_date_sk#18] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(32) Project [codegen id : 9] +Output [1]: [cs_item_sk#19] +Input [3]: [cs_sold_date_sk#18, cs_item_sk#19, d_date_sk#10] + +(33) Scan parquet default.item +Output [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 8] +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] + +(35) Filter [codegen id : 8] +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] +Condition : isnotnull(i_item_sk#6) + +(36) BroadcastExchange +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#20] + +(37) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [cs_item_sk#19] +Right keys [1]: [i_item_sk#6] +Join condition: None + +(38) Project [codegen id : 9] +Output [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Input [5]: [cs_item_sk#19, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] + +(39) Exchange +Input [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Arguments: hashpartitioning(coalesce(i_brand_id#7, 0), isnull(i_brand_id#7), coalesce(i_class_id#8, 0), isnull(i_class_id#8), coalesce(i_category_id#9, 0), isnull(i_category_id#9), 5), true, [id=#21] + +(40) Sort [codegen id : 10] +Input [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Arguments: [coalesce(i_brand_id#7, 0) ASC NULLS FIRST, isnull(i_brand_id#7) ASC NULLS FIRST, coalesce(i_class_id#8, 0) ASC NULLS FIRST, isnull(i_class_id#8) ASC NULLS FIRST, coalesce(i_category_id#9, 0) ASC NULLS FIRST, isnull(i_category_id#9) ASC NULLS FIRST], false, 0 + +(41) SortMergeJoin +Left keys [6]: [coalesce(brand_id#14, 0), isnull(brand_id#14), coalesce(class_id#15, 0), isnull(class_id#15), coalesce(category_id#16, 0), isnull(category_id#16)] +Right keys [6]: [coalesce(i_brand_id#7, 0), isnull(i_brand_id#7), coalesce(i_class_id#8, 0), isnull(i_class_id#8), coalesce(i_category_id#9, 0), isnull(i_category_id#9)] +Join condition: None + +(42) Scan parquet default.web_sales +Output [2]: [ws_sold_date_sk#22, ws_item_sk#23] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(43) ColumnarToRow [codegen id : 13] +Input [2]: [ws_sold_date_sk#22, ws_item_sk#23] + +(44) Filter [codegen id : 13] +Input [2]: [ws_sold_date_sk#22, ws_item_sk#23] +Condition : (isnotnull(ws_item_sk#23) AND isnotnull(ws_sold_date_sk#22)) + +(45) ReusedExchange [Reuses operator id: 16] +Output [1]: [d_date_sk#10] + +(46) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [ws_sold_date_sk#22] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(47) Project [codegen id : 13] +Output [1]: [ws_item_sk#23] +Input [3]: [ws_sold_date_sk#22, ws_item_sk#23, d_date_sk#10] + +(48) ReusedExchange [Reuses operator id: 36] +Output [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] + +(49) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [ws_item_sk#23] +Right keys [1]: [i_item_sk#6] +Join condition: None + +(50) Project [codegen id : 13] +Output [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Input [5]: [ws_item_sk#23, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] + +(51) Exchange +Input [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Arguments: hashpartitioning(coalesce(i_brand_id#7, 0), isnull(i_brand_id#7), coalesce(i_class_id#8, 0), isnull(i_class_id#8), coalesce(i_category_id#9, 0), isnull(i_category_id#9), 5), true, [id=#24] + +(52) Sort [codegen id : 14] +Input [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Arguments: [coalesce(i_brand_id#7, 0) ASC NULLS FIRST, isnull(i_brand_id#7) ASC NULLS FIRST, coalesce(i_class_id#8, 0) ASC NULLS FIRST, isnull(i_class_id#8) ASC NULLS FIRST, coalesce(i_category_id#9, 0) ASC NULLS FIRST, isnull(i_category_id#9) ASC NULLS FIRST], false, 0 + +(53) SortMergeJoin +Left keys [6]: [coalesce(brand_id#14, 0), isnull(brand_id#14), coalesce(class_id#15, 0), isnull(class_id#15), coalesce(category_id#16, 0), isnull(category_id#16)] +Right keys [6]: [coalesce(i_brand_id#7, 0), isnull(i_brand_id#7), coalesce(i_class_id#8, 0), isnull(i_class_id#8), coalesce(i_category_id#9, 0), isnull(i_category_id#9)] +Join condition: None + +(54) HashAggregate [codegen id : 15] +Input [3]: [brand_id#14, class_id#15, category_id#16] +Keys [3]: [brand_id#14, class_id#15, category_id#16] +Functions: [] +Aggregate Attributes: [] +Results [3]: [brand_id#14, class_id#15, category_id#16] + +(55) Exchange +Input [3]: [brand_id#14, class_id#15, category_id#16] +Arguments: hashpartitioning(brand_id#14, class_id#15, category_id#16, 5), true, [id=#25] + +(56) HashAggregate [codegen id : 16] +Input [3]: [brand_id#14, class_id#15, category_id#16] +Keys [3]: [brand_id#14, class_id#15, category_id#16] +Functions: [] +Aggregate Attributes: [] +Results [3]: [brand_id#14, class_id#15, category_id#16] + +(57) HashAggregate [codegen id : 16] +Input [3]: [brand_id#14, class_id#15, category_id#16] +Keys [3]: [brand_id#14, class_id#15, category_id#16] +Functions: [] +Aggregate Attributes: [] +Results [3]: [brand_id#14, class_id#15, category_id#16] + +(58) HashAggregate [codegen id : 16] +Input [3]: [brand_id#14, class_id#15, category_id#16] +Keys [3]: [brand_id#14, class_id#15, category_id#16] +Functions: [] +Aggregate Attributes: [] +Results [3]: [brand_id#14, class_id#15, category_id#16] + +(59) BroadcastExchange +Input [3]: [brand_id#14, class_id#15, category_id#16] +Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [id=#26] + +(60) BroadcastHashJoin [codegen id : 17] +Left keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Right keys [3]: [brand_id#14, class_id#15, category_id#16] +Join condition: None + +(61) Project [codegen id : 17] +Output [1]: [i_item_sk#6 AS ss_item_sk#27] +Input [7]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, brand_id#14, class_id#15, category_id#16] + +(62) Exchange +Input [1]: [ss_item_sk#27] +Arguments: hashpartitioning(ss_item_sk#27, 5), true, [id=#28] + +(63) Sort [codegen id : 18] +Input [1]: [ss_item_sk#27] +Arguments: [ss_item_sk#27 ASC NULLS FIRST], false, 0 + +(64) SortMergeJoin +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [ss_item_sk#27] +Join condition: None + +(65) Scan parquet default.date_dim +Output [3]: [d_date_sk#10, d_year#11, d_moy#29] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,11), IsNotNull(d_date_sk)] +ReadSchema: struct + +(66) ColumnarToRow [codegen id : 19] +Input [3]: [d_date_sk#10, d_year#11, d_moy#29] + +(67) Filter [codegen id : 19] +Input [3]: [d_date_sk#10, d_year#11, d_moy#29] +Condition : ((((isnotnull(d_year#11) AND isnotnull(d_moy#29)) AND (d_year#11 = 2000)) AND (d_moy#29 = 11)) AND isnotnull(d_date_sk#10)) + +(68) Project [codegen id : 19] +Output [1]: [d_date_sk#10] +Input [3]: [d_date_sk#10, d_year#11, d_moy#29] + +(69) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#30] + +(70) BroadcastHashJoin [codegen id : 38] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(71) Project [codegen id : 38] +Output [3]: [ss_item_sk#2, ss_quantity#3, ss_list_price#4] +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4, d_date_sk#10] + +(72) Scan parquet default.item +Output [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(73) ColumnarToRow [codegen id : 20] +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] + +(74) Filter [codegen id : 20] +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] +Condition : isnotnull(i_item_sk#6) + +(75) Exchange +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] +Arguments: hashpartitioning(i_item_sk#6, 5), true, [id=#31] + +(76) Sort [codegen id : 21] +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] +Arguments: [i_item_sk#6 ASC NULLS FIRST], false, 0 + +(77) ReusedExchange [Reuses operator id: 62] +Output [1]: [ss_item_sk#27] + +(78) Sort [codegen id : 37] +Input [1]: [ss_item_sk#27] +Arguments: [ss_item_sk#27 ASC NULLS FIRST], false, 0 + +(79) SortMergeJoin +Left keys [1]: [i_item_sk#6] +Right keys [1]: [ss_item_sk#27] +Join condition: None + +(80) BroadcastExchange +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#32] + +(81) BroadcastHashJoin [codegen id : 38] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#6] +Join condition: None + +(82) Project [codegen id : 38] +Output [5]: [ss_quantity#3, ss_list_price#4, i_brand_id#7, i_class_id#8, i_category_id#9] +Input [7]: [ss_item_sk#2, ss_quantity#3, ss_list_price#4, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] + +(83) HashAggregate [codegen id : 38] +Input [5]: [ss_quantity#3, ss_list_price#4, i_brand_id#7, i_class_id#8, i_category_id#9] +Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)] +Aggregate Attributes [3]: [sum#33, isEmpty#34, count#35] +Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#36, isEmpty#37, count#38] + +(84) Exchange +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#36, isEmpty#37, count#38] +Arguments: hashpartitioning(i_brand_id#7, i_class_id#8, i_category_id#9, 5), true, [id=#39] + +(85) HashAggregate [codegen id : 39] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#36, isEmpty#37, count#38] +Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#40, count(1)#41] +Results [7]: [store AS channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#40 AS sales#43, count(1)#41 AS number_sales#44, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#40 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#45] + +(86) Filter [codegen id : 39] +Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#45] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#45) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#45 as decimal(32,6)) > cast(Subquery scalar-subquery#46, [id=#47] as decimal(32,6)))) + +(87) Project [codegen id : 39] +Output [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44] +Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#45] + +(88) Scan parquet default.catalog_sales +Output [4]: [cs_sold_date_sk#18, cs_item_sk#19, cs_quantity#48, cs_list_price#49] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(89) ColumnarToRow [codegen id : 40] +Input [4]: [cs_sold_date_sk#18, cs_item_sk#19, cs_quantity#48, cs_list_price#49] + +(90) Filter [codegen id : 40] +Input [4]: [cs_sold_date_sk#18, cs_item_sk#19, cs_quantity#48, cs_list_price#49] +Condition : (isnotnull(cs_item_sk#19) AND isnotnull(cs_sold_date_sk#18)) + +(91) Exchange +Input [4]: [cs_sold_date_sk#18, cs_item_sk#19, cs_quantity#48, cs_list_price#49] +Arguments: hashpartitioning(cs_item_sk#19, 5), true, [id=#50] + +(92) Sort [codegen id : 41] +Input [4]: [cs_sold_date_sk#18, cs_item_sk#19, cs_quantity#48, cs_list_price#49] +Arguments: [cs_item_sk#19 ASC NULLS FIRST], false, 0 + +(93) ReusedExchange [Reuses operator id: 62] +Output [1]: [ss_item_sk#27] + +(94) Sort [codegen id : 57] +Input [1]: [ss_item_sk#27] +Arguments: [ss_item_sk#27 ASC NULLS FIRST], false, 0 + +(95) SortMergeJoin +Left keys [1]: [cs_item_sk#19] +Right keys [1]: [ss_item_sk#27] +Join condition: None + +(96) ReusedExchange [Reuses operator id: 69] +Output [1]: [d_date_sk#10] + +(97) BroadcastHashJoin [codegen id : 77] +Left keys [1]: [cs_sold_date_sk#18] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(98) Project [codegen id : 77] +Output [3]: [cs_item_sk#19, cs_quantity#48, cs_list_price#49] +Input [5]: [cs_sold_date_sk#18, cs_item_sk#19, cs_quantity#48, cs_list_price#49, d_date_sk#10] + +(99) ReusedExchange [Reuses operator id: 80] +Output [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] + +(100) BroadcastHashJoin [codegen id : 77] +Left keys [1]: [cs_item_sk#19] +Right keys [1]: [i_item_sk#6] +Join condition: None + +(101) Project [codegen id : 77] +Output [5]: [cs_quantity#48, cs_list_price#49, i_brand_id#7, i_class_id#8, i_category_id#9] +Input [7]: [cs_item_sk#19, cs_quantity#48, cs_list_price#49, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] + +(102) HashAggregate [codegen id : 77] +Input [5]: [cs_quantity#48, cs_list_price#49, i_brand_id#7, i_class_id#8, i_category_id#9] +Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)] +Aggregate Attributes [3]: [sum#51, isEmpty#52, count#53] +Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#54, isEmpty#55, count#56] + +(103) Exchange +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#54, isEmpty#55, count#56] +Arguments: hashpartitioning(i_brand_id#7, i_class_id#8, i_category_id#9, 5), true, [id=#57] + +(104) HashAggregate [codegen id : 78] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#54, isEmpty#55, count#56] +Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#58, count(1)#59] +Results [7]: [catalog AS channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#58 AS sales#61, count(1)#59 AS number_sales#62, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#58 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#63] + +(105) Filter [codegen id : 78] +Input [7]: [channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sales#61, number_sales#62, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#63] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#63) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#63 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#46, [id=#47] as decimal(32,6)))) + +(106) Project [codegen id : 78] +Output [6]: [channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sales#61, number_sales#62] +Input [7]: [channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sales#61, number_sales#62, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#63] + +(107) Scan parquet default.web_sales +Output [4]: [ws_sold_date_sk#22, ws_item_sk#23, ws_quantity#64, ws_list_price#65] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(108) ColumnarToRow [codegen id : 79] +Input [4]: [ws_sold_date_sk#22, ws_item_sk#23, ws_quantity#64, ws_list_price#65] + +(109) Filter [codegen id : 79] +Input [4]: [ws_sold_date_sk#22, ws_item_sk#23, ws_quantity#64, ws_list_price#65] +Condition : (isnotnull(ws_item_sk#23) AND isnotnull(ws_sold_date_sk#22)) + +(110) Exchange +Input [4]: [ws_sold_date_sk#22, ws_item_sk#23, ws_quantity#64, ws_list_price#65] +Arguments: hashpartitioning(ws_item_sk#23, 5), true, [id=#66] + +(111) Sort [codegen id : 80] +Input [4]: [ws_sold_date_sk#22, ws_item_sk#23, ws_quantity#64, ws_list_price#65] +Arguments: [ws_item_sk#23 ASC NULLS FIRST], false, 0 + +(112) ReusedExchange [Reuses operator id: 62] +Output [1]: [ss_item_sk#27] + +(113) Sort [codegen id : 96] +Input [1]: [ss_item_sk#27] +Arguments: [ss_item_sk#27 ASC NULLS FIRST], false, 0 + +(114) SortMergeJoin +Left keys [1]: [ws_item_sk#23] +Right keys [1]: [ss_item_sk#27] +Join condition: None + +(115) ReusedExchange [Reuses operator id: 69] +Output [1]: [d_date_sk#10] + +(116) BroadcastHashJoin [codegen id : 116] +Left keys [1]: [ws_sold_date_sk#22] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(117) Project [codegen id : 116] +Output [3]: [ws_item_sk#23, ws_quantity#64, ws_list_price#65] +Input [5]: [ws_sold_date_sk#22, ws_item_sk#23, ws_quantity#64, ws_list_price#65, d_date_sk#10] + +(118) ReusedExchange [Reuses operator id: 80] +Output [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] + +(119) BroadcastHashJoin [codegen id : 116] +Left keys [1]: [ws_item_sk#23] +Right keys [1]: [i_item_sk#6] +Join condition: None + +(120) Project [codegen id : 116] +Output [5]: [ws_quantity#64, ws_list_price#65, i_brand_id#7, i_class_id#8, i_category_id#9] +Input [7]: [ws_item_sk#23, ws_quantity#64, ws_list_price#65, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] + +(121) HashAggregate [codegen id : 116] +Input [5]: [ws_quantity#64, ws_list_price#65, i_brand_id#7, i_class_id#8, i_category_id#9] +Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)] +Aggregate Attributes [3]: [sum#67, isEmpty#68, count#69] +Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#70, isEmpty#71, count#72] + +(122) Exchange +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#70, isEmpty#71, count#72] +Arguments: hashpartitioning(i_brand_id#7, i_class_id#8, i_category_id#9, 5), true, [id=#73] + +(123) HashAggregate [codegen id : 117] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#70, isEmpty#71, count#72] +Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#74, count(1)#75] +Results [7]: [web AS channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#74 AS sales#77, count(1)#75 AS number_sales#78, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#74 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#79] + +(124) Filter [codegen id : 117] +Input [7]: [channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sales#77, number_sales#78, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#79] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#79) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#79 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#46, [id=#47] as decimal(32,6)))) + +(125) Project [codegen id : 117] +Output [6]: [channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sales#77, number_sales#78] +Input [7]: [channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sales#77, number_sales#78, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#79] + +(126) Union + +(127) HashAggregate [codegen id : 118] +Input [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44] +Keys [4]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9] +Functions [2]: [partial_sum(sales#43), partial_sum(number_sales#44)] +Aggregate Attributes [3]: [sum#80, isEmpty#81, sum#82] +Results [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum#83, isEmpty#84, sum#85] + +(128) Exchange +Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum#83, isEmpty#84, sum#85] +Arguments: hashpartitioning(channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, 5), true, [id=#86] + +(129) HashAggregate [codegen id : 119] +Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum#83, isEmpty#84, sum#85] +Keys [4]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9] +Functions [2]: [sum(sales#43), sum(number_sales#44)] +Aggregate Attributes [2]: [sum(sales#43)#87, sum(number_sales#44)#88] +Results [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum(sales#43)#87 AS sum_sales#89, sum(number_sales#44)#88 AS number_sales#90] + +(130) ReusedExchange [Reuses operator id: 84] +Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#91, isEmpty#92, count#93] + +(131) HashAggregate [codegen id : 158] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#91, isEmpty#92, count#93] +Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#94, count(1)#95] +Results [7]: [store AS channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#94 AS sales#43, count(1)#95 AS number_sales#44, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#94 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#96] + +(132) Filter [codegen id : 158] +Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#96] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#96) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#96 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#46, [id=#47] as decimal(32,6)))) + +(133) Project [codegen id : 158] +Output [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44] +Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#96] + +(134) ReusedExchange [Reuses operator id: 103] +Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#97, isEmpty#98, count#99] + +(135) HashAggregate [codegen id : 197] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#97, isEmpty#98, count#99] +Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#100, count(1)#101] +Results [7]: [catalog AS channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#100 AS sales#61, count(1)#101 AS number_sales#62, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#100 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#102] + +(136) Filter [codegen id : 197] +Input [7]: [channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sales#61, number_sales#62, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#102] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#102) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#102 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#46, [id=#47] as decimal(32,6)))) + +(137) Project [codegen id : 197] +Output [6]: [channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sales#61, number_sales#62] +Input [7]: [channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sales#61, number_sales#62, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#102] + +(138) ReusedExchange [Reuses operator id: 122] +Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#103, isEmpty#104, count#105] + +(139) HashAggregate [codegen id : 236] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#103, isEmpty#104, count#105] +Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#106, count(1)#107] +Results [7]: [web AS channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#106 AS sales#77, count(1)#107 AS number_sales#78, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#106 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#108] + +(140) Filter [codegen id : 236] +Input [7]: [channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sales#77, number_sales#78, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#108] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#108) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#108 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#46, [id=#47] as decimal(32,6)))) + +(141) Project [codegen id : 236] +Output [6]: [channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sales#77, number_sales#78] +Input [7]: [channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sales#77, number_sales#78, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#108] + +(142) Union + +(143) HashAggregate [codegen id : 237] +Input [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44] +Keys [4]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9] +Functions [2]: [partial_sum(sales#43), partial_sum(number_sales#44)] +Aggregate Attributes [3]: [sum#109, isEmpty#110, sum#111] +Results [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum#112, isEmpty#113, sum#114] + +(144) Exchange +Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum#112, isEmpty#113, sum#114] +Arguments: hashpartitioning(channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, 5), true, [id=#115] + +(145) HashAggregate [codegen id : 238] +Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum#112, isEmpty#113, sum#114] +Keys [4]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9] +Functions [2]: [sum(sales#43), sum(number_sales#44)] +Aggregate Attributes [2]: [sum(sales#43)#116, sum(number_sales#44)#117] +Results [5]: [channel#42, i_brand_id#7, i_class_id#8, sum(sales#43)#116 AS sum_sales#89, sum(number_sales#44)#117 AS number_sales#90] + +(146) HashAggregate [codegen id : 238] +Input [5]: [channel#42, i_brand_id#7, i_class_id#8, sum_sales#89, number_sales#90] +Keys [3]: [channel#42, i_brand_id#7, i_class_id#8] +Functions [2]: [partial_sum(sum_sales#89), partial_sum(number_sales#90)] +Aggregate Attributes [3]: [sum#118, isEmpty#119, sum#120] +Results [6]: [channel#42, i_brand_id#7, i_class_id#8, sum#121, isEmpty#122, sum#123] + +(147) Exchange +Input [6]: [channel#42, i_brand_id#7, i_class_id#8, sum#121, isEmpty#122, sum#123] +Arguments: hashpartitioning(channel#42, i_brand_id#7, i_class_id#8, 5), true, [id=#124] + +(148) HashAggregate [codegen id : 239] +Input [6]: [channel#42, i_brand_id#7, i_class_id#8, sum#121, isEmpty#122, sum#123] +Keys [3]: [channel#42, i_brand_id#7, i_class_id#8] +Functions [2]: [sum(sum_sales#89), sum(number_sales#90)] +Aggregate Attributes [2]: [sum(sum_sales#89)#125, sum(number_sales#90)#126] +Results [6]: [channel#42, i_brand_id#7, i_class_id#8, null AS i_category_id#127, sum(sum_sales#89)#125 AS sum(sum_sales)#128, sum(number_sales#90)#126 AS sum(number_sales)#129] + +(149) Union + +(150) HashAggregate [codegen id : 240] +Input [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] +Keys [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] +Functions: [] +Aggregate Attributes: [] +Results [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] + +(151) Exchange +Input [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] +Arguments: hashpartitioning(channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90, 5), true, [id=#130] + +(152) HashAggregate [codegen id : 241] +Input [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] +Keys [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] +Functions: [] +Aggregate Attributes: [] +Results [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] + +(153) ReusedExchange [Reuses operator id: 84] +Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#131, isEmpty#132, count#133] + +(154) HashAggregate [codegen id : 280] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#131, isEmpty#132, count#133] +Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#134, count(1)#135] +Results [7]: [store AS channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#134 AS sales#43, count(1)#135 AS number_sales#44, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#134 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#136] + +(155) Filter [codegen id : 280] +Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#136] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#136) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#136 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#46, [id=#47] as decimal(32,6)))) + +(156) Project [codegen id : 280] +Output [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44] +Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#136] + +(157) ReusedExchange [Reuses operator id: 103] +Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#137, isEmpty#138, count#139] + +(158) HashAggregate [codegen id : 319] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#137, isEmpty#138, count#139] +Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#140, count(1)#141] +Results [7]: [catalog AS channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#140 AS sales#61, count(1)#141 AS number_sales#62, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#140 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#142] + +(159) Filter [codegen id : 319] +Input [7]: [channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sales#61, number_sales#62, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#142] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#142) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#142 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#46, [id=#47] as decimal(32,6)))) + +(160) Project [codegen id : 319] +Output [6]: [channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sales#61, number_sales#62] +Input [7]: [channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sales#61, number_sales#62, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#142] + +(161) ReusedExchange [Reuses operator id: 122] +Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#143, isEmpty#144, count#145] + +(162) HashAggregate [codegen id : 358] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#143, isEmpty#144, count#145] +Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#146, count(1)#147] +Results [7]: [web AS channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#146 AS sales#77, count(1)#147 AS number_sales#78, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#146 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#148] + +(163) Filter [codegen id : 358] +Input [7]: [channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sales#77, number_sales#78, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#148] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#148) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#148 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#46, [id=#47] as decimal(32,6)))) + +(164) Project [codegen id : 358] +Output [6]: [channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sales#77, number_sales#78] +Input [7]: [channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sales#77, number_sales#78, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#148] + +(165) Union + +(166) HashAggregate [codegen id : 359] +Input [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44] +Keys [4]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9] +Functions [2]: [partial_sum(sales#43), partial_sum(number_sales#44)] +Aggregate Attributes [3]: [sum#149, isEmpty#150, sum#151] +Results [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum#152, isEmpty#153, sum#154] + +(167) Exchange +Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum#152, isEmpty#153, sum#154] +Arguments: hashpartitioning(channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, 5), true, [id=#155] + +(168) HashAggregate [codegen id : 360] +Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum#152, isEmpty#153, sum#154] +Keys [4]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9] +Functions [2]: [sum(sales#43), sum(number_sales#44)] +Aggregate Attributes [2]: [sum(sales#43)#156, sum(number_sales#44)#157] +Results [4]: [channel#42, i_brand_id#7, sum(sales#43)#156 AS sum_sales#89, sum(number_sales#44)#157 AS number_sales#90] + +(169) HashAggregate [codegen id : 360] +Input [4]: [channel#42, i_brand_id#7, sum_sales#89, number_sales#90] +Keys [2]: [channel#42, i_brand_id#7] +Functions [2]: [partial_sum(sum_sales#89), partial_sum(number_sales#90)] +Aggregate Attributes [3]: [sum#158, isEmpty#159, sum#160] +Results [5]: [channel#42, i_brand_id#7, sum#161, isEmpty#162, sum#163] + +(170) Exchange +Input [5]: [channel#42, i_brand_id#7, sum#161, isEmpty#162, sum#163] +Arguments: hashpartitioning(channel#42, i_brand_id#7, 5), true, [id=#164] + +(171) HashAggregate [codegen id : 361] +Input [5]: [channel#42, i_brand_id#7, sum#161, isEmpty#162, sum#163] +Keys [2]: [channel#42, i_brand_id#7] +Functions [2]: [sum(sum_sales#89), sum(number_sales#90)] +Aggregate Attributes [2]: [sum(sum_sales#89)#165, sum(number_sales#90)#166] +Results [6]: [channel#42, i_brand_id#7, null AS i_class_id#167, null AS i_category_id#168, sum(sum_sales#89)#165 AS sum(sum_sales)#169, sum(number_sales#90)#166 AS sum(number_sales)#170] + +(172) Union + +(173) HashAggregate [codegen id : 362] +Input [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] +Keys [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] +Functions: [] +Aggregate Attributes: [] +Results [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] + +(174) Exchange +Input [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] +Arguments: hashpartitioning(channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90, 5), true, [id=#171] + +(175) HashAggregate [codegen id : 363] +Input [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] +Keys [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] +Functions: [] +Aggregate Attributes: [] +Results [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] + +(176) ReusedExchange [Reuses operator id: 84] +Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#172, isEmpty#173, count#174] + +(177) HashAggregate [codegen id : 402] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#172, isEmpty#173, count#174] +Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#175, count(1)#176] +Results [7]: [store AS channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#175 AS sales#43, count(1)#176 AS number_sales#44, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#175 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#177] + +(178) Filter [codegen id : 402] +Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#177] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#177) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#177 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#46, [id=#47] as decimal(32,6)))) + +(179) Project [codegen id : 402] +Output [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44] +Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#177] + +(180) ReusedExchange [Reuses operator id: 103] +Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#178, isEmpty#179, count#180] + +(181) HashAggregate [codegen id : 441] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#178, isEmpty#179, count#180] +Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#181, count(1)#182] +Results [7]: [catalog AS channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#181 AS sales#61, count(1)#182 AS number_sales#62, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#181 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#183] + +(182) Filter [codegen id : 441] +Input [7]: [channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sales#61, number_sales#62, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#183] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#183) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#183 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#46, [id=#47] as decimal(32,6)))) + +(183) Project [codegen id : 441] +Output [6]: [channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sales#61, number_sales#62] +Input [7]: [channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sales#61, number_sales#62, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#183] + +(184) ReusedExchange [Reuses operator id: 122] +Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#184, isEmpty#185, count#186] + +(185) HashAggregate [codegen id : 480] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#184, isEmpty#185, count#186] +Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#187, count(1)#188] +Results [7]: [web AS channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#187 AS sales#77, count(1)#188 AS number_sales#78, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#187 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#189] + +(186) Filter [codegen id : 480] +Input [7]: [channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sales#77, number_sales#78, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#189] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#189) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#189 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#46, [id=#47] as decimal(32,6)))) + +(187) Project [codegen id : 480] +Output [6]: [channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sales#77, number_sales#78] +Input [7]: [channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sales#77, number_sales#78, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#189] + +(188) Union + +(189) HashAggregate [codegen id : 481] +Input [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44] +Keys [4]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9] +Functions [2]: [partial_sum(sales#43), partial_sum(number_sales#44)] +Aggregate Attributes [3]: [sum#190, isEmpty#191, sum#192] +Results [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum#193, isEmpty#194, sum#195] + +(190) Exchange +Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum#193, isEmpty#194, sum#195] +Arguments: hashpartitioning(channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, 5), true, [id=#196] + +(191) HashAggregate [codegen id : 482] +Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum#193, isEmpty#194, sum#195] +Keys [4]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9] +Functions [2]: [sum(sales#43), sum(number_sales#44)] +Aggregate Attributes [2]: [sum(sales#43)#197, sum(number_sales#44)#198] +Results [3]: [channel#42, sum(sales#43)#197 AS sum_sales#89, sum(number_sales#44)#198 AS number_sales#90] + +(192) HashAggregate [codegen id : 482] +Input [3]: [channel#42, sum_sales#89, number_sales#90] +Keys [1]: [channel#42] +Functions [2]: [partial_sum(sum_sales#89), partial_sum(number_sales#90)] +Aggregate Attributes [3]: [sum#199, isEmpty#200, sum#201] +Results [4]: [channel#42, sum#202, isEmpty#203, sum#204] + +(193) Exchange +Input [4]: [channel#42, sum#202, isEmpty#203, sum#204] +Arguments: hashpartitioning(channel#42, 5), true, [id=#205] + +(194) HashAggregate [codegen id : 483] +Input [4]: [channel#42, sum#202, isEmpty#203, sum#204] +Keys [1]: [channel#42] +Functions [2]: [sum(sum_sales#89), sum(number_sales#90)] +Aggregate Attributes [2]: [sum(sum_sales#89)#206, sum(number_sales#90)#207] +Results [6]: [channel#42, null AS i_brand_id#208, null AS i_class_id#209, null AS i_category_id#210, sum(sum_sales#89)#206 AS sum(sum_sales)#211, sum(number_sales#90)#207 AS sum(number_sales)#212] + +(195) Union + +(196) HashAggregate [codegen id : 484] +Input [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] +Keys [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] +Functions: [] +Aggregate Attributes: [] +Results [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] + +(197) Exchange +Input [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] +Arguments: hashpartitioning(channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90, 5), true, [id=#213] + +(198) HashAggregate [codegen id : 485] +Input [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] +Keys [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] +Functions: [] +Aggregate Attributes: [] +Results [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] + +(199) ReusedExchange [Reuses operator id: 84] +Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#214, isEmpty#215, count#216] + +(200) HashAggregate [codegen id : 524] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#214, isEmpty#215, count#216] +Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#217, count(1)#218] +Results [7]: [store AS channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#217 AS sales#43, count(1)#218 AS number_sales#44, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#217 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#219] + +(201) Filter [codegen id : 524] +Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#219] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#219) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#219 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#46, [id=#47] as decimal(32,6)))) + +(202) Project [codegen id : 524] +Output [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44] +Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#219] + +(203) ReusedExchange [Reuses operator id: 103] +Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#220, isEmpty#221, count#222] + +(204) HashAggregate [codegen id : 563] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#220, isEmpty#221, count#222] +Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#223, count(1)#224] +Results [7]: [catalog AS channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#223 AS sales#61, count(1)#224 AS number_sales#62, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#223 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#225] + +(205) Filter [codegen id : 563] +Input [7]: [channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sales#61, number_sales#62, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#225] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#225) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#225 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#46, [id=#47] as decimal(32,6)))) + +(206) Project [codegen id : 563] +Output [6]: [channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sales#61, number_sales#62] +Input [7]: [channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sales#61, number_sales#62, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#225] + +(207) ReusedExchange [Reuses operator id: 122] +Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#226, isEmpty#227, count#228] + +(208) HashAggregate [codegen id : 602] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#226, isEmpty#227, count#228] +Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#229, count(1)#230] +Results [7]: [web AS channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#229 AS sales#77, count(1)#230 AS number_sales#78, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#229 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#231] + +(209) Filter [codegen id : 602] +Input [7]: [channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sales#77, number_sales#78, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#231] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#231) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#231 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#46, [id=#47] as decimal(32,6)))) + +(210) Project [codegen id : 602] +Output [6]: [channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sales#77, number_sales#78] +Input [7]: [channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sales#77, number_sales#78, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#231] + +(211) Union + +(212) HashAggregate [codegen id : 603] +Input [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44] +Keys [4]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9] +Functions [2]: [partial_sum(sales#43), partial_sum(number_sales#44)] +Aggregate Attributes [3]: [sum#232, isEmpty#233, sum#234] +Results [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum#235, isEmpty#236, sum#237] + +(213) Exchange +Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum#235, isEmpty#236, sum#237] +Arguments: hashpartitioning(channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, 5), true, [id=#238] + +(214) HashAggregate [codegen id : 604] +Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum#235, isEmpty#236, sum#237] +Keys [4]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9] +Functions [2]: [sum(sales#43), sum(number_sales#44)] +Aggregate Attributes [2]: [sum(sales#43)#239, sum(number_sales#44)#240] +Results [2]: [sum(sales#43)#239 AS sum_sales#89, sum(number_sales#44)#240 AS number_sales#90] + +(215) HashAggregate [codegen id : 604] +Input [2]: [sum_sales#89, number_sales#90] +Keys: [] +Functions [2]: [partial_sum(sum_sales#89), partial_sum(number_sales#90)] +Aggregate Attributes [3]: [sum#241, isEmpty#242, sum#243] +Results [3]: [sum#244, isEmpty#245, sum#246] + +(216) Exchange +Input [3]: [sum#244, isEmpty#245, sum#246] +Arguments: SinglePartition, true, [id=#247] + +(217) HashAggregate [codegen id : 605] +Input [3]: [sum#244, isEmpty#245, sum#246] +Keys: [] +Functions [2]: [sum(sum_sales#89), sum(number_sales#90)] +Aggregate Attributes [2]: [sum(sum_sales#89)#248, sum(number_sales#90)#249] +Results [6]: [null AS channel#250, null AS i_brand_id#251, null AS i_class_id#252, null AS i_category_id#253, sum(sum_sales#89)#248 AS sum(sum_sales)#254, sum(number_sales#90)#249 AS sum(number_sales)#255] + +(218) Union + +(219) HashAggregate [codegen id : 606] +Input [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] +Keys [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] +Functions: [] +Aggregate Attributes: [] +Results [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] + +(220) Exchange +Input [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] +Arguments: hashpartitioning(channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90, 5), true, [id=#256] + +(221) HashAggregate [codegen id : 607] +Input [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] +Keys [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] +Functions: [] +Aggregate Attributes: [] +Results [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] + +(222) TakeOrderedAndProject +Input [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] +Arguments: 100, [channel#42 ASC NULLS FIRST, i_brand_id#7 ASC NULLS FIRST, i_class_id#8 ASC NULLS FIRST, i_category_id#9 ASC NULLS FIRST], [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 86 Hosting Expression = Subquery scalar-subquery#46, [id=#47] +* HashAggregate (252) ++- Exchange (251) + +- * HashAggregate (250) + +- Union (249) + :- * Project (232) + : +- * BroadcastHashJoin Inner BuildRight (231) + : :- * Filter (225) + : : +- * ColumnarToRow (224) + : : +- Scan parquet default.store_sales (223) + : +- BroadcastExchange (230) + : +- * Project (229) + : +- * Filter (228) + : +- * ColumnarToRow (227) + : +- Scan parquet default.date_dim (226) + :- * Project (242) + : +- * BroadcastHashJoin Inner BuildRight (241) + : :- * Filter (235) + : : +- * ColumnarToRow (234) + : : +- Scan parquet default.catalog_sales (233) + : +- BroadcastExchange (240) + : +- * Project (239) + : +- * Filter (238) + : +- * ColumnarToRow (237) + : +- Scan parquet default.date_dim (236) + +- * Project (248) + +- * BroadcastHashJoin Inner BuildRight (247) + :- * Filter (245) + : +- * ColumnarToRow (244) + : +- Scan parquet default.web_sales (243) + +- ReusedExchange (246) + + +(223) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(224) ColumnarToRow [codegen id : 2] +Input [3]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4] + +(225) Filter [codegen id : 2] +Input [3]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4] +Condition : isnotnull(ss_sold_date_sk#1) + +(226) Scan parquet default.date_dim +Output [2]: [d_date_sk#10, d_year#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(227) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#10, d_year#11] + +(228) Filter [codegen id : 1] +Input [2]: [d_date_sk#10, d_year#11] +Condition : (((isnotnull(d_year#11) AND (d_year#11 >= 1999)) AND (d_year#11 <= 2001)) AND isnotnull(d_date_sk#10)) + +(229) Project [codegen id : 1] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_year#11] + +(230) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#257] + +(231) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(232) Project [codegen id : 2] +Output [2]: [ss_quantity#3 AS quantity#258, ss_list_price#4 AS list_price#259] +Input [4]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4, d_date_sk#10] + +(233) Scan parquet default.catalog_sales +Output [3]: [cs_sold_date_sk#18, cs_quantity#48, cs_list_price#49] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(234) ColumnarToRow [codegen id : 4] +Input [3]: [cs_sold_date_sk#18, cs_quantity#48, cs_list_price#49] + +(235) Filter [codegen id : 4] +Input [3]: [cs_sold_date_sk#18, cs_quantity#48, cs_list_price#49] +Condition : isnotnull(cs_sold_date_sk#18) + +(236) Scan parquet default.date_dim +Output [2]: [d_date_sk#10, d_year#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1998), LessThanOrEqual(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(237) ColumnarToRow [codegen id : 3] +Input [2]: [d_date_sk#10, d_year#11] + +(238) Filter [codegen id : 3] +Input [2]: [d_date_sk#10, d_year#11] +Condition : (((isnotnull(d_year#11) AND (d_year#11 >= 1998)) AND (d_year#11 <= 2000)) AND isnotnull(d_date_sk#10)) + +(239) Project [codegen id : 3] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_year#11] + +(240) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#260] + +(241) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_sold_date_sk#18] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(242) Project [codegen id : 4] +Output [2]: [cs_quantity#48 AS quantity#261, cs_list_price#49 AS list_price#262] +Input [4]: [cs_sold_date_sk#18, cs_quantity#48, cs_list_price#49, d_date_sk#10] + +(243) Scan parquet default.web_sales +Output [3]: [ws_sold_date_sk#22, ws_quantity#64, ws_list_price#65] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(244) ColumnarToRow [codegen id : 6] +Input [3]: [ws_sold_date_sk#22, ws_quantity#64, ws_list_price#65] + +(245) Filter [codegen id : 6] +Input [3]: [ws_sold_date_sk#22, ws_quantity#64, ws_list_price#65] +Condition : isnotnull(ws_sold_date_sk#22) + +(246) ReusedExchange [Reuses operator id: 240] +Output [1]: [d_date_sk#10] + +(247) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_sold_date_sk#22] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(248) Project [codegen id : 6] +Output [2]: [ws_quantity#64 AS quantity#263, ws_list_price#65 AS list_price#264] +Input [4]: [ws_sold_date_sk#22, ws_quantity#64, ws_list_price#65, d_date_sk#10] + +(249) Union + +(250) HashAggregate [codegen id : 7] +Input [2]: [quantity#258, list_price#259] +Keys: [] +Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#258 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#259 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [2]: [sum#265, count#266] +Results [2]: [sum#267, count#268] + +(251) Exchange +Input [2]: [sum#267, count#268] +Arguments: SinglePartition, true, [id=#269] + +(252) HashAggregate [codegen id : 8] +Input [2]: [sum#267, count#268] +Keys: [] +Functions [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#258 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#259 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#258 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#259 as decimal(12,2)))), DecimalType(18,2), true))#270] +Results [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#258 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#259 as decimal(12,2)))), DecimalType(18,2), true))#270 AS average_sales#271] + +Subquery:2 Hosting operator id = 105 Hosting Expression = ReusedSubquery Subquery scalar-subquery#46, [id=#47] + +Subquery:3 Hosting operator id = 124 Hosting Expression = ReusedSubquery Subquery scalar-subquery#46, [id=#47] + +Subquery:4 Hosting operator id = 132 Hosting Expression = ReusedSubquery Subquery scalar-subquery#46, [id=#47] + +Subquery:5 Hosting operator id = 136 Hosting Expression = ReusedSubquery Subquery scalar-subquery#46, [id=#47] + +Subquery:6 Hosting operator id = 140 Hosting Expression = ReusedSubquery Subquery scalar-subquery#46, [id=#47] + +Subquery:7 Hosting operator id = 155 Hosting Expression = ReusedSubquery Subquery scalar-subquery#46, [id=#47] + +Subquery:8 Hosting operator id = 159 Hosting Expression = ReusedSubquery Subquery scalar-subquery#46, [id=#47] + +Subquery:9 Hosting operator id = 163 Hosting Expression = ReusedSubquery Subquery scalar-subquery#46, [id=#47] + +Subquery:10 Hosting operator id = 178 Hosting Expression = ReusedSubquery Subquery scalar-subquery#46, [id=#47] + +Subquery:11 Hosting operator id = 182 Hosting Expression = ReusedSubquery Subquery scalar-subquery#46, [id=#47] + +Subquery:12 Hosting operator id = 186 Hosting Expression = ReusedSubquery Subquery scalar-subquery#46, [id=#47] + +Subquery:13 Hosting operator id = 201 Hosting Expression = ReusedSubquery Subquery scalar-subquery#46, [id=#47] + +Subquery:14 Hosting operator id = 205 Hosting Expression = ReusedSubquery Subquery scalar-subquery#46, [id=#47] + +Subquery:15 Hosting operator id = 209 Hosting Expression = ReusedSubquery Subquery scalar-subquery#46, [id=#47] + + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/simplified.txt new file mode 100644 index 0000000000000..7a78f19e5cc99 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/simplified.txt @@ -0,0 +1,427 @@ +TakeOrderedAndProject [channel,i_brand_id,i_category_id,i_class_id,number_sales,sum_sales] + WholeStageCodegen (607) + HashAggregate [channel,i_brand_id,i_category_id,i_class_id,number_sales,sum_sales] + InputAdapter + Exchange [channel,i_brand_id,i_category_id,i_class_id,number_sales,sum_sales] #1 + WholeStageCodegen (606) + HashAggregate [channel,i_brand_id,i_category_id,i_class_id,number_sales,sum_sales] + InputAdapter + Union + WholeStageCodegen (485) + HashAggregate [channel,i_brand_id,i_category_id,i_class_id,number_sales,sum_sales] + InputAdapter + Exchange [channel,i_brand_id,i_category_id,i_class_id,number_sales,sum_sales] #2 + WholeStageCodegen (484) + HashAggregate [channel,i_brand_id,i_category_id,i_class_id,number_sales,sum_sales] + InputAdapter + Union + WholeStageCodegen (363) + HashAggregate [channel,i_brand_id,i_category_id,i_class_id,number_sales,sum_sales] + InputAdapter + Exchange [channel,i_brand_id,i_category_id,i_class_id,number_sales,sum_sales] #3 + WholeStageCodegen (362) + HashAggregate [channel,i_brand_id,i_category_id,i_class_id,number_sales,sum_sales] + InputAdapter + Union + WholeStageCodegen (241) + HashAggregate [channel,i_brand_id,i_category_id,i_class_id,number_sales,sum_sales] + InputAdapter + Exchange [channel,i_brand_id,i_category_id,i_class_id,number_sales,sum_sales] #4 + WholeStageCodegen (240) + HashAggregate [channel,i_brand_id,i_category_id,i_class_id,number_sales,sum_sales] + InputAdapter + Union + WholeStageCodegen (119) + HashAggregate [channel,i_brand_id,i_category_id,i_class_id,isEmpty,sum,sum] [isEmpty,number_sales,sum,sum,sum(number_salesL),sum(sales),sum_sales] + InputAdapter + Exchange [channel,i_brand_id,i_category_id,i_class_id] #5 + WholeStageCodegen (118) + HashAggregate [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] [isEmpty,isEmpty,sum,sum,sum,sum] + InputAdapter + Union + WholeStageCodegen (39) + Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] + Subquery #1 + WholeStageCodegen (8) + HashAggregate [count,sum] [average_sales,avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2), true)),count,sum] + InputAdapter + Exchange #20 + WholeStageCodegen (7) + HashAggregate [list_price,quantity] [count,count,sum,sum] + InputAdapter + Union + WholeStageCodegen (2) + Project [ss_list_price,ss_quantity] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_list_price,ss_quantity,ss_sold_date_sk] + InputAdapter + BroadcastExchange #21 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + WholeStageCodegen (4) + Project [cs_list_price,cs_quantity] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_list_price,cs_quantity,cs_sold_date_sk] + InputAdapter + BroadcastExchange #22 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + WholeStageCodegen (6) + Project [ws_list_price,ws_quantity] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_list_price,ws_quantity,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #22 + HashAggregate [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] [channel,count,count(1),isEmpty,number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] + InputAdapter + Exchange [i_brand_id,i_category_id,i_class_id] #6 + WholeStageCodegen (38) + HashAggregate [i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity] [count,count,isEmpty,isEmpty,sum,sum] + Project [i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_item_sk,ss_list_price,ss_quantity] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + InputAdapter + SortMergeJoin [ss_item_sk,ss_item_sk] + WholeStageCodegen (2) + Sort [ss_item_sk] + InputAdapter + Exchange [ss_item_sk] #7 + WholeStageCodegen (1) + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_list_price,ss_quantity,ss_sold_date_sk] + WholeStageCodegen (18) + Sort [ss_item_sk] + InputAdapter + Exchange [ss_item_sk] #8 + WholeStageCodegen (17) + Project [i_item_sk] + BroadcastHashJoin [brand_id,category_id,class_id,i_brand_id,i_category_id,i_class_id] + Filter [i_brand_id,i_category_id,i_class_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (16) + HashAggregate [brand_id,category_id,class_id] + HashAggregate [brand_id,category_id,class_id] + HashAggregate [brand_id,category_id,class_id] + InputAdapter + Exchange [brand_id,category_id,class_id] #10 + WholeStageCodegen (15) + HashAggregate [brand_id,category_id,class_id] + InputAdapter + SortMergeJoin [brand_id,category_id,class_id,i_brand_id,i_category_id,i_class_id] + SortMergeJoin [brand_id,category_id,class_id,i_brand_id,i_category_id,i_class_id] + WholeStageCodegen (6) + Sort [brand_id,category_id,class_id] + InputAdapter + Exchange [brand_id,category_id,class_id] #11 + WholeStageCodegen (5) + Project [i_brand_id,i_category_id,i_class_id] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_item_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (4) + Filter [i_brand_id,i_category_id,i_class_id,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] + WholeStageCodegen (10) + Sort [i_brand_id,i_category_id,i_class_id] + InputAdapter + Exchange [i_brand_id,i_category_id,i_class_id] #14 + WholeStageCodegen (9) + Project [i_brand_id,i_category_id,i_class_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_item_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_item_sk,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #12 + InputAdapter + BroadcastExchange #15 + WholeStageCodegen (8) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] + WholeStageCodegen (14) + Sort [i_brand_id,i_category_id,i_class_id] + InputAdapter + Exchange [i_brand_id,i_category_id,i_class_id] #16 + WholeStageCodegen (13) + Project [i_brand_id,i_category_id,i_class_id] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Project [ws_item_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_item_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #12 + InputAdapter + ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk] #15 + InputAdapter + BroadcastExchange #17 + WholeStageCodegen (19) + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + InputAdapter + BroadcastExchange #18 + SortMergeJoin [i_item_sk,ss_item_sk] + WholeStageCodegen (21) + Sort [i_item_sk] + InputAdapter + Exchange [i_item_sk] #19 + WholeStageCodegen (20) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] + WholeStageCodegen (37) + Sort [ss_item_sk] + InputAdapter + ReusedExchange [ss_item_sk] #8 + WholeStageCodegen (78) + Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #1 + HashAggregate [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] [channel,count,count(1),isEmpty,number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] + InputAdapter + Exchange [i_brand_id,i_category_id,i_class_id] #23 + WholeStageCodegen (77) + HashAggregate [cs_list_price,cs_quantity,i_brand_id,i_category_id,i_class_id] [count,count,isEmpty,isEmpty,sum,sum] + Project [cs_list_price,cs_quantity,i_brand_id,i_category_id,i_class_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_item_sk,cs_list_price,cs_quantity] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + InputAdapter + SortMergeJoin [cs_item_sk,ss_item_sk] + WholeStageCodegen (41) + Sort [cs_item_sk] + InputAdapter + Exchange [cs_item_sk] #24 + WholeStageCodegen (40) + Filter [cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_item_sk,cs_list_price,cs_quantity,cs_sold_date_sk] + WholeStageCodegen (57) + Sort [ss_item_sk] + InputAdapter + ReusedExchange [ss_item_sk] #8 + InputAdapter + ReusedExchange [d_date_sk] #17 + InputAdapter + ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk] #18 + WholeStageCodegen (117) + Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #1 + HashAggregate [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] [channel,count,count(1),isEmpty,number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] + InputAdapter + Exchange [i_brand_id,i_category_id,i_class_id] #25 + WholeStageCodegen (116) + HashAggregate [i_brand_id,i_category_id,i_class_id,ws_list_price,ws_quantity] [count,count,isEmpty,isEmpty,sum,sum] + Project [i_brand_id,i_category_id,i_class_id,ws_list_price,ws_quantity] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Project [ws_item_sk,ws_list_price,ws_quantity] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + InputAdapter + SortMergeJoin [ss_item_sk,ws_item_sk] + WholeStageCodegen (80) + Sort [ws_item_sk] + InputAdapter + Exchange [ws_item_sk] #26 + WholeStageCodegen (79) + Filter [ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_item_sk,ws_list_price,ws_quantity,ws_sold_date_sk] + WholeStageCodegen (96) + Sort [ss_item_sk] + InputAdapter + ReusedExchange [ss_item_sk] #8 + InputAdapter + ReusedExchange [d_date_sk] #17 + InputAdapter + ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk] #18 + WholeStageCodegen (239) + HashAggregate [channel,i_brand_id,i_class_id,isEmpty,sum,sum] [i_category_id,isEmpty,sum,sum,sum(number_salesL),sum(number_sales),sum(sum_sales),sum(sum_sales)] + InputAdapter + Exchange [channel,i_brand_id,i_class_id] #27 + WholeStageCodegen (238) + HashAggregate [channel,i_brand_id,i_class_id,number_sales,sum_sales] [isEmpty,isEmpty,sum,sum,sum,sum] + HashAggregate [channel,i_brand_id,i_category_id,i_class_id,isEmpty,sum,sum] [isEmpty,number_sales,sum,sum,sum(number_salesL),sum(sales),sum_sales] + InputAdapter + Exchange [channel,i_brand_id,i_category_id,i_class_id] #28 + WholeStageCodegen (237) + HashAggregate [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] [isEmpty,isEmpty,sum,sum,sum,sum] + InputAdapter + Union + WholeStageCodegen (158) + Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #1 + HashAggregate [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] [channel,count,count(1),isEmpty,number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] + InputAdapter + ReusedExchange [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] #6 + WholeStageCodegen (197) + Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #1 + HashAggregate [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] [channel,count,count(1),isEmpty,number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] + InputAdapter + ReusedExchange [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] #23 + WholeStageCodegen (236) + Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #1 + HashAggregate [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] [channel,count,count(1),isEmpty,number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] + InputAdapter + ReusedExchange [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] #25 + WholeStageCodegen (361) + HashAggregate [channel,i_brand_id,isEmpty,sum,sum] [i_category_id,i_class_id,isEmpty,sum,sum,sum(number_salesL),sum(number_sales),sum(sum_sales),sum(sum_sales)] + InputAdapter + Exchange [channel,i_brand_id] #29 + WholeStageCodegen (360) + HashAggregate [channel,i_brand_id,number_sales,sum_sales] [isEmpty,isEmpty,sum,sum,sum,sum] + HashAggregate [channel,i_brand_id,i_category_id,i_class_id,isEmpty,sum,sum] [isEmpty,number_sales,sum,sum,sum(number_salesL),sum(sales),sum_sales] + InputAdapter + Exchange [channel,i_brand_id,i_category_id,i_class_id] #30 + WholeStageCodegen (359) + HashAggregate [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] [isEmpty,isEmpty,sum,sum,sum,sum] + InputAdapter + Union + WholeStageCodegen (280) + Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #1 + HashAggregate [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] [channel,count,count(1),isEmpty,number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] + InputAdapter + ReusedExchange [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] #6 + WholeStageCodegen (319) + Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #1 + HashAggregate [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] [channel,count,count(1),isEmpty,number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] + InputAdapter + ReusedExchange [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] #23 + WholeStageCodegen (358) + Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #1 + HashAggregate [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] [channel,count,count(1),isEmpty,number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] + InputAdapter + ReusedExchange [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] #25 + WholeStageCodegen (483) + HashAggregate [channel,isEmpty,sum,sum] [i_brand_id,i_category_id,i_class_id,isEmpty,sum,sum,sum(number_salesL),sum(number_sales),sum(sum_sales),sum(sum_sales)] + InputAdapter + Exchange [channel] #31 + WholeStageCodegen (482) + HashAggregate [channel,number_sales,sum_sales] [isEmpty,isEmpty,sum,sum,sum,sum] + HashAggregate [channel,i_brand_id,i_category_id,i_class_id,isEmpty,sum,sum] [isEmpty,number_sales,sum,sum,sum(number_salesL),sum(sales),sum_sales] + InputAdapter + Exchange [channel,i_brand_id,i_category_id,i_class_id] #32 + WholeStageCodegen (481) + HashAggregate [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] [isEmpty,isEmpty,sum,sum,sum,sum] + InputAdapter + Union + WholeStageCodegen (402) + Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #1 + HashAggregate [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] [channel,count,count(1),isEmpty,number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] + InputAdapter + ReusedExchange [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] #6 + WholeStageCodegen (441) + Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #1 + HashAggregate [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] [channel,count,count(1),isEmpty,number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] + InputAdapter + ReusedExchange [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] #23 + WholeStageCodegen (480) + Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #1 + HashAggregate [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] [channel,count,count(1),isEmpty,number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] + InputAdapter + ReusedExchange [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] #25 + WholeStageCodegen (605) + HashAggregate [isEmpty,sum,sum] [channel,i_brand_id,i_category_id,i_class_id,isEmpty,sum,sum,sum(number_salesL),sum(number_sales),sum(sum_sales),sum(sum_sales)] + InputAdapter + Exchange #33 + WholeStageCodegen (604) + HashAggregate [number_sales,sum_sales] [isEmpty,isEmpty,sum,sum,sum,sum] + HashAggregate [channel,i_brand_id,i_category_id,i_class_id,isEmpty,sum,sum] [isEmpty,number_sales,sum,sum,sum(number_salesL),sum(sales),sum_sales] + InputAdapter + Exchange [channel,i_brand_id,i_category_id,i_class_id] #34 + WholeStageCodegen (603) + HashAggregate [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] [isEmpty,isEmpty,sum,sum,sum,sum] + InputAdapter + Union + WholeStageCodegen (524) + Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #1 + HashAggregate [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] [channel,count,count(1),isEmpty,number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] + InputAdapter + ReusedExchange [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] #6 + WholeStageCodegen (563) + Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #1 + HashAggregate [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] [channel,count,count(1),isEmpty,number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] + InputAdapter + ReusedExchange [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] #23 + WholeStageCodegen (602) + Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #1 + HashAggregate [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] [channel,count,count(1),isEmpty,number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] + InputAdapter + ReusedExchange [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] #25 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/explain.txt new file mode 100644 index 0000000000000..24f9a69287dc9 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/explain.txt @@ -0,0 +1,1380 @@ +== Physical Plan == +TakeOrderedAndProject (206) ++- * HashAggregate (205) + +- Exchange (204) + +- * HashAggregate (203) + +- Union (202) + :- * HashAggregate (182) + : +- Exchange (181) + : +- * HashAggregate (180) + : +- Union (179) + : :- * HashAggregate (159) + : : +- Exchange (158) + : : +- * HashAggregate (157) + : : +- Union (156) + : : :- * HashAggregate (136) + : : : +- Exchange (135) + : : : +- * HashAggregate (134) + : : : +- Union (133) + : : : :- * HashAggregate (113) + : : : : +- Exchange (112) + : : : : +- * HashAggregate (111) + : : : : +- Union (110) + : : : : :- * Project (77) + : : : : : +- * Filter (76) + : : : : : +- * HashAggregate (75) + : : : : : +- Exchange (74) + : : : : : +- * HashAggregate (73) + : : : : : +- * Project (72) + : : : : : +- * BroadcastHashJoin Inner BuildRight (71) + : : : : : :- * Project (65) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (64) + : : : : : : :- * BroadcastHashJoin LeftSemi BuildRight (57) + : : : : : : : :- * Filter (3) + : : : : : : : : +- * ColumnarToRow (2) + : : : : : : : : +- Scan parquet default.store_sales (1) + : : : : : : : +- BroadcastExchange (56) + : : : : : : : +- * Project (55) + : : : : : : : +- * BroadcastHashJoin Inner BuildRight (54) + : : : : : : : :- * Filter (6) + : : : : : : : : +- * ColumnarToRow (5) + : : : : : : : : +- Scan parquet default.item (4) + : : : : : : : +- BroadcastExchange (53) + : : : : : : : +- * HashAggregate (52) + : : : : : : : +- * HashAggregate (51) + : : : : : : : +- * HashAggregate (50) + : : : : : : : +- Exchange (49) + : : : : : : : +- * HashAggregate (48) + : : : : : : : +- * BroadcastHashJoin LeftSemi BuildRight (47) + : : : : : : : :- * BroadcastHashJoin LeftSemi BuildRight (36) + : : : : : : : : :- * Project (22) + : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (21) + : : : : : : : : : :- * Project (15) + : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : : : : : : : : :- * Filter (9) + : : : : : : : : : : : +- * ColumnarToRow (8) + : : : : : : : : : : : +- Scan parquet default.store_sales (7) + : : : : : : : : : : +- BroadcastExchange (13) + : : : : : : : : : : +- * Filter (12) + : : : : : : : : : : +- * ColumnarToRow (11) + : : : : : : : : : : +- Scan parquet default.item (10) + : : : : : : : : : +- BroadcastExchange (20) + : : : : : : : : : +- * Project (19) + : : : : : : : : : +- * Filter (18) + : : : : : : : : : +- * ColumnarToRow (17) + : : : : : : : : : +- Scan parquet default.date_dim (16) + : : : : : : : : +- BroadcastExchange (35) + : : : : : : : : +- * Project (34) + : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (33) + : : : : : : : : :- * Project (31) + : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (30) + : : : : : : : : : :- * Filter (25) + : : : : : : : : : : +- * ColumnarToRow (24) + : : : : : : : : : : +- Scan parquet default.catalog_sales (23) + : : : : : : : : : +- BroadcastExchange (29) + : : : : : : : : : +- * Filter (28) + : : : : : : : : : +- * ColumnarToRow (27) + : : : : : : : : : +- Scan parquet default.item (26) + : : : : : : : : +- ReusedExchange (32) + : : : : : : : +- BroadcastExchange (46) + : : : : : : : +- * Project (45) + : : : : : : : +- * BroadcastHashJoin Inner BuildRight (44) + : : : : : : : :- * Project (42) + : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (41) + : : : : : : : : :- * Filter (39) + : : : : : : : : : +- * ColumnarToRow (38) + : : : : : : : : : +- Scan parquet default.web_sales (37) + : : : : : : : : +- ReusedExchange (40) + : : : : : : : +- ReusedExchange (43) + : : : : : : +- BroadcastExchange (63) + : : : : : : +- * BroadcastHashJoin LeftSemi BuildRight (62) + : : : : : : :- * Filter (60) + : : : : : : : +- * ColumnarToRow (59) + : : : : : : : +- Scan parquet default.item (58) + : : : : : : +- ReusedExchange (61) + : : : : : +- BroadcastExchange (70) + : : : : : +- * Project (69) + : : : : : +- * Filter (68) + : : : : : +- * ColumnarToRow (67) + : : : : : +- Scan parquet default.date_dim (66) + : : : : :- * Project (93) + : : : : : +- * Filter (92) + : : : : : +- * HashAggregate (91) + : : : : : +- Exchange (90) + : : : : : +- * HashAggregate (89) + : : : : : +- * Project (88) + : : : : : +- * BroadcastHashJoin Inner BuildRight (87) + : : : : : :- * Project (85) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (84) + : : : : : : :- * BroadcastHashJoin LeftSemi BuildRight (82) + : : : : : : : :- * Filter (80) + : : : : : : : : +- * ColumnarToRow (79) + : : : : : : : : +- Scan parquet default.catalog_sales (78) + : : : : : : : +- ReusedExchange (81) + : : : : : : +- ReusedExchange (83) + : : : : : +- ReusedExchange (86) + : : : : +- * Project (109) + : : : : +- * Filter (108) + : : : : +- * HashAggregate (107) + : : : : +- Exchange (106) + : : : : +- * HashAggregate (105) + : : : : +- * Project (104) + : : : : +- * BroadcastHashJoin Inner BuildRight (103) + : : : : :- * Project (101) + : : : : : +- * BroadcastHashJoin Inner BuildRight (100) + : : : : : :- * BroadcastHashJoin LeftSemi BuildRight (98) + : : : : : : :- * Filter (96) + : : : : : : : +- * ColumnarToRow (95) + : : : : : : : +- Scan parquet default.web_sales (94) + : : : : : : +- ReusedExchange (97) + : : : : : +- ReusedExchange (99) + : : : : +- ReusedExchange (102) + : : : +- * HashAggregate (132) + : : : +- Exchange (131) + : : : +- * HashAggregate (130) + : : : +- * HashAggregate (129) + : : : +- Exchange (128) + : : : +- * HashAggregate (127) + : : : +- Union (126) + : : : :- * Project (117) + : : : : +- * Filter (116) + : : : : +- * HashAggregate (115) + : : : : +- ReusedExchange (114) + : : : :- * Project (121) + : : : : +- * Filter (120) + : : : : +- * HashAggregate (119) + : : : : +- ReusedExchange (118) + : : : +- * Project (125) + : : : +- * Filter (124) + : : : +- * HashAggregate (123) + : : : +- ReusedExchange (122) + : : +- * HashAggregate (155) + : : +- Exchange (154) + : : +- * HashAggregate (153) + : : +- * HashAggregate (152) + : : +- Exchange (151) + : : +- * HashAggregate (150) + : : +- Union (149) + : : :- * Project (140) + : : : +- * Filter (139) + : : : +- * HashAggregate (138) + : : : +- ReusedExchange (137) + : : :- * Project (144) + : : : +- * Filter (143) + : : : +- * HashAggregate (142) + : : : +- ReusedExchange (141) + : : +- * Project (148) + : : +- * Filter (147) + : : +- * HashAggregate (146) + : : +- ReusedExchange (145) + : +- * HashAggregate (178) + : +- Exchange (177) + : +- * HashAggregate (176) + : +- * HashAggregate (175) + : +- Exchange (174) + : +- * HashAggregate (173) + : +- Union (172) + : :- * Project (163) + : : +- * Filter (162) + : : +- * HashAggregate (161) + : : +- ReusedExchange (160) + : :- * Project (167) + : : +- * Filter (166) + : : +- * HashAggregate (165) + : : +- ReusedExchange (164) + : +- * Project (171) + : +- * Filter (170) + : +- * HashAggregate (169) + : +- ReusedExchange (168) + +- * HashAggregate (201) + +- Exchange (200) + +- * HashAggregate (199) + +- * HashAggregate (198) + +- Exchange (197) + +- * HashAggregate (196) + +- Union (195) + :- * Project (186) + : +- * Filter (185) + : +- * HashAggregate (184) + : +- ReusedExchange (183) + :- * Project (190) + : +- * Filter (189) + : +- * HashAggregate (188) + : +- ReusedExchange (187) + +- * Project (194) + +- * Filter (193) + +- * HashAggregate (192) + +- ReusedExchange (191) + + +(1) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 25] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] + +(3) Filter [codegen id : 25] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] +Condition : (isnotnull(ss_item_sk#2) AND isnotnull(ss_sold_date_sk#1)) + +(4) Scan parquet default.item +Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_class_id), IsNotNull(i_category_id), IsNotNull(i_brand_id)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 11] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(6) Filter [codegen id : 11] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Condition : ((isnotnull(i_class_id#7) AND isnotnull(i_category_id#8)) AND isnotnull(i_brand_id#6)) + +(7) Scan parquet default.store_sales +Output [2]: [ss_sold_date_sk#1, ss_item_sk#2] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(8) ColumnarToRow [codegen id : 9] +Input [2]: [ss_sold_date_sk#1, ss_item_sk#2] + +(9) Filter [codegen id : 9] +Input [2]: [ss_sold_date_sk#1, ss_item_sk#2] +Condition : (isnotnull(ss_item_sk#2) AND isnotnull(ss_sold_date_sk#1)) + +(10) Scan parquet default.item +Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 1] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(12) Filter [codegen id : 1] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Condition : (((isnotnull(i_item_sk#5) AND isnotnull(i_brand_id#6)) AND isnotnull(i_class_id#7)) AND isnotnull(i_category_id#8)) + +(13) BroadcastExchange +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#9] + +(14) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#5] +Join condition: None + +(15) Project [codegen id : 9] +Output [4]: [ss_sold_date_sk#1, i_brand_id#6, i_class_id#7, i_category_id#8] +Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(16) Scan parquet default.date_dim +Output [2]: [d_date_sk#10, d_year#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#10, d_year#11] + +(18) Filter [codegen id : 2] +Input [2]: [d_date_sk#10, d_year#11] +Condition : (((isnotnull(d_year#11) AND (d_year#11 >= 1999)) AND (d_year#11 <= 2001)) AND isnotnull(d_date_sk#10)) + +(19) Project [codegen id : 2] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_year#11] + +(20) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(21) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(22) Project [codegen id : 9] +Output [3]: [i_brand_id#6 AS brand_id#13, i_class_id#7 AS class_id#14, i_category_id#8 AS category_id#15] +Input [5]: [ss_sold_date_sk#1, i_brand_id#6, i_class_id#7, i_category_id#8, d_date_sk#10] + +(23) Scan parquet default.catalog_sales +Output [2]: [cs_sold_date_sk#16, cs_item_sk#17] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 5] +Input [2]: [cs_sold_date_sk#16, cs_item_sk#17] + +(25) Filter [codegen id : 5] +Input [2]: [cs_sold_date_sk#16, cs_item_sk#17] +Condition : (isnotnull(cs_item_sk#17) AND isnotnull(cs_sold_date_sk#16)) + +(26) Scan parquet default.item +Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 3] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(28) Filter [codegen id : 3] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Condition : isnotnull(i_item_sk#5) + +(29) BroadcastExchange +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#18] + +(30) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_item_sk#17] +Right keys [1]: [i_item_sk#5] +Join condition: None + +(31) Project [codegen id : 5] +Output [4]: [cs_sold_date_sk#16, i_brand_id#6, i_class_id#7, i_category_id#8] +Input [6]: [cs_sold_date_sk#16, cs_item_sk#17, i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(32) ReusedExchange [Reuses operator id: 20] +Output [1]: [d_date_sk#10] + +(33) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_sold_date_sk#16] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(34) Project [codegen id : 5] +Output [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Input [5]: [cs_sold_date_sk#16, i_brand_id#6, i_class_id#7, i_category_id#8, d_date_sk#10] + +(35) BroadcastExchange +Input [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), isnull(input[0, int, true]), coalesce(input[1, int, true], 0), isnull(input[1, int, true]), coalesce(input[2, int, true], 0), isnull(input[2, int, true])),false), [id=#19] + +(36) BroadcastHashJoin [codegen id : 9] +Left keys [6]: [coalesce(brand_id#13, 0), isnull(brand_id#13), coalesce(class_id#14, 0), isnull(class_id#14), coalesce(category_id#15, 0), isnull(category_id#15)] +Right keys [6]: [coalesce(i_brand_id#6, 0), isnull(i_brand_id#6), coalesce(i_class_id#7, 0), isnull(i_class_id#7), coalesce(i_category_id#8, 0), isnull(i_category_id#8)] +Join condition: None + +(37) Scan parquet default.web_sales +Output [2]: [ws_sold_date_sk#20, ws_item_sk#21] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(38) ColumnarToRow [codegen id : 8] +Input [2]: [ws_sold_date_sk#20, ws_item_sk#21] + +(39) Filter [codegen id : 8] +Input [2]: [ws_sold_date_sk#20, ws_item_sk#21] +Condition : (isnotnull(ws_item_sk#21) AND isnotnull(ws_sold_date_sk#20)) + +(40) ReusedExchange [Reuses operator id: 29] +Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(41) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ws_item_sk#21] +Right keys [1]: [i_item_sk#5] +Join condition: None + +(42) Project [codegen id : 8] +Output [4]: [ws_sold_date_sk#20, i_brand_id#6, i_class_id#7, i_category_id#8] +Input [6]: [ws_sold_date_sk#20, ws_item_sk#21, i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(43) ReusedExchange [Reuses operator id: 20] +Output [1]: [d_date_sk#10] + +(44) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ws_sold_date_sk#20] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(45) Project [codegen id : 8] +Output [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Input [5]: [ws_sold_date_sk#20, i_brand_id#6, i_class_id#7, i_category_id#8, d_date_sk#10] + +(46) BroadcastExchange +Input [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), isnull(input[0, int, true]), coalesce(input[1, int, true], 0), isnull(input[1, int, true]), coalesce(input[2, int, true], 0), isnull(input[2, int, true])),false), [id=#22] + +(47) BroadcastHashJoin [codegen id : 9] +Left keys [6]: [coalesce(brand_id#13, 0), isnull(brand_id#13), coalesce(class_id#14, 0), isnull(class_id#14), coalesce(category_id#15, 0), isnull(category_id#15)] +Right keys [6]: [coalesce(i_brand_id#6, 0), isnull(i_brand_id#6), coalesce(i_class_id#7, 0), isnull(i_class_id#7), coalesce(i_category_id#8, 0), isnull(i_category_id#8)] +Join condition: None + +(48) HashAggregate [codegen id : 9] +Input [3]: [brand_id#13, class_id#14, category_id#15] +Keys [3]: [brand_id#13, class_id#14, category_id#15] +Functions: [] +Aggregate Attributes: [] +Results [3]: [brand_id#13, class_id#14, category_id#15] + +(49) Exchange +Input [3]: [brand_id#13, class_id#14, category_id#15] +Arguments: hashpartitioning(brand_id#13, class_id#14, category_id#15, 5), true, [id=#23] + +(50) HashAggregate [codegen id : 10] +Input [3]: [brand_id#13, class_id#14, category_id#15] +Keys [3]: [brand_id#13, class_id#14, category_id#15] +Functions: [] +Aggregate Attributes: [] +Results [3]: [brand_id#13, class_id#14, category_id#15] + +(51) HashAggregate [codegen id : 10] +Input [3]: [brand_id#13, class_id#14, category_id#15] +Keys [3]: [brand_id#13, class_id#14, category_id#15] +Functions: [] +Aggregate Attributes: [] +Results [3]: [brand_id#13, class_id#14, category_id#15] + +(52) HashAggregate [codegen id : 10] +Input [3]: [brand_id#13, class_id#14, category_id#15] +Keys [3]: [brand_id#13, class_id#14, category_id#15] +Functions: [] +Aggregate Attributes: [] +Results [3]: [brand_id#13, class_id#14, category_id#15] + +(53) BroadcastExchange +Input [3]: [brand_id#13, class_id#14, category_id#15] +Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [id=#24] + +(54) BroadcastHashJoin [codegen id : 11] +Left keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Right keys [3]: [brand_id#13, class_id#14, category_id#15] +Join condition: None + +(55) Project [codegen id : 11] +Output [1]: [i_item_sk#5 AS ss_item_sk#25] +Input [7]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8, brand_id#13, class_id#14, category_id#15] + +(56) BroadcastExchange +Input [1]: [ss_item_sk#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#26] + +(57) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [ss_item_sk#25] +Join condition: None + +(58) Scan parquet default.item +Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(59) ColumnarToRow [codegen id : 23] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(60) Filter [codegen id : 23] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Condition : isnotnull(i_item_sk#5) + +(61) ReusedExchange [Reuses operator id: 56] +Output [1]: [ss_item_sk#25] + +(62) BroadcastHashJoin [codegen id : 23] +Left keys [1]: [i_item_sk#5] +Right keys [1]: [ss_item_sk#25] +Join condition: None + +(63) BroadcastExchange +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#27] + +(64) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#5] +Join condition: None + +(65) Project [codegen id : 25] +Output [6]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4, i_brand_id#6, i_class_id#7, i_category_id#8] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4, i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(66) Scan parquet default.date_dim +Output [3]: [d_date_sk#10, d_year#11, d_moy#28] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,11), IsNotNull(d_date_sk)] +ReadSchema: struct + +(67) ColumnarToRow [codegen id : 24] +Input [3]: [d_date_sk#10, d_year#11, d_moy#28] + +(68) Filter [codegen id : 24] +Input [3]: [d_date_sk#10, d_year#11, d_moy#28] +Condition : ((((isnotnull(d_year#11) AND isnotnull(d_moy#28)) AND (d_year#11 = 2000)) AND (d_moy#28 = 11)) AND isnotnull(d_date_sk#10)) + +(69) Project [codegen id : 24] +Output [1]: [d_date_sk#10] +Input [3]: [d_date_sk#10, d_year#11, d_moy#28] + +(70) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#29] + +(71) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(72) Project [codegen id : 25] +Output [5]: [ss_quantity#3, ss_list_price#4, i_brand_id#6, i_class_id#7, i_category_id#8] +Input [7]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4, i_brand_id#6, i_class_id#7, i_category_id#8, d_date_sk#10] + +(73) HashAggregate [codegen id : 25] +Input [5]: [ss_quantity#3, ss_list_price#4, i_brand_id#6, i_class_id#7, i_category_id#8] +Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)] +Aggregate Attributes [3]: [sum#30, isEmpty#31, count#32] +Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#33, isEmpty#34, count#35] + +(74) Exchange +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#33, isEmpty#34, count#35] +Arguments: hashpartitioning(i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#36] + +(75) HashAggregate [codegen id : 26] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#33, isEmpty#34, count#35] +Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#37, count(1)#38] +Results [7]: [store AS channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#37 AS sales#40, count(1)#38 AS number_sales#41, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#37 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#42] + +(76) Filter [codegen id : 26] +Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#42] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#42) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#42 as decimal(32,6)) > cast(Subquery scalar-subquery#43, [id=#44] as decimal(32,6)))) + +(77) Project [codegen id : 26] +Output [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41] +Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#42] + +(78) Scan parquet default.catalog_sales +Output [4]: [cs_sold_date_sk#16, cs_item_sk#17, cs_quantity#45, cs_list_price#46] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(79) ColumnarToRow [codegen id : 51] +Input [4]: [cs_sold_date_sk#16, cs_item_sk#17, cs_quantity#45, cs_list_price#46] + +(80) Filter [codegen id : 51] +Input [4]: [cs_sold_date_sk#16, cs_item_sk#17, cs_quantity#45, cs_list_price#46] +Condition : (isnotnull(cs_item_sk#17) AND isnotnull(cs_sold_date_sk#16)) + +(81) ReusedExchange [Reuses operator id: 56] +Output [1]: [ss_item_sk#25] + +(82) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [cs_item_sk#17] +Right keys [1]: [ss_item_sk#25] +Join condition: None + +(83) ReusedExchange [Reuses operator id: 63] +Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(84) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [cs_item_sk#17] +Right keys [1]: [i_item_sk#5] +Join condition: None + +(85) Project [codegen id : 51] +Output [6]: [cs_sold_date_sk#16, cs_quantity#45, cs_list_price#46, i_brand_id#6, i_class_id#7, i_category_id#8] +Input [8]: [cs_sold_date_sk#16, cs_item_sk#17, cs_quantity#45, cs_list_price#46, i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(86) ReusedExchange [Reuses operator id: 70] +Output [1]: [d_date_sk#10] + +(87) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [cs_sold_date_sk#16] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(88) Project [codegen id : 51] +Output [5]: [cs_quantity#45, cs_list_price#46, i_brand_id#6, i_class_id#7, i_category_id#8] +Input [7]: [cs_sold_date_sk#16, cs_quantity#45, cs_list_price#46, i_brand_id#6, i_class_id#7, i_category_id#8, d_date_sk#10] + +(89) HashAggregate [codegen id : 51] +Input [5]: [cs_quantity#45, cs_list_price#46, i_brand_id#6, i_class_id#7, i_category_id#8] +Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)] +Aggregate Attributes [3]: [sum#47, isEmpty#48, count#49] +Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#50, isEmpty#51, count#52] + +(90) Exchange +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#50, isEmpty#51, count#52] +Arguments: hashpartitioning(i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#53] + +(91) HashAggregate [codegen id : 52] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#50, isEmpty#51, count#52] +Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#54, count(1)#55] +Results [7]: [catalog AS channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#54 AS sales#57, count(1)#55 AS number_sales#58, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#54 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#59] + +(92) Filter [codegen id : 52] +Input [7]: [channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sales#57, number_sales#58, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#59] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#59) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#59 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#43, [id=#44] as decimal(32,6)))) + +(93) Project [codegen id : 52] +Output [6]: [channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sales#57, number_sales#58] +Input [7]: [channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sales#57, number_sales#58, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#59] + +(94) Scan parquet default.web_sales +Output [4]: [ws_sold_date_sk#20, ws_item_sk#21, ws_quantity#60, ws_list_price#61] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(95) ColumnarToRow [codegen id : 77] +Input [4]: [ws_sold_date_sk#20, ws_item_sk#21, ws_quantity#60, ws_list_price#61] + +(96) Filter [codegen id : 77] +Input [4]: [ws_sold_date_sk#20, ws_item_sk#21, ws_quantity#60, ws_list_price#61] +Condition : (isnotnull(ws_item_sk#21) AND isnotnull(ws_sold_date_sk#20)) + +(97) ReusedExchange [Reuses operator id: 56] +Output [1]: [ss_item_sk#25] + +(98) BroadcastHashJoin [codegen id : 77] +Left keys [1]: [ws_item_sk#21] +Right keys [1]: [ss_item_sk#25] +Join condition: None + +(99) ReusedExchange [Reuses operator id: 63] +Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(100) BroadcastHashJoin [codegen id : 77] +Left keys [1]: [ws_item_sk#21] +Right keys [1]: [i_item_sk#5] +Join condition: None + +(101) Project [codegen id : 77] +Output [6]: [ws_sold_date_sk#20, ws_quantity#60, ws_list_price#61, i_brand_id#6, i_class_id#7, i_category_id#8] +Input [8]: [ws_sold_date_sk#20, ws_item_sk#21, ws_quantity#60, ws_list_price#61, i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(102) ReusedExchange [Reuses operator id: 70] +Output [1]: [d_date_sk#10] + +(103) BroadcastHashJoin [codegen id : 77] +Left keys [1]: [ws_sold_date_sk#20] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(104) Project [codegen id : 77] +Output [5]: [ws_quantity#60, ws_list_price#61, i_brand_id#6, i_class_id#7, i_category_id#8] +Input [7]: [ws_sold_date_sk#20, ws_quantity#60, ws_list_price#61, i_brand_id#6, i_class_id#7, i_category_id#8, d_date_sk#10] + +(105) HashAggregate [codegen id : 77] +Input [5]: [ws_quantity#60, ws_list_price#61, i_brand_id#6, i_class_id#7, i_category_id#8] +Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)] +Aggregate Attributes [3]: [sum#62, isEmpty#63, count#64] +Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#65, isEmpty#66, count#67] + +(106) Exchange +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#65, isEmpty#66, count#67] +Arguments: hashpartitioning(i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#68] + +(107) HashAggregate [codegen id : 78] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#65, isEmpty#66, count#67] +Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#69, count(1)#70] +Results [7]: [web AS channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#69 AS sales#72, count(1)#70 AS number_sales#73, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#69 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#74] + +(108) Filter [codegen id : 78] +Input [7]: [channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sales#72, number_sales#73, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#74] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#74) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#74 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#43, [id=#44] as decimal(32,6)))) + +(109) Project [codegen id : 78] +Output [6]: [channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sales#72, number_sales#73] +Input [7]: [channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sales#72, number_sales#73, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#74] + +(110) Union + +(111) HashAggregate [codegen id : 79] +Input [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41] +Keys [4]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8] +Functions [2]: [partial_sum(sales#40), partial_sum(number_sales#41)] +Aggregate Attributes [3]: [sum#75, isEmpty#76, sum#77] +Results [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum#78, isEmpty#79, sum#80] + +(112) Exchange +Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum#78, isEmpty#79, sum#80] +Arguments: hashpartitioning(channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#81] + +(113) HashAggregate [codegen id : 80] +Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum#78, isEmpty#79, sum#80] +Keys [4]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8] +Functions [2]: [sum(sales#40), sum(number_sales#41)] +Aggregate Attributes [2]: [sum(sales#40)#82, sum(number_sales#41)#83] +Results [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum(sales#40)#82 AS sum_sales#84, sum(number_sales#41)#83 AS number_sales#85] + +(114) ReusedExchange [Reuses operator id: 74] +Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#86, isEmpty#87, count#88] + +(115) HashAggregate [codegen id : 106] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#86, isEmpty#87, count#88] +Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#89, count(1)#90] +Results [7]: [store AS channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#89 AS sales#40, count(1)#90 AS number_sales#41, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#89 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#91] + +(116) Filter [codegen id : 106] +Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#91] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#91) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#91 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#43, [id=#44] as decimal(32,6)))) + +(117) Project [codegen id : 106] +Output [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41] +Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#91] + +(118) ReusedExchange [Reuses operator id: 90] +Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#92, isEmpty#93, count#94] + +(119) HashAggregate [codegen id : 132] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#92, isEmpty#93, count#94] +Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#95, count(1)#96] +Results [7]: [catalog AS channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#95 AS sales#57, count(1)#96 AS number_sales#58, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#95 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#97] + +(120) Filter [codegen id : 132] +Input [7]: [channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sales#57, number_sales#58, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#97] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#97) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#97 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#43, [id=#44] as decimal(32,6)))) + +(121) Project [codegen id : 132] +Output [6]: [channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sales#57, number_sales#58] +Input [7]: [channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sales#57, number_sales#58, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#97] + +(122) ReusedExchange [Reuses operator id: 106] +Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#98, isEmpty#99, count#100] + +(123) HashAggregate [codegen id : 158] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#98, isEmpty#99, count#100] +Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#101, count(1)#102] +Results [7]: [web AS channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#101 AS sales#72, count(1)#102 AS number_sales#73, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#101 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#103] + +(124) Filter [codegen id : 158] +Input [7]: [channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sales#72, number_sales#73, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#103] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#103) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#103 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#43, [id=#44] as decimal(32,6)))) + +(125) Project [codegen id : 158] +Output [6]: [channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sales#72, number_sales#73] +Input [7]: [channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sales#72, number_sales#73, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#103] + +(126) Union + +(127) HashAggregate [codegen id : 159] +Input [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41] +Keys [4]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8] +Functions [2]: [partial_sum(sales#40), partial_sum(number_sales#41)] +Aggregate Attributes [3]: [sum#104, isEmpty#105, sum#106] +Results [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum#107, isEmpty#108, sum#109] + +(128) Exchange +Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum#107, isEmpty#108, sum#109] +Arguments: hashpartitioning(channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#110] + +(129) HashAggregate [codegen id : 160] +Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum#107, isEmpty#108, sum#109] +Keys [4]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8] +Functions [2]: [sum(sales#40), sum(number_sales#41)] +Aggregate Attributes [2]: [sum(sales#40)#111, sum(number_sales#41)#112] +Results [5]: [channel#39, i_brand_id#6, i_class_id#7, sum(sales#40)#111 AS sum_sales#84, sum(number_sales#41)#112 AS number_sales#85] + +(130) HashAggregate [codegen id : 160] +Input [5]: [channel#39, i_brand_id#6, i_class_id#7, sum_sales#84, number_sales#85] +Keys [3]: [channel#39, i_brand_id#6, i_class_id#7] +Functions [2]: [partial_sum(sum_sales#84), partial_sum(number_sales#85)] +Aggregate Attributes [3]: [sum#113, isEmpty#114, sum#115] +Results [6]: [channel#39, i_brand_id#6, i_class_id#7, sum#116, isEmpty#117, sum#118] + +(131) Exchange +Input [6]: [channel#39, i_brand_id#6, i_class_id#7, sum#116, isEmpty#117, sum#118] +Arguments: hashpartitioning(channel#39, i_brand_id#6, i_class_id#7, 5), true, [id=#119] + +(132) HashAggregate [codegen id : 161] +Input [6]: [channel#39, i_brand_id#6, i_class_id#7, sum#116, isEmpty#117, sum#118] +Keys [3]: [channel#39, i_brand_id#6, i_class_id#7] +Functions [2]: [sum(sum_sales#84), sum(number_sales#85)] +Aggregate Attributes [2]: [sum(sum_sales#84)#120, sum(number_sales#85)#121] +Results [6]: [channel#39, i_brand_id#6, i_class_id#7, null AS i_category_id#122, sum(sum_sales#84)#120 AS sum(sum_sales)#123, sum(number_sales#85)#121 AS sum(number_sales)#124] + +(133) Union + +(134) HashAggregate [codegen id : 162] +Input [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] +Keys [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] +Functions: [] +Aggregate Attributes: [] +Results [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] + +(135) Exchange +Input [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] +Arguments: hashpartitioning(channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85, 5), true, [id=#125] + +(136) HashAggregate [codegen id : 163] +Input [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] +Keys [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] +Functions: [] +Aggregate Attributes: [] +Results [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] + +(137) ReusedExchange [Reuses operator id: 74] +Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#126, isEmpty#127, count#128] + +(138) HashAggregate [codegen id : 189] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#126, isEmpty#127, count#128] +Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#129, count(1)#130] +Results [7]: [store AS channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#129 AS sales#40, count(1)#130 AS number_sales#41, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#129 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#131] + +(139) Filter [codegen id : 189] +Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#131] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#131) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#131 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#43, [id=#44] as decimal(32,6)))) + +(140) Project [codegen id : 189] +Output [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41] +Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#131] + +(141) ReusedExchange [Reuses operator id: 90] +Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#132, isEmpty#133, count#134] + +(142) HashAggregate [codegen id : 215] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#132, isEmpty#133, count#134] +Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#135, count(1)#136] +Results [7]: [catalog AS channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#135 AS sales#57, count(1)#136 AS number_sales#58, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#135 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#137] + +(143) Filter [codegen id : 215] +Input [7]: [channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sales#57, number_sales#58, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#137] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#137) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#137 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#43, [id=#44] as decimal(32,6)))) + +(144) Project [codegen id : 215] +Output [6]: [channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sales#57, number_sales#58] +Input [7]: [channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sales#57, number_sales#58, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#137] + +(145) ReusedExchange [Reuses operator id: 106] +Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#138, isEmpty#139, count#140] + +(146) HashAggregate [codegen id : 241] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#138, isEmpty#139, count#140] +Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#141, count(1)#142] +Results [7]: [web AS channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#141 AS sales#72, count(1)#142 AS number_sales#73, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#141 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#143] + +(147) Filter [codegen id : 241] +Input [7]: [channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sales#72, number_sales#73, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#143] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#143) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#143 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#43, [id=#44] as decimal(32,6)))) + +(148) Project [codegen id : 241] +Output [6]: [channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sales#72, number_sales#73] +Input [7]: [channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sales#72, number_sales#73, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#143] + +(149) Union + +(150) HashAggregate [codegen id : 242] +Input [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41] +Keys [4]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8] +Functions [2]: [partial_sum(sales#40), partial_sum(number_sales#41)] +Aggregate Attributes [3]: [sum#144, isEmpty#145, sum#146] +Results [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum#147, isEmpty#148, sum#149] + +(151) Exchange +Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum#147, isEmpty#148, sum#149] +Arguments: hashpartitioning(channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#150] + +(152) HashAggregate [codegen id : 243] +Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum#147, isEmpty#148, sum#149] +Keys [4]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8] +Functions [2]: [sum(sales#40), sum(number_sales#41)] +Aggregate Attributes [2]: [sum(sales#40)#151, sum(number_sales#41)#152] +Results [4]: [channel#39, i_brand_id#6, sum(sales#40)#151 AS sum_sales#84, sum(number_sales#41)#152 AS number_sales#85] + +(153) HashAggregate [codegen id : 243] +Input [4]: [channel#39, i_brand_id#6, sum_sales#84, number_sales#85] +Keys [2]: [channel#39, i_brand_id#6] +Functions [2]: [partial_sum(sum_sales#84), partial_sum(number_sales#85)] +Aggregate Attributes [3]: [sum#153, isEmpty#154, sum#155] +Results [5]: [channel#39, i_brand_id#6, sum#156, isEmpty#157, sum#158] + +(154) Exchange +Input [5]: [channel#39, i_brand_id#6, sum#156, isEmpty#157, sum#158] +Arguments: hashpartitioning(channel#39, i_brand_id#6, 5), true, [id=#159] + +(155) HashAggregate [codegen id : 244] +Input [5]: [channel#39, i_brand_id#6, sum#156, isEmpty#157, sum#158] +Keys [2]: [channel#39, i_brand_id#6] +Functions [2]: [sum(sum_sales#84), sum(number_sales#85)] +Aggregate Attributes [2]: [sum(sum_sales#84)#160, sum(number_sales#85)#161] +Results [6]: [channel#39, i_brand_id#6, null AS i_class_id#162, null AS i_category_id#163, sum(sum_sales#84)#160 AS sum(sum_sales)#164, sum(number_sales#85)#161 AS sum(number_sales)#165] + +(156) Union + +(157) HashAggregate [codegen id : 245] +Input [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] +Keys [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] +Functions: [] +Aggregate Attributes: [] +Results [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] + +(158) Exchange +Input [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] +Arguments: hashpartitioning(channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85, 5), true, [id=#166] + +(159) HashAggregate [codegen id : 246] +Input [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] +Keys [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] +Functions: [] +Aggregate Attributes: [] +Results [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] + +(160) ReusedExchange [Reuses operator id: 74] +Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#167, isEmpty#168, count#169] + +(161) HashAggregate [codegen id : 272] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#167, isEmpty#168, count#169] +Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#170, count(1)#171] +Results [7]: [store AS channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#170 AS sales#40, count(1)#171 AS number_sales#41, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#170 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#172] + +(162) Filter [codegen id : 272] +Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#172] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#172) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#172 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#43, [id=#44] as decimal(32,6)))) + +(163) Project [codegen id : 272] +Output [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41] +Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#172] + +(164) ReusedExchange [Reuses operator id: 90] +Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#173, isEmpty#174, count#175] + +(165) HashAggregate [codegen id : 298] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#173, isEmpty#174, count#175] +Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#176, count(1)#177] +Results [7]: [catalog AS channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#176 AS sales#57, count(1)#177 AS number_sales#58, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#176 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#178] + +(166) Filter [codegen id : 298] +Input [7]: [channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sales#57, number_sales#58, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#178] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#178) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#178 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#43, [id=#44] as decimal(32,6)))) + +(167) Project [codegen id : 298] +Output [6]: [channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sales#57, number_sales#58] +Input [7]: [channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sales#57, number_sales#58, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#178] + +(168) ReusedExchange [Reuses operator id: 106] +Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#179, isEmpty#180, count#181] + +(169) HashAggregate [codegen id : 324] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#179, isEmpty#180, count#181] +Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#182, count(1)#183] +Results [7]: [web AS channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#182 AS sales#72, count(1)#183 AS number_sales#73, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#182 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#184] + +(170) Filter [codegen id : 324] +Input [7]: [channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sales#72, number_sales#73, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#184] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#184) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#184 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#43, [id=#44] as decimal(32,6)))) + +(171) Project [codegen id : 324] +Output [6]: [channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sales#72, number_sales#73] +Input [7]: [channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sales#72, number_sales#73, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#184] + +(172) Union + +(173) HashAggregate [codegen id : 325] +Input [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41] +Keys [4]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8] +Functions [2]: [partial_sum(sales#40), partial_sum(number_sales#41)] +Aggregate Attributes [3]: [sum#185, isEmpty#186, sum#187] +Results [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum#188, isEmpty#189, sum#190] + +(174) Exchange +Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum#188, isEmpty#189, sum#190] +Arguments: hashpartitioning(channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#191] + +(175) HashAggregate [codegen id : 326] +Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum#188, isEmpty#189, sum#190] +Keys [4]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8] +Functions [2]: [sum(sales#40), sum(number_sales#41)] +Aggregate Attributes [2]: [sum(sales#40)#192, sum(number_sales#41)#193] +Results [3]: [channel#39, sum(sales#40)#192 AS sum_sales#84, sum(number_sales#41)#193 AS number_sales#85] + +(176) HashAggregate [codegen id : 326] +Input [3]: [channel#39, sum_sales#84, number_sales#85] +Keys [1]: [channel#39] +Functions [2]: [partial_sum(sum_sales#84), partial_sum(number_sales#85)] +Aggregate Attributes [3]: [sum#194, isEmpty#195, sum#196] +Results [4]: [channel#39, sum#197, isEmpty#198, sum#199] + +(177) Exchange +Input [4]: [channel#39, sum#197, isEmpty#198, sum#199] +Arguments: hashpartitioning(channel#39, 5), true, [id=#200] + +(178) HashAggregate [codegen id : 327] +Input [4]: [channel#39, sum#197, isEmpty#198, sum#199] +Keys [1]: [channel#39] +Functions [2]: [sum(sum_sales#84), sum(number_sales#85)] +Aggregate Attributes [2]: [sum(sum_sales#84)#201, sum(number_sales#85)#202] +Results [6]: [channel#39, null AS i_brand_id#203, null AS i_class_id#204, null AS i_category_id#205, sum(sum_sales#84)#201 AS sum(sum_sales)#206, sum(number_sales#85)#202 AS sum(number_sales)#207] + +(179) Union + +(180) HashAggregate [codegen id : 328] +Input [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] +Keys [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] +Functions: [] +Aggregate Attributes: [] +Results [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] + +(181) Exchange +Input [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] +Arguments: hashpartitioning(channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85, 5), true, [id=#208] + +(182) HashAggregate [codegen id : 329] +Input [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] +Keys [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] +Functions: [] +Aggregate Attributes: [] +Results [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] + +(183) ReusedExchange [Reuses operator id: 74] +Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#209, isEmpty#210, count#211] + +(184) HashAggregate [codegen id : 355] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#209, isEmpty#210, count#211] +Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#212, count(1)#213] +Results [7]: [store AS channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#212 AS sales#40, count(1)#213 AS number_sales#41, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#212 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#214] + +(185) Filter [codegen id : 355] +Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#214] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#214) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#214 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#43, [id=#44] as decimal(32,6)))) + +(186) Project [codegen id : 355] +Output [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41] +Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#214] + +(187) ReusedExchange [Reuses operator id: 90] +Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#215, isEmpty#216, count#217] + +(188) HashAggregate [codegen id : 381] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#215, isEmpty#216, count#217] +Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#218, count(1)#219] +Results [7]: [catalog AS channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#218 AS sales#57, count(1)#219 AS number_sales#58, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#218 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#220] + +(189) Filter [codegen id : 381] +Input [7]: [channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sales#57, number_sales#58, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#220] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#220) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#220 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#43, [id=#44] as decimal(32,6)))) + +(190) Project [codegen id : 381] +Output [6]: [channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sales#57, number_sales#58] +Input [7]: [channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sales#57, number_sales#58, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#220] + +(191) ReusedExchange [Reuses operator id: 106] +Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#221, isEmpty#222, count#223] + +(192) HashAggregate [codegen id : 407] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#221, isEmpty#222, count#223] +Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#224, count(1)#225] +Results [7]: [web AS channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#224 AS sales#72, count(1)#225 AS number_sales#73, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#224 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#226] + +(193) Filter [codegen id : 407] +Input [7]: [channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sales#72, number_sales#73, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#226] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#226) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#226 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#43, [id=#44] as decimal(32,6)))) + +(194) Project [codegen id : 407] +Output [6]: [channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sales#72, number_sales#73] +Input [7]: [channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sales#72, number_sales#73, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#226] + +(195) Union + +(196) HashAggregate [codegen id : 408] +Input [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41] +Keys [4]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8] +Functions [2]: [partial_sum(sales#40), partial_sum(number_sales#41)] +Aggregate Attributes [3]: [sum#227, isEmpty#228, sum#229] +Results [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum#230, isEmpty#231, sum#232] + +(197) Exchange +Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum#230, isEmpty#231, sum#232] +Arguments: hashpartitioning(channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#233] + +(198) HashAggregate [codegen id : 409] +Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum#230, isEmpty#231, sum#232] +Keys [4]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8] +Functions [2]: [sum(sales#40), sum(number_sales#41)] +Aggregate Attributes [2]: [sum(sales#40)#234, sum(number_sales#41)#235] +Results [2]: [sum(sales#40)#234 AS sum_sales#84, sum(number_sales#41)#235 AS number_sales#85] + +(199) HashAggregate [codegen id : 409] +Input [2]: [sum_sales#84, number_sales#85] +Keys: [] +Functions [2]: [partial_sum(sum_sales#84), partial_sum(number_sales#85)] +Aggregate Attributes [3]: [sum#236, isEmpty#237, sum#238] +Results [3]: [sum#239, isEmpty#240, sum#241] + +(200) Exchange +Input [3]: [sum#239, isEmpty#240, sum#241] +Arguments: SinglePartition, true, [id=#242] + +(201) HashAggregate [codegen id : 410] +Input [3]: [sum#239, isEmpty#240, sum#241] +Keys: [] +Functions [2]: [sum(sum_sales#84), sum(number_sales#85)] +Aggregate Attributes [2]: [sum(sum_sales#84)#243, sum(number_sales#85)#244] +Results [6]: [null AS channel#245, null AS i_brand_id#246, null AS i_class_id#247, null AS i_category_id#248, sum(sum_sales#84)#243 AS sum(sum_sales)#249, sum(number_sales#85)#244 AS sum(number_sales)#250] + +(202) Union + +(203) HashAggregate [codegen id : 411] +Input [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] +Keys [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] +Functions: [] +Aggregate Attributes: [] +Results [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] + +(204) Exchange +Input [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] +Arguments: hashpartitioning(channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85, 5), true, [id=#251] + +(205) HashAggregate [codegen id : 412] +Input [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] +Keys [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] +Functions: [] +Aggregate Attributes: [] +Results [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] + +(206) TakeOrderedAndProject +Input [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] +Arguments: 100, [channel#39 ASC NULLS FIRST, i_brand_id#6 ASC NULLS FIRST, i_class_id#7 ASC NULLS FIRST, i_category_id#8 ASC NULLS FIRST], [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 76 Hosting Expression = Subquery scalar-subquery#43, [id=#44] +* HashAggregate (236) ++- Exchange (235) + +- * HashAggregate (234) + +- Union (233) + :- * Project (216) + : +- * BroadcastHashJoin Inner BuildRight (215) + : :- * Filter (209) + : : +- * ColumnarToRow (208) + : : +- Scan parquet default.store_sales (207) + : +- BroadcastExchange (214) + : +- * Project (213) + : +- * Filter (212) + : +- * ColumnarToRow (211) + : +- Scan parquet default.date_dim (210) + :- * Project (226) + : +- * BroadcastHashJoin Inner BuildRight (225) + : :- * Filter (219) + : : +- * ColumnarToRow (218) + : : +- Scan parquet default.catalog_sales (217) + : +- BroadcastExchange (224) + : +- * Project (223) + : +- * Filter (222) + : +- * ColumnarToRow (221) + : +- Scan parquet default.date_dim (220) + +- * Project (232) + +- * BroadcastHashJoin Inner BuildRight (231) + :- * Filter (229) + : +- * ColumnarToRow (228) + : +- Scan parquet default.web_sales (227) + +- ReusedExchange (230) + + +(207) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(208) ColumnarToRow [codegen id : 2] +Input [3]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4] + +(209) Filter [codegen id : 2] +Input [3]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4] +Condition : isnotnull(ss_sold_date_sk#1) + +(210) Scan parquet default.date_dim +Output [2]: [d_date_sk#10, d_year#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(211) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#10, d_year#11] + +(212) Filter [codegen id : 1] +Input [2]: [d_date_sk#10, d_year#11] +Condition : (((isnotnull(d_year#11) AND (d_year#11 >= 1999)) AND (d_year#11 <= 2001)) AND isnotnull(d_date_sk#10)) + +(213) Project [codegen id : 1] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_year#11] + +(214) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#252] + +(215) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(216) Project [codegen id : 2] +Output [2]: [ss_quantity#3 AS quantity#253, ss_list_price#4 AS list_price#254] +Input [4]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4, d_date_sk#10] + +(217) Scan parquet default.catalog_sales +Output [3]: [cs_sold_date_sk#16, cs_quantity#45, cs_list_price#46] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(218) ColumnarToRow [codegen id : 4] +Input [3]: [cs_sold_date_sk#16, cs_quantity#45, cs_list_price#46] + +(219) Filter [codegen id : 4] +Input [3]: [cs_sold_date_sk#16, cs_quantity#45, cs_list_price#46] +Condition : isnotnull(cs_sold_date_sk#16) + +(220) Scan parquet default.date_dim +Output [2]: [d_date_sk#10, d_year#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1998), LessThanOrEqual(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(221) ColumnarToRow [codegen id : 3] +Input [2]: [d_date_sk#10, d_year#11] + +(222) Filter [codegen id : 3] +Input [2]: [d_date_sk#10, d_year#11] +Condition : (((isnotnull(d_year#11) AND (d_year#11 >= 1998)) AND (d_year#11 <= 2000)) AND isnotnull(d_date_sk#10)) + +(223) Project [codegen id : 3] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_year#11] + +(224) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#255] + +(225) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_sold_date_sk#16] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(226) Project [codegen id : 4] +Output [2]: [cs_quantity#45 AS quantity#256, cs_list_price#46 AS list_price#257] +Input [4]: [cs_sold_date_sk#16, cs_quantity#45, cs_list_price#46, d_date_sk#10] + +(227) Scan parquet default.web_sales +Output [3]: [ws_sold_date_sk#20, ws_quantity#60, ws_list_price#61] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(228) ColumnarToRow [codegen id : 6] +Input [3]: [ws_sold_date_sk#20, ws_quantity#60, ws_list_price#61] + +(229) Filter [codegen id : 6] +Input [3]: [ws_sold_date_sk#20, ws_quantity#60, ws_list_price#61] +Condition : isnotnull(ws_sold_date_sk#20) + +(230) ReusedExchange [Reuses operator id: 224] +Output [1]: [d_date_sk#10] + +(231) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_sold_date_sk#20] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(232) Project [codegen id : 6] +Output [2]: [ws_quantity#60 AS quantity#258, ws_list_price#61 AS list_price#259] +Input [4]: [ws_sold_date_sk#20, ws_quantity#60, ws_list_price#61, d_date_sk#10] + +(233) Union + +(234) HashAggregate [codegen id : 7] +Input [2]: [quantity#253, list_price#254] +Keys: [] +Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#253 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#254 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [2]: [sum#260, count#261] +Results [2]: [sum#262, count#263] + +(235) Exchange +Input [2]: [sum#262, count#263] +Arguments: SinglePartition, true, [id=#264] + +(236) HashAggregate [codegen id : 8] +Input [2]: [sum#262, count#263] +Keys: [] +Functions [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#253 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#254 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#253 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#254 as decimal(12,2)))), DecimalType(18,2), true))#265] +Results [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#253 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#254 as decimal(12,2)))), DecimalType(18,2), true))#265 AS average_sales#266] + +Subquery:2 Hosting operator id = 92 Hosting Expression = ReusedSubquery Subquery scalar-subquery#43, [id=#44] + +Subquery:3 Hosting operator id = 108 Hosting Expression = ReusedSubquery Subquery scalar-subquery#43, [id=#44] + +Subquery:4 Hosting operator id = 116 Hosting Expression = ReusedSubquery Subquery scalar-subquery#43, [id=#44] + +Subquery:5 Hosting operator id = 120 Hosting Expression = ReusedSubquery Subquery scalar-subquery#43, [id=#44] + +Subquery:6 Hosting operator id = 124 Hosting Expression = ReusedSubquery Subquery scalar-subquery#43, [id=#44] + +Subquery:7 Hosting operator id = 139 Hosting Expression = ReusedSubquery Subquery scalar-subquery#43, [id=#44] + +Subquery:8 Hosting operator id = 143 Hosting Expression = ReusedSubquery Subquery scalar-subquery#43, [id=#44] + +Subquery:9 Hosting operator id = 147 Hosting Expression = ReusedSubquery Subquery scalar-subquery#43, [id=#44] + +Subquery:10 Hosting operator id = 162 Hosting Expression = ReusedSubquery Subquery scalar-subquery#43, [id=#44] + +Subquery:11 Hosting operator id = 166 Hosting Expression = ReusedSubquery Subquery scalar-subquery#43, [id=#44] + +Subquery:12 Hosting operator id = 170 Hosting Expression = ReusedSubquery Subquery scalar-subquery#43, [id=#44] + +Subquery:13 Hosting operator id = 185 Hosting Expression = ReusedSubquery Subquery scalar-subquery#43, [id=#44] + +Subquery:14 Hosting operator id = 189 Hosting Expression = ReusedSubquery Subquery scalar-subquery#43, [id=#44] + +Subquery:15 Hosting operator id = 193 Hosting Expression = ReusedSubquery Subquery scalar-subquery#43, [id=#44] + + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/simplified.txt new file mode 100644 index 0000000000000..a329e40a70bbe --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/simplified.txt @@ -0,0 +1,387 @@ +TakeOrderedAndProject [channel,i_brand_id,i_category_id,i_class_id,number_sales,sum_sales] + WholeStageCodegen (412) + HashAggregate [channel,i_brand_id,i_category_id,i_class_id,number_sales,sum_sales] + InputAdapter + Exchange [channel,i_brand_id,i_category_id,i_class_id,number_sales,sum_sales] #1 + WholeStageCodegen (411) + HashAggregate [channel,i_brand_id,i_category_id,i_class_id,number_sales,sum_sales] + InputAdapter + Union + WholeStageCodegen (329) + HashAggregate [channel,i_brand_id,i_category_id,i_class_id,number_sales,sum_sales] + InputAdapter + Exchange [channel,i_brand_id,i_category_id,i_class_id,number_sales,sum_sales] #2 + WholeStageCodegen (328) + HashAggregate [channel,i_brand_id,i_category_id,i_class_id,number_sales,sum_sales] + InputAdapter + Union + WholeStageCodegen (246) + HashAggregate [channel,i_brand_id,i_category_id,i_class_id,number_sales,sum_sales] + InputAdapter + Exchange [channel,i_brand_id,i_category_id,i_class_id,number_sales,sum_sales] #3 + WholeStageCodegen (245) + HashAggregate [channel,i_brand_id,i_category_id,i_class_id,number_sales,sum_sales] + InputAdapter + Union + WholeStageCodegen (163) + HashAggregate [channel,i_brand_id,i_category_id,i_class_id,number_sales,sum_sales] + InputAdapter + Exchange [channel,i_brand_id,i_category_id,i_class_id,number_sales,sum_sales] #4 + WholeStageCodegen (162) + HashAggregate [channel,i_brand_id,i_category_id,i_class_id,number_sales,sum_sales] + InputAdapter + Union + WholeStageCodegen (80) + HashAggregate [channel,i_brand_id,i_category_id,i_class_id,isEmpty,sum,sum] [isEmpty,number_sales,sum,sum,sum(number_salesL),sum(sales),sum_sales] + InputAdapter + Exchange [channel,i_brand_id,i_category_id,i_class_id] #5 + WholeStageCodegen (79) + HashAggregate [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] [isEmpty,isEmpty,sum,sum,sum,sum] + InputAdapter + Union + WholeStageCodegen (26) + Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] + Subquery #1 + WholeStageCodegen (8) + HashAggregate [count,sum] [average_sales,avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2), true)),count,sum] + InputAdapter + Exchange #17 + WholeStageCodegen (7) + HashAggregate [list_price,quantity] [count,count,sum,sum] + InputAdapter + Union + WholeStageCodegen (2) + Project [ss_list_price,ss_quantity] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_list_price,ss_quantity,ss_sold_date_sk] + InputAdapter + BroadcastExchange #18 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + WholeStageCodegen (4) + Project [cs_list_price,cs_quantity] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_list_price,cs_quantity,cs_sold_date_sk] + InputAdapter + BroadcastExchange #19 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + WholeStageCodegen (6) + Project [ws_list_price,ws_quantity] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_list_price,ws_quantity,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #19 + HashAggregate [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] [channel,count,count(1),isEmpty,number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] + InputAdapter + Exchange [i_brand_id,i_category_id,i_class_id] #6 + WholeStageCodegen (25) + HashAggregate [i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity] [count,count,isEmpty,isEmpty,sum,sum] + Project [i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity,ss_sold_date_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + BroadcastHashJoin [ss_item_sk,ss_item_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_list_price,ss_quantity,ss_sold_date_sk] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (11) + Project [i_item_sk] + BroadcastHashJoin [brand_id,category_id,class_id,i_brand_id,i_category_id,i_class_id] + Filter [i_brand_id,i_category_id,i_class_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (10) + HashAggregate [brand_id,category_id,class_id] + HashAggregate [brand_id,category_id,class_id] + HashAggregate [brand_id,category_id,class_id] + InputAdapter + Exchange [brand_id,category_id,class_id] #9 + WholeStageCodegen (9) + HashAggregate [brand_id,category_id,class_id] + BroadcastHashJoin [brand_id,category_id,class_id,i_brand_id,i_category_id,i_class_id] + BroadcastHashJoin [brand_id,category_id,class_id,i_brand_id,i_category_id,i_class_id] + Project [i_brand_id,i_category_id,i_class_id] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_brand_id,i_category_id,i_class_id,ss_sold_date_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (1) + Filter [i_brand_id,i_category_id,i_class_id,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (5) + Project [i_brand_id,i_category_id,i_class_id] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,i_brand_id,i_category_id,i_class_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Filter [cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_item_sk,cs_sold_date_sk] + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (3) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] + InputAdapter + ReusedExchange [d_date_sk] #11 + InputAdapter + BroadcastExchange #14 + WholeStageCodegen (8) + Project [i_brand_id,i_category_id,i_class_id] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [i_brand_id,i_category_id,i_class_id,ws_sold_date_sk] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Filter [ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_item_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk] #13 + InputAdapter + ReusedExchange [d_date_sk] #11 + InputAdapter + BroadcastExchange #15 + WholeStageCodegen (23) + BroadcastHashJoin [i_item_sk,ss_item_sk] + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] + InputAdapter + ReusedExchange [ss_item_sk] #7 + InputAdapter + BroadcastExchange #16 + WholeStageCodegen (24) + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + WholeStageCodegen (52) + Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #1 + HashAggregate [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] [channel,count,count(1),isEmpty,number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] + InputAdapter + Exchange [i_brand_id,i_category_id,i_class_id] #20 + WholeStageCodegen (51) + HashAggregate [cs_list_price,cs_quantity,i_brand_id,i_category_id,i_class_id] [count,count,isEmpty,isEmpty,sum,sum] + Project [cs_list_price,cs_quantity,i_brand_id,i_category_id,i_class_id] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_list_price,cs_quantity,cs_sold_date_sk,i_brand_id,i_category_id,i_class_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + BroadcastHashJoin [cs_item_sk,ss_item_sk] + Filter [cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_item_sk,cs_list_price,cs_quantity,cs_sold_date_sk] + InputAdapter + ReusedExchange [ss_item_sk] #7 + InputAdapter + ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk] #15 + InputAdapter + ReusedExchange [d_date_sk] #16 + WholeStageCodegen (78) + Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #1 + HashAggregate [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] [channel,count,count(1),isEmpty,number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] + InputAdapter + Exchange [i_brand_id,i_category_id,i_class_id] #21 + WholeStageCodegen (77) + HashAggregate [i_brand_id,i_category_id,i_class_id,ws_list_price,ws_quantity] [count,count,isEmpty,isEmpty,sum,sum] + Project [i_brand_id,i_category_id,i_class_id,ws_list_price,ws_quantity] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [i_brand_id,i_category_id,i_class_id,ws_list_price,ws_quantity,ws_sold_date_sk] + BroadcastHashJoin [i_item_sk,ws_item_sk] + BroadcastHashJoin [ss_item_sk,ws_item_sk] + Filter [ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_item_sk,ws_list_price,ws_quantity,ws_sold_date_sk] + InputAdapter + ReusedExchange [ss_item_sk] #7 + InputAdapter + ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk] #15 + InputAdapter + ReusedExchange [d_date_sk] #16 + WholeStageCodegen (161) + HashAggregate [channel,i_brand_id,i_class_id,isEmpty,sum,sum] [i_category_id,isEmpty,sum,sum,sum(number_salesL),sum(number_sales),sum(sum_sales),sum(sum_sales)] + InputAdapter + Exchange [channel,i_brand_id,i_class_id] #22 + WholeStageCodegen (160) + HashAggregate [channel,i_brand_id,i_class_id,number_sales,sum_sales] [isEmpty,isEmpty,sum,sum,sum,sum] + HashAggregate [channel,i_brand_id,i_category_id,i_class_id,isEmpty,sum,sum] [isEmpty,number_sales,sum,sum,sum(number_salesL),sum(sales),sum_sales] + InputAdapter + Exchange [channel,i_brand_id,i_category_id,i_class_id] #23 + WholeStageCodegen (159) + HashAggregate [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] [isEmpty,isEmpty,sum,sum,sum,sum] + InputAdapter + Union + WholeStageCodegen (106) + Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #1 + HashAggregate [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] [channel,count,count(1),isEmpty,number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] + InputAdapter + ReusedExchange [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] #6 + WholeStageCodegen (132) + Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #1 + HashAggregate [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] [channel,count,count(1),isEmpty,number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] + InputAdapter + ReusedExchange [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] #20 + WholeStageCodegen (158) + Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #1 + HashAggregate [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] [channel,count,count(1),isEmpty,number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] + InputAdapter + ReusedExchange [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] #21 + WholeStageCodegen (244) + HashAggregate [channel,i_brand_id,isEmpty,sum,sum] [i_category_id,i_class_id,isEmpty,sum,sum,sum(number_salesL),sum(number_sales),sum(sum_sales),sum(sum_sales)] + InputAdapter + Exchange [channel,i_brand_id] #24 + WholeStageCodegen (243) + HashAggregate [channel,i_brand_id,number_sales,sum_sales] [isEmpty,isEmpty,sum,sum,sum,sum] + HashAggregate [channel,i_brand_id,i_category_id,i_class_id,isEmpty,sum,sum] [isEmpty,number_sales,sum,sum,sum(number_salesL),sum(sales),sum_sales] + InputAdapter + Exchange [channel,i_brand_id,i_category_id,i_class_id] #25 + WholeStageCodegen (242) + HashAggregate [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] [isEmpty,isEmpty,sum,sum,sum,sum] + InputAdapter + Union + WholeStageCodegen (189) + Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #1 + HashAggregate [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] [channel,count,count(1),isEmpty,number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] + InputAdapter + ReusedExchange [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] #6 + WholeStageCodegen (215) + Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #1 + HashAggregate [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] [channel,count,count(1),isEmpty,number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] + InputAdapter + ReusedExchange [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] #20 + WholeStageCodegen (241) + Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #1 + HashAggregate [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] [channel,count,count(1),isEmpty,number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] + InputAdapter + ReusedExchange [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] #21 + WholeStageCodegen (327) + HashAggregate [channel,isEmpty,sum,sum] [i_brand_id,i_category_id,i_class_id,isEmpty,sum,sum,sum(number_salesL),sum(number_sales),sum(sum_sales),sum(sum_sales)] + InputAdapter + Exchange [channel] #26 + WholeStageCodegen (326) + HashAggregate [channel,number_sales,sum_sales] [isEmpty,isEmpty,sum,sum,sum,sum] + HashAggregate [channel,i_brand_id,i_category_id,i_class_id,isEmpty,sum,sum] [isEmpty,number_sales,sum,sum,sum(number_salesL),sum(sales),sum_sales] + InputAdapter + Exchange [channel,i_brand_id,i_category_id,i_class_id] #27 + WholeStageCodegen (325) + HashAggregate [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] [isEmpty,isEmpty,sum,sum,sum,sum] + InputAdapter + Union + WholeStageCodegen (272) + Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #1 + HashAggregate [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] [channel,count,count(1),isEmpty,number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] + InputAdapter + ReusedExchange [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] #6 + WholeStageCodegen (298) + Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #1 + HashAggregate [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] [channel,count,count(1),isEmpty,number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] + InputAdapter + ReusedExchange [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] #20 + WholeStageCodegen (324) + Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #1 + HashAggregate [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] [channel,count,count(1),isEmpty,number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] + InputAdapter + ReusedExchange [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] #21 + WholeStageCodegen (410) + HashAggregate [isEmpty,sum,sum] [channel,i_brand_id,i_category_id,i_class_id,isEmpty,sum,sum,sum(number_salesL),sum(number_sales),sum(sum_sales),sum(sum_sales)] + InputAdapter + Exchange #28 + WholeStageCodegen (409) + HashAggregate [number_sales,sum_sales] [isEmpty,isEmpty,sum,sum,sum,sum] + HashAggregate [channel,i_brand_id,i_category_id,i_class_id,isEmpty,sum,sum] [isEmpty,number_sales,sum,sum,sum(number_salesL),sum(sales),sum_sales] + InputAdapter + Exchange [channel,i_brand_id,i_category_id,i_class_id] #29 + WholeStageCodegen (408) + HashAggregate [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] [isEmpty,isEmpty,sum,sum,sum,sum] + InputAdapter + Union + WholeStageCodegen (355) + Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #1 + HashAggregate [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] [channel,count,count(1),isEmpty,number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] + InputAdapter + ReusedExchange [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] #6 + WholeStageCodegen (381) + Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #1 + HashAggregate [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] [channel,count,count(1),isEmpty,number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] + InputAdapter + ReusedExchange [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] #20 + WholeStageCodegen (407) + Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #1 + HashAggregate [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] [channel,count,count(1),isEmpty,number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] + InputAdapter + ReusedExchange [count,i_brand_id,i_category_id,i_class_id,isEmpty,sum] #21 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a.sf100/explain.txt new file mode 100644 index 0000000000000..ef8c5ccae050f --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a.sf100/explain.txt @@ -0,0 +1,877 @@ +== Physical Plan == +TakeOrderedAndProject (160) ++- Union (159) + :- * HashAggregate (51) + : +- Exchange (50) + : +- * HashAggregate (49) + : +- * Project (48) + : +- * SortMergeJoin Inner (47) + : :- * Sort (25) + : : +- Exchange (24) + : : +- * Project (23) + : : +- * BroadcastHashJoin Inner BuildRight (22) + : : :- * Project (17) + : : : +- * BroadcastHashJoin Inner BuildRight (16) + : : : :- * Project (10) + : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.catalog_sales (1) + : : : : +- BroadcastExchange (8) + : : : : +- * Project (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.customer_demographics (4) + : : : +- BroadcastExchange (15) + : : : +- * Project (14) + : : : +- * Filter (13) + : : : +- * ColumnarToRow (12) + : : : +- Scan parquet default.date_dim (11) + : : +- BroadcastExchange (21) + : : +- * Filter (20) + : : +- * ColumnarToRow (19) + : : +- Scan parquet default.item (18) + : +- * Sort (46) + : +- Exchange (45) + : +- * Project (44) + : +- * SortMergeJoin Inner (43) + : :- * Sort (37) + : : +- Exchange (36) + : : +- * Project (35) + : : +- * BroadcastHashJoin Inner BuildRight (34) + : : :- * Project (29) + : : : +- * Filter (28) + : : : +- * ColumnarToRow (27) + : : : +- Scan parquet default.customer (26) + : : +- BroadcastExchange (33) + : : +- * Filter (32) + : : +- * ColumnarToRow (31) + : : +- Scan parquet default.customer_address (30) + : +- * Sort (42) + : +- Exchange (41) + : +- * Filter (40) + : +- * ColumnarToRow (39) + : +- Scan parquet default.customer_demographics (38) + :- * HashAggregate (76) + : +- Exchange (75) + : +- * HashAggregate (74) + : +- * Project (73) + : +- * SortMergeJoin Inner (72) + : :- * Sort (53) + : : +- ReusedExchange (52) + : +- * Sort (71) + : +- Exchange (70) + : +- * Project (69) + : +- * SortMergeJoin Inner (68) + : :- * Sort (65) + : : +- Exchange (64) + : : +- * Project (63) + : : +- * BroadcastHashJoin Inner BuildRight (62) + : : :- * Project (57) + : : : +- * Filter (56) + : : : +- * ColumnarToRow (55) + : : : +- Scan parquet default.customer (54) + : : +- BroadcastExchange (61) + : : +- * Filter (60) + : : +- * ColumnarToRow (59) + : : +- Scan parquet default.customer_address (58) + : +- * Sort (67) + : +- ReusedExchange (66) + :- * HashAggregate (102) + : +- Exchange (101) + : +- * HashAggregate (100) + : +- * Project (99) + : +- * SortMergeJoin Inner (98) + : :- * Sort (78) + : : +- ReusedExchange (77) + : +- * Sort (97) + : +- Exchange (96) + : +- * Project (95) + : +- * SortMergeJoin Inner (94) + : :- * Sort (91) + : : +- Exchange (90) + : : +- * Project (89) + : : +- * BroadcastHashJoin Inner BuildRight (88) + : : :- * Project (82) + : : : +- * Filter (81) + : : : +- * ColumnarToRow (80) + : : : +- Scan parquet default.customer (79) + : : +- BroadcastExchange (87) + : : +- * Project (86) + : : +- * Filter (85) + : : +- * ColumnarToRow (84) + : : +- Scan parquet default.customer_address (83) + : +- * Sort (93) + : +- ReusedExchange (92) + :- * HashAggregate (137) + : +- Exchange (136) + : +- * HashAggregate (135) + : +- * Project (134) + : +- * BroadcastHashJoin Inner BuildRight (133) + : :- * Project (131) + : : +- * BroadcastHashJoin Inner BuildRight (130) + : : :- * Project (111) + : : : +- * BroadcastHashJoin Inner BuildRight (110) + : : : :- * Project (108) + : : : : +- * BroadcastHashJoin Inner BuildRight (107) + : : : : :- * Filter (105) + : : : : : +- * ColumnarToRow (104) + : : : : : +- Scan parquet default.catalog_sales (103) + : : : : +- ReusedExchange (106) + : : : +- ReusedExchange (109) + : : +- BroadcastExchange (129) + : : +- * Project (128) + : : +- * BroadcastHashJoin Inner BuildLeft (127) + : : :- BroadcastExchange (123) + : : : +- * Project (122) + : : : +- * BroadcastHashJoin Inner BuildRight (121) + : : : :- * Project (115) + : : : : +- * Filter (114) + : : : : +- * ColumnarToRow (113) + : : : : +- Scan parquet default.customer (112) + : : : +- BroadcastExchange (120) + : : : +- * Project (119) + : : : +- * Filter (118) + : : : +- * ColumnarToRow (117) + : : : +- Scan parquet default.customer_address (116) + : : +- * Filter (126) + : : +- * ColumnarToRow (125) + : : +- Scan parquet default.customer_demographics (124) + : +- ReusedExchange (132) + +- * HashAggregate (158) + +- Exchange (157) + +- * HashAggregate (156) + +- * Project (155) + +- * BroadcastHashJoin Inner BuildRight (154) + :- * Project (152) + : +- * BroadcastHashJoin Inner BuildRight (151) + : :- * Project (146) + : : +- * BroadcastHashJoin Inner BuildRight (145) + : : :- * Project (143) + : : : +- * BroadcastHashJoin Inner BuildRight (142) + : : : :- * Filter (140) + : : : : +- * ColumnarToRow (139) + : : : : +- Scan parquet default.catalog_sales (138) + : : : +- ReusedExchange (141) + : : +- ReusedExchange (144) + : +- BroadcastExchange (150) + : +- * Filter (149) + : +- * ColumnarToRow (148) + : +- Scan parquet default.item (147) + +- ReusedExchange (153) + + +(1) Scan parquet default.catalog_sales +Output [9]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_customer_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [9]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9] + +(3) Filter [codegen id : 4] +Input [9]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9] +Condition : (((isnotnull(cs_bill_cdemo_sk#3) AND isnotnull(cs_bill_customer_sk#2)) AND isnotnull(cs_sold_date_sk#1)) AND isnotnull(cs_item_sk#4)) + +(4) Scan parquet default.customer_demographics +Output [4]: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/customer_demographics] +PushedFilters: [IsNotNull(cd_gender), IsNotNull(cd_education_status), EqualTo(cd_gender,M), EqualTo(cd_education_status,College), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] + +(6) Filter [codegen id : 1] +Input [4]: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] +Condition : ((((isnotnull(cd_gender#11) AND isnotnull(cd_education_status#12)) AND (cd_gender#11 = M)) AND (cd_education_status#12 = College)) AND isnotnull(cd_demo_sk#10)) + +(7) Project [codegen id : 1] +Output [2]: [cd_demo_sk#10, cd_dep_count#13] +Input [4]: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] + +(8) BroadcastExchange +Input [2]: [cd_demo_sk#10, cd_dep_count#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_bill_cdemo_sk#3] +Right keys [1]: [cd_demo_sk#10] +Join condition: None + +(10) Project [codegen id : 4] +Output [9]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13] +Input [11]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_demo_sk#10, cd_dep_count#13] + +(11) Scan parquet default.date_dim +Output [2]: [d_date_sk#15, d_year#16] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#15, d_year#16] + +(13) Filter [codegen id : 2] +Input [2]: [d_date_sk#15, d_year#16] +Condition : ((isnotnull(d_year#16) AND (d_year#16 = 2001)) AND isnotnull(d_date_sk#15)) + +(14) Project [codegen id : 2] +Output [1]: [d_date_sk#15] +Input [2]: [d_date_sk#15, d_year#16] + +(15) BroadcastExchange +Input [1]: [d_date_sk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#17] + +(16) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_sold_date_sk#1] +Right keys [1]: [d_date_sk#15] +Join condition: None + +(17) Project [codegen id : 4] +Output [8]: [cs_bill_customer_sk#2, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13] +Input [10]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, d_date_sk#15] + +(18) Scan parquet default.item +Output [2]: [i_item_sk#18, i_item_id#19] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [2]: [i_item_sk#18, i_item_id#19] + +(20) Filter [codegen id : 3] +Input [2]: [i_item_sk#18, i_item_id#19] +Condition : isnotnull(i_item_sk#18) + +(21) BroadcastExchange +Input [2]: [i_item_sk#18, i_item_id#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#20] + +(22) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_item_sk#4] +Right keys [1]: [i_item_sk#18] +Join condition: None + +(23) Project [codegen id : 4] +Output [8]: [cs_bill_customer_sk#2, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, i_item_id#19] +Input [10]: [cs_bill_customer_sk#2, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, i_item_sk#18, i_item_id#19] + +(24) Exchange +Input [8]: [cs_bill_customer_sk#2, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, i_item_id#19] +Arguments: hashpartitioning(cs_bill_customer_sk#2, 5), true, [id=#21] + +(25) Sort [codegen id : 5] +Input [8]: [cs_bill_customer_sk#2, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, i_item_id#19] +Arguments: [cs_bill_customer_sk#2 ASC NULLS FIRST], false, 0 + +(26) Scan parquet default.customer +Output [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_month#25, c_birth_year#26] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/customer] +PushedFilters: [In(c_birth_month, [9,5,12,4,1,10]), IsNotNull(c_customer_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 7] +Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_month#25, c_birth_year#26] + +(28) Filter [codegen id : 7] +Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_month#25, c_birth_year#26] +Condition : (((c_birth_month#25 IN (9,5,12,4,1,10) AND isnotnull(c_customer_sk#22)) AND isnotnull(c_current_cdemo_sk#23)) AND isnotnull(c_current_addr_sk#24)) + +(29) Project [codegen id : 7] +Output [4]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_year#26] +Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_month#25, c_birth_year#26] + +(30) Scan parquet default.customer_address +Output [4]: [ca_address_sk#27, ca_county#28, ca_state#29, ca_country#30] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/customer_address] +PushedFilters: [In(ca_state, [ND,WI,AL,NC,OK,MS,TN]), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 6] +Input [4]: [ca_address_sk#27, ca_county#28, ca_state#29, ca_country#30] + +(32) Filter [codegen id : 6] +Input [4]: [ca_address_sk#27, ca_county#28, ca_state#29, ca_country#30] +Condition : (ca_state#29 IN (ND,WI,AL,NC,OK,MS,TN) AND isnotnull(ca_address_sk#27)) + +(33) BroadcastExchange +Input [4]: [ca_address_sk#27, ca_county#28, ca_state#29, ca_country#30] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#31] + +(34) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [c_current_addr_sk#24] +Right keys [1]: [ca_address_sk#27] +Join condition: None + +(35) Project [codegen id : 7] +Output [6]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_county#28, ca_state#29, ca_country#30] +Input [8]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_year#26, ca_address_sk#27, ca_county#28, ca_state#29, ca_country#30] + +(36) Exchange +Input [6]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_county#28, ca_state#29, ca_country#30] +Arguments: hashpartitioning(c_current_cdemo_sk#23, 5), true, [id=#32] + +(37) Sort [codegen id : 8] +Input [6]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_county#28, ca_state#29, ca_country#30] +Arguments: [c_current_cdemo_sk#23 ASC NULLS FIRST], false, 0 + +(38) Scan parquet default.customer_demographics +Output [1]: [cd_demo_sk#33] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(39) ColumnarToRow [codegen id : 9] +Input [1]: [cd_demo_sk#33] + +(40) Filter [codegen id : 9] +Input [1]: [cd_demo_sk#33] +Condition : isnotnull(cd_demo_sk#33) + +(41) Exchange +Input [1]: [cd_demo_sk#33] +Arguments: hashpartitioning(cd_demo_sk#33, 5), true, [id=#34] + +(42) Sort [codegen id : 10] +Input [1]: [cd_demo_sk#33] +Arguments: [cd_demo_sk#33 ASC NULLS FIRST], false, 0 + +(43) SortMergeJoin [codegen id : 11] +Left keys [1]: [c_current_cdemo_sk#23] +Right keys [1]: [cd_demo_sk#33] +Join condition: None + +(44) Project [codegen id : 11] +Output [5]: [c_customer_sk#22, c_birth_year#26, ca_county#28, ca_state#29, ca_country#30] +Input [7]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_county#28, ca_state#29, ca_country#30, cd_demo_sk#33] + +(45) Exchange +Input [5]: [c_customer_sk#22, c_birth_year#26, ca_county#28, ca_state#29, ca_country#30] +Arguments: hashpartitioning(c_customer_sk#22, 5), true, [id=#35] + +(46) Sort [codegen id : 12] +Input [5]: [c_customer_sk#22, c_birth_year#26, ca_county#28, ca_state#29, ca_country#30] +Arguments: [c_customer_sk#22 ASC NULLS FIRST], false, 0 + +(47) SortMergeJoin [codegen id : 13] +Left keys [1]: [cs_bill_customer_sk#2] +Right keys [1]: [c_customer_sk#22] +Join condition: None + +(48) Project [codegen id : 13] +Output [11]: [i_item_id#19, ca_country#30, ca_state#29, ca_county#28, cast(cs_quantity#5 as decimal(12,2)) AS agg1#36, cast(cs_list_price#6 as decimal(12,2)) AS agg2#37, cast(cs_coupon_amt#8 as decimal(12,2)) AS agg3#38, cast(cs_sales_price#7 as decimal(12,2)) AS agg4#39, cast(cs_net_profit#9 as decimal(12,2)) AS agg5#40, cast(c_birth_year#26 as decimal(12,2)) AS agg6#41, cast(cd_dep_count#13 as decimal(12,2)) AS agg7#42] +Input [13]: [cs_bill_customer_sk#2, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, i_item_id#19, c_customer_sk#22, c_birth_year#26, ca_county#28, ca_state#29, ca_country#30] + +(49) HashAggregate [codegen id : 13] +Input [11]: [i_item_id#19, ca_country#30, ca_state#29, ca_county#28, agg1#36, agg2#37, agg3#38, agg4#39, agg5#40, agg6#41, agg7#42] +Keys [4]: [i_item_id#19, ca_country#30, ca_state#29, ca_county#28] +Functions [7]: [partial_avg(agg1#36), partial_avg(agg2#37), partial_avg(agg3#38), partial_avg(agg4#39), partial_avg(agg5#40), partial_avg(agg6#41), partial_avg(agg7#42)] +Aggregate Attributes [14]: [sum#43, count#44, sum#45, count#46, sum#47, count#48, sum#49, count#50, sum#51, count#52, sum#53, count#54, sum#55, count#56] +Results [18]: [i_item_id#19, ca_country#30, ca_state#29, ca_county#28, sum#57, count#58, sum#59, count#60, sum#61, count#62, sum#63, count#64, sum#65, count#66, sum#67, count#68, sum#69, count#70] + +(50) Exchange +Input [18]: [i_item_id#19, ca_country#30, ca_state#29, ca_county#28, sum#57, count#58, sum#59, count#60, sum#61, count#62, sum#63, count#64, sum#65, count#66, sum#67, count#68, sum#69, count#70] +Arguments: hashpartitioning(i_item_id#19, ca_country#30, ca_state#29, ca_county#28, 5), true, [id=#71] + +(51) HashAggregate [codegen id : 14] +Input [18]: [i_item_id#19, ca_country#30, ca_state#29, ca_county#28, sum#57, count#58, sum#59, count#60, sum#61, count#62, sum#63, count#64, sum#65, count#66, sum#67, count#68, sum#69, count#70] +Keys [4]: [i_item_id#19, ca_country#30, ca_state#29, ca_county#28] +Functions [7]: [avg(agg1#36), avg(agg2#37), avg(agg3#38), avg(agg4#39), avg(agg5#40), avg(agg6#41), avg(agg7#42)] +Aggregate Attributes [7]: [avg(agg1#36)#72, avg(agg2#37)#73, avg(agg3#38)#74, avg(agg4#39)#75, avg(agg5#40)#76, avg(agg6#41)#77, avg(agg7#42)#78] +Results [11]: [i_item_id#19, ca_country#30, ca_state#29, ca_county#28, avg(agg1#36)#72 AS agg1#79, avg(agg2#37)#73 AS agg2#80, avg(agg3#38)#74 AS agg3#81, avg(agg4#39)#75 AS agg4#82, avg(agg5#40)#76 AS agg5#83, avg(agg6#41)#77 AS agg6#84, avg(agg7#42)#78 AS agg7#85] + +(52) ReusedExchange [Reuses operator id: 24] +Output [8]: [cs_bill_customer_sk#2, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, i_item_id#19] + +(53) Sort [codegen id : 19] +Input [8]: [cs_bill_customer_sk#2, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, i_item_id#19] +Arguments: [cs_bill_customer_sk#2 ASC NULLS FIRST], false, 0 + +(54) Scan parquet default.customer +Output [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_month#25, c_birth_year#26] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/customer] +PushedFilters: [In(c_birth_month, [9,5,12,4,1,10]), IsNotNull(c_customer_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(55) ColumnarToRow [codegen id : 21] +Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_month#25, c_birth_year#26] + +(56) Filter [codegen id : 21] +Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_month#25, c_birth_year#26] +Condition : (((c_birth_month#25 IN (9,5,12,4,1,10) AND isnotnull(c_customer_sk#22)) AND isnotnull(c_current_cdemo_sk#23)) AND isnotnull(c_current_addr_sk#24)) + +(57) Project [codegen id : 21] +Output [4]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_year#26] +Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_month#25, c_birth_year#26] + +(58) Scan parquet default.customer_address +Output [3]: [ca_address_sk#27, ca_state#29, ca_country#30] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/customer_address] +PushedFilters: [In(ca_state, [ND,WI,AL,NC,OK,MS,TN]), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(59) ColumnarToRow [codegen id : 20] +Input [3]: [ca_address_sk#27, ca_state#29, ca_country#30] + +(60) Filter [codegen id : 20] +Input [3]: [ca_address_sk#27, ca_state#29, ca_country#30] +Condition : (ca_state#29 IN (ND,WI,AL,NC,OK,MS,TN) AND isnotnull(ca_address_sk#27)) + +(61) BroadcastExchange +Input [3]: [ca_address_sk#27, ca_state#29, ca_country#30] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#86] + +(62) BroadcastHashJoin [codegen id : 21] +Left keys [1]: [c_current_addr_sk#24] +Right keys [1]: [ca_address_sk#27] +Join condition: None + +(63) Project [codegen id : 21] +Output [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_state#29, ca_country#30] +Input [7]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_year#26, ca_address_sk#27, ca_state#29, ca_country#30] + +(64) Exchange +Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_state#29, ca_country#30] +Arguments: hashpartitioning(c_current_cdemo_sk#23, 5), true, [id=#87] + +(65) Sort [codegen id : 22] +Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_state#29, ca_country#30] +Arguments: [c_current_cdemo_sk#23 ASC NULLS FIRST], false, 0 + +(66) ReusedExchange [Reuses operator id: 41] +Output [1]: [cd_demo_sk#88] + +(67) Sort [codegen id : 24] +Input [1]: [cd_demo_sk#88] +Arguments: [cd_demo_sk#88 ASC NULLS FIRST], false, 0 + +(68) SortMergeJoin [codegen id : 25] +Left keys [1]: [c_current_cdemo_sk#23] +Right keys [1]: [cd_demo_sk#88] +Join condition: None + +(69) Project [codegen id : 25] +Output [4]: [c_customer_sk#22, c_birth_year#26, ca_state#29, ca_country#30] +Input [6]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_state#29, ca_country#30, cd_demo_sk#88] + +(70) Exchange +Input [4]: [c_customer_sk#22, c_birth_year#26, ca_state#29, ca_country#30] +Arguments: hashpartitioning(c_customer_sk#22, 5), true, [id=#89] + +(71) Sort [codegen id : 26] +Input [4]: [c_customer_sk#22, c_birth_year#26, ca_state#29, ca_country#30] +Arguments: [c_customer_sk#22 ASC NULLS FIRST], false, 0 + +(72) SortMergeJoin [codegen id : 27] +Left keys [1]: [cs_bill_customer_sk#2] +Right keys [1]: [c_customer_sk#22] +Join condition: None + +(73) Project [codegen id : 27] +Output [10]: [i_item_id#19, ca_country#30, ca_state#29, cast(cs_quantity#5 as decimal(12,2)) AS agg1#36, cast(cs_list_price#6 as decimal(12,2)) AS agg2#37, cast(cs_coupon_amt#8 as decimal(12,2)) AS agg3#38, cast(cs_sales_price#7 as decimal(12,2)) AS agg4#39, cast(cs_net_profit#9 as decimal(12,2)) AS agg5#40, cast(c_birth_year#26 as decimal(12,2)) AS agg6#41, cast(cd_dep_count#13 as decimal(12,2)) AS agg7#42] +Input [12]: [cs_bill_customer_sk#2, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, i_item_id#19, c_customer_sk#22, c_birth_year#26, ca_state#29, ca_country#30] + +(74) HashAggregate [codegen id : 27] +Input [10]: [i_item_id#19, ca_country#30, ca_state#29, agg1#36, agg2#37, agg3#38, agg4#39, agg5#40, agg6#41, agg7#42] +Keys [3]: [i_item_id#19, ca_country#30, ca_state#29] +Functions [7]: [partial_avg(agg1#36), partial_avg(agg2#37), partial_avg(agg3#38), partial_avg(agg4#39), partial_avg(agg5#40), partial_avg(agg6#41), partial_avg(agg7#42)] +Aggregate Attributes [14]: [sum#90, count#91, sum#92, count#93, sum#94, count#95, sum#96, count#97, sum#98, count#99, sum#100, count#101, sum#102, count#103] +Results [17]: [i_item_id#19, ca_country#30, ca_state#29, sum#104, count#105, sum#106, count#107, sum#108, count#109, sum#110, count#111, sum#112, count#113, sum#114, count#115, sum#116, count#117] + +(75) Exchange +Input [17]: [i_item_id#19, ca_country#30, ca_state#29, sum#104, count#105, sum#106, count#107, sum#108, count#109, sum#110, count#111, sum#112, count#113, sum#114, count#115, sum#116, count#117] +Arguments: hashpartitioning(i_item_id#19, ca_country#30, ca_state#29, 5), true, [id=#118] + +(76) HashAggregate [codegen id : 28] +Input [17]: [i_item_id#19, ca_country#30, ca_state#29, sum#104, count#105, sum#106, count#107, sum#108, count#109, sum#110, count#111, sum#112, count#113, sum#114, count#115, sum#116, count#117] +Keys [3]: [i_item_id#19, ca_country#30, ca_state#29] +Functions [7]: [avg(agg1#36), avg(agg2#37), avg(agg3#38), avg(agg4#39), avg(agg5#40), avg(agg6#41), avg(agg7#42)] +Aggregate Attributes [7]: [avg(agg1#36)#119, avg(agg2#37)#120, avg(agg3#38)#121, avg(agg4#39)#122, avg(agg5#40)#123, avg(agg6#41)#124, avg(agg7#42)#125] +Results [11]: [i_item_id#19, ca_country#30, ca_state#29, null AS county#126, avg(agg1#36)#119 AS agg1#127, avg(agg2#37)#120 AS agg2#128, avg(agg3#38)#121 AS agg3#129, avg(agg4#39)#122 AS agg4#130, avg(agg5#40)#123 AS agg5#131, avg(agg6#41)#124 AS agg6#132, avg(agg7#42)#125 AS agg7#133] + +(77) ReusedExchange [Reuses operator id: 24] +Output [8]: [cs_bill_customer_sk#2, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, i_item_id#19] + +(78) Sort [codegen id : 33] +Input [8]: [cs_bill_customer_sk#2, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, i_item_id#19] +Arguments: [cs_bill_customer_sk#2 ASC NULLS FIRST], false, 0 + +(79) Scan parquet default.customer +Output [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_month#25, c_birth_year#26] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/customer] +PushedFilters: [In(c_birth_month, [9,5,12,4,1,10]), IsNotNull(c_customer_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(80) ColumnarToRow [codegen id : 35] +Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_month#25, c_birth_year#26] + +(81) Filter [codegen id : 35] +Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_month#25, c_birth_year#26] +Condition : (((c_birth_month#25 IN (9,5,12,4,1,10) AND isnotnull(c_customer_sk#22)) AND isnotnull(c_current_cdemo_sk#23)) AND isnotnull(c_current_addr_sk#24)) + +(82) Project [codegen id : 35] +Output [4]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_year#26] +Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_month#25, c_birth_year#26] + +(83) Scan parquet default.customer_address +Output [3]: [ca_address_sk#27, ca_state#29, ca_country#30] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/customer_address] +PushedFilters: [In(ca_state, [ND,WI,AL,NC,OK,MS,TN]), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(84) ColumnarToRow [codegen id : 34] +Input [3]: [ca_address_sk#27, ca_state#29, ca_country#30] + +(85) Filter [codegen id : 34] +Input [3]: [ca_address_sk#27, ca_state#29, ca_country#30] +Condition : (ca_state#29 IN (ND,WI,AL,NC,OK,MS,TN) AND isnotnull(ca_address_sk#27)) + +(86) Project [codegen id : 34] +Output [2]: [ca_address_sk#27, ca_country#30] +Input [3]: [ca_address_sk#27, ca_state#29, ca_country#30] + +(87) BroadcastExchange +Input [2]: [ca_address_sk#27, ca_country#30] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#134] + +(88) BroadcastHashJoin [codegen id : 35] +Left keys [1]: [c_current_addr_sk#24] +Right keys [1]: [ca_address_sk#27] +Join condition: None + +(89) Project [codegen id : 35] +Output [4]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_country#30] +Input [6]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_year#26, ca_address_sk#27, ca_country#30] + +(90) Exchange +Input [4]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_country#30] +Arguments: hashpartitioning(c_current_cdemo_sk#23, 5), true, [id=#135] + +(91) Sort [codegen id : 36] +Input [4]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_country#30] +Arguments: [c_current_cdemo_sk#23 ASC NULLS FIRST], false, 0 + +(92) ReusedExchange [Reuses operator id: 41] +Output [1]: [cd_demo_sk#136] + +(93) Sort [codegen id : 38] +Input [1]: [cd_demo_sk#136] +Arguments: [cd_demo_sk#136 ASC NULLS FIRST], false, 0 + +(94) SortMergeJoin [codegen id : 39] +Left keys [1]: [c_current_cdemo_sk#23] +Right keys [1]: [cd_demo_sk#136] +Join condition: None + +(95) Project [codegen id : 39] +Output [3]: [c_customer_sk#22, c_birth_year#26, ca_country#30] +Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_country#30, cd_demo_sk#136] + +(96) Exchange +Input [3]: [c_customer_sk#22, c_birth_year#26, ca_country#30] +Arguments: hashpartitioning(c_customer_sk#22, 5), true, [id=#137] + +(97) Sort [codegen id : 40] +Input [3]: [c_customer_sk#22, c_birth_year#26, ca_country#30] +Arguments: [c_customer_sk#22 ASC NULLS FIRST], false, 0 + +(98) SortMergeJoin [codegen id : 41] +Left keys [1]: [cs_bill_customer_sk#2] +Right keys [1]: [c_customer_sk#22] +Join condition: None + +(99) Project [codegen id : 41] +Output [9]: [i_item_id#19, ca_country#30, cast(cs_quantity#5 as decimal(12,2)) AS agg1#36, cast(cs_list_price#6 as decimal(12,2)) AS agg2#37, cast(cs_coupon_amt#8 as decimal(12,2)) AS agg3#38, cast(cs_sales_price#7 as decimal(12,2)) AS agg4#39, cast(cs_net_profit#9 as decimal(12,2)) AS agg5#40, cast(c_birth_year#26 as decimal(12,2)) AS agg6#41, cast(cd_dep_count#13 as decimal(12,2)) AS agg7#42] +Input [11]: [cs_bill_customer_sk#2, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, i_item_id#19, c_customer_sk#22, c_birth_year#26, ca_country#30] + +(100) HashAggregate [codegen id : 41] +Input [9]: [i_item_id#19, ca_country#30, agg1#36, agg2#37, agg3#38, agg4#39, agg5#40, agg6#41, agg7#42] +Keys [2]: [i_item_id#19, ca_country#30] +Functions [7]: [partial_avg(agg1#36), partial_avg(agg2#37), partial_avg(agg3#38), partial_avg(agg4#39), partial_avg(agg5#40), partial_avg(agg6#41), partial_avg(agg7#42)] +Aggregate Attributes [14]: [sum#138, count#139, sum#140, count#141, sum#142, count#143, sum#144, count#145, sum#146, count#147, sum#148, count#149, sum#150, count#151] +Results [16]: [i_item_id#19, ca_country#30, sum#152, count#153, sum#154, count#155, sum#156, count#157, sum#158, count#159, sum#160, count#161, sum#162, count#163, sum#164, count#165] + +(101) Exchange +Input [16]: [i_item_id#19, ca_country#30, sum#152, count#153, sum#154, count#155, sum#156, count#157, sum#158, count#159, sum#160, count#161, sum#162, count#163, sum#164, count#165] +Arguments: hashpartitioning(i_item_id#19, ca_country#30, 5), true, [id=#166] + +(102) HashAggregate [codegen id : 42] +Input [16]: [i_item_id#19, ca_country#30, sum#152, count#153, sum#154, count#155, sum#156, count#157, sum#158, count#159, sum#160, count#161, sum#162, count#163, sum#164, count#165] +Keys [2]: [i_item_id#19, ca_country#30] +Functions [7]: [avg(agg1#36), avg(agg2#37), avg(agg3#38), avg(agg4#39), avg(agg5#40), avg(agg6#41), avg(agg7#42)] +Aggregate Attributes [7]: [avg(agg1#36)#167, avg(agg2#37)#168, avg(agg3#38)#169, avg(agg4#39)#170, avg(agg5#40)#171, avg(agg6#41)#172, avg(agg7#42)#173] +Results [11]: [i_item_id#19, ca_country#30, null AS ca_state#174, null AS county#175, avg(agg1#36)#167 AS agg1#176, avg(agg2#37)#168 AS agg2#177, avg(agg3#38)#169 AS agg3#178, avg(agg4#39)#170 AS agg4#179, avg(agg5#40)#171 AS agg5#180, avg(agg6#41)#172 AS agg6#181, avg(agg7#42)#173 AS agg7#182] + +(103) Scan parquet default.catalog_sales +Output [9]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_customer_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(104) ColumnarToRow [codegen id : 49] +Input [9]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9] + +(105) Filter [codegen id : 49] +Input [9]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9] +Condition : (((isnotnull(cs_bill_cdemo_sk#3) AND isnotnull(cs_bill_customer_sk#2)) AND isnotnull(cs_sold_date_sk#1)) AND isnotnull(cs_item_sk#4)) + +(106) ReusedExchange [Reuses operator id: 8] +Output [2]: [cd_demo_sk#10, cd_dep_count#13] + +(107) BroadcastHashJoin [codegen id : 49] +Left keys [1]: [cs_bill_cdemo_sk#3] +Right keys [1]: [cd_demo_sk#10] +Join condition: None + +(108) Project [codegen id : 49] +Output [9]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13] +Input [11]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_demo_sk#10, cd_dep_count#13] + +(109) ReusedExchange [Reuses operator id: 15] +Output [1]: [d_date_sk#15] + +(110) BroadcastHashJoin [codegen id : 49] +Left keys [1]: [cs_sold_date_sk#1] +Right keys [1]: [d_date_sk#15] +Join condition: None + +(111) Project [codegen id : 49] +Output [8]: [cs_bill_customer_sk#2, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13] +Input [10]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, d_date_sk#15] + +(112) Scan parquet default.customer +Output [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_month#25, c_birth_year#26] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/customer] +PushedFilters: [In(c_birth_month, [9,5,12,4,1,10]), IsNotNull(c_customer_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(113) ColumnarToRow [codegen id : 46] +Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_month#25, c_birth_year#26] + +(114) Filter [codegen id : 46] +Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_month#25, c_birth_year#26] +Condition : (((c_birth_month#25 IN (9,5,12,4,1,10) AND isnotnull(c_customer_sk#22)) AND isnotnull(c_current_cdemo_sk#23)) AND isnotnull(c_current_addr_sk#24)) + +(115) Project [codegen id : 46] +Output [4]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_year#26] +Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_month#25, c_birth_year#26] + +(116) Scan parquet default.customer_address +Output [2]: [ca_address_sk#27, ca_state#29] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/customer_address] +PushedFilters: [In(ca_state, [ND,WI,AL,NC,OK,MS,TN]), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(117) ColumnarToRow [codegen id : 45] +Input [2]: [ca_address_sk#27, ca_state#29] + +(118) Filter [codegen id : 45] +Input [2]: [ca_address_sk#27, ca_state#29] +Condition : (ca_state#29 IN (ND,WI,AL,NC,OK,MS,TN) AND isnotnull(ca_address_sk#27)) + +(119) Project [codegen id : 45] +Output [1]: [ca_address_sk#27] +Input [2]: [ca_address_sk#27, ca_state#29] + +(120) BroadcastExchange +Input [1]: [ca_address_sk#27] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#183] + +(121) BroadcastHashJoin [codegen id : 46] +Left keys [1]: [c_current_addr_sk#24] +Right keys [1]: [ca_address_sk#27] +Join condition: None + +(122) Project [codegen id : 46] +Output [3]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26] +Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_year#26, ca_address_sk#27] + +(123) BroadcastExchange +Input [3]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [id=#184] + +(124) Scan parquet default.customer_demographics +Output [1]: [cd_demo_sk#185] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(125) ColumnarToRow +Input [1]: [cd_demo_sk#185] + +(126) Filter +Input [1]: [cd_demo_sk#185] +Condition : isnotnull(cd_demo_sk#185) + +(127) BroadcastHashJoin [codegen id : 47] +Left keys [1]: [c_current_cdemo_sk#23] +Right keys [1]: [cd_demo_sk#185] +Join condition: None + +(128) Project [codegen id : 47] +Output [2]: [c_customer_sk#22, c_birth_year#26] +Input [4]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, cd_demo_sk#185] + +(129) BroadcastExchange +Input [2]: [c_customer_sk#22, c_birth_year#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#186] + +(130) BroadcastHashJoin [codegen id : 49] +Left keys [1]: [cs_bill_customer_sk#2] +Right keys [1]: [c_customer_sk#22] +Join condition: None + +(131) Project [codegen id : 49] +Output [8]: [cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#26] +Input [10]: [cs_bill_customer_sk#2, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_customer_sk#22, c_birth_year#26] + +(132) ReusedExchange [Reuses operator id: 21] +Output [2]: [i_item_sk#18, i_item_id#19] + +(133) BroadcastHashJoin [codegen id : 49] +Left keys [1]: [cs_item_sk#4] +Right keys [1]: [i_item_sk#18] +Join condition: None + +(134) Project [codegen id : 49] +Output [8]: [i_item_id#19, cast(cs_quantity#5 as decimal(12,2)) AS agg1#36, cast(cs_list_price#6 as decimal(12,2)) AS agg2#37, cast(cs_coupon_amt#8 as decimal(12,2)) AS agg3#38, cast(cs_sales_price#7 as decimal(12,2)) AS agg4#39, cast(cs_net_profit#9 as decimal(12,2)) AS agg5#40, cast(c_birth_year#26 as decimal(12,2)) AS agg6#41, cast(cd_dep_count#13 as decimal(12,2)) AS agg7#42] +Input [10]: [cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#26, i_item_sk#18, i_item_id#19] + +(135) HashAggregate [codegen id : 49] +Input [8]: [i_item_id#19, agg1#36, agg2#37, agg3#38, agg4#39, agg5#40, agg6#41, agg7#42] +Keys [1]: [i_item_id#19] +Functions [7]: [partial_avg(agg1#36), partial_avg(agg2#37), partial_avg(agg3#38), partial_avg(agg4#39), partial_avg(agg5#40), partial_avg(agg6#41), partial_avg(agg7#42)] +Aggregate Attributes [14]: [sum#187, count#188, sum#189, count#190, sum#191, count#192, sum#193, count#194, sum#195, count#196, sum#197, count#198, sum#199, count#200] +Results [15]: [i_item_id#19, sum#201, count#202, sum#203, count#204, sum#205, count#206, sum#207, count#208, sum#209, count#210, sum#211, count#212, sum#213, count#214] + +(136) Exchange +Input [15]: [i_item_id#19, sum#201, count#202, sum#203, count#204, sum#205, count#206, sum#207, count#208, sum#209, count#210, sum#211, count#212, sum#213, count#214] +Arguments: hashpartitioning(i_item_id#19, 5), true, [id=#215] + +(137) HashAggregate [codegen id : 50] +Input [15]: [i_item_id#19, sum#201, count#202, sum#203, count#204, sum#205, count#206, sum#207, count#208, sum#209, count#210, sum#211, count#212, sum#213, count#214] +Keys [1]: [i_item_id#19] +Functions [7]: [avg(agg1#36), avg(agg2#37), avg(agg3#38), avg(agg4#39), avg(agg5#40), avg(agg6#41), avg(agg7#42)] +Aggregate Attributes [7]: [avg(agg1#36)#216, avg(agg2#37)#217, avg(agg3#38)#218, avg(agg4#39)#219, avg(agg5#40)#220, avg(agg6#41)#221, avg(agg7#42)#222] +Results [11]: [i_item_id#19, null AS ca_country#223, null AS ca_state#224, null AS county#225, avg(agg1#36)#216 AS agg1#226, avg(agg2#37)#217 AS agg2#227, avg(agg3#38)#218 AS agg3#228, avg(agg4#39)#219 AS agg4#229, avg(agg5#40)#220 AS agg5#230, avg(agg6#41)#221 AS agg6#231, avg(agg7#42)#222 AS agg7#232] + +(138) Scan parquet default.catalog_sales +Output [9]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_customer_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(139) ColumnarToRow [codegen id : 57] +Input [9]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9] + +(140) Filter [codegen id : 57] +Input [9]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9] +Condition : (((isnotnull(cs_bill_cdemo_sk#3) AND isnotnull(cs_bill_customer_sk#2)) AND isnotnull(cs_sold_date_sk#1)) AND isnotnull(cs_item_sk#4)) + +(141) ReusedExchange [Reuses operator id: 8] +Output [2]: [cd_demo_sk#10, cd_dep_count#13] + +(142) BroadcastHashJoin [codegen id : 57] +Left keys [1]: [cs_bill_cdemo_sk#3] +Right keys [1]: [cd_demo_sk#10] +Join condition: None + +(143) Project [codegen id : 57] +Output [9]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13] +Input [11]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_demo_sk#10, cd_dep_count#13] + +(144) ReusedExchange [Reuses operator id: 15] +Output [1]: [d_date_sk#15] + +(145) BroadcastHashJoin [codegen id : 57] +Left keys [1]: [cs_sold_date_sk#1] +Right keys [1]: [d_date_sk#15] +Join condition: None + +(146) Project [codegen id : 57] +Output [8]: [cs_bill_customer_sk#2, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13] +Input [10]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, d_date_sk#15] + +(147) Scan parquet default.item +Output [1]: [i_item_sk#18] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(148) ColumnarToRow [codegen id : 53] +Input [1]: [i_item_sk#18] + +(149) Filter [codegen id : 53] +Input [1]: [i_item_sk#18] +Condition : isnotnull(i_item_sk#18) + +(150) BroadcastExchange +Input [1]: [i_item_sk#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#233] + +(151) BroadcastHashJoin [codegen id : 57] +Left keys [1]: [cs_item_sk#4] +Right keys [1]: [i_item_sk#18] +Join condition: None + +(152) Project [codegen id : 57] +Output [7]: [cs_bill_customer_sk#2, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13] +Input [9]: [cs_bill_customer_sk#2, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, i_item_sk#18] + +(153) ReusedExchange [Reuses operator id: 129] +Output [2]: [c_customer_sk#22, c_birth_year#26] + +(154) BroadcastHashJoin [codegen id : 57] +Left keys [1]: [cs_bill_customer_sk#2] +Right keys [1]: [c_customer_sk#22] +Join condition: None + +(155) Project [codegen id : 57] +Output [7]: [cast(cs_quantity#5 as decimal(12,2)) AS agg1#36, cast(cs_list_price#6 as decimal(12,2)) AS agg2#37, cast(cs_coupon_amt#8 as decimal(12,2)) AS agg3#38, cast(cs_sales_price#7 as decimal(12,2)) AS agg4#39, cast(cs_net_profit#9 as decimal(12,2)) AS agg5#40, cast(c_birth_year#26 as decimal(12,2)) AS agg6#41, cast(cd_dep_count#13 as decimal(12,2)) AS agg7#42] +Input [9]: [cs_bill_customer_sk#2, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_customer_sk#22, c_birth_year#26] + +(156) HashAggregate [codegen id : 57] +Input [7]: [agg1#36, agg2#37, agg3#38, agg4#39, agg5#40, agg6#41, agg7#42] +Keys: [] +Functions [7]: [partial_avg(agg1#36), partial_avg(agg2#37), partial_avg(agg3#38), partial_avg(agg4#39), partial_avg(agg5#40), partial_avg(agg6#41), partial_avg(agg7#42)] +Aggregate Attributes [14]: [sum#234, count#235, sum#236, count#237, sum#238, count#239, sum#240, count#241, sum#242, count#243, sum#244, count#245, sum#246, count#247] +Results [14]: [sum#248, count#249, sum#250, count#251, sum#252, count#253, sum#254, count#255, sum#256, count#257, sum#258, count#259, sum#260, count#261] + +(157) Exchange +Input [14]: [sum#248, count#249, sum#250, count#251, sum#252, count#253, sum#254, count#255, sum#256, count#257, sum#258, count#259, sum#260, count#261] +Arguments: SinglePartition, true, [id=#262] + +(158) HashAggregate [codegen id : 58] +Input [14]: [sum#248, count#249, sum#250, count#251, sum#252, count#253, sum#254, count#255, sum#256, count#257, sum#258, count#259, sum#260, count#261] +Keys: [] +Functions [7]: [avg(agg1#36), avg(agg2#37), avg(agg3#38), avg(agg4#39), avg(agg5#40), avg(agg6#41), avg(agg7#42)] +Aggregate Attributes [7]: [avg(agg1#36)#263, avg(agg2#37)#264, avg(agg3#38)#265, avg(agg4#39)#266, avg(agg5#40)#267, avg(agg6#41)#268, avg(agg7#42)#269] +Results [11]: [null AS i_item_id#270, null AS ca_country#271, null AS ca_state#272, null AS county#273, avg(agg1#36)#263 AS agg1#274, avg(agg2#37)#264 AS agg2#275, avg(agg3#38)#265 AS agg3#276, avg(agg4#39)#266 AS agg4#277, avg(agg5#40)#267 AS agg5#278, avg(agg6#41)#268 AS agg6#279, avg(agg7#42)#269 AS agg7#280] + +(159) Union + +(160) TakeOrderedAndProject +Input [11]: [i_item_id#19, ca_country#30, ca_state#29, ca_county#28, agg1#79, agg2#80, agg3#81, agg4#82, agg5#83, agg6#84, agg7#85] +Arguments: 100, [ca_country#30 ASC NULLS FIRST, ca_state#29 ASC NULLS FIRST, ca_county#28 ASC NULLS FIRST, i_item_id#19 ASC NULLS FIRST], [i_item_id#19, ca_country#30, ca_state#29, ca_county#28, agg1#79, agg2#80, agg3#81, agg4#82, agg5#83, agg6#84, agg7#85] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a.sf100/simplified.txt new file mode 100644 index 0000000000000..9eb82fd1c6222 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a.sf100/simplified.txt @@ -0,0 +1,262 @@ +TakeOrderedAndProject [agg1,agg2,agg3,agg4,agg5,agg6,agg7,ca_country,ca_county,ca_state,i_item_id] + Union + WholeStageCodegen (14) + HashAggregate [ca_country,ca_county,ca_state,count,count,count,count,count,count,count,i_item_id,sum,sum,sum,sum,sum,sum,sum] [agg1,agg2,agg3,agg4,agg5,agg6,agg7,avg(agg1),avg(agg2),avg(agg3),avg(agg4),avg(agg5),avg(agg6),avg(agg7),count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum] + InputAdapter + Exchange [ca_country,ca_county,ca_state,i_item_id] #1 + WholeStageCodegen (13) + HashAggregate [agg1,agg2,agg3,agg4,agg5,agg6,agg7,ca_country,ca_county,ca_state,i_item_id] [count,count,count,count,count,count,count,count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [c_birth_year,ca_country,ca_county,ca_state,cd_dep_count,cs_coupon_amt,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,i_item_id] + SortMergeJoin [c_customer_sk,cs_bill_customer_sk] + InputAdapter + WholeStageCodegen (5) + Sort [cs_bill_customer_sk] + InputAdapter + Exchange [cs_bill_customer_sk] #2 + WholeStageCodegen (4) + Project [cd_dep_count,cs_bill_customer_sk,cs_coupon_amt,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,i_item_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cd_dep_count,cs_bill_customer_sk,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cd_dep_count,cs_bill_customer_sk,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] + BroadcastHashJoin [cd_demo_sk,cs_bill_cdemo_sk] + Filter [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [cd_demo_sk,cd_dep_count] + Filter [cd_demo_sk,cd_education_status,cd_gender] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_dep_count,cd_education_status,cd_gender] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_id,i_item_sk] + InputAdapter + WholeStageCodegen (12) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #6 + WholeStageCodegen (11) + Project [c_birth_year,c_customer_sk,ca_country,ca_county,ca_state] + SortMergeJoin [c_current_cdemo_sk,cd_demo_sk] + InputAdapter + WholeStageCodegen (8) + Sort [c_current_cdemo_sk] + InputAdapter + Exchange [c_current_cdemo_sk] #7 + WholeStageCodegen (7) + Project [c_birth_year,c_current_cdemo_sk,c_customer_sk,ca_country,ca_county,ca_state] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_birth_year,c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] + Filter [c_birth_month,c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_birth_month,c_birth_year,c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (6) + Filter [ca_address_sk,ca_state] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_country,ca_county,ca_state] + InputAdapter + WholeStageCodegen (10) + Sort [cd_demo_sk] + InputAdapter + Exchange [cd_demo_sk] #9 + WholeStageCodegen (9) + Filter [cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk] + WholeStageCodegen (28) + HashAggregate [ca_country,ca_state,count,count,count,count,count,count,count,i_item_id,sum,sum,sum,sum,sum,sum,sum] [agg1,agg2,agg3,agg4,agg5,agg6,agg7,avg(agg1),avg(agg2),avg(agg3),avg(agg4),avg(agg5),avg(agg6),avg(agg7),count,count,count,count,count,count,count,county,sum,sum,sum,sum,sum,sum,sum] + InputAdapter + Exchange [ca_country,ca_state,i_item_id] #10 + WholeStageCodegen (27) + HashAggregate [agg1,agg2,agg3,agg4,agg5,agg6,agg7,ca_country,ca_state,i_item_id] [count,count,count,count,count,count,count,count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [c_birth_year,ca_country,ca_state,cd_dep_count,cs_coupon_amt,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,i_item_id] + SortMergeJoin [c_customer_sk,cs_bill_customer_sk] + InputAdapter + WholeStageCodegen (19) + Sort [cs_bill_customer_sk] + InputAdapter + ReusedExchange [cd_dep_count,cs_bill_customer_sk,cs_coupon_amt,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,i_item_id] #2 + InputAdapter + WholeStageCodegen (26) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #11 + WholeStageCodegen (25) + Project [c_birth_year,c_customer_sk,ca_country,ca_state] + SortMergeJoin [c_current_cdemo_sk,cd_demo_sk] + InputAdapter + WholeStageCodegen (22) + Sort [c_current_cdemo_sk] + InputAdapter + Exchange [c_current_cdemo_sk] #12 + WholeStageCodegen (21) + Project [c_birth_year,c_current_cdemo_sk,c_customer_sk,ca_country,ca_state] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_birth_year,c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] + Filter [c_birth_month,c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_birth_month,c_birth_year,c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (20) + Filter [ca_address_sk,ca_state] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_country,ca_state] + InputAdapter + WholeStageCodegen (24) + Sort [cd_demo_sk] + InputAdapter + ReusedExchange [cd_demo_sk] #9 + WholeStageCodegen (42) + HashAggregate [ca_country,count,count,count,count,count,count,count,i_item_id,sum,sum,sum,sum,sum,sum,sum] [agg1,agg2,agg3,agg4,agg5,agg6,agg7,avg(agg1),avg(agg2),avg(agg3),avg(agg4),avg(agg5),avg(agg6),avg(agg7),ca_state,count,count,count,count,count,count,count,county,sum,sum,sum,sum,sum,sum,sum] + InputAdapter + Exchange [ca_country,i_item_id] #14 + WholeStageCodegen (41) + HashAggregate [agg1,agg2,agg3,agg4,agg5,agg6,agg7,ca_country,i_item_id] [count,count,count,count,count,count,count,count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [c_birth_year,ca_country,cd_dep_count,cs_coupon_amt,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,i_item_id] + SortMergeJoin [c_customer_sk,cs_bill_customer_sk] + InputAdapter + WholeStageCodegen (33) + Sort [cs_bill_customer_sk] + InputAdapter + ReusedExchange [cd_dep_count,cs_bill_customer_sk,cs_coupon_amt,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,i_item_id] #2 + InputAdapter + WholeStageCodegen (40) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #15 + WholeStageCodegen (39) + Project [c_birth_year,c_customer_sk,ca_country] + SortMergeJoin [c_current_cdemo_sk,cd_demo_sk] + InputAdapter + WholeStageCodegen (36) + Sort [c_current_cdemo_sk] + InputAdapter + Exchange [c_current_cdemo_sk] #16 + WholeStageCodegen (35) + Project [c_birth_year,c_current_cdemo_sk,c_customer_sk,ca_country] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_birth_year,c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] + Filter [c_birth_month,c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_birth_month,c_birth_year,c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] + InputAdapter + BroadcastExchange #17 + WholeStageCodegen (34) + Project [ca_address_sk,ca_country] + Filter [ca_address_sk,ca_state] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_country,ca_state] + InputAdapter + WholeStageCodegen (38) + Sort [cd_demo_sk] + InputAdapter + ReusedExchange [cd_demo_sk] #9 + WholeStageCodegen (50) + HashAggregate [count,count,count,count,count,count,count,i_item_id,sum,sum,sum,sum,sum,sum,sum] [agg1,agg2,agg3,agg4,agg5,agg6,agg7,avg(agg1),avg(agg2),avg(agg3),avg(agg4),avg(agg5),avg(agg6),avg(agg7),ca_country,ca_state,count,count,count,count,count,count,count,county,sum,sum,sum,sum,sum,sum,sum] + InputAdapter + Exchange [i_item_id] #18 + WholeStageCodegen (49) + HashAggregate [agg1,agg2,agg3,agg4,agg5,agg6,agg7,i_item_id] [count,count,count,count,count,count,count,count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [c_birth_year,cd_dep_count,cs_coupon_amt,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,i_item_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [c_birth_year,cd_dep_count,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price] + BroadcastHashJoin [c_customer_sk,cs_bill_customer_sk] + Project [cd_dep_count,cs_bill_customer_sk,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cd_dep_count,cs_bill_customer_sk,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] + BroadcastHashJoin [cd_demo_sk,cs_bill_cdemo_sk] + Filter [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] + InputAdapter + ReusedExchange [cd_demo_sk,cd_dep_count] #3 + InputAdapter + ReusedExchange [d_date_sk] #4 + InputAdapter + BroadcastExchange #19 + WholeStageCodegen (47) + Project [c_birth_year,c_customer_sk] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + InputAdapter + BroadcastExchange #20 + WholeStageCodegen (46) + Project [c_birth_year,c_current_cdemo_sk,c_customer_sk] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_birth_year,c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] + Filter [c_birth_month,c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_birth_month,c_birth_year,c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] + InputAdapter + BroadcastExchange #21 + WholeStageCodegen (45) + Project [ca_address_sk] + Filter [ca_address_sk,ca_state] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_state] + Filter [cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk] + InputAdapter + ReusedExchange [i_item_id,i_item_sk] #5 + WholeStageCodegen (58) + HashAggregate [count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum] [agg1,agg2,agg3,agg4,agg5,agg6,agg7,avg(agg1),avg(agg2),avg(agg3),avg(agg4),avg(agg5),avg(agg6),avg(agg7),ca_country,ca_state,count,count,count,count,count,count,count,county,i_item_id,sum,sum,sum,sum,sum,sum,sum] + InputAdapter + Exchange #22 + WholeStageCodegen (57) + HashAggregate [agg1,agg2,agg3,agg4,agg5,agg6,agg7] [count,count,count,count,count,count,count,count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [c_birth_year,cd_dep_count,cs_coupon_amt,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price] + BroadcastHashJoin [c_customer_sk,cs_bill_customer_sk] + Project [cd_dep_count,cs_bill_customer_sk,cs_coupon_amt,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cd_dep_count,cs_bill_customer_sk,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cd_dep_count,cs_bill_customer_sk,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] + BroadcastHashJoin [cd_demo_sk,cs_bill_cdemo_sk] + Filter [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] + InputAdapter + ReusedExchange [cd_demo_sk,cd_dep_count] #3 + InputAdapter + ReusedExchange [d_date_sk] #4 + InputAdapter + BroadcastExchange #23 + WholeStageCodegen (53) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk] + InputAdapter + ReusedExchange [c_birth_year,c_customer_sk] #19 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a/explain.txt new file mode 100644 index 0000000000000..b7e9b4857929e --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a/explain.txt @@ -0,0 +1,856 @@ +== Physical Plan == +TakeOrderedAndProject (157) ++- Union (156) + :- * HashAggregate (45) + : +- Exchange (44) + : +- * HashAggregate (43) + : +- * Project (42) + : +- * BroadcastHashJoin Inner BuildRight (41) + : :- * Project (36) + : : +- * BroadcastHashJoin Inner BuildRight (35) + : : :- * Project (29) + : : : +- * BroadcastHashJoin Inner BuildRight (28) + : : : :- * Project (23) + : : : : +- * BroadcastHashJoin Inner BuildRight (22) + : : : : :- * Project (17) + : : : : : +- * BroadcastHashJoin Inner BuildRight (16) + : : : : : :- * Project (10) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : : : :- * Filter (3) + : : : : : : : +- * ColumnarToRow (2) + : : : : : : : +- Scan parquet default.catalog_sales (1) + : : : : : : +- BroadcastExchange (8) + : : : : : : +- * Project (7) + : : : : : : +- * Filter (6) + : : : : : : +- * ColumnarToRow (5) + : : : : : : +- Scan parquet default.customer_demographics (4) + : : : : : +- BroadcastExchange (15) + : : : : : +- * Project (14) + : : : : : +- * Filter (13) + : : : : : +- * ColumnarToRow (12) + : : : : : +- Scan parquet default.customer (11) + : : : : +- BroadcastExchange (21) + : : : : +- * Filter (20) + : : : : +- * ColumnarToRow (19) + : : : : +- Scan parquet default.customer_demographics (18) + : : : +- BroadcastExchange (27) + : : : +- * Filter (26) + : : : +- * ColumnarToRow (25) + : : : +- Scan parquet default.customer_address (24) + : : +- BroadcastExchange (34) + : : +- * Project (33) + : : +- * Filter (32) + : : +- * ColumnarToRow (31) + : : +- Scan parquet default.date_dim (30) + : +- BroadcastExchange (40) + : +- * Filter (39) + : +- * ColumnarToRow (38) + : +- Scan parquet default.item (37) + :- * HashAggregate (72) + : +- Exchange (71) + : +- * HashAggregate (70) + : +- * Project (69) + : +- * BroadcastHashJoin Inner BuildRight (68) + : :- * Project (66) + : : +- * BroadcastHashJoin Inner BuildRight (65) + : : :- * Project (63) + : : : +- * BroadcastHashJoin Inner BuildRight (62) + : : : :- * Project (57) + : : : : +- * BroadcastHashJoin Inner BuildRight (56) + : : : : :- * Project (54) + : : : : : +- * BroadcastHashJoin Inner BuildRight (53) + : : : : : :- * Project (51) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (50) + : : : : : : :- * Filter (48) + : : : : : : : +- * ColumnarToRow (47) + : : : : : : : +- Scan parquet default.catalog_sales (46) + : : : : : : +- ReusedExchange (49) + : : : : : +- ReusedExchange (52) + : : : : +- ReusedExchange (55) + : : : +- BroadcastExchange (61) + : : : +- * Filter (60) + : : : +- * ColumnarToRow (59) + : : : +- Scan parquet default.customer_address (58) + : : +- ReusedExchange (64) + : +- ReusedExchange (67) + :- * HashAggregate (100) + : +- Exchange (99) + : +- * HashAggregate (98) + : +- * Project (97) + : +- * BroadcastHashJoin Inner BuildRight (96) + : :- * Project (94) + : : +- * BroadcastHashJoin Inner BuildRight (93) + : : :- * Project (91) + : : : +- * BroadcastHashJoin Inner BuildRight (90) + : : : :- * Project (84) + : : : : +- * BroadcastHashJoin Inner BuildRight (83) + : : : : :- * Project (81) + : : : : : +- * BroadcastHashJoin Inner BuildRight (80) + : : : : : :- * Project (78) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (77) + : : : : : : :- * Filter (75) + : : : : : : : +- * ColumnarToRow (74) + : : : : : : : +- Scan parquet default.catalog_sales (73) + : : : : : : +- ReusedExchange (76) + : : : : : +- ReusedExchange (79) + : : : : +- ReusedExchange (82) + : : : +- BroadcastExchange (89) + : : : +- * Project (88) + : : : +- * Filter (87) + : : : +- * ColumnarToRow (86) + : : : +- Scan parquet default.customer_address (85) + : : +- ReusedExchange (92) + : +- ReusedExchange (95) + :- * HashAggregate (128) + : +- Exchange (127) + : +- * HashAggregate (126) + : +- * Project (125) + : +- * BroadcastHashJoin Inner BuildRight (124) + : :- * Project (122) + : : +- * BroadcastHashJoin Inner BuildRight (121) + : : :- * Project (119) + : : : +- * BroadcastHashJoin Inner BuildRight (118) + : : : :- * Project (112) + : : : : +- * BroadcastHashJoin Inner BuildRight (111) + : : : : :- * Project (109) + : : : : : +- * BroadcastHashJoin Inner BuildRight (108) + : : : : : :- * Project (106) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (105) + : : : : : : :- * Filter (103) + : : : : : : : +- * ColumnarToRow (102) + : : : : : : : +- Scan parquet default.catalog_sales (101) + : : : : : : +- ReusedExchange (104) + : : : : : +- ReusedExchange (107) + : : : : +- ReusedExchange (110) + : : : +- BroadcastExchange (117) + : : : +- * Project (116) + : : : +- * Filter (115) + : : : +- * ColumnarToRow (114) + : : : +- Scan parquet default.customer_address (113) + : : +- ReusedExchange (120) + : +- ReusedExchange (123) + +- * HashAggregate (155) + +- Exchange (154) + +- * HashAggregate (153) + +- * Project (152) + +- * BroadcastHashJoin Inner BuildRight (151) + :- * Project (146) + : +- * BroadcastHashJoin Inner BuildRight (145) + : :- * Project (143) + : : +- * BroadcastHashJoin Inner BuildRight (142) + : : :- * Project (140) + : : : +- * BroadcastHashJoin Inner BuildRight (139) + : : : :- * Project (137) + : : : : +- * BroadcastHashJoin Inner BuildRight (136) + : : : : :- * Project (134) + : : : : : +- * BroadcastHashJoin Inner BuildRight (133) + : : : : : :- * Filter (131) + : : : : : : +- * ColumnarToRow (130) + : : : : : : +- Scan parquet default.catalog_sales (129) + : : : : : +- ReusedExchange (132) + : : : : +- ReusedExchange (135) + : : : +- ReusedExchange (138) + : : +- ReusedExchange (141) + : +- ReusedExchange (144) + +- BroadcastExchange (150) + +- * Filter (149) + +- * ColumnarToRow (148) + +- Scan parquet default.item (147) + + +(1) Scan parquet default.catalog_sales +Output [9]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_customer_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 7] +Input [9]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9] + +(3) Filter [codegen id : 7] +Input [9]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9] +Condition : (((isnotnull(cs_bill_cdemo_sk#3) AND isnotnull(cs_bill_customer_sk#2)) AND isnotnull(cs_sold_date_sk#1)) AND isnotnull(cs_item_sk#4)) + +(4) Scan parquet default.customer_demographics +Output [4]: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/customer_demographics] +PushedFilters: [IsNotNull(cd_gender), IsNotNull(cd_education_status), EqualTo(cd_gender,M), EqualTo(cd_education_status,College), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] + +(6) Filter [codegen id : 1] +Input [4]: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] +Condition : ((((isnotnull(cd_gender#11) AND isnotnull(cd_education_status#12)) AND (cd_gender#11 = M)) AND (cd_education_status#12 = College)) AND isnotnull(cd_demo_sk#10)) + +(7) Project [codegen id : 1] +Output [2]: [cd_demo_sk#10, cd_dep_count#13] +Input [4]: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] + +(8) BroadcastExchange +Input [2]: [cd_demo_sk#10, cd_dep_count#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] + +(9) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [cs_bill_cdemo_sk#3] +Right keys [1]: [cd_demo_sk#10] +Join condition: None + +(10) Project [codegen id : 7] +Output [9]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13] +Input [11]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_demo_sk#10, cd_dep_count#13] + +(11) Scan parquet default.customer +Output [5]: [c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_month#18, c_birth_year#19] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/customer] +PushedFilters: [In(c_birth_month, [9,5,12,4,1,10]), IsNotNull(c_customer_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [5]: [c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_month#18, c_birth_year#19] + +(13) Filter [codegen id : 2] +Input [5]: [c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_month#18, c_birth_year#19] +Condition : (((c_birth_month#18 IN (9,5,12,4,1,10) AND isnotnull(c_customer_sk#15)) AND isnotnull(c_current_cdemo_sk#16)) AND isnotnull(c_current_addr_sk#17)) + +(14) Project [codegen id : 2] +Output [4]: [c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19] +Input [5]: [c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_month#18, c_birth_year#19] + +(15) BroadcastExchange +Input [4]: [c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] + +(16) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [cs_bill_customer_sk#2] +Right keys [1]: [c_customer_sk#15] +Join condition: None + +(17) Project [codegen id : 7] +Output [11]: [cs_sold_date_sk#1, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19] +Input [13]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19] + +(18) Scan parquet default.customer_demographics +Output [1]: [cd_demo_sk#21] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [1]: [cd_demo_sk#21] + +(20) Filter [codegen id : 3] +Input [1]: [cd_demo_sk#21] +Condition : isnotnull(cd_demo_sk#21) + +(21) BroadcastExchange +Input [1]: [cd_demo_sk#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#22] + +(22) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [c_current_cdemo_sk#16] +Right keys [1]: [cd_demo_sk#21] +Join condition: None + +(23) Project [codegen id : 7] +Output [10]: [cs_sold_date_sk#1, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_current_addr_sk#17, c_birth_year#19] +Input [12]: [cs_sold_date_sk#1, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19, cd_demo_sk#21] + +(24) Scan parquet default.customer_address +Output [4]: [ca_address_sk#23, ca_county#24, ca_state#25, ca_country#26] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/customer_address] +PushedFilters: [In(ca_state, [ND,WI,AL,NC,OK,MS,TN]), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(25) ColumnarToRow [codegen id : 4] +Input [4]: [ca_address_sk#23, ca_county#24, ca_state#25, ca_country#26] + +(26) Filter [codegen id : 4] +Input [4]: [ca_address_sk#23, ca_county#24, ca_state#25, ca_country#26] +Condition : (ca_state#25 IN (ND,WI,AL,NC,OK,MS,TN) AND isnotnull(ca_address_sk#23)) + +(27) BroadcastExchange +Input [4]: [ca_address_sk#23, ca_county#24, ca_state#25, ca_country#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#27] + +(28) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [c_current_addr_sk#17] +Right keys [1]: [ca_address_sk#23] +Join condition: None + +(29) Project [codegen id : 7] +Output [12]: [cs_sold_date_sk#1, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19, ca_county#24, ca_state#25, ca_country#26] +Input [14]: [cs_sold_date_sk#1, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_current_addr_sk#17, c_birth_year#19, ca_address_sk#23, ca_county#24, ca_state#25, ca_country#26] + +(30) Scan parquet default.date_dim +Output [2]: [d_date_sk#28, d_year#29] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 5] +Input [2]: [d_date_sk#28, d_year#29] + +(32) Filter [codegen id : 5] +Input [2]: [d_date_sk#28, d_year#29] +Condition : ((isnotnull(d_year#29) AND (d_year#29 = 2001)) AND isnotnull(d_date_sk#28)) + +(33) Project [codegen id : 5] +Output [1]: [d_date_sk#28] +Input [2]: [d_date_sk#28, d_year#29] + +(34) BroadcastExchange +Input [1]: [d_date_sk#28] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#30] + +(35) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [cs_sold_date_sk#1] +Right keys [1]: [d_date_sk#28] +Join condition: None + +(36) Project [codegen id : 7] +Output [11]: [cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19, ca_county#24, ca_state#25, ca_country#26] +Input [13]: [cs_sold_date_sk#1, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19, ca_county#24, ca_state#25, ca_country#26, d_date_sk#28] + +(37) Scan parquet default.item +Output [2]: [i_item_sk#31, i_item_id#32] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(38) ColumnarToRow [codegen id : 6] +Input [2]: [i_item_sk#31, i_item_id#32] + +(39) Filter [codegen id : 6] +Input [2]: [i_item_sk#31, i_item_id#32] +Condition : isnotnull(i_item_sk#31) + +(40) BroadcastExchange +Input [2]: [i_item_sk#31, i_item_id#32] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#33] + +(41) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [cs_item_sk#4] +Right keys [1]: [i_item_sk#31] +Join condition: None + +(42) Project [codegen id : 7] +Output [11]: [i_item_id#32, ca_country#26, ca_state#25, ca_county#24, cast(cs_quantity#5 as decimal(12,2)) AS agg1#34, cast(cs_list_price#6 as decimal(12,2)) AS agg2#35, cast(cs_coupon_amt#8 as decimal(12,2)) AS agg3#36, cast(cs_sales_price#7 as decimal(12,2)) AS agg4#37, cast(cs_net_profit#9 as decimal(12,2)) AS agg5#38, cast(c_birth_year#19 as decimal(12,2)) AS agg6#39, cast(cd_dep_count#13 as decimal(12,2)) AS agg7#40] +Input [13]: [cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19, ca_county#24, ca_state#25, ca_country#26, i_item_sk#31, i_item_id#32] + +(43) HashAggregate [codegen id : 7] +Input [11]: [i_item_id#32, ca_country#26, ca_state#25, ca_county#24, agg1#34, agg2#35, agg3#36, agg4#37, agg5#38, agg6#39, agg7#40] +Keys [4]: [i_item_id#32, ca_country#26, ca_state#25, ca_county#24] +Functions [7]: [partial_avg(agg1#34), partial_avg(agg2#35), partial_avg(agg3#36), partial_avg(agg4#37), partial_avg(agg5#38), partial_avg(agg6#39), partial_avg(agg7#40)] +Aggregate Attributes [14]: [sum#41, count#42, sum#43, count#44, sum#45, count#46, sum#47, count#48, sum#49, count#50, sum#51, count#52, sum#53, count#54] +Results [18]: [i_item_id#32, ca_country#26, ca_state#25, ca_county#24, sum#55, count#56, sum#57, count#58, sum#59, count#60, sum#61, count#62, sum#63, count#64, sum#65, count#66, sum#67, count#68] + +(44) Exchange +Input [18]: [i_item_id#32, ca_country#26, ca_state#25, ca_county#24, sum#55, count#56, sum#57, count#58, sum#59, count#60, sum#61, count#62, sum#63, count#64, sum#65, count#66, sum#67, count#68] +Arguments: hashpartitioning(i_item_id#32, ca_country#26, ca_state#25, ca_county#24, 5), true, [id=#69] + +(45) HashAggregate [codegen id : 8] +Input [18]: [i_item_id#32, ca_country#26, ca_state#25, ca_county#24, sum#55, count#56, sum#57, count#58, sum#59, count#60, sum#61, count#62, sum#63, count#64, sum#65, count#66, sum#67, count#68] +Keys [4]: [i_item_id#32, ca_country#26, ca_state#25, ca_county#24] +Functions [7]: [avg(agg1#34), avg(agg2#35), avg(agg3#36), avg(agg4#37), avg(agg5#38), avg(agg6#39), avg(agg7#40)] +Aggregate Attributes [7]: [avg(agg1#34)#70, avg(agg2#35)#71, avg(agg3#36)#72, avg(agg4#37)#73, avg(agg5#38)#74, avg(agg6#39)#75, avg(agg7#40)#76] +Results [11]: [i_item_id#32, ca_country#26, ca_state#25, ca_county#24, avg(agg1#34)#70 AS agg1#77, avg(agg2#35)#71 AS agg2#78, avg(agg3#36)#72 AS agg3#79, avg(agg4#37)#73 AS agg4#80, avg(agg5#38)#74 AS agg5#81, avg(agg6#39)#75 AS agg6#82, avg(agg7#40)#76 AS agg7#83] + +(46) Scan parquet default.catalog_sales +Output [9]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_customer_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(47) ColumnarToRow [codegen id : 15] +Input [9]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9] + +(48) Filter [codegen id : 15] +Input [9]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9] +Condition : (((isnotnull(cs_bill_cdemo_sk#3) AND isnotnull(cs_bill_customer_sk#2)) AND isnotnull(cs_sold_date_sk#1)) AND isnotnull(cs_item_sk#4)) + +(49) ReusedExchange [Reuses operator id: 8] +Output [2]: [cd_demo_sk#10, cd_dep_count#13] + +(50) BroadcastHashJoin [codegen id : 15] +Left keys [1]: [cs_bill_cdemo_sk#3] +Right keys [1]: [cd_demo_sk#10] +Join condition: None + +(51) Project [codegen id : 15] +Output [9]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13] +Input [11]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_demo_sk#10, cd_dep_count#13] + +(52) ReusedExchange [Reuses operator id: 15] +Output [4]: [c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19] + +(53) BroadcastHashJoin [codegen id : 15] +Left keys [1]: [cs_bill_customer_sk#2] +Right keys [1]: [c_customer_sk#15] +Join condition: None + +(54) Project [codegen id : 15] +Output [11]: [cs_sold_date_sk#1, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19] +Input [13]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19] + +(55) ReusedExchange [Reuses operator id: 21] +Output [1]: [cd_demo_sk#84] + +(56) BroadcastHashJoin [codegen id : 15] +Left keys [1]: [c_current_cdemo_sk#16] +Right keys [1]: [cd_demo_sk#84] +Join condition: None + +(57) Project [codegen id : 15] +Output [10]: [cs_sold_date_sk#1, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_current_addr_sk#17, c_birth_year#19] +Input [12]: [cs_sold_date_sk#1, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19, cd_demo_sk#84] + +(58) Scan parquet default.customer_address +Output [3]: [ca_address_sk#23, ca_state#25, ca_country#26] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/customer_address] +PushedFilters: [In(ca_state, [ND,WI,AL,NC,OK,MS,TN]), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(59) ColumnarToRow [codegen id : 12] +Input [3]: [ca_address_sk#23, ca_state#25, ca_country#26] + +(60) Filter [codegen id : 12] +Input [3]: [ca_address_sk#23, ca_state#25, ca_country#26] +Condition : (ca_state#25 IN (ND,WI,AL,NC,OK,MS,TN) AND isnotnull(ca_address_sk#23)) + +(61) BroadcastExchange +Input [3]: [ca_address_sk#23, ca_state#25, ca_country#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#85] + +(62) BroadcastHashJoin [codegen id : 15] +Left keys [1]: [c_current_addr_sk#17] +Right keys [1]: [ca_address_sk#23] +Join condition: None + +(63) Project [codegen id : 15] +Output [11]: [cs_sold_date_sk#1, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19, ca_state#25, ca_country#26] +Input [13]: [cs_sold_date_sk#1, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_current_addr_sk#17, c_birth_year#19, ca_address_sk#23, ca_state#25, ca_country#26] + +(64) ReusedExchange [Reuses operator id: 34] +Output [1]: [d_date_sk#28] + +(65) BroadcastHashJoin [codegen id : 15] +Left keys [1]: [cs_sold_date_sk#1] +Right keys [1]: [d_date_sk#28] +Join condition: None + +(66) Project [codegen id : 15] +Output [10]: [cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19, ca_state#25, ca_country#26] +Input [12]: [cs_sold_date_sk#1, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19, ca_state#25, ca_country#26, d_date_sk#28] + +(67) ReusedExchange [Reuses operator id: 40] +Output [2]: [i_item_sk#31, i_item_id#32] + +(68) BroadcastHashJoin [codegen id : 15] +Left keys [1]: [cs_item_sk#4] +Right keys [1]: [i_item_sk#31] +Join condition: None + +(69) Project [codegen id : 15] +Output [10]: [i_item_id#32, ca_country#26, ca_state#25, cast(cs_quantity#5 as decimal(12,2)) AS agg1#34, cast(cs_list_price#6 as decimal(12,2)) AS agg2#35, cast(cs_coupon_amt#8 as decimal(12,2)) AS agg3#36, cast(cs_sales_price#7 as decimal(12,2)) AS agg4#37, cast(cs_net_profit#9 as decimal(12,2)) AS agg5#38, cast(c_birth_year#19 as decimal(12,2)) AS agg6#39, cast(cd_dep_count#13 as decimal(12,2)) AS agg7#40] +Input [12]: [cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19, ca_state#25, ca_country#26, i_item_sk#31, i_item_id#32] + +(70) HashAggregate [codegen id : 15] +Input [10]: [i_item_id#32, ca_country#26, ca_state#25, agg1#34, agg2#35, agg3#36, agg4#37, agg5#38, agg6#39, agg7#40] +Keys [3]: [i_item_id#32, ca_country#26, ca_state#25] +Functions [7]: [partial_avg(agg1#34), partial_avg(agg2#35), partial_avg(agg3#36), partial_avg(agg4#37), partial_avg(agg5#38), partial_avg(agg6#39), partial_avg(agg7#40)] +Aggregate Attributes [14]: [sum#86, count#87, sum#88, count#89, sum#90, count#91, sum#92, count#93, sum#94, count#95, sum#96, count#97, sum#98, count#99] +Results [17]: [i_item_id#32, ca_country#26, ca_state#25, sum#100, count#101, sum#102, count#103, sum#104, count#105, sum#106, count#107, sum#108, count#109, sum#110, count#111, sum#112, count#113] + +(71) Exchange +Input [17]: [i_item_id#32, ca_country#26, ca_state#25, sum#100, count#101, sum#102, count#103, sum#104, count#105, sum#106, count#107, sum#108, count#109, sum#110, count#111, sum#112, count#113] +Arguments: hashpartitioning(i_item_id#32, ca_country#26, ca_state#25, 5), true, [id=#114] + +(72) HashAggregate [codegen id : 16] +Input [17]: [i_item_id#32, ca_country#26, ca_state#25, sum#100, count#101, sum#102, count#103, sum#104, count#105, sum#106, count#107, sum#108, count#109, sum#110, count#111, sum#112, count#113] +Keys [3]: [i_item_id#32, ca_country#26, ca_state#25] +Functions [7]: [avg(agg1#34), avg(agg2#35), avg(agg3#36), avg(agg4#37), avg(agg5#38), avg(agg6#39), avg(agg7#40)] +Aggregate Attributes [7]: [avg(agg1#34)#115, avg(agg2#35)#116, avg(agg3#36)#117, avg(agg4#37)#118, avg(agg5#38)#119, avg(agg6#39)#120, avg(agg7#40)#121] +Results [11]: [i_item_id#32, ca_country#26, ca_state#25, null AS county#122, avg(agg1#34)#115 AS agg1#123, avg(agg2#35)#116 AS agg2#124, avg(agg3#36)#117 AS agg3#125, avg(agg4#37)#118 AS agg4#126, avg(agg5#38)#119 AS agg5#127, avg(agg6#39)#120 AS agg6#128, avg(agg7#40)#121 AS agg7#129] + +(73) Scan parquet default.catalog_sales +Output [9]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_customer_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(74) ColumnarToRow [codegen id : 23] +Input [9]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9] + +(75) Filter [codegen id : 23] +Input [9]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9] +Condition : (((isnotnull(cs_bill_cdemo_sk#3) AND isnotnull(cs_bill_customer_sk#2)) AND isnotnull(cs_sold_date_sk#1)) AND isnotnull(cs_item_sk#4)) + +(76) ReusedExchange [Reuses operator id: 8] +Output [2]: [cd_demo_sk#10, cd_dep_count#13] + +(77) BroadcastHashJoin [codegen id : 23] +Left keys [1]: [cs_bill_cdemo_sk#3] +Right keys [1]: [cd_demo_sk#10] +Join condition: None + +(78) Project [codegen id : 23] +Output [9]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13] +Input [11]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_demo_sk#10, cd_dep_count#13] + +(79) ReusedExchange [Reuses operator id: 15] +Output [4]: [c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19] + +(80) BroadcastHashJoin [codegen id : 23] +Left keys [1]: [cs_bill_customer_sk#2] +Right keys [1]: [c_customer_sk#15] +Join condition: None + +(81) Project [codegen id : 23] +Output [11]: [cs_sold_date_sk#1, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19] +Input [13]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19] + +(82) ReusedExchange [Reuses operator id: 21] +Output [1]: [cd_demo_sk#130] + +(83) BroadcastHashJoin [codegen id : 23] +Left keys [1]: [c_current_cdemo_sk#16] +Right keys [1]: [cd_demo_sk#130] +Join condition: None + +(84) Project [codegen id : 23] +Output [10]: [cs_sold_date_sk#1, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_current_addr_sk#17, c_birth_year#19] +Input [12]: [cs_sold_date_sk#1, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19, cd_demo_sk#130] + +(85) Scan parquet default.customer_address +Output [3]: [ca_address_sk#23, ca_state#25, ca_country#26] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/customer_address] +PushedFilters: [In(ca_state, [ND,WI,AL,NC,OK,MS,TN]), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(86) ColumnarToRow [codegen id : 20] +Input [3]: [ca_address_sk#23, ca_state#25, ca_country#26] + +(87) Filter [codegen id : 20] +Input [3]: [ca_address_sk#23, ca_state#25, ca_country#26] +Condition : (ca_state#25 IN (ND,WI,AL,NC,OK,MS,TN) AND isnotnull(ca_address_sk#23)) + +(88) Project [codegen id : 20] +Output [2]: [ca_address_sk#23, ca_country#26] +Input [3]: [ca_address_sk#23, ca_state#25, ca_country#26] + +(89) BroadcastExchange +Input [2]: [ca_address_sk#23, ca_country#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#131] + +(90) BroadcastHashJoin [codegen id : 23] +Left keys [1]: [c_current_addr_sk#17] +Right keys [1]: [ca_address_sk#23] +Join condition: None + +(91) Project [codegen id : 23] +Output [10]: [cs_sold_date_sk#1, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19, ca_country#26] +Input [12]: [cs_sold_date_sk#1, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_current_addr_sk#17, c_birth_year#19, ca_address_sk#23, ca_country#26] + +(92) ReusedExchange [Reuses operator id: 34] +Output [1]: [d_date_sk#28] + +(93) BroadcastHashJoin [codegen id : 23] +Left keys [1]: [cs_sold_date_sk#1] +Right keys [1]: [d_date_sk#28] +Join condition: None + +(94) Project [codegen id : 23] +Output [9]: [cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19, ca_country#26] +Input [11]: [cs_sold_date_sk#1, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19, ca_country#26, d_date_sk#28] + +(95) ReusedExchange [Reuses operator id: 40] +Output [2]: [i_item_sk#31, i_item_id#32] + +(96) BroadcastHashJoin [codegen id : 23] +Left keys [1]: [cs_item_sk#4] +Right keys [1]: [i_item_sk#31] +Join condition: None + +(97) Project [codegen id : 23] +Output [9]: [i_item_id#32, ca_country#26, cast(cs_quantity#5 as decimal(12,2)) AS agg1#34, cast(cs_list_price#6 as decimal(12,2)) AS agg2#35, cast(cs_coupon_amt#8 as decimal(12,2)) AS agg3#36, cast(cs_sales_price#7 as decimal(12,2)) AS agg4#37, cast(cs_net_profit#9 as decimal(12,2)) AS agg5#38, cast(c_birth_year#19 as decimal(12,2)) AS agg6#39, cast(cd_dep_count#13 as decimal(12,2)) AS agg7#40] +Input [11]: [cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19, ca_country#26, i_item_sk#31, i_item_id#32] + +(98) HashAggregate [codegen id : 23] +Input [9]: [i_item_id#32, ca_country#26, agg1#34, agg2#35, agg3#36, agg4#37, agg5#38, agg6#39, agg7#40] +Keys [2]: [i_item_id#32, ca_country#26] +Functions [7]: [partial_avg(agg1#34), partial_avg(agg2#35), partial_avg(agg3#36), partial_avg(agg4#37), partial_avg(agg5#38), partial_avg(agg6#39), partial_avg(agg7#40)] +Aggregate Attributes [14]: [sum#132, count#133, sum#134, count#135, sum#136, count#137, sum#138, count#139, sum#140, count#141, sum#142, count#143, sum#144, count#145] +Results [16]: [i_item_id#32, ca_country#26, sum#146, count#147, sum#148, count#149, sum#150, count#151, sum#152, count#153, sum#154, count#155, sum#156, count#157, sum#158, count#159] + +(99) Exchange +Input [16]: [i_item_id#32, ca_country#26, sum#146, count#147, sum#148, count#149, sum#150, count#151, sum#152, count#153, sum#154, count#155, sum#156, count#157, sum#158, count#159] +Arguments: hashpartitioning(i_item_id#32, ca_country#26, 5), true, [id=#160] + +(100) HashAggregate [codegen id : 24] +Input [16]: [i_item_id#32, ca_country#26, sum#146, count#147, sum#148, count#149, sum#150, count#151, sum#152, count#153, sum#154, count#155, sum#156, count#157, sum#158, count#159] +Keys [2]: [i_item_id#32, ca_country#26] +Functions [7]: [avg(agg1#34), avg(agg2#35), avg(agg3#36), avg(agg4#37), avg(agg5#38), avg(agg6#39), avg(agg7#40)] +Aggregate Attributes [7]: [avg(agg1#34)#161, avg(agg2#35)#162, avg(agg3#36)#163, avg(agg4#37)#164, avg(agg5#38)#165, avg(agg6#39)#166, avg(agg7#40)#167] +Results [11]: [i_item_id#32, ca_country#26, null AS ca_state#168, null AS county#169, avg(agg1#34)#161 AS agg1#170, avg(agg2#35)#162 AS agg2#171, avg(agg3#36)#163 AS agg3#172, avg(agg4#37)#164 AS agg4#173, avg(agg5#38)#165 AS agg5#174, avg(agg6#39)#166 AS agg6#175, avg(agg7#40)#167 AS agg7#176] + +(101) Scan parquet default.catalog_sales +Output [9]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_customer_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(102) ColumnarToRow [codegen id : 31] +Input [9]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9] + +(103) Filter [codegen id : 31] +Input [9]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9] +Condition : (((isnotnull(cs_bill_cdemo_sk#3) AND isnotnull(cs_bill_customer_sk#2)) AND isnotnull(cs_sold_date_sk#1)) AND isnotnull(cs_item_sk#4)) + +(104) ReusedExchange [Reuses operator id: 8] +Output [2]: [cd_demo_sk#10, cd_dep_count#13] + +(105) BroadcastHashJoin [codegen id : 31] +Left keys [1]: [cs_bill_cdemo_sk#3] +Right keys [1]: [cd_demo_sk#10] +Join condition: None + +(106) Project [codegen id : 31] +Output [9]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13] +Input [11]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_demo_sk#10, cd_dep_count#13] + +(107) ReusedExchange [Reuses operator id: 15] +Output [4]: [c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19] + +(108) BroadcastHashJoin [codegen id : 31] +Left keys [1]: [cs_bill_customer_sk#2] +Right keys [1]: [c_customer_sk#15] +Join condition: None + +(109) Project [codegen id : 31] +Output [11]: [cs_sold_date_sk#1, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19] +Input [13]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19] + +(110) ReusedExchange [Reuses operator id: 21] +Output [1]: [cd_demo_sk#177] + +(111) BroadcastHashJoin [codegen id : 31] +Left keys [1]: [c_current_cdemo_sk#16] +Right keys [1]: [cd_demo_sk#177] +Join condition: None + +(112) Project [codegen id : 31] +Output [10]: [cs_sold_date_sk#1, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_current_addr_sk#17, c_birth_year#19] +Input [12]: [cs_sold_date_sk#1, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19, cd_demo_sk#177] + +(113) Scan parquet default.customer_address +Output [2]: [ca_address_sk#23, ca_state#25] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/customer_address] +PushedFilters: [In(ca_state, [ND,WI,AL,NC,OK,MS,TN]), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(114) ColumnarToRow [codegen id : 28] +Input [2]: [ca_address_sk#23, ca_state#25] + +(115) Filter [codegen id : 28] +Input [2]: [ca_address_sk#23, ca_state#25] +Condition : (ca_state#25 IN (ND,WI,AL,NC,OK,MS,TN) AND isnotnull(ca_address_sk#23)) + +(116) Project [codegen id : 28] +Output [1]: [ca_address_sk#23] +Input [2]: [ca_address_sk#23, ca_state#25] + +(117) BroadcastExchange +Input [1]: [ca_address_sk#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#178] + +(118) BroadcastHashJoin [codegen id : 31] +Left keys [1]: [c_current_addr_sk#17] +Right keys [1]: [ca_address_sk#23] +Join condition: None + +(119) Project [codegen id : 31] +Output [9]: [cs_sold_date_sk#1, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19] +Input [11]: [cs_sold_date_sk#1, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_current_addr_sk#17, c_birth_year#19, ca_address_sk#23] + +(120) ReusedExchange [Reuses operator id: 34] +Output [1]: [d_date_sk#28] + +(121) BroadcastHashJoin [codegen id : 31] +Left keys [1]: [cs_sold_date_sk#1] +Right keys [1]: [d_date_sk#28] +Join condition: None + +(122) Project [codegen id : 31] +Output [8]: [cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19] +Input [10]: [cs_sold_date_sk#1, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19, d_date_sk#28] + +(123) ReusedExchange [Reuses operator id: 40] +Output [2]: [i_item_sk#31, i_item_id#32] + +(124) BroadcastHashJoin [codegen id : 31] +Left keys [1]: [cs_item_sk#4] +Right keys [1]: [i_item_sk#31] +Join condition: None + +(125) Project [codegen id : 31] +Output [8]: [i_item_id#32, cast(cs_quantity#5 as decimal(12,2)) AS agg1#34, cast(cs_list_price#6 as decimal(12,2)) AS agg2#35, cast(cs_coupon_amt#8 as decimal(12,2)) AS agg3#36, cast(cs_sales_price#7 as decimal(12,2)) AS agg4#37, cast(cs_net_profit#9 as decimal(12,2)) AS agg5#38, cast(c_birth_year#19 as decimal(12,2)) AS agg6#39, cast(cd_dep_count#13 as decimal(12,2)) AS agg7#40] +Input [10]: [cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19, i_item_sk#31, i_item_id#32] + +(126) HashAggregate [codegen id : 31] +Input [8]: [i_item_id#32, agg1#34, agg2#35, agg3#36, agg4#37, agg5#38, agg6#39, agg7#40] +Keys [1]: [i_item_id#32] +Functions [7]: [partial_avg(agg1#34), partial_avg(agg2#35), partial_avg(agg3#36), partial_avg(agg4#37), partial_avg(agg5#38), partial_avg(agg6#39), partial_avg(agg7#40)] +Aggregate Attributes [14]: [sum#179, count#180, sum#181, count#182, sum#183, count#184, sum#185, count#186, sum#187, count#188, sum#189, count#190, sum#191, count#192] +Results [15]: [i_item_id#32, sum#193, count#194, sum#195, count#196, sum#197, count#198, sum#199, count#200, sum#201, count#202, sum#203, count#204, sum#205, count#206] + +(127) Exchange +Input [15]: [i_item_id#32, sum#193, count#194, sum#195, count#196, sum#197, count#198, sum#199, count#200, sum#201, count#202, sum#203, count#204, sum#205, count#206] +Arguments: hashpartitioning(i_item_id#32, 5), true, [id=#207] + +(128) HashAggregate [codegen id : 32] +Input [15]: [i_item_id#32, sum#193, count#194, sum#195, count#196, sum#197, count#198, sum#199, count#200, sum#201, count#202, sum#203, count#204, sum#205, count#206] +Keys [1]: [i_item_id#32] +Functions [7]: [avg(agg1#34), avg(agg2#35), avg(agg3#36), avg(agg4#37), avg(agg5#38), avg(agg6#39), avg(agg7#40)] +Aggregate Attributes [7]: [avg(agg1#34)#208, avg(agg2#35)#209, avg(agg3#36)#210, avg(agg4#37)#211, avg(agg5#38)#212, avg(agg6#39)#213, avg(agg7#40)#214] +Results [11]: [i_item_id#32, null AS ca_country#215, null AS ca_state#216, null AS county#217, avg(agg1#34)#208 AS agg1#218, avg(agg2#35)#209 AS agg2#219, avg(agg3#36)#210 AS agg3#220, avg(agg4#37)#211 AS agg4#221, avg(agg5#38)#212 AS agg5#222, avg(agg6#39)#213 AS agg6#223, avg(agg7#40)#214 AS agg7#224] + +(129) Scan parquet default.catalog_sales +Output [9]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_customer_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(130) ColumnarToRow [codegen id : 39] +Input [9]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9] + +(131) Filter [codegen id : 39] +Input [9]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9] +Condition : (((isnotnull(cs_bill_cdemo_sk#3) AND isnotnull(cs_bill_customer_sk#2)) AND isnotnull(cs_sold_date_sk#1)) AND isnotnull(cs_item_sk#4)) + +(132) ReusedExchange [Reuses operator id: 8] +Output [2]: [cd_demo_sk#10, cd_dep_count#13] + +(133) BroadcastHashJoin [codegen id : 39] +Left keys [1]: [cs_bill_cdemo_sk#3] +Right keys [1]: [cd_demo_sk#10] +Join condition: None + +(134) Project [codegen id : 39] +Output [9]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13] +Input [11]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_demo_sk#10, cd_dep_count#13] + +(135) ReusedExchange [Reuses operator id: 15] +Output [4]: [c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19] + +(136) BroadcastHashJoin [codegen id : 39] +Left keys [1]: [cs_bill_customer_sk#2] +Right keys [1]: [c_customer_sk#15] +Join condition: None + +(137) Project [codegen id : 39] +Output [11]: [cs_sold_date_sk#1, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19] +Input [13]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19] + +(138) ReusedExchange [Reuses operator id: 21] +Output [1]: [cd_demo_sk#225] + +(139) BroadcastHashJoin [codegen id : 39] +Left keys [1]: [c_current_cdemo_sk#16] +Right keys [1]: [cd_demo_sk#225] +Join condition: None + +(140) Project [codegen id : 39] +Output [10]: [cs_sold_date_sk#1, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_current_addr_sk#17, c_birth_year#19] +Input [12]: [cs_sold_date_sk#1, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19, cd_demo_sk#225] + +(141) ReusedExchange [Reuses operator id: 117] +Output [1]: [ca_address_sk#23] + +(142) BroadcastHashJoin [codegen id : 39] +Left keys [1]: [c_current_addr_sk#17] +Right keys [1]: [ca_address_sk#23] +Join condition: None + +(143) Project [codegen id : 39] +Output [9]: [cs_sold_date_sk#1, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19] +Input [11]: [cs_sold_date_sk#1, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_current_addr_sk#17, c_birth_year#19, ca_address_sk#23] + +(144) ReusedExchange [Reuses operator id: 34] +Output [1]: [d_date_sk#28] + +(145) BroadcastHashJoin [codegen id : 39] +Left keys [1]: [cs_sold_date_sk#1] +Right keys [1]: [d_date_sk#28] +Join condition: None + +(146) Project [codegen id : 39] +Output [8]: [cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19] +Input [10]: [cs_sold_date_sk#1, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19, d_date_sk#28] + +(147) Scan parquet default.item +Output [1]: [i_item_sk#31] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(148) ColumnarToRow [codegen id : 38] +Input [1]: [i_item_sk#31] + +(149) Filter [codegen id : 38] +Input [1]: [i_item_sk#31] +Condition : isnotnull(i_item_sk#31) + +(150) BroadcastExchange +Input [1]: [i_item_sk#31] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#226] + +(151) BroadcastHashJoin [codegen id : 39] +Left keys [1]: [cs_item_sk#4] +Right keys [1]: [i_item_sk#31] +Join condition: None + +(152) Project [codegen id : 39] +Output [7]: [cast(cs_quantity#5 as decimal(12,2)) AS agg1#34, cast(cs_list_price#6 as decimal(12,2)) AS agg2#35, cast(cs_coupon_amt#8 as decimal(12,2)) AS agg3#36, cast(cs_sales_price#7 as decimal(12,2)) AS agg4#37, cast(cs_net_profit#9 as decimal(12,2)) AS agg5#38, cast(c_birth_year#19 as decimal(12,2)) AS agg6#39, cast(cd_dep_count#13 as decimal(12,2)) AS agg7#40] +Input [9]: [cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19, i_item_sk#31] + +(153) HashAggregate [codegen id : 39] +Input [7]: [agg1#34, agg2#35, agg3#36, agg4#37, agg5#38, agg6#39, agg7#40] +Keys: [] +Functions [7]: [partial_avg(agg1#34), partial_avg(agg2#35), partial_avg(agg3#36), partial_avg(agg4#37), partial_avg(agg5#38), partial_avg(agg6#39), partial_avg(agg7#40)] +Aggregate Attributes [14]: [sum#227, count#228, sum#229, count#230, sum#231, count#232, sum#233, count#234, sum#235, count#236, sum#237, count#238, sum#239, count#240] +Results [14]: [sum#241, count#242, sum#243, count#244, sum#245, count#246, sum#247, count#248, sum#249, count#250, sum#251, count#252, sum#253, count#254] + +(154) Exchange +Input [14]: [sum#241, count#242, sum#243, count#244, sum#245, count#246, sum#247, count#248, sum#249, count#250, sum#251, count#252, sum#253, count#254] +Arguments: SinglePartition, true, [id=#255] + +(155) HashAggregate [codegen id : 40] +Input [14]: [sum#241, count#242, sum#243, count#244, sum#245, count#246, sum#247, count#248, sum#249, count#250, sum#251, count#252, sum#253, count#254] +Keys: [] +Functions [7]: [avg(agg1#34), avg(agg2#35), avg(agg3#36), avg(agg4#37), avg(agg5#38), avg(agg6#39), avg(agg7#40)] +Aggregate Attributes [7]: [avg(agg1#34)#256, avg(agg2#35)#257, avg(agg3#36)#258, avg(agg4#37)#259, avg(agg5#38)#260, avg(agg6#39)#261, avg(agg7#40)#262] +Results [11]: [null AS i_item_id#263, null AS ca_country#264, null AS ca_state#265, null AS county#266, avg(agg1#34)#256 AS agg1#267, avg(agg2#35)#257 AS agg2#268, avg(agg3#36)#258 AS agg3#269, avg(agg4#37)#259 AS agg4#270, avg(agg5#38)#260 AS agg5#271, avg(agg6#39)#261 AS agg6#272, avg(agg7#40)#262 AS agg7#273] + +(156) Union + +(157) TakeOrderedAndProject +Input [11]: [i_item_id#32, ca_country#26, ca_state#25, ca_county#24, agg1#77, agg2#78, agg3#79, agg4#80, agg5#81, agg6#82, agg7#83] +Arguments: 100, [ca_country#26 ASC NULLS FIRST, ca_state#25 ASC NULLS FIRST, ca_county#24 ASC NULLS FIRST, i_item_id#32 ASC NULLS FIRST], [i_item_id#32, ca_country#26, ca_state#25, ca_county#24, agg1#77, agg2#78, agg3#79, agg4#80, agg5#81, agg6#82, agg7#83] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a/simplified.txt new file mode 100644 index 0000000000000..bf528051893ed --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a/simplified.txt @@ -0,0 +1,227 @@ +TakeOrderedAndProject [agg1,agg2,agg3,agg4,agg5,agg6,agg7,ca_country,ca_county,ca_state,i_item_id] + Union + WholeStageCodegen (8) + HashAggregate [ca_country,ca_county,ca_state,count,count,count,count,count,count,count,i_item_id,sum,sum,sum,sum,sum,sum,sum] [agg1,agg2,agg3,agg4,agg5,agg6,agg7,avg(agg1),avg(agg2),avg(agg3),avg(agg4),avg(agg5),avg(agg6),avg(agg7),count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum] + InputAdapter + Exchange [ca_country,ca_county,ca_state,i_item_id] #1 + WholeStageCodegen (7) + HashAggregate [agg1,agg2,agg3,agg4,agg5,agg6,agg7,ca_country,ca_county,ca_state,i_item_id] [count,count,count,count,count,count,count,count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [c_birth_year,ca_country,ca_county,ca_state,cd_dep_count,cs_coupon_amt,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,i_item_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [c_birth_year,ca_country,ca_county,ca_state,cd_dep_count,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [c_birth_year,ca_country,ca_county,ca_state,cd_dep_count,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_birth_year,c_current_addr_sk,cd_dep_count,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [c_birth_year,c_current_addr_sk,c_current_cdemo_sk,cd_dep_count,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] + BroadcastHashJoin [c_customer_sk,cs_bill_customer_sk] + Project [cd_dep_count,cs_bill_customer_sk,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] + BroadcastHashJoin [cd_demo_sk,cs_bill_cdemo_sk] + Filter [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [cd_demo_sk,cd_dep_count] + Filter [cd_demo_sk,cd_education_status,cd_gender] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_dep_count,cd_education_status,cd_gender] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [c_birth_year,c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] + Filter [c_birth_month,c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_birth_month,c_birth_year,c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Filter [ca_address_sk,ca_state] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_country,ca_county,ca_state] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Project [d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (6) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_id,i_item_sk] + WholeStageCodegen (16) + HashAggregate [ca_country,ca_state,count,count,count,count,count,count,count,i_item_id,sum,sum,sum,sum,sum,sum,sum] [agg1,agg2,agg3,agg4,agg5,agg6,agg7,avg(agg1),avg(agg2),avg(agg3),avg(agg4),avg(agg5),avg(agg6),avg(agg7),count,count,count,count,count,count,count,county,sum,sum,sum,sum,sum,sum,sum] + InputAdapter + Exchange [ca_country,ca_state,i_item_id] #8 + WholeStageCodegen (15) + HashAggregate [agg1,agg2,agg3,agg4,agg5,agg6,agg7,ca_country,ca_state,i_item_id] [count,count,count,count,count,count,count,count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [c_birth_year,ca_country,ca_state,cd_dep_count,cs_coupon_amt,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,i_item_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [c_birth_year,ca_country,ca_state,cd_dep_count,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [c_birth_year,ca_country,ca_state,cd_dep_count,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_birth_year,c_current_addr_sk,cd_dep_count,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [c_birth_year,c_current_addr_sk,c_current_cdemo_sk,cd_dep_count,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] + BroadcastHashJoin [c_customer_sk,cs_bill_customer_sk] + Project [cd_dep_count,cs_bill_customer_sk,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] + BroadcastHashJoin [cd_demo_sk,cs_bill_cdemo_sk] + Filter [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] + InputAdapter + ReusedExchange [cd_demo_sk,cd_dep_count] #2 + InputAdapter + ReusedExchange [c_birth_year,c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] #3 + InputAdapter + ReusedExchange [cd_demo_sk] #4 + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (12) + Filter [ca_address_sk,ca_state] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_country,ca_state] + InputAdapter + ReusedExchange [d_date_sk] #6 + InputAdapter + ReusedExchange [i_item_id,i_item_sk] #7 + WholeStageCodegen (24) + HashAggregate [ca_country,count,count,count,count,count,count,count,i_item_id,sum,sum,sum,sum,sum,sum,sum] [agg1,agg2,agg3,agg4,agg5,agg6,agg7,avg(agg1),avg(agg2),avg(agg3),avg(agg4),avg(agg5),avg(agg6),avg(agg7),ca_state,count,count,count,count,count,count,count,county,sum,sum,sum,sum,sum,sum,sum] + InputAdapter + Exchange [ca_country,i_item_id] #10 + WholeStageCodegen (23) + HashAggregate [agg1,agg2,agg3,agg4,agg5,agg6,agg7,ca_country,i_item_id] [count,count,count,count,count,count,count,count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [c_birth_year,ca_country,cd_dep_count,cs_coupon_amt,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,i_item_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [c_birth_year,ca_country,cd_dep_count,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [c_birth_year,ca_country,cd_dep_count,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_birth_year,c_current_addr_sk,cd_dep_count,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [c_birth_year,c_current_addr_sk,c_current_cdemo_sk,cd_dep_count,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] + BroadcastHashJoin [c_customer_sk,cs_bill_customer_sk] + Project [cd_dep_count,cs_bill_customer_sk,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] + BroadcastHashJoin [cd_demo_sk,cs_bill_cdemo_sk] + Filter [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] + InputAdapter + ReusedExchange [cd_demo_sk,cd_dep_count] #2 + InputAdapter + ReusedExchange [c_birth_year,c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] #3 + InputAdapter + ReusedExchange [cd_demo_sk] #4 + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (20) + Project [ca_address_sk,ca_country] + Filter [ca_address_sk,ca_state] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_country,ca_state] + InputAdapter + ReusedExchange [d_date_sk] #6 + InputAdapter + ReusedExchange [i_item_id,i_item_sk] #7 + WholeStageCodegen (32) + HashAggregate [count,count,count,count,count,count,count,i_item_id,sum,sum,sum,sum,sum,sum,sum] [agg1,agg2,agg3,agg4,agg5,agg6,agg7,avg(agg1),avg(agg2),avg(agg3),avg(agg4),avg(agg5),avg(agg6),avg(agg7),ca_country,ca_state,count,count,count,count,count,count,count,county,sum,sum,sum,sum,sum,sum,sum] + InputAdapter + Exchange [i_item_id] #12 + WholeStageCodegen (31) + HashAggregate [agg1,agg2,agg3,agg4,agg5,agg6,agg7,i_item_id] [count,count,count,count,count,count,count,count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [c_birth_year,cd_dep_count,cs_coupon_amt,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,i_item_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [c_birth_year,cd_dep_count,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [c_birth_year,cd_dep_count,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_birth_year,c_current_addr_sk,cd_dep_count,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [c_birth_year,c_current_addr_sk,c_current_cdemo_sk,cd_dep_count,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] + BroadcastHashJoin [c_customer_sk,cs_bill_customer_sk] + Project [cd_dep_count,cs_bill_customer_sk,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] + BroadcastHashJoin [cd_demo_sk,cs_bill_cdemo_sk] + Filter [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] + InputAdapter + ReusedExchange [cd_demo_sk,cd_dep_count] #2 + InputAdapter + ReusedExchange [c_birth_year,c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] #3 + InputAdapter + ReusedExchange [cd_demo_sk] #4 + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (28) + Project [ca_address_sk] + Filter [ca_address_sk,ca_state] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_state] + InputAdapter + ReusedExchange [d_date_sk] #6 + InputAdapter + ReusedExchange [i_item_id,i_item_sk] #7 + WholeStageCodegen (40) + HashAggregate [count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum] [agg1,agg2,agg3,agg4,agg5,agg6,agg7,avg(agg1),avg(agg2),avg(agg3),avg(agg4),avg(agg5),avg(agg6),avg(agg7),ca_country,ca_state,count,count,count,count,count,count,count,county,i_item_id,sum,sum,sum,sum,sum,sum,sum] + InputAdapter + Exchange #14 + WholeStageCodegen (39) + HashAggregate [agg1,agg2,agg3,agg4,agg5,agg6,agg7] [count,count,count,count,count,count,count,count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [c_birth_year,cd_dep_count,cs_coupon_amt,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [c_birth_year,cd_dep_count,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [c_birth_year,cd_dep_count,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_birth_year,c_current_addr_sk,cd_dep_count,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [c_birth_year,c_current_addr_sk,c_current_cdemo_sk,cd_dep_count,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] + BroadcastHashJoin [c_customer_sk,cs_bill_customer_sk] + Project [cd_dep_count,cs_bill_customer_sk,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] + BroadcastHashJoin [cd_demo_sk,cs_bill_cdemo_sk] + Filter [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] + InputAdapter + ReusedExchange [cd_demo_sk,cd_dep_count] #2 + InputAdapter + ReusedExchange [c_birth_year,c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] #3 + InputAdapter + ReusedExchange [cd_demo_sk] #4 + InputAdapter + ReusedExchange [ca_address_sk] #13 + InputAdapter + ReusedExchange [d_date_sk] #6 + InputAdapter + BroadcastExchange #15 + WholeStageCodegen (38) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20.sf100/explain.txt new file mode 100644 index 0000000000000..9bb210f7f01db --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20.sf100/explain.txt @@ -0,0 +1,152 @@ +== Physical Plan == +TakeOrderedAndProject (27) ++- * Project (26) + +- Window (25) + +- * Sort (24) + +- Exchange (23) + +- * HashAggregate (22) + +- Exchange (21) + +- * HashAggregate (20) + +- * Project (19) + +- * SortMergeJoin Inner (18) + :- * Sort (12) + : +- Exchange (11) + : +- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.catalog_sales (1) + : +- BroadcastExchange (8) + : +- * Project (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet default.date_dim (4) + +- * Sort (17) + +- Exchange (16) + +- * Filter (15) + +- * ColumnarToRow (14) + +- Scan parquet default.item (13) + + +(1) Scan parquet default.catalog_sales +Output [3]: [cs_sold_date_sk#1, cs_item_sk#2, cs_ext_sales_price#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 2] +Input [3]: [cs_sold_date_sk#1, cs_item_sk#2, cs_ext_sales_price#3] + +(3) Filter [codegen id : 2] +Input [3]: [cs_sold_date_sk#1, cs_item_sk#2, cs_ext_sales_price#3] +Condition : (isnotnull(cs_item_sk#2) AND isnotnull(cs_sold_date_sk#1)) + +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#4, d_date#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#4, d_date#5] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#4, d_date#5] +Condition : (((isnotnull(d_date#5) AND (d_date#5 >= 10644)) AND (d_date#5 <= 10674)) AND isnotnull(d_date_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#4] +Input [2]: [d_date_sk#4, d_date#5] + +(8) BroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#6] + +(9) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [cs_sold_date_sk#1] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(10) Project [codegen id : 2] +Output [2]: [cs_item_sk#2, cs_ext_sales_price#3] +Input [4]: [cs_sold_date_sk#1, cs_item_sk#2, cs_ext_sales_price#3, d_date_sk#4] + +(11) Exchange +Input [2]: [cs_item_sk#2, cs_ext_sales_price#3] +Arguments: hashpartitioning(cs_item_sk#2, 5), true, [id=#7] + +(12) Sort [codegen id : 3] +Input [2]: [cs_item_sk#2, cs_ext_sales_price#3] +Arguments: [cs_item_sk#2 ASC NULLS FIRST], false, 0 + +(13) Scan parquet default.item +Output [6]: [i_item_sk#8, i_item_id#9, i_item_desc#10, i_current_price#11, i_class#12, i_category#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/item] +PushedFilters: [In(i_category, [Sports,Books,Home]), IsNotNull(i_item_sk)] +ReadSchema: struct + +(14) ColumnarToRow [codegen id : 4] +Input [6]: [i_item_sk#8, i_item_id#9, i_item_desc#10, i_current_price#11, i_class#12, i_category#13] + +(15) Filter [codegen id : 4] +Input [6]: [i_item_sk#8, i_item_id#9, i_item_desc#10, i_current_price#11, i_class#12, i_category#13] +Condition : (i_category#13 IN (Sports,Books,Home) AND isnotnull(i_item_sk#8)) + +(16) Exchange +Input [6]: [i_item_sk#8, i_item_id#9, i_item_desc#10, i_current_price#11, i_class#12, i_category#13] +Arguments: hashpartitioning(i_item_sk#8, 5), true, [id=#14] + +(17) Sort [codegen id : 5] +Input [6]: [i_item_sk#8, i_item_id#9, i_item_desc#10, i_current_price#11, i_class#12, i_category#13] +Arguments: [i_item_sk#8 ASC NULLS FIRST], false, 0 + +(18) SortMergeJoin [codegen id : 6] +Left keys [1]: [cs_item_sk#2] +Right keys [1]: [i_item_sk#8] +Join condition: None + +(19) Project [codegen id : 6] +Output [6]: [cs_ext_sales_price#3, i_item_id#9, i_item_desc#10, i_current_price#11, i_class#12, i_category#13] +Input [8]: [cs_item_sk#2, cs_ext_sales_price#3, i_item_sk#8, i_item_id#9, i_item_desc#10, i_current_price#11, i_class#12, i_category#13] + +(20) HashAggregate [codegen id : 6] +Input [6]: [cs_ext_sales_price#3, i_item_id#9, i_item_desc#10, i_current_price#11, i_class#12, i_category#13] +Keys [5]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#3))] +Aggregate Attributes [1]: [sum#15] +Results [6]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11, sum#16] + +(21) Exchange +Input [6]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11, sum#16] +Arguments: hashpartitioning(i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11, 5), true, [id=#17] + +(22) HashAggregate [codegen id : 7] +Input [6]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11, sum#16] +Keys [5]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11] +Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#3))#18] +Results [8]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#3))#18,17,2) AS itemrevenue#19, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#3))#18,17,2) AS _w0#20, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#3))#18,17,2) AS _w1#21] + +(23) Exchange +Input [8]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11, itemrevenue#19, _w0#20, _w1#21] +Arguments: hashpartitioning(i_class#12, 5), true, [id=#22] + +(24) Sort [codegen id : 8] +Input [8]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11, itemrevenue#19, _w0#20, _w1#21] +Arguments: [i_class#12 ASC NULLS FIRST], false, 0 + +(25) Window +Input [8]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11, itemrevenue#19, _w0#20, _w1#21] +Arguments: [sum(_w1#21) windowspecdefinition(i_class#12, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#23], [i_class#12] + +(26) Project [codegen id : 9] +Output [7]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11, itemrevenue#19, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#20) * 100.00), DecimalType(21,2), true) as decimal(27,2))) / promote_precision(_we0#23)), DecimalType(38,17), true) AS revenueratio#24] +Input [9]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11, itemrevenue#19, _w0#20, _w1#21, _we0#23] + +(27) TakeOrderedAndProject +Input [7]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11, itemrevenue#19, revenueratio#24] +Arguments: 100, [i_category#13 ASC NULLS FIRST, i_class#12 ASC NULLS FIRST, i_item_id#9 ASC NULLS FIRST, i_item_desc#10 ASC NULLS FIRST, revenueratio#24 ASC NULLS FIRST], [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11, itemrevenue#19, revenueratio#24] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20.sf100/simplified.txt new file mode 100644 index 0000000000000..b7d7a77003325 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20.sf100/simplified.txt @@ -0,0 +1,47 @@ +TakeOrderedAndProject [i_category,i_class,i_current_price,i_item_desc,i_item_id,itemrevenue,revenueratio] + WholeStageCodegen (9) + Project [_w0,_we0,i_category,i_class,i_current_price,i_item_desc,i_item_id,itemrevenue] + InputAdapter + Window [_w1,i_class] + WholeStageCodegen (8) + Sort [i_class] + InputAdapter + Exchange [i_class] #1 + WholeStageCodegen (7) + HashAggregate [i_category,i_class,i_current_price,i_item_desc,i_item_id,sum] [_w0,_w1,itemrevenue,sum,sum(UnscaledValue(cs_ext_sales_price))] + InputAdapter + Exchange [i_category,i_class,i_current_price,i_item_desc,i_item_id] #2 + WholeStageCodegen (6) + HashAggregate [cs_ext_sales_price,i_category,i_class,i_current_price,i_item_desc,i_item_id] [sum,sum] + Project [cs_ext_sales_price,i_category,i_class,i_current_price,i_item_desc,i_item_id] + SortMergeJoin [cs_item_sk,i_item_sk] + InputAdapter + WholeStageCodegen (3) + Sort [cs_item_sk] + InputAdapter + Exchange [cs_item_sk] #3 + WholeStageCodegen (2) + Project [cs_ext_sales_price,cs_item_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk] + InputAdapter + WholeStageCodegen (5) + Sort [i_item_sk] + InputAdapter + Exchange [i_item_sk] #5 + WholeStageCodegen (4) + Filter [i_category,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_category,i_class,i_current_price,i_item_desc,i_item_id,i_item_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20/explain.txt new file mode 100644 index 0000000000000..b3ffeacc48faf --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20/explain.txt @@ -0,0 +1,137 @@ +== Physical Plan == +TakeOrderedAndProject (24) ++- * Project (23) + +- Window (22) + +- * Sort (21) + +- Exchange (20) + +- * HashAggregate (19) + +- Exchange (18) + +- * HashAggregate (17) + +- * Project (16) + +- * BroadcastHashJoin Inner BuildRight (15) + :- * Project (9) + : +- * BroadcastHashJoin Inner BuildRight (8) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.catalog_sales (1) + : +- BroadcastExchange (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet default.item (4) + +- BroadcastExchange (14) + +- * Project (13) + +- * Filter (12) + +- * ColumnarToRow (11) + +- Scan parquet default.date_dim (10) + + +(1) Scan parquet default.catalog_sales +Output [3]: [cs_sold_date_sk#1, cs_item_sk#2, cs_ext_sales_price#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [cs_sold_date_sk#1, cs_item_sk#2, cs_ext_sales_price#3] + +(3) Filter [codegen id : 3] +Input [3]: [cs_sold_date_sk#1, cs_item_sk#2, cs_ext_sales_price#3] +Condition : (isnotnull(cs_item_sk#2) AND isnotnull(cs_sold_date_sk#1)) + +(4) Scan parquet default.item +Output [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/item] +PushedFilters: [In(i_category, [Sports,Books,Home]), IsNotNull(i_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(6) Filter [codegen id : 1] +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Condition : (i_category#9 IN (Sports,Books,Home) AND isnotnull(i_item_sk#4)) + +(7) BroadcastExchange +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#10] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cs_item_sk#2] +Right keys [1]: [i_item_sk#4] +Join condition: None + +(9) Project [codegen id : 3] +Output [7]: [cs_sold_date_sk#1, cs_ext_sales_price#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Input [9]: [cs_sold_date_sk#1, cs_item_sk#2, cs_ext_sales_price#3, i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(10) Scan parquet default.date_dim +Output [2]: [d_date_sk#11, d_date#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#11, d_date#12] + +(12) Filter [codegen id : 2] +Input [2]: [d_date_sk#11, d_date#12] +Condition : (((isnotnull(d_date#12) AND (d_date#12 >= 10644)) AND (d_date#12 <= 10674)) AND isnotnull(d_date_sk#11)) + +(13) Project [codegen id : 2] +Output [1]: [d_date_sk#11] +Input [2]: [d_date_sk#11, d_date#12] + +(14) BroadcastExchange +Input [1]: [d_date_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cs_sold_date_sk#1] +Right keys [1]: [d_date_sk#11] +Join condition: None + +(16) Project [codegen id : 3] +Output [6]: [cs_ext_sales_price#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Input [8]: [cs_sold_date_sk#1, cs_ext_sales_price#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9, d_date_sk#11] + +(17) HashAggregate [codegen id : 3] +Input [6]: [cs_ext_sales_price#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#3))] +Aggregate Attributes [1]: [sum#14] +Results [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#15] + +(18) Exchange +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#15] +Arguments: hashpartitioning(i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, 5), true, [id=#16] + +(19) HashAggregate [codegen id : 4] +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#15] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#3))#17] +Results [8]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#3))#17,17,2) AS itemrevenue#18, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#3))#17,17,2) AS _w0#19, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#3))#17,17,2) AS _w1#20] + +(20) Exchange +Input [8]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, _w0#19, _w1#20] +Arguments: hashpartitioning(i_class#8, 5), true, [id=#21] + +(21) Sort [codegen id : 5] +Input [8]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, _w0#19, _w1#20] +Arguments: [i_class#8 ASC NULLS FIRST], false, 0 + +(22) Window +Input [8]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, _w0#19, _w1#20] +Arguments: [sum(_w1#20) windowspecdefinition(i_class#8, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#22], [i_class#8] + +(23) Project [codegen id : 6] +Output [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#19) * 100.00), DecimalType(21,2), true) as decimal(27,2))) / promote_precision(_we0#22)), DecimalType(38,17), true) AS revenueratio#23] +Input [9]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, _w0#19, _w1#20, _we0#22] + +(24) TakeOrderedAndProject +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, revenueratio#23] +Arguments: 100, [i_category#9 ASC NULLS FIRST, i_class#8 ASC NULLS FIRST, i_item_id#5 ASC NULLS FIRST, i_item_desc#6 ASC NULLS FIRST, revenueratio#23 ASC NULLS FIRST], [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, revenueratio#23] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20/simplified.txt new file mode 100644 index 0000000000000..b462752d01701 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20/simplified.txt @@ -0,0 +1,38 @@ +TakeOrderedAndProject [i_category,i_class,i_current_price,i_item_desc,i_item_id,itemrevenue,revenueratio] + WholeStageCodegen (6) + Project [_w0,_we0,i_category,i_class,i_current_price,i_item_desc,i_item_id,itemrevenue] + InputAdapter + Window [_w1,i_class] + WholeStageCodegen (5) + Sort [i_class] + InputAdapter + Exchange [i_class] #1 + WholeStageCodegen (4) + HashAggregate [i_category,i_class,i_current_price,i_item_desc,i_item_id,sum] [_w0,_w1,itemrevenue,sum,sum(UnscaledValue(cs_ext_sales_price))] + InputAdapter + Exchange [i_category,i_class,i_current_price,i_item_desc,i_item_id] #2 + WholeStageCodegen (3) + HashAggregate [cs_ext_sales_price,i_category,i_class,i_current_price,i_item_desc,i_item_id] [sum,sum] + Project [cs_ext_sales_price,i_category,i_class,i_current_price,i_item_desc,i_item_id] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_ext_sales_price,cs_sold_date_sk,i_category,i_class,i_current_price,i_item_desc,i_item_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Filter [cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [i_category,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_category,i_class,i_current_price,i_item_desc,i_item_id,i_item_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22.sf100/explain.txt new file mode 100644 index 0000000000000..811539b96fced --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22.sf100/explain.txt @@ -0,0 +1,157 @@ +== Physical Plan == +TakeOrderedAndProject (28) ++- * HashAggregate (27) + +- Exchange (26) + +- * HashAggregate (25) + +- * Expand (24) + +- BroadcastNestedLoopJoin Inner BuildRight (23) + :- * Project (19) + : +- * SortMergeJoin Inner (18) + : :- * Sort (12) + : : +- Exchange (11) + : : +- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.inventory (1) + : : +- BroadcastExchange (8) + : : +- * Project (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet default.date_dim (4) + : +- * Sort (17) + : +- Exchange (16) + : +- * Filter (15) + : +- * ColumnarToRow (14) + : +- Scan parquet default.item (13) + +- BroadcastExchange (22) + +- * ColumnarToRow (21) + +- Scan parquet default.warehouse (20) + + +(1) Scan parquet default.inventory +Output [3]: [inv_date_sk#1, inv_item_sk#2, inv_quantity_on_hand#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/inventory] +PushedFilters: [IsNotNull(inv_date_sk), IsNotNull(inv_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 2] +Input [3]: [inv_date_sk#1, inv_item_sk#2, inv_quantity_on_hand#3] + +(3) Filter [codegen id : 2] +Input [3]: [inv_date_sk#1, inv_item_sk#2, inv_quantity_on_hand#3] +Condition : (isnotnull(inv_date_sk#1) AND isnotnull(inv_item_sk#2)) + +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#4, d_month_seq#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1200)) AND (d_month_seq#5 <= 1211)) AND isnotnull(d_date_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#4] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(8) BroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#6] + +(9) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [inv_date_sk#1] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(10) Project [codegen id : 2] +Output [2]: [inv_item_sk#2, inv_quantity_on_hand#3] +Input [4]: [inv_date_sk#1, inv_item_sk#2, inv_quantity_on_hand#3, d_date_sk#4] + +(11) Exchange +Input [2]: [inv_item_sk#2, inv_quantity_on_hand#3] +Arguments: hashpartitioning(inv_item_sk#2, 5), true, [id=#7] + +(12) Sort [codegen id : 3] +Input [2]: [inv_item_sk#2, inv_quantity_on_hand#3] +Arguments: [inv_item_sk#2 ASC NULLS FIRST], false, 0 + +(13) Scan parquet default.item +Output [5]: [i_item_sk#8, i_brand#9, i_class#10, i_category#11, i_product_name#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(14) ColumnarToRow [codegen id : 4] +Input [5]: [i_item_sk#8, i_brand#9, i_class#10, i_category#11, i_product_name#12] + +(15) Filter [codegen id : 4] +Input [5]: [i_item_sk#8, i_brand#9, i_class#10, i_category#11, i_product_name#12] +Condition : isnotnull(i_item_sk#8) + +(16) Exchange +Input [5]: [i_item_sk#8, i_brand#9, i_class#10, i_category#11, i_product_name#12] +Arguments: hashpartitioning(i_item_sk#8, 5), true, [id=#13] + +(17) Sort [codegen id : 5] +Input [5]: [i_item_sk#8, i_brand#9, i_class#10, i_category#11, i_product_name#12] +Arguments: [i_item_sk#8 ASC NULLS FIRST], false, 0 + +(18) SortMergeJoin [codegen id : 6] +Left keys [1]: [inv_item_sk#2] +Right keys [1]: [i_item_sk#8] +Join condition: None + +(19) Project [codegen id : 6] +Output [5]: [inv_quantity_on_hand#3, i_brand#9, i_class#10, i_category#11, i_product_name#12] +Input [7]: [inv_item_sk#2, inv_quantity_on_hand#3, i_item_sk#8, i_brand#9, i_class#10, i_category#11, i_product_name#12] + +(20) Scan parquet default.warehouse +Output: [] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/warehouse] +ReadSchema: struct<> + +(21) ColumnarToRow [codegen id : 7] +Input: [] + +(22) BroadcastExchange +Input: [] +Arguments: IdentityBroadcastMode, [id=#14] + +(23) BroadcastNestedLoopJoin +Join condition: None + +(24) Expand [codegen id : 8] +Input [5]: [inv_quantity_on_hand#3, i_brand#9, i_class#10, i_category#11, i_product_name#12] +Arguments: [List(inv_quantity_on_hand#3, i_product_name#12, i_brand#9, i_class#10, i_category#11, 0), List(inv_quantity_on_hand#3, i_product_name#12, i_brand#9, i_class#10, null, 1), List(inv_quantity_on_hand#3, i_product_name#12, i_brand#9, null, null, 3), List(inv_quantity_on_hand#3, i_product_name#12, null, null, null, 7), List(inv_quantity_on_hand#3, null, null, null, null, 15)], [inv_quantity_on_hand#3, i_product_name#15, i_brand#16, i_class#17, i_category#18, spark_grouping_id#19] + +(25) HashAggregate [codegen id : 8] +Input [6]: [inv_quantity_on_hand#3, i_product_name#15, i_brand#16, i_class#17, i_category#18, spark_grouping_id#19] +Keys [5]: [i_product_name#15, i_brand#16, i_class#17, i_category#18, spark_grouping_id#19] +Functions [1]: [partial_avg(cast(inv_quantity_on_hand#3 as bigint))] +Aggregate Attributes [2]: [sum#20, count#21] +Results [7]: [i_product_name#15, i_brand#16, i_class#17, i_category#18, spark_grouping_id#19, sum#22, count#23] + +(26) Exchange +Input [7]: [i_product_name#15, i_brand#16, i_class#17, i_category#18, spark_grouping_id#19, sum#22, count#23] +Arguments: hashpartitioning(i_product_name#15, i_brand#16, i_class#17, i_category#18, spark_grouping_id#19, 5), true, [id=#24] + +(27) HashAggregate [codegen id : 9] +Input [7]: [i_product_name#15, i_brand#16, i_class#17, i_category#18, spark_grouping_id#19, sum#22, count#23] +Keys [5]: [i_product_name#15, i_brand#16, i_class#17, i_category#18, spark_grouping_id#19] +Functions [1]: [avg(cast(inv_quantity_on_hand#3 as bigint))] +Aggregate Attributes [1]: [avg(cast(inv_quantity_on_hand#3 as bigint))#25] +Results [5]: [i_product_name#15, i_brand#16, i_class#17, i_category#18, avg(cast(inv_quantity_on_hand#3 as bigint))#25 AS qoh#26] + +(28) TakeOrderedAndProject +Input [5]: [i_product_name#15, i_brand#16, i_class#17, i_category#18, qoh#26] +Arguments: 100, [qoh#26 ASC NULLS FIRST, i_product_name#15 ASC NULLS FIRST, i_brand#16 ASC NULLS FIRST, i_class#17 ASC NULLS FIRST, i_category#18 ASC NULLS FIRST], [i_product_name#15, i_brand#16, i_class#17, i_category#18, qoh#26] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22.sf100/simplified.txt new file mode 100644 index 0000000000000..ee408c2c2e004 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22.sf100/simplified.txt @@ -0,0 +1,48 @@ +TakeOrderedAndProject [i_brand,i_category,i_class,i_product_name,qoh] + WholeStageCodegen (9) + HashAggregate [count,i_brand,i_category,i_class,i_product_name,spark_grouping_id,sum] [avg(cast(inv_quantity_on_hand as bigint)),count,qoh,sum] + InputAdapter + Exchange [i_brand,i_category,i_class,i_product_name,spark_grouping_id] #1 + WholeStageCodegen (8) + HashAggregate [i_brand,i_category,i_class,i_product_name,inv_quantity_on_hand,spark_grouping_id] [count,count,sum,sum] + Expand [i_brand,i_category,i_class,i_product_name,inv_quantity_on_hand] + InputAdapter + BroadcastNestedLoopJoin + WholeStageCodegen (6) + Project [i_brand,i_category,i_class,i_product_name,inv_quantity_on_hand] + SortMergeJoin [i_item_sk,inv_item_sk] + InputAdapter + WholeStageCodegen (3) + Sort [inv_item_sk] + InputAdapter + Exchange [inv_item_sk] #2 + WholeStageCodegen (2) + Project [inv_item_sk,inv_quantity_on_hand] + BroadcastHashJoin [d_date_sk,inv_date_sk] + Filter [inv_date_sk,inv_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_month_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq] + InputAdapter + WholeStageCodegen (5) + Sort [i_item_sk] + InputAdapter + Exchange [i_item_sk] #4 + WholeStageCodegen (4) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand,i_category,i_class,i_item_sk,i_product_name] + BroadcastExchange #5 + WholeStageCodegen (7) + ColumnarToRow + InputAdapter + Scan parquet default.warehouse diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22/explain.txt new file mode 100644 index 0000000000000..03cc2a5b182dc --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22/explain.txt @@ -0,0 +1,142 @@ +== Physical Plan == +TakeOrderedAndProject (25) ++- * HashAggregate (24) + +- Exchange (23) + +- * HashAggregate (22) + +- * Expand (21) + +- BroadcastNestedLoopJoin Inner BuildRight (20) + :- * Project (16) + : +- * BroadcastHashJoin Inner BuildRight (15) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.inventory (1) + : : +- BroadcastExchange (8) + : : +- * Project (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet default.date_dim (4) + : +- BroadcastExchange (14) + : +- * Filter (13) + : +- * ColumnarToRow (12) + : +- Scan parquet default.item (11) + +- BroadcastExchange (19) + +- * ColumnarToRow (18) + +- Scan parquet default.warehouse (17) + + +(1) Scan parquet default.inventory +Output [3]: [inv_date_sk#1, inv_item_sk#2, inv_quantity_on_hand#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/inventory] +PushedFilters: [IsNotNull(inv_date_sk), IsNotNull(inv_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [inv_date_sk#1, inv_item_sk#2, inv_quantity_on_hand#3] + +(3) Filter [codegen id : 3] +Input [3]: [inv_date_sk#1, inv_item_sk#2, inv_quantity_on_hand#3] +Condition : (isnotnull(inv_date_sk#1) AND isnotnull(inv_item_sk#2)) + +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#4, d_month_seq#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1200)) AND (d_month_seq#5 <= 1211)) AND isnotnull(d_date_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#4] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(8) BroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#6] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [inv_date_sk#1] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(10) Project [codegen id : 3] +Output [2]: [inv_item_sk#2, inv_quantity_on_hand#3] +Input [4]: [inv_date_sk#1, inv_item_sk#2, inv_quantity_on_hand#3, d_date_sk#4] + +(11) Scan parquet default.item +Output [5]: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [5]: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] + +(13) Filter [codegen id : 2] +Input [5]: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Condition : isnotnull(i_item_sk#7) + +(14) BroadcastExchange +Input [5]: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#12] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [inv_item_sk#2] +Right keys [1]: [i_item_sk#7] +Join condition: None + +(16) Project [codegen id : 3] +Output [5]: [inv_quantity_on_hand#3, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Input [7]: [inv_item_sk#2, inv_quantity_on_hand#3, i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] + +(17) Scan parquet default.warehouse +Output: [] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/warehouse] +ReadSchema: struct<> + +(18) ColumnarToRow [codegen id : 4] +Input: [] + +(19) BroadcastExchange +Input: [] +Arguments: IdentityBroadcastMode, [id=#13] + +(20) BroadcastNestedLoopJoin +Join condition: None + +(21) Expand [codegen id : 5] +Input [5]: [inv_quantity_on_hand#3, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Arguments: [List(inv_quantity_on_hand#3, i_product_name#11, i_brand#8, i_class#9, i_category#10, 0), List(inv_quantity_on_hand#3, i_product_name#11, i_brand#8, i_class#9, null, 1), List(inv_quantity_on_hand#3, i_product_name#11, i_brand#8, null, null, 3), List(inv_quantity_on_hand#3, i_product_name#11, null, null, null, 7), List(inv_quantity_on_hand#3, null, null, null, null, 15)], [inv_quantity_on_hand#3, i_product_name#14, i_brand#15, i_class#16, i_category#17, spark_grouping_id#18] + +(22) HashAggregate [codegen id : 5] +Input [6]: [inv_quantity_on_hand#3, i_product_name#14, i_brand#15, i_class#16, i_category#17, spark_grouping_id#18] +Keys [5]: [i_product_name#14, i_brand#15, i_class#16, i_category#17, spark_grouping_id#18] +Functions [1]: [partial_avg(cast(inv_quantity_on_hand#3 as bigint))] +Aggregate Attributes [2]: [sum#19, count#20] +Results [7]: [i_product_name#14, i_brand#15, i_class#16, i_category#17, spark_grouping_id#18, sum#21, count#22] + +(23) Exchange +Input [7]: [i_product_name#14, i_brand#15, i_class#16, i_category#17, spark_grouping_id#18, sum#21, count#22] +Arguments: hashpartitioning(i_product_name#14, i_brand#15, i_class#16, i_category#17, spark_grouping_id#18, 5), true, [id=#23] + +(24) HashAggregate [codegen id : 6] +Input [7]: [i_product_name#14, i_brand#15, i_class#16, i_category#17, spark_grouping_id#18, sum#21, count#22] +Keys [5]: [i_product_name#14, i_brand#15, i_class#16, i_category#17, spark_grouping_id#18] +Functions [1]: [avg(cast(inv_quantity_on_hand#3 as bigint))] +Aggregate Attributes [1]: [avg(cast(inv_quantity_on_hand#3 as bigint))#24] +Results [5]: [i_product_name#14, i_brand#15, i_class#16, i_category#17, avg(cast(inv_quantity_on_hand#3 as bigint))#24 AS qoh#25] + +(25) TakeOrderedAndProject +Input [5]: [i_product_name#14, i_brand#15, i_class#16, i_category#17, qoh#25] +Arguments: 100, [qoh#25 ASC NULLS FIRST, i_product_name#14 ASC NULLS FIRST, i_brand#15 ASC NULLS FIRST, i_class#16 ASC NULLS FIRST, i_category#17 ASC NULLS FIRST], [i_product_name#14, i_brand#15, i_class#16, i_category#17, qoh#25] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22/simplified.txt new file mode 100644 index 0000000000000..91c091e1d7c29 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22/simplified.txt @@ -0,0 +1,39 @@ +TakeOrderedAndProject [i_brand,i_category,i_class,i_product_name,qoh] + WholeStageCodegen (6) + HashAggregate [count,i_brand,i_category,i_class,i_product_name,spark_grouping_id,sum] [avg(cast(inv_quantity_on_hand as bigint)),count,qoh,sum] + InputAdapter + Exchange [i_brand,i_category,i_class,i_product_name,spark_grouping_id] #1 + WholeStageCodegen (5) + HashAggregate [i_brand,i_category,i_class,i_product_name,inv_quantity_on_hand,spark_grouping_id] [count,count,sum,sum] + Expand [i_brand,i_category,i_class,i_product_name,inv_quantity_on_hand] + InputAdapter + BroadcastNestedLoopJoin + WholeStageCodegen (3) + Project [i_brand,i_category,i_class,i_product_name,inv_quantity_on_hand] + BroadcastHashJoin [i_item_sk,inv_item_sk] + Project [inv_item_sk,inv_quantity_on_hand] + BroadcastHashJoin [d_date_sk,inv_date_sk] + Filter [inv_date_sk,inv_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_month_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand,i_category,i_class,i_item_sk,i_product_name] + BroadcastExchange #4 + WholeStageCodegen (4) + ColumnarToRow + InputAdapter + Scan parquet default.warehouse diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22a.sf100/explain.txt new file mode 100644 index 0000000000000..415c62a070bab --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22a.sf100/explain.txt @@ -0,0 +1,316 @@ +== Physical Plan == +TakeOrderedAndProject (52) ++- Union (51) + :- * HashAggregate (30) + : +- * HashAggregate (29) + : +- * HashAggregate (28) + : +- Exchange (27) + : +- * HashAggregate (26) + : +- * Project (25) + : +- * SortMergeJoin Inner (24) + : :- * Sort (18) + : : +- Exchange (17) + : : +- * Project (16) + : : +- * BroadcastHashJoin Inner BuildRight (15) + : : :- * Project (9) + : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.inventory (1) + : : : +- BroadcastExchange (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.warehouse (4) + : : +- BroadcastExchange (14) + : : +- * Project (13) + : : +- * Filter (12) + : : +- * ColumnarToRow (11) + : : +- Scan parquet default.date_dim (10) + : +- * Sort (23) + : +- Exchange (22) + : +- * Filter (21) + : +- * ColumnarToRow (20) + : +- Scan parquet default.item (19) + :- * HashAggregate (35) + : +- Exchange (34) + : +- * HashAggregate (33) + : +- * HashAggregate (32) + : +- ReusedExchange (31) + :- * HashAggregate (40) + : +- Exchange (39) + : +- * HashAggregate (38) + : +- * HashAggregate (37) + : +- ReusedExchange (36) + :- * HashAggregate (45) + : +- Exchange (44) + : +- * HashAggregate (43) + : +- * HashAggregate (42) + : +- ReusedExchange (41) + +- * HashAggregate (50) + +- Exchange (49) + +- * HashAggregate (48) + +- * HashAggregate (47) + +- ReusedExchange (46) + + +(1) Scan parquet default.inventory +Output [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/inventory] +PushedFilters: [IsNotNull(inv_date_sk), IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] + +(3) Filter [codegen id : 3] +Input [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] +Condition : ((isnotnull(inv_date_sk#1) AND isnotnull(inv_item_sk#2)) AND isnotnull(inv_warehouse_sk#3)) + +(4) Scan parquet default.warehouse +Output [1]: [w_warehouse_sk#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [1]: [w_warehouse_sk#5] + +(6) Filter [codegen id : 1] +Input [1]: [w_warehouse_sk#5] +Condition : isnotnull(w_warehouse_sk#5) + +(7) BroadcastExchange +Input [1]: [w_warehouse_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#6] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [inv_warehouse_sk#3] +Right keys [1]: [w_warehouse_sk#5] +Join condition: None + +(9) Project [codegen id : 3] +Output [3]: [inv_date_sk#1, inv_item_sk#2, inv_quantity_on_hand#4] +Input [5]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4, w_warehouse_sk#5] + +(10) Scan parquet default.date_dim +Output [2]: [d_date_sk#7, d_month_seq#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#7, d_month_seq#8] + +(12) Filter [codegen id : 2] +Input [2]: [d_date_sk#7, d_month_seq#8] +Condition : (((isnotnull(d_month_seq#8) AND (d_month_seq#8 >= 1212)) AND (d_month_seq#8 <= 1223)) AND isnotnull(d_date_sk#7)) + +(13) Project [codegen id : 2] +Output [1]: [d_date_sk#7] +Input [2]: [d_date_sk#7, d_month_seq#8] + +(14) BroadcastExchange +Input [1]: [d_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [inv_date_sk#1] +Right keys [1]: [d_date_sk#7] +Join condition: None + +(16) Project [codegen id : 3] +Output [2]: [inv_item_sk#2, inv_quantity_on_hand#4] +Input [4]: [inv_date_sk#1, inv_item_sk#2, inv_quantity_on_hand#4, d_date_sk#7] + +(17) Exchange +Input [2]: [inv_item_sk#2, inv_quantity_on_hand#4] +Arguments: hashpartitioning(inv_item_sk#2, 5), true, [id=#10] + +(18) Sort [codegen id : 4] +Input [2]: [inv_item_sk#2, inv_quantity_on_hand#4] +Arguments: [inv_item_sk#2 ASC NULLS FIRST], false, 0 + +(19) Scan parquet default.item +Output [5]: [i_item_sk#11, i_brand#12, i_class#13, i_category#14, i_product_name#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 5] +Input [5]: [i_item_sk#11, i_brand#12, i_class#13, i_category#14, i_product_name#15] + +(21) Filter [codegen id : 5] +Input [5]: [i_item_sk#11, i_brand#12, i_class#13, i_category#14, i_product_name#15] +Condition : isnotnull(i_item_sk#11) + +(22) Exchange +Input [5]: [i_item_sk#11, i_brand#12, i_class#13, i_category#14, i_product_name#15] +Arguments: hashpartitioning(i_item_sk#11, 5), true, [id=#16] + +(23) Sort [codegen id : 6] +Input [5]: [i_item_sk#11, i_brand#12, i_class#13, i_category#14, i_product_name#15] +Arguments: [i_item_sk#11 ASC NULLS FIRST], false, 0 + +(24) SortMergeJoin [codegen id : 7] +Left keys [1]: [inv_item_sk#2] +Right keys [1]: [i_item_sk#11] +Join condition: None + +(25) Project [codegen id : 7] +Output [5]: [inv_quantity_on_hand#4, i_brand#12, i_class#13, i_category#14, i_product_name#15] +Input [7]: [inv_item_sk#2, inv_quantity_on_hand#4, i_item_sk#11, i_brand#12, i_class#13, i_category#14, i_product_name#15] + +(26) HashAggregate [codegen id : 7] +Input [5]: [inv_quantity_on_hand#4, i_brand#12, i_class#13, i_category#14, i_product_name#15] +Keys [4]: [i_product_name#15, i_brand#12, i_class#13, i_category#14] +Functions [1]: [partial_avg(cast(inv_quantity_on_hand#4 as bigint))] +Aggregate Attributes [2]: [sum#17, count#18] +Results [6]: [i_product_name#15, i_brand#12, i_class#13, i_category#14, sum#19, count#20] + +(27) Exchange +Input [6]: [i_product_name#15, i_brand#12, i_class#13, i_category#14, sum#19, count#20] +Arguments: hashpartitioning(i_product_name#15, i_brand#12, i_class#13, i_category#14, 5), true, [id=#21] + +(28) HashAggregate [codegen id : 8] +Input [6]: [i_product_name#15, i_brand#12, i_class#13, i_category#14, sum#19, count#20] +Keys [4]: [i_product_name#15, i_brand#12, i_class#13, i_category#14] +Functions [1]: [avg(cast(inv_quantity_on_hand#4 as bigint))] +Aggregate Attributes [1]: [avg(cast(inv_quantity_on_hand#4 as bigint))#22] +Results [5]: [i_product_name#15, i_brand#12, i_class#13, i_category#14, avg(cast(inv_quantity_on_hand#4 as bigint))#22 AS qoh#23] + +(29) HashAggregate [codegen id : 8] +Input [5]: [i_product_name#15, i_brand#12, i_class#13, i_category#14, qoh#23] +Keys [4]: [i_product_name#15, i_brand#12, i_class#13, i_category#14] +Functions [1]: [partial_avg(qoh#23)] +Aggregate Attributes [2]: [sum#24, count#25] +Results [6]: [i_product_name#15, i_brand#12, i_class#13, i_category#14, sum#26, count#27] + +(30) HashAggregate [codegen id : 8] +Input [6]: [i_product_name#15, i_brand#12, i_class#13, i_category#14, sum#26, count#27] +Keys [4]: [i_product_name#15, i_brand#12, i_class#13, i_category#14] +Functions [1]: [avg(qoh#23)] +Aggregate Attributes [1]: [avg(qoh#23)#28] +Results [5]: [i_product_name#15, i_brand#12, i_class#13, i_category#14, avg(qoh#23)#28 AS qoh#29] + +(31) ReusedExchange [Reuses operator id: 27] +Output [6]: [i_product_name#15, i_brand#12, i_class#13, i_category#14, sum#30, count#31] + +(32) HashAggregate [codegen id : 16] +Input [6]: [i_product_name#15, i_brand#12, i_class#13, i_category#14, sum#30, count#31] +Keys [4]: [i_product_name#15, i_brand#12, i_class#13, i_category#14] +Functions [1]: [avg(cast(inv_quantity_on_hand#4 as bigint))] +Aggregate Attributes [1]: [avg(cast(inv_quantity_on_hand#4 as bigint))#32] +Results [4]: [i_product_name#15, i_brand#12, i_class#13, avg(cast(inv_quantity_on_hand#4 as bigint))#32 AS qoh#23] + +(33) HashAggregate [codegen id : 16] +Input [4]: [i_product_name#15, i_brand#12, i_class#13, qoh#23] +Keys [3]: [i_product_name#15, i_brand#12, i_class#13] +Functions [1]: [partial_avg(qoh#23)] +Aggregate Attributes [2]: [sum#33, count#34] +Results [5]: [i_product_name#15, i_brand#12, i_class#13, sum#35, count#36] + +(34) Exchange +Input [5]: [i_product_name#15, i_brand#12, i_class#13, sum#35, count#36] +Arguments: hashpartitioning(i_product_name#15, i_brand#12, i_class#13, 5), true, [id=#37] + +(35) HashAggregate [codegen id : 17] +Input [5]: [i_product_name#15, i_brand#12, i_class#13, sum#35, count#36] +Keys [3]: [i_product_name#15, i_brand#12, i_class#13] +Functions [1]: [avg(qoh#23)] +Aggregate Attributes [1]: [avg(qoh#23)#38] +Results [5]: [i_product_name#15, i_brand#12, i_class#13, null AS i_category#39, avg(qoh#23)#38 AS qoh#40] + +(36) ReusedExchange [Reuses operator id: 27] +Output [6]: [i_product_name#15, i_brand#12, i_class#13, i_category#14, sum#41, count#42] + +(37) HashAggregate [codegen id : 25] +Input [6]: [i_product_name#15, i_brand#12, i_class#13, i_category#14, sum#41, count#42] +Keys [4]: [i_product_name#15, i_brand#12, i_class#13, i_category#14] +Functions [1]: [avg(cast(inv_quantity_on_hand#4 as bigint))] +Aggregate Attributes [1]: [avg(cast(inv_quantity_on_hand#4 as bigint))#43] +Results [3]: [i_product_name#15, i_brand#12, avg(cast(inv_quantity_on_hand#4 as bigint))#43 AS qoh#23] + +(38) HashAggregate [codegen id : 25] +Input [3]: [i_product_name#15, i_brand#12, qoh#23] +Keys [2]: [i_product_name#15, i_brand#12] +Functions [1]: [partial_avg(qoh#23)] +Aggregate Attributes [2]: [sum#44, count#45] +Results [4]: [i_product_name#15, i_brand#12, sum#46, count#47] + +(39) Exchange +Input [4]: [i_product_name#15, i_brand#12, sum#46, count#47] +Arguments: hashpartitioning(i_product_name#15, i_brand#12, 5), true, [id=#48] + +(40) HashAggregate [codegen id : 26] +Input [4]: [i_product_name#15, i_brand#12, sum#46, count#47] +Keys [2]: [i_product_name#15, i_brand#12] +Functions [1]: [avg(qoh#23)] +Aggregate Attributes [1]: [avg(qoh#23)#49] +Results [5]: [i_product_name#15, i_brand#12, null AS i_class#50, null AS i_category#51, avg(qoh#23)#49 AS qoh#52] + +(41) ReusedExchange [Reuses operator id: 27] +Output [6]: [i_product_name#15, i_brand#12, i_class#13, i_category#14, sum#53, count#54] + +(42) HashAggregate [codegen id : 34] +Input [6]: [i_product_name#15, i_brand#12, i_class#13, i_category#14, sum#53, count#54] +Keys [4]: [i_product_name#15, i_brand#12, i_class#13, i_category#14] +Functions [1]: [avg(cast(inv_quantity_on_hand#4 as bigint))] +Aggregate Attributes [1]: [avg(cast(inv_quantity_on_hand#4 as bigint))#55] +Results [2]: [i_product_name#15, avg(cast(inv_quantity_on_hand#4 as bigint))#55 AS qoh#23] + +(43) HashAggregate [codegen id : 34] +Input [2]: [i_product_name#15, qoh#23] +Keys [1]: [i_product_name#15] +Functions [1]: [partial_avg(qoh#23)] +Aggregate Attributes [2]: [sum#56, count#57] +Results [3]: [i_product_name#15, sum#58, count#59] + +(44) Exchange +Input [3]: [i_product_name#15, sum#58, count#59] +Arguments: hashpartitioning(i_product_name#15, 5), true, [id=#60] + +(45) HashAggregate [codegen id : 35] +Input [3]: [i_product_name#15, sum#58, count#59] +Keys [1]: [i_product_name#15] +Functions [1]: [avg(qoh#23)] +Aggregate Attributes [1]: [avg(qoh#23)#61] +Results [5]: [i_product_name#15, null AS i_brand#62, null AS i_class#63, null AS i_category#64, avg(qoh#23)#61 AS qoh#65] + +(46) ReusedExchange [Reuses operator id: 27] +Output [6]: [i_product_name#15, i_brand#12, i_class#13, i_category#14, sum#66, count#67] + +(47) HashAggregate [codegen id : 43] +Input [6]: [i_product_name#15, i_brand#12, i_class#13, i_category#14, sum#66, count#67] +Keys [4]: [i_product_name#15, i_brand#12, i_class#13, i_category#14] +Functions [1]: [avg(cast(inv_quantity_on_hand#4 as bigint))] +Aggregate Attributes [1]: [avg(cast(inv_quantity_on_hand#4 as bigint))#68] +Results [1]: [avg(cast(inv_quantity_on_hand#4 as bigint))#68 AS qoh#23] + +(48) HashAggregate [codegen id : 43] +Input [1]: [qoh#23] +Keys: [] +Functions [1]: [partial_avg(qoh#23)] +Aggregate Attributes [2]: [sum#69, count#70] +Results [2]: [sum#71, count#72] + +(49) Exchange +Input [2]: [sum#71, count#72] +Arguments: SinglePartition, true, [id=#73] + +(50) HashAggregate [codegen id : 44] +Input [2]: [sum#71, count#72] +Keys: [] +Functions [1]: [avg(qoh#23)] +Aggregate Attributes [1]: [avg(qoh#23)#74] +Results [5]: [null AS i_product_name#75, null AS i_brand#76, null AS i_class#77, null AS i_category#78, avg(qoh#23)#74 AS qoh#79] + +(51) Union + +(52) TakeOrderedAndProject +Input [5]: [i_product_name#15, i_brand#12, i_class#13, i_category#14, qoh#29] +Arguments: 100, [qoh#29 ASC NULLS FIRST, i_product_name#15 ASC NULLS FIRST, i_brand#12 ASC NULLS FIRST, i_class#13 ASC NULLS FIRST, i_category#14 ASC NULLS FIRST], [i_product_name#15, i_brand#12, i_class#13, i_category#14, qoh#29] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22a.sf100/simplified.txt new file mode 100644 index 0000000000000..efa57dd3395d0 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22a.sf100/simplified.txt @@ -0,0 +1,87 @@ +TakeOrderedAndProject [i_brand,i_category,i_class,i_product_name,qoh] + Union + WholeStageCodegen (8) + HashAggregate [count,i_brand,i_category,i_class,i_product_name,sum] [avg(qoh),count,qoh,sum] + HashAggregate [i_brand,i_category,i_class,i_product_name,qoh] [count,count,sum,sum] + HashAggregate [count,i_brand,i_category,i_class,i_product_name,sum] [avg(cast(inv_quantity_on_hand as bigint)),count,qoh,sum] + InputAdapter + Exchange [i_brand,i_category,i_class,i_product_name] #1 + WholeStageCodegen (7) + HashAggregate [i_brand,i_category,i_class,i_product_name,inv_quantity_on_hand] [count,count,sum,sum] + Project [i_brand,i_category,i_class,i_product_name,inv_quantity_on_hand] + SortMergeJoin [i_item_sk,inv_item_sk] + InputAdapter + WholeStageCodegen (4) + Sort [inv_item_sk] + InputAdapter + Exchange [inv_item_sk] #2 + WholeStageCodegen (3) + Project [inv_item_sk,inv_quantity_on_hand] + BroadcastHashJoin [d_date_sk,inv_date_sk] + Project [inv_date_sk,inv_item_sk,inv_quantity_on_hand] + BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] + Filter [inv_date_sk,inv_item_sk,inv_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [w_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.warehouse [w_warehouse_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date_sk,d_month_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq] + InputAdapter + WholeStageCodegen (6) + Sort [i_item_sk] + InputAdapter + Exchange [i_item_sk] #5 + WholeStageCodegen (5) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand,i_category,i_class,i_item_sk,i_product_name] + WholeStageCodegen (17) + HashAggregate [count,i_brand,i_class,i_product_name,sum] [avg(qoh),count,i_category,qoh,sum] + InputAdapter + Exchange [i_brand,i_class,i_product_name] #6 + WholeStageCodegen (16) + HashAggregate [i_brand,i_class,i_product_name,qoh] [count,count,sum,sum] + HashAggregate [count,i_brand,i_category,i_class,i_product_name,sum] [avg(cast(inv_quantity_on_hand as bigint)),count,qoh,sum] + InputAdapter + ReusedExchange [count,i_brand,i_category,i_class,i_product_name,sum] #1 + WholeStageCodegen (26) + HashAggregate [count,i_brand,i_product_name,sum] [avg(qoh),count,i_category,i_class,qoh,sum] + InputAdapter + Exchange [i_brand,i_product_name] #7 + WholeStageCodegen (25) + HashAggregate [i_brand,i_product_name,qoh] [count,count,sum,sum] + HashAggregate [count,i_brand,i_category,i_class,i_product_name,sum] [avg(cast(inv_quantity_on_hand as bigint)),count,qoh,sum] + InputAdapter + ReusedExchange [count,i_brand,i_category,i_class,i_product_name,sum] #1 + WholeStageCodegen (35) + HashAggregate [count,i_product_name,sum] [avg(qoh),count,i_brand,i_category,i_class,qoh,sum] + InputAdapter + Exchange [i_product_name] #8 + WholeStageCodegen (34) + HashAggregate [i_product_name,qoh] [count,count,sum,sum] + HashAggregate [count,i_brand,i_category,i_class,i_product_name,sum] [avg(cast(inv_quantity_on_hand as bigint)),count,qoh,sum] + InputAdapter + ReusedExchange [count,i_brand,i_category,i_class,i_product_name,sum] #1 + WholeStageCodegen (44) + HashAggregate [count,sum] [avg(qoh),count,i_brand,i_category,i_class,i_product_name,qoh,sum] + InputAdapter + Exchange #9 + WholeStageCodegen (43) + HashAggregate [qoh] [count,count,sum,sum] + HashAggregate [count,i_brand,i_category,i_class,i_product_name,sum] [avg(cast(inv_quantity_on_hand as bigint)),count,qoh,sum] + InputAdapter + ReusedExchange [count,i_brand,i_category,i_class,i_product_name,sum] #1 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22a/explain.txt new file mode 100644 index 0000000000000..340c432c78489 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22a/explain.txt @@ -0,0 +1,301 @@ +== Physical Plan == +TakeOrderedAndProject (49) ++- Union (48) + :- * HashAggregate (27) + : +- * HashAggregate (26) + : +- * HashAggregate (25) + : +- Exchange (24) + : +- * HashAggregate (23) + : +- * Project (22) + : +- * BroadcastHashJoin Inner BuildRight (21) + : :- * Project (16) + : : +- * BroadcastHashJoin Inner BuildRight (15) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.inventory (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.item (11) + : +- BroadcastExchange (20) + : +- * Filter (19) + : +- * ColumnarToRow (18) + : +- Scan parquet default.warehouse (17) + :- * HashAggregate (32) + : +- Exchange (31) + : +- * HashAggregate (30) + : +- * HashAggregate (29) + : +- ReusedExchange (28) + :- * HashAggregate (37) + : +- Exchange (36) + : +- * HashAggregate (35) + : +- * HashAggregate (34) + : +- ReusedExchange (33) + :- * HashAggregate (42) + : +- Exchange (41) + : +- * HashAggregate (40) + : +- * HashAggregate (39) + : +- ReusedExchange (38) + +- * HashAggregate (47) + +- Exchange (46) + +- * HashAggregate (45) + +- * HashAggregate (44) + +- ReusedExchange (43) + + +(1) Scan parquet default.inventory +Output [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/inventory] +PushedFilters: [IsNotNull(inv_date_sk), IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] + +(3) Filter [codegen id : 4] +Input [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] +Condition : ((isnotnull(inv_date_sk#1) AND isnotnull(inv_item_sk#2)) AND isnotnull(inv_warehouse_sk#3)) + +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#5, d_month_seq#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#5, d_month_seq#6] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#5, d_month_seq#6] +Condition : (((isnotnull(d_month_seq#6) AND (d_month_seq#6 >= 1212)) AND (d_month_seq#6 <= 1223)) AND isnotnull(d_date_sk#5)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#5] +Input [2]: [d_date_sk#5, d_month_seq#6] + +(8) BroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#7] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_date_sk#1] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(10) Project [codegen id : 4] +Output [3]: [inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] +Input [5]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4, d_date_sk#5] + +(11) Scan parquet default.item +Output [5]: [i_item_sk#8, i_brand#9, i_class#10, i_category#11, i_product_name#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [5]: [i_item_sk#8, i_brand#9, i_class#10, i_category#11, i_product_name#12] + +(13) Filter [codegen id : 2] +Input [5]: [i_item_sk#8, i_brand#9, i_class#10, i_category#11, i_product_name#12] +Condition : isnotnull(i_item_sk#8) + +(14) BroadcastExchange +Input [5]: [i_item_sk#8, i_brand#9, i_class#10, i_category#11, i_product_name#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#13] + +(15) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_item_sk#2] +Right keys [1]: [i_item_sk#8] +Join condition: None + +(16) Project [codegen id : 4] +Output [6]: [inv_warehouse_sk#3, inv_quantity_on_hand#4, i_brand#9, i_class#10, i_category#11, i_product_name#12] +Input [8]: [inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4, i_item_sk#8, i_brand#9, i_class#10, i_category#11, i_product_name#12] + +(17) Scan parquet default.warehouse +Output [1]: [w_warehouse_sk#14] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [1]: [w_warehouse_sk#14] + +(19) Filter [codegen id : 3] +Input [1]: [w_warehouse_sk#14] +Condition : isnotnull(w_warehouse_sk#14) + +(20) BroadcastExchange +Input [1]: [w_warehouse_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#15] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_warehouse_sk#3] +Right keys [1]: [w_warehouse_sk#14] +Join condition: None + +(22) Project [codegen id : 4] +Output [5]: [inv_quantity_on_hand#4, i_brand#9, i_class#10, i_category#11, i_product_name#12] +Input [7]: [inv_warehouse_sk#3, inv_quantity_on_hand#4, i_brand#9, i_class#10, i_category#11, i_product_name#12, w_warehouse_sk#14] + +(23) HashAggregate [codegen id : 4] +Input [5]: [inv_quantity_on_hand#4, i_brand#9, i_class#10, i_category#11, i_product_name#12] +Keys [4]: [i_product_name#12, i_brand#9, i_class#10, i_category#11] +Functions [1]: [partial_avg(cast(inv_quantity_on_hand#4 as bigint))] +Aggregate Attributes [2]: [sum#16, count#17] +Results [6]: [i_product_name#12, i_brand#9, i_class#10, i_category#11, sum#18, count#19] + +(24) Exchange +Input [6]: [i_product_name#12, i_brand#9, i_class#10, i_category#11, sum#18, count#19] +Arguments: hashpartitioning(i_product_name#12, i_brand#9, i_class#10, i_category#11, 5), true, [id=#20] + +(25) HashAggregate [codegen id : 5] +Input [6]: [i_product_name#12, i_brand#9, i_class#10, i_category#11, sum#18, count#19] +Keys [4]: [i_product_name#12, i_brand#9, i_class#10, i_category#11] +Functions [1]: [avg(cast(inv_quantity_on_hand#4 as bigint))] +Aggregate Attributes [1]: [avg(cast(inv_quantity_on_hand#4 as bigint))#21] +Results [5]: [i_product_name#12, i_brand#9, i_class#10, i_category#11, avg(cast(inv_quantity_on_hand#4 as bigint))#21 AS qoh#22] + +(26) HashAggregate [codegen id : 5] +Input [5]: [i_product_name#12, i_brand#9, i_class#10, i_category#11, qoh#22] +Keys [4]: [i_product_name#12, i_brand#9, i_class#10, i_category#11] +Functions [1]: [partial_avg(qoh#22)] +Aggregate Attributes [2]: [sum#23, count#24] +Results [6]: [i_product_name#12, i_brand#9, i_class#10, i_category#11, sum#25, count#26] + +(27) HashAggregate [codegen id : 5] +Input [6]: [i_product_name#12, i_brand#9, i_class#10, i_category#11, sum#25, count#26] +Keys [4]: [i_product_name#12, i_brand#9, i_class#10, i_category#11] +Functions [1]: [avg(qoh#22)] +Aggregate Attributes [1]: [avg(qoh#22)#27] +Results [5]: [i_product_name#12, i_brand#9, i_class#10, i_category#11, avg(qoh#22)#27 AS qoh#28] + +(28) ReusedExchange [Reuses operator id: 24] +Output [6]: [i_product_name#12, i_brand#9, i_class#10, i_category#11, sum#29, count#30] + +(29) HashAggregate [codegen id : 10] +Input [6]: [i_product_name#12, i_brand#9, i_class#10, i_category#11, sum#29, count#30] +Keys [4]: [i_product_name#12, i_brand#9, i_class#10, i_category#11] +Functions [1]: [avg(cast(inv_quantity_on_hand#4 as bigint))] +Aggregate Attributes [1]: [avg(cast(inv_quantity_on_hand#4 as bigint))#31] +Results [4]: [i_product_name#12, i_brand#9, i_class#10, avg(cast(inv_quantity_on_hand#4 as bigint))#31 AS qoh#22] + +(30) HashAggregate [codegen id : 10] +Input [4]: [i_product_name#12, i_brand#9, i_class#10, qoh#22] +Keys [3]: [i_product_name#12, i_brand#9, i_class#10] +Functions [1]: [partial_avg(qoh#22)] +Aggregate Attributes [2]: [sum#32, count#33] +Results [5]: [i_product_name#12, i_brand#9, i_class#10, sum#34, count#35] + +(31) Exchange +Input [5]: [i_product_name#12, i_brand#9, i_class#10, sum#34, count#35] +Arguments: hashpartitioning(i_product_name#12, i_brand#9, i_class#10, 5), true, [id=#36] + +(32) HashAggregate [codegen id : 11] +Input [5]: [i_product_name#12, i_brand#9, i_class#10, sum#34, count#35] +Keys [3]: [i_product_name#12, i_brand#9, i_class#10] +Functions [1]: [avg(qoh#22)] +Aggregate Attributes [1]: [avg(qoh#22)#37] +Results [5]: [i_product_name#12, i_brand#9, i_class#10, null AS i_category#38, avg(qoh#22)#37 AS qoh#39] + +(33) ReusedExchange [Reuses operator id: 24] +Output [6]: [i_product_name#12, i_brand#9, i_class#10, i_category#11, sum#40, count#41] + +(34) HashAggregate [codegen id : 16] +Input [6]: [i_product_name#12, i_brand#9, i_class#10, i_category#11, sum#40, count#41] +Keys [4]: [i_product_name#12, i_brand#9, i_class#10, i_category#11] +Functions [1]: [avg(cast(inv_quantity_on_hand#4 as bigint))] +Aggregate Attributes [1]: [avg(cast(inv_quantity_on_hand#4 as bigint))#42] +Results [3]: [i_product_name#12, i_brand#9, avg(cast(inv_quantity_on_hand#4 as bigint))#42 AS qoh#22] + +(35) HashAggregate [codegen id : 16] +Input [3]: [i_product_name#12, i_brand#9, qoh#22] +Keys [2]: [i_product_name#12, i_brand#9] +Functions [1]: [partial_avg(qoh#22)] +Aggregate Attributes [2]: [sum#43, count#44] +Results [4]: [i_product_name#12, i_brand#9, sum#45, count#46] + +(36) Exchange +Input [4]: [i_product_name#12, i_brand#9, sum#45, count#46] +Arguments: hashpartitioning(i_product_name#12, i_brand#9, 5), true, [id=#47] + +(37) HashAggregate [codegen id : 17] +Input [4]: [i_product_name#12, i_brand#9, sum#45, count#46] +Keys [2]: [i_product_name#12, i_brand#9] +Functions [1]: [avg(qoh#22)] +Aggregate Attributes [1]: [avg(qoh#22)#48] +Results [5]: [i_product_name#12, i_brand#9, null AS i_class#49, null AS i_category#50, avg(qoh#22)#48 AS qoh#51] + +(38) ReusedExchange [Reuses operator id: 24] +Output [6]: [i_product_name#12, i_brand#9, i_class#10, i_category#11, sum#52, count#53] + +(39) HashAggregate [codegen id : 22] +Input [6]: [i_product_name#12, i_brand#9, i_class#10, i_category#11, sum#52, count#53] +Keys [4]: [i_product_name#12, i_brand#9, i_class#10, i_category#11] +Functions [1]: [avg(cast(inv_quantity_on_hand#4 as bigint))] +Aggregate Attributes [1]: [avg(cast(inv_quantity_on_hand#4 as bigint))#54] +Results [2]: [i_product_name#12, avg(cast(inv_quantity_on_hand#4 as bigint))#54 AS qoh#22] + +(40) HashAggregate [codegen id : 22] +Input [2]: [i_product_name#12, qoh#22] +Keys [1]: [i_product_name#12] +Functions [1]: [partial_avg(qoh#22)] +Aggregate Attributes [2]: [sum#55, count#56] +Results [3]: [i_product_name#12, sum#57, count#58] + +(41) Exchange +Input [3]: [i_product_name#12, sum#57, count#58] +Arguments: hashpartitioning(i_product_name#12, 5), true, [id=#59] + +(42) HashAggregate [codegen id : 23] +Input [3]: [i_product_name#12, sum#57, count#58] +Keys [1]: [i_product_name#12] +Functions [1]: [avg(qoh#22)] +Aggregate Attributes [1]: [avg(qoh#22)#60] +Results [5]: [i_product_name#12, null AS i_brand#61, null AS i_class#62, null AS i_category#63, avg(qoh#22)#60 AS qoh#64] + +(43) ReusedExchange [Reuses operator id: 24] +Output [6]: [i_product_name#12, i_brand#9, i_class#10, i_category#11, sum#65, count#66] + +(44) HashAggregate [codegen id : 28] +Input [6]: [i_product_name#12, i_brand#9, i_class#10, i_category#11, sum#65, count#66] +Keys [4]: [i_product_name#12, i_brand#9, i_class#10, i_category#11] +Functions [1]: [avg(cast(inv_quantity_on_hand#4 as bigint))] +Aggregate Attributes [1]: [avg(cast(inv_quantity_on_hand#4 as bigint))#67] +Results [1]: [avg(cast(inv_quantity_on_hand#4 as bigint))#67 AS qoh#22] + +(45) HashAggregate [codegen id : 28] +Input [1]: [qoh#22] +Keys: [] +Functions [1]: [partial_avg(qoh#22)] +Aggregate Attributes [2]: [sum#68, count#69] +Results [2]: [sum#70, count#71] + +(46) Exchange +Input [2]: [sum#70, count#71] +Arguments: SinglePartition, true, [id=#72] + +(47) HashAggregate [codegen id : 29] +Input [2]: [sum#70, count#71] +Keys: [] +Functions [1]: [avg(qoh#22)] +Aggregate Attributes [1]: [avg(qoh#22)#73] +Results [5]: [null AS i_product_name#74, null AS i_brand#75, null AS i_class#76, null AS i_category#77, avg(qoh#22)#73 AS qoh#78] + +(48) Union + +(49) TakeOrderedAndProject +Input [5]: [i_product_name#12, i_brand#9, i_class#10, i_category#11, qoh#28] +Arguments: 100, [qoh#28 ASC NULLS FIRST, i_product_name#12 ASC NULLS FIRST, i_brand#9 ASC NULLS FIRST, i_class#10 ASC NULLS FIRST, i_category#11 ASC NULLS FIRST], [i_product_name#12, i_brand#9, i_class#10, i_category#11, qoh#28] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22a/simplified.txt new file mode 100644 index 0000000000000..0b68b7090b0a2 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22a/simplified.txt @@ -0,0 +1,78 @@ +TakeOrderedAndProject [i_brand,i_category,i_class,i_product_name,qoh] + Union + WholeStageCodegen (5) + HashAggregate [count,i_brand,i_category,i_class,i_product_name,sum] [avg(qoh),count,qoh,sum] + HashAggregate [i_brand,i_category,i_class,i_product_name,qoh] [count,count,sum,sum] + HashAggregate [count,i_brand,i_category,i_class,i_product_name,sum] [avg(cast(inv_quantity_on_hand as bigint)),count,qoh,sum] + InputAdapter + Exchange [i_brand,i_category,i_class,i_product_name] #1 + WholeStageCodegen (4) + HashAggregate [i_brand,i_category,i_class,i_product_name,inv_quantity_on_hand] [count,count,sum,sum] + Project [i_brand,i_category,i_class,i_product_name,inv_quantity_on_hand] + BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] + Project [i_brand,i_category,i_class,i_product_name,inv_quantity_on_hand,inv_warehouse_sk] + BroadcastHashJoin [i_item_sk,inv_item_sk] + Project [inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] + BroadcastHashJoin [d_date_sk,inv_date_sk] + Filter [inv_date_sk,inv_item_sk,inv_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_month_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand,i_category,i_class,i_item_sk,i_product_name] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [w_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.warehouse [w_warehouse_sk] + WholeStageCodegen (11) + HashAggregate [count,i_brand,i_class,i_product_name,sum] [avg(qoh),count,i_category,qoh,sum] + InputAdapter + Exchange [i_brand,i_class,i_product_name] #5 + WholeStageCodegen (10) + HashAggregate [i_brand,i_class,i_product_name,qoh] [count,count,sum,sum] + HashAggregate [count,i_brand,i_category,i_class,i_product_name,sum] [avg(cast(inv_quantity_on_hand as bigint)),count,qoh,sum] + InputAdapter + ReusedExchange [count,i_brand,i_category,i_class,i_product_name,sum] #1 + WholeStageCodegen (17) + HashAggregate [count,i_brand,i_product_name,sum] [avg(qoh),count,i_category,i_class,qoh,sum] + InputAdapter + Exchange [i_brand,i_product_name] #6 + WholeStageCodegen (16) + HashAggregate [i_brand,i_product_name,qoh] [count,count,sum,sum] + HashAggregate [count,i_brand,i_category,i_class,i_product_name,sum] [avg(cast(inv_quantity_on_hand as bigint)),count,qoh,sum] + InputAdapter + ReusedExchange [count,i_brand,i_category,i_class,i_product_name,sum] #1 + WholeStageCodegen (23) + HashAggregate [count,i_product_name,sum] [avg(qoh),count,i_brand,i_category,i_class,qoh,sum] + InputAdapter + Exchange [i_product_name] #7 + WholeStageCodegen (22) + HashAggregate [i_product_name,qoh] [count,count,sum,sum] + HashAggregate [count,i_brand,i_category,i_class,i_product_name,sum] [avg(cast(inv_quantity_on_hand as bigint)),count,qoh,sum] + InputAdapter + ReusedExchange [count,i_brand,i_category,i_class,i_product_name,sum] #1 + WholeStageCodegen (29) + HashAggregate [count,sum] [avg(qoh),count,i_brand,i_category,i_class,i_product_name,qoh,sum] + InputAdapter + Exchange #8 + WholeStageCodegen (28) + HashAggregate [qoh] [count,count,sum,sum] + HashAggregate [count,i_brand,i_category,i_class,i_product_name,sum] [avg(cast(inv_quantity_on_hand as bigint)),count,qoh,sum] + InputAdapter + ReusedExchange [count,i_brand,i_category,i_class,i_product_name,sum] #1 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24.sf100/explain.txt new file mode 100644 index 0000000000000..8f01636da7200 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24.sf100/explain.txt @@ -0,0 +1,532 @@ +== Physical Plan == +* Sort (47) ++- Exchange (46) + +- * Project (45) + +- * Filter (44) + +- * HashAggregate (43) + +- Exchange (42) + +- * HashAggregate (41) + +- * HashAggregate (40) + +- Exchange (39) + +- * HashAggregate (38) + +- * Project (37) + +- * SortMergeJoin Inner (36) + :- * Sort (30) + : +- Exchange (29) + : +- * Project (28) + : +- * BroadcastHashJoin Inner BuildRight (27) + : :- * Project (22) + : : +- * BroadcastHashJoin Inner BuildLeft (21) + : : :- BroadcastExchange (17) + : : : +- * Project (16) + : : : +- * BroadcastHashJoin Inner BuildLeft (15) + : : : :- BroadcastExchange (11) + : : : : +- * Project (10) + : : : : +- * BroadcastHashJoin Inner BuildLeft (9) + : : : : :- BroadcastExchange (5) + : : : : : +- * Project (4) + : : : : : +- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.store (1) + : : : : +- * Filter (8) + : : : : +- * ColumnarToRow (7) + : : : : +- Scan parquet default.customer_address (6) + : : : +- * Filter (14) + : : : +- * ColumnarToRow (13) + : : : +- Scan parquet default.customer (12) + : : +- * Filter (20) + : : +- * ColumnarToRow (19) + : : +- Scan parquet default.store_sales (18) + : +- BroadcastExchange (26) + : +- * Filter (25) + : +- * ColumnarToRow (24) + : +- Scan parquet default.item (23) + +- * Sort (35) + +- Exchange (34) + +- * Filter (33) + +- * ColumnarToRow (32) + +- Scan parquet default.store_returns (31) + + +(1) Scan parquet default.store +Output [5]: [s_store_sk#1, s_store_name#2, s_market_id#3, s_state#4, s_zip#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [5]: [s_store_sk#1, s_store_name#2, s_market_id#3, s_state#4, s_zip#5] + +(3) Filter [codegen id : 1] +Input [5]: [s_store_sk#1, s_store_name#2, s_market_id#3, s_state#4, s_zip#5] +Condition : (((isnotnull(s_market_id#3) AND (s_market_id#3 = 8)) AND isnotnull(s_store_sk#1)) AND isnotnull(s_zip#5)) + +(4) Project [codegen id : 1] +Output [4]: [s_store_sk#1, s_store_name#2, s_state#4, s_zip#5] +Input [5]: [s_store_sk#1, s_store_name#2, s_market_id#3, s_state#4, s_zip#5] + +(5) BroadcastExchange +Input [4]: [s_store_sk#1, s_store_name#2, s_state#4, s_zip#5] +Arguments: HashedRelationBroadcastMode(List(input[3, string, true]),false), [id=#6] + +(6) Scan parquet default.customer_address +Output [4]: [ca_address_sk#7, ca_state#8, ca_zip#9, ca_country#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_country), IsNotNull(ca_zip)] +ReadSchema: struct + +(7) ColumnarToRow +Input [4]: [ca_address_sk#7, ca_state#8, ca_zip#9, ca_country#10] + +(8) Filter +Input [4]: [ca_address_sk#7, ca_state#8, ca_zip#9, ca_country#10] +Condition : ((isnotnull(ca_address_sk#7) AND isnotnull(ca_country#10)) AND isnotnull(ca_zip#9)) + +(9) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [s_zip#5] +Right keys [1]: [ca_zip#9] +Join condition: None + +(10) Project [codegen id : 2] +Output [6]: [s_store_sk#1, s_store_name#2, s_state#4, ca_address_sk#7, ca_state#8, ca_country#10] +Input [8]: [s_store_sk#1, s_store_name#2, s_state#4, s_zip#5, ca_address_sk#7, ca_state#8, ca_zip#9, ca_country#10] + +(11) BroadcastExchange +Input [6]: [s_store_sk#1, s_store_name#2, s_state#4, ca_address_sk#7, ca_state#8, ca_country#10] +Arguments: HashedRelationBroadcastMode(List(input[3, int, true], upper(input[5, string, true])),false), [id=#11] + +(12) Scan parquet default.customer +Output [5]: [c_customer_sk#12, c_current_addr_sk#13, c_first_name#14, c_last_name#15, c_birth_country#16] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_birth_country), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(13) ColumnarToRow +Input [5]: [c_customer_sk#12, c_current_addr_sk#13, c_first_name#14, c_last_name#15, c_birth_country#16] + +(14) Filter +Input [5]: [c_customer_sk#12, c_current_addr_sk#13, c_first_name#14, c_last_name#15, c_birth_country#16] +Condition : ((isnotnull(c_customer_sk#12) AND isnotnull(c_birth_country#16)) AND isnotnull(c_current_addr_sk#13)) + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [2]: [ca_address_sk#7, upper(ca_country#10)] +Right keys [2]: [c_current_addr_sk#13, c_birth_country#16] +Join condition: None + +(16) Project [codegen id : 3] +Output [7]: [s_store_sk#1, s_store_name#2, s_state#4, ca_state#8, c_customer_sk#12, c_first_name#14, c_last_name#15] +Input [11]: [s_store_sk#1, s_store_name#2, s_state#4, ca_address_sk#7, ca_state#8, ca_country#10, c_customer_sk#12, c_current_addr_sk#13, c_first_name#14, c_last_name#15, c_birth_country#16] + +(17) BroadcastExchange +Input [7]: [s_store_sk#1, s_store_name#2, s_state#4, ca_state#8, c_customer_sk#12, c_first_name#14, c_last_name#15] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, true] as bigint), 32) | (cast(input[4, int, true] as bigint) & 4294967295))),false), [id=#17] + +(18) Scan parquet default.store_sales +Output [5]: [ss_item_sk#18, ss_customer_sk#19, ss_store_sk#20, ss_ticket_number#21, ss_net_paid#22] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(19) ColumnarToRow +Input [5]: [ss_item_sk#18, ss_customer_sk#19, ss_store_sk#20, ss_ticket_number#21, ss_net_paid#22] + +(20) Filter +Input [5]: [ss_item_sk#18, ss_customer_sk#19, ss_store_sk#20, ss_ticket_number#21, ss_net_paid#22] +Condition : (((isnotnull(ss_ticket_number#21) AND isnotnull(ss_item_sk#18)) AND isnotnull(ss_store_sk#20)) AND isnotnull(ss_customer_sk#19)) + +(21) BroadcastHashJoin [codegen id : 5] +Left keys [2]: [s_store_sk#1, c_customer_sk#12] +Right keys [2]: [ss_store_sk#20, ss_customer_sk#19] +Join condition: None + +(22) Project [codegen id : 5] +Output [8]: [s_store_name#2, s_state#4, ca_state#8, c_first_name#14, c_last_name#15, ss_item_sk#18, ss_ticket_number#21, ss_net_paid#22] +Input [12]: [s_store_sk#1, s_store_name#2, s_state#4, ca_state#8, c_customer_sk#12, c_first_name#14, c_last_name#15, ss_item_sk#18, ss_customer_sk#19, ss_store_sk#20, ss_ticket_number#21, ss_net_paid#22] + +(23) Scan parquet default.item +Output [6]: [i_item_sk#23, i_current_price#24, i_size#25, i_color#26, i_units#27, i_manager_id#28] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_color), EqualTo(i_color,pale), IsNotNull(i_item_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 4] +Input [6]: [i_item_sk#23, i_current_price#24, i_size#25, i_color#26, i_units#27, i_manager_id#28] + +(25) Filter [codegen id : 4] +Input [6]: [i_item_sk#23, i_current_price#24, i_size#25, i_color#26, i_units#27, i_manager_id#28] +Condition : ((isnotnull(i_color#26) AND (i_color#26 = pale)) AND isnotnull(i_item_sk#23)) + +(26) BroadcastExchange +Input [6]: [i_item_sk#23, i_current_price#24, i_size#25, i_color#26, i_units#27, i_manager_id#28] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#29] + +(27) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_item_sk#18] +Right keys [1]: [i_item_sk#23] +Join condition: None + +(28) Project [codegen id : 5] +Output [13]: [s_store_name#2, s_state#4, ca_state#8, c_first_name#14, c_last_name#15, ss_item_sk#18, ss_ticket_number#21, ss_net_paid#22, i_current_price#24, i_size#25, i_color#26, i_units#27, i_manager_id#28] +Input [14]: [s_store_name#2, s_state#4, ca_state#8, c_first_name#14, c_last_name#15, ss_item_sk#18, ss_ticket_number#21, ss_net_paid#22, i_item_sk#23, i_current_price#24, i_size#25, i_color#26, i_units#27, i_manager_id#28] + +(29) Exchange +Input [13]: [s_store_name#2, s_state#4, ca_state#8, c_first_name#14, c_last_name#15, ss_item_sk#18, ss_ticket_number#21, ss_net_paid#22, i_current_price#24, i_size#25, i_color#26, i_units#27, i_manager_id#28] +Arguments: hashpartitioning(cast(ss_item_sk#18 as bigint), cast(ss_ticket_number#21 as bigint), 5), true, [id=#30] + +(30) Sort [codegen id : 6] +Input [13]: [s_store_name#2, s_state#4, ca_state#8, c_first_name#14, c_last_name#15, ss_item_sk#18, ss_ticket_number#21, ss_net_paid#22, i_current_price#24, i_size#25, i_color#26, i_units#27, i_manager_id#28] +Arguments: [cast(ss_item_sk#18 as bigint) ASC NULLS FIRST, cast(ss_ticket_number#21 as bigint) ASC NULLS FIRST], false, 0 + +(31) Scan parquet default.store_returns +Output [2]: [sr_item_sk#31, sr_ticket_number#32] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store_returns] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] +ReadSchema: struct + +(32) ColumnarToRow [codegen id : 7] +Input [2]: [sr_item_sk#31, sr_ticket_number#32] + +(33) Filter [codegen id : 7] +Input [2]: [sr_item_sk#31, sr_ticket_number#32] +Condition : (isnotnull(sr_ticket_number#32) AND isnotnull(sr_item_sk#31)) + +(34) Exchange +Input [2]: [sr_item_sk#31, sr_ticket_number#32] +Arguments: hashpartitioning(sr_item_sk#31, sr_ticket_number#32, 5), true, [id=#33] + +(35) Sort [codegen id : 8] +Input [2]: [sr_item_sk#31, sr_ticket_number#32] +Arguments: [sr_item_sk#31 ASC NULLS FIRST, sr_ticket_number#32 ASC NULLS FIRST], false, 0 + +(36) SortMergeJoin [codegen id : 9] +Left keys [2]: [cast(ss_item_sk#18 as bigint), cast(ss_ticket_number#21 as bigint)] +Right keys [2]: [sr_item_sk#31, sr_ticket_number#32] +Join condition: None + +(37) Project [codegen id : 9] +Output [11]: [ss_net_paid#22, s_store_name#2, s_state#4, i_current_price#24, i_size#25, i_color#26, i_units#27, i_manager_id#28, c_first_name#14, c_last_name#15, ca_state#8] +Input [15]: [s_store_name#2, s_state#4, ca_state#8, c_first_name#14, c_last_name#15, ss_item_sk#18, ss_ticket_number#21, ss_net_paid#22, i_current_price#24, i_size#25, i_color#26, i_units#27, i_manager_id#28, sr_item_sk#31, sr_ticket_number#32] + +(38) HashAggregate [codegen id : 9] +Input [11]: [ss_net_paid#22, s_store_name#2, s_state#4, i_current_price#24, i_size#25, i_color#26, i_units#27, i_manager_id#28, c_first_name#14, c_last_name#15, ca_state#8] +Keys [10]: [c_last_name#15, c_first_name#14, s_store_name#2, ca_state#8, s_state#4, i_color#26, i_current_price#24, i_manager_id#28, i_units#27, i_size#25] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#22))] +Aggregate Attributes [1]: [sum#34] +Results [11]: [c_last_name#15, c_first_name#14, s_store_name#2, ca_state#8, s_state#4, i_color#26, i_current_price#24, i_manager_id#28, i_units#27, i_size#25, sum#35] + +(39) Exchange +Input [11]: [c_last_name#15, c_first_name#14, s_store_name#2, ca_state#8, s_state#4, i_color#26, i_current_price#24, i_manager_id#28, i_units#27, i_size#25, sum#35] +Arguments: hashpartitioning(c_last_name#15, c_first_name#14, s_store_name#2, ca_state#8, s_state#4, i_color#26, i_current_price#24, i_manager_id#28, i_units#27, i_size#25, 5), true, [id=#36] + +(40) HashAggregate [codegen id : 10] +Input [11]: [c_last_name#15, c_first_name#14, s_store_name#2, ca_state#8, s_state#4, i_color#26, i_current_price#24, i_manager_id#28, i_units#27, i_size#25, sum#35] +Keys [10]: [c_last_name#15, c_first_name#14, s_store_name#2, ca_state#8, s_state#4, i_color#26, i_current_price#24, i_manager_id#28, i_units#27, i_size#25] +Functions [1]: [sum(UnscaledValue(ss_net_paid#22))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#22))#37] +Results [4]: [c_last_name#15, c_first_name#14, s_store_name#2, MakeDecimal(sum(UnscaledValue(ss_net_paid#22))#37,17,2) AS netpaid#38] + +(41) HashAggregate [codegen id : 10] +Input [4]: [c_last_name#15, c_first_name#14, s_store_name#2, netpaid#38] +Keys [3]: [c_last_name#15, c_first_name#14, s_store_name#2] +Functions [1]: [partial_sum(netpaid#38)] +Aggregate Attributes [2]: [sum#39, isEmpty#40] +Results [5]: [c_last_name#15, c_first_name#14, s_store_name#2, sum#41, isEmpty#42] + +(42) Exchange +Input [5]: [c_last_name#15, c_first_name#14, s_store_name#2, sum#41, isEmpty#42] +Arguments: hashpartitioning(c_last_name#15, c_first_name#14, s_store_name#2, 5), true, [id=#43] + +(43) HashAggregate [codegen id : 11] +Input [5]: [c_last_name#15, c_first_name#14, s_store_name#2, sum#41, isEmpty#42] +Keys [3]: [c_last_name#15, c_first_name#14, s_store_name#2] +Functions [1]: [sum(netpaid#38)] +Aggregate Attributes [1]: [sum(netpaid#38)#44] +Results [5]: [c_last_name#15, c_first_name#14, s_store_name#2, sum(netpaid#38)#44 AS paid#45, sum(netpaid#38)#44 AS sum(netpaid#38)#46] + +(44) Filter [codegen id : 11] +Input [5]: [c_last_name#15, c_first_name#14, s_store_name#2, paid#45, sum(netpaid#38)#46] +Condition : (isnotnull(sum(netpaid#38)#46) AND (cast(sum(netpaid#38)#46 as decimal(33,8)) > cast(Subquery scalar-subquery#47, [id=#48] as decimal(33,8)))) + +(45) Project [codegen id : 11] +Output [4]: [c_last_name#15, c_first_name#14, s_store_name#2, paid#45] +Input [5]: [c_last_name#15, c_first_name#14, s_store_name#2, paid#45, sum(netpaid#38)#46] + +(46) Exchange +Input [4]: [c_last_name#15, c_first_name#14, s_store_name#2, paid#45] +Arguments: rangepartitioning(c_last_name#15 ASC NULLS FIRST, c_first_name#14 ASC NULLS FIRST, s_store_name#2 ASC NULLS FIRST, 5), true, [id=#49] + +(47) Sort [codegen id : 12] +Input [4]: [c_last_name#15, c_first_name#14, s_store_name#2, paid#45] +Arguments: [c_last_name#15 ASC NULLS FIRST, c_first_name#14 ASC NULLS FIRST, s_store_name#2 ASC NULLS FIRST], true, 0 + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 44 Hosting Expression = Subquery scalar-subquery#47, [id=#48] +* HashAggregate (93) ++- Exchange (92) + +- * HashAggregate (91) + +- * HashAggregate (90) + +- Exchange (89) + +- * HashAggregate (88) + +- * Project (87) + +- * SortMergeJoin Inner (86) + :- * Sort (80) + : +- Exchange (79) + : +- * Project (78) + : +- * SortMergeJoin Inner (77) + : :- * Sort (71) + : : +- Exchange (70) + : : +- * Project (69) + : : +- * BroadcastHashJoin Inner BuildLeft (68) + : : :- BroadcastExchange (64) + : : : +- * Project (63) + : : : +- * BroadcastHashJoin Inner BuildLeft (62) + : : : :- BroadcastExchange (58) + : : : : +- * Project (57) + : : : : +- * BroadcastHashJoin Inner BuildLeft (56) + : : : : :- BroadcastExchange (52) + : : : : : +- * Project (51) + : : : : : +- * Filter (50) + : : : : : +- * ColumnarToRow (49) + : : : : : +- Scan parquet default.store (48) + : : : : +- * Filter (55) + : : : : +- * ColumnarToRow (54) + : : : : +- Scan parquet default.customer_address (53) + : : : +- * Filter (61) + : : : +- * ColumnarToRow (60) + : : : +- Scan parquet default.customer (59) + : : +- * Filter (67) + : : +- * ColumnarToRow (66) + : : +- Scan parquet default.store_sales (65) + : +- * Sort (76) + : +- Exchange (75) + : +- * Filter (74) + : +- * ColumnarToRow (73) + : +- Scan parquet default.item (72) + +- * Sort (85) + +- Exchange (84) + +- * Filter (83) + +- * ColumnarToRow (82) + +- Scan parquet default.store_returns (81) + + +(48) Scan parquet default.store +Output [5]: [s_store_sk#1, s_store_name#2, s_market_id#3, s_state#4, s_zip#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)] +ReadSchema: struct + +(49) ColumnarToRow [codegen id : 1] +Input [5]: [s_store_sk#1, s_store_name#2, s_market_id#3, s_state#4, s_zip#5] + +(50) Filter [codegen id : 1] +Input [5]: [s_store_sk#1, s_store_name#2, s_market_id#3, s_state#4, s_zip#5] +Condition : (((isnotnull(s_market_id#3) AND (s_market_id#3 = 8)) AND isnotnull(s_store_sk#1)) AND isnotnull(s_zip#5)) + +(51) Project [codegen id : 1] +Output [4]: [s_store_sk#1, s_store_name#2, s_state#4, s_zip#5] +Input [5]: [s_store_sk#1, s_store_name#2, s_market_id#3, s_state#4, s_zip#5] + +(52) BroadcastExchange +Input [4]: [s_store_sk#1, s_store_name#2, s_state#4, s_zip#5] +Arguments: HashedRelationBroadcastMode(List(input[3, string, true]),false), [id=#50] + +(53) Scan parquet default.customer_address +Output [4]: [ca_address_sk#7, ca_state#8, ca_zip#9, ca_country#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_country), IsNotNull(ca_zip)] +ReadSchema: struct + +(54) ColumnarToRow +Input [4]: [ca_address_sk#7, ca_state#8, ca_zip#9, ca_country#10] + +(55) Filter +Input [4]: [ca_address_sk#7, ca_state#8, ca_zip#9, ca_country#10] +Condition : ((isnotnull(ca_address_sk#7) AND isnotnull(ca_country#10)) AND isnotnull(ca_zip#9)) + +(56) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [s_zip#5] +Right keys [1]: [ca_zip#9] +Join condition: None + +(57) Project [codegen id : 2] +Output [6]: [s_store_sk#1, s_store_name#2, s_state#4, ca_address_sk#7, ca_state#8, ca_country#10] +Input [8]: [s_store_sk#1, s_store_name#2, s_state#4, s_zip#5, ca_address_sk#7, ca_state#8, ca_zip#9, ca_country#10] + +(58) BroadcastExchange +Input [6]: [s_store_sk#1, s_store_name#2, s_state#4, ca_address_sk#7, ca_state#8, ca_country#10] +Arguments: HashedRelationBroadcastMode(List(input[3, int, true], upper(input[5, string, true])),false), [id=#51] + +(59) Scan parquet default.customer +Output [5]: [c_customer_sk#12, c_current_addr_sk#13, c_first_name#14, c_last_name#15, c_birth_country#16] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_birth_country), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(60) ColumnarToRow +Input [5]: [c_customer_sk#12, c_current_addr_sk#13, c_first_name#14, c_last_name#15, c_birth_country#16] + +(61) Filter +Input [5]: [c_customer_sk#12, c_current_addr_sk#13, c_first_name#14, c_last_name#15, c_birth_country#16] +Condition : ((isnotnull(c_customer_sk#12) AND isnotnull(c_birth_country#16)) AND isnotnull(c_current_addr_sk#13)) + +(62) BroadcastHashJoin [codegen id : 3] +Left keys [2]: [ca_address_sk#7, upper(ca_country#10)] +Right keys [2]: [c_current_addr_sk#13, c_birth_country#16] +Join condition: None + +(63) Project [codegen id : 3] +Output [7]: [s_store_sk#1, s_store_name#2, s_state#4, ca_state#8, c_customer_sk#12, c_first_name#14, c_last_name#15] +Input [11]: [s_store_sk#1, s_store_name#2, s_state#4, ca_address_sk#7, ca_state#8, ca_country#10, c_customer_sk#12, c_current_addr_sk#13, c_first_name#14, c_last_name#15, c_birth_country#16] + +(64) BroadcastExchange +Input [7]: [s_store_sk#1, s_store_name#2, s_state#4, ca_state#8, c_customer_sk#12, c_first_name#14, c_last_name#15] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, true] as bigint), 32) | (cast(input[4, int, true] as bigint) & 4294967295))),false), [id=#52] + +(65) Scan parquet default.store_sales +Output [5]: [ss_item_sk#18, ss_customer_sk#19, ss_store_sk#20, ss_ticket_number#21, ss_net_paid#22] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(66) ColumnarToRow +Input [5]: [ss_item_sk#18, ss_customer_sk#19, ss_store_sk#20, ss_ticket_number#21, ss_net_paid#22] + +(67) Filter +Input [5]: [ss_item_sk#18, ss_customer_sk#19, ss_store_sk#20, ss_ticket_number#21, ss_net_paid#22] +Condition : (((isnotnull(ss_ticket_number#21) AND isnotnull(ss_item_sk#18)) AND isnotnull(ss_store_sk#20)) AND isnotnull(ss_customer_sk#19)) + +(68) BroadcastHashJoin [codegen id : 4] +Left keys [2]: [s_store_sk#1, c_customer_sk#12] +Right keys [2]: [ss_store_sk#20, ss_customer_sk#19] +Join condition: None + +(69) Project [codegen id : 4] +Output [8]: [s_store_name#2, s_state#4, ca_state#8, c_first_name#14, c_last_name#15, ss_item_sk#18, ss_ticket_number#21, ss_net_paid#22] +Input [12]: [s_store_sk#1, s_store_name#2, s_state#4, ca_state#8, c_customer_sk#12, c_first_name#14, c_last_name#15, ss_item_sk#18, ss_customer_sk#19, ss_store_sk#20, ss_ticket_number#21, ss_net_paid#22] + +(70) Exchange +Input [8]: [s_store_name#2, s_state#4, ca_state#8, c_first_name#14, c_last_name#15, ss_item_sk#18, ss_ticket_number#21, ss_net_paid#22] +Arguments: hashpartitioning(ss_item_sk#18, 5), true, [id=#53] + +(71) Sort [codegen id : 5] +Input [8]: [s_store_name#2, s_state#4, ca_state#8, c_first_name#14, c_last_name#15, ss_item_sk#18, ss_ticket_number#21, ss_net_paid#22] +Arguments: [ss_item_sk#18 ASC NULLS FIRST], false, 0 + +(72) Scan parquet default.item +Output [6]: [i_item_sk#23, i_current_price#24, i_size#25, i_color#26, i_units#27, i_manager_id#28] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(73) ColumnarToRow [codegen id : 6] +Input [6]: [i_item_sk#23, i_current_price#24, i_size#25, i_color#26, i_units#27, i_manager_id#28] + +(74) Filter [codegen id : 6] +Input [6]: [i_item_sk#23, i_current_price#24, i_size#25, i_color#26, i_units#27, i_manager_id#28] +Condition : isnotnull(i_item_sk#23) + +(75) Exchange +Input [6]: [i_item_sk#23, i_current_price#24, i_size#25, i_color#26, i_units#27, i_manager_id#28] +Arguments: hashpartitioning(i_item_sk#23, 5), true, [id=#54] + +(76) Sort [codegen id : 7] +Input [6]: [i_item_sk#23, i_current_price#24, i_size#25, i_color#26, i_units#27, i_manager_id#28] +Arguments: [i_item_sk#23 ASC NULLS FIRST], false, 0 + +(77) SortMergeJoin [codegen id : 8] +Left keys [1]: [ss_item_sk#18] +Right keys [1]: [i_item_sk#23] +Join condition: None + +(78) Project [codegen id : 8] +Output [13]: [s_store_name#2, s_state#4, ca_state#8, c_first_name#14, c_last_name#15, ss_item_sk#18, ss_ticket_number#21, ss_net_paid#22, i_current_price#24, i_size#25, i_color#26, i_units#27, i_manager_id#28] +Input [14]: [s_store_name#2, s_state#4, ca_state#8, c_first_name#14, c_last_name#15, ss_item_sk#18, ss_ticket_number#21, ss_net_paid#22, i_item_sk#23, i_current_price#24, i_size#25, i_color#26, i_units#27, i_manager_id#28] + +(79) Exchange +Input [13]: [s_store_name#2, s_state#4, ca_state#8, c_first_name#14, c_last_name#15, ss_item_sk#18, ss_ticket_number#21, ss_net_paid#22, i_current_price#24, i_size#25, i_color#26, i_units#27, i_manager_id#28] +Arguments: hashpartitioning(cast(ss_item_sk#18 as bigint), cast(ss_ticket_number#21 as bigint), 5), true, [id=#55] + +(80) Sort [codegen id : 9] +Input [13]: [s_store_name#2, s_state#4, ca_state#8, c_first_name#14, c_last_name#15, ss_item_sk#18, ss_ticket_number#21, ss_net_paid#22, i_current_price#24, i_size#25, i_color#26, i_units#27, i_manager_id#28] +Arguments: [cast(ss_item_sk#18 as bigint) ASC NULLS FIRST, cast(ss_ticket_number#21 as bigint) ASC NULLS FIRST], false, 0 + +(81) Scan parquet default.store_returns +Output [2]: [sr_item_sk#31, sr_ticket_number#32] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store_returns] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] +ReadSchema: struct + +(82) ColumnarToRow [codegen id : 10] +Input [2]: [sr_item_sk#31, sr_ticket_number#32] + +(83) Filter [codegen id : 10] +Input [2]: [sr_item_sk#31, sr_ticket_number#32] +Condition : (isnotnull(sr_ticket_number#32) AND isnotnull(sr_item_sk#31)) + +(84) Exchange +Input [2]: [sr_item_sk#31, sr_ticket_number#32] +Arguments: hashpartitioning(sr_item_sk#31, sr_ticket_number#32, 5), true, [id=#56] + +(85) Sort [codegen id : 11] +Input [2]: [sr_item_sk#31, sr_ticket_number#32] +Arguments: [sr_item_sk#31 ASC NULLS FIRST, sr_ticket_number#32 ASC NULLS FIRST], false, 0 + +(86) SortMergeJoin [codegen id : 12] +Left keys [2]: [cast(ss_item_sk#18 as bigint), cast(ss_ticket_number#21 as bigint)] +Right keys [2]: [sr_item_sk#31, sr_ticket_number#32] +Join condition: None + +(87) Project [codegen id : 12] +Output [11]: [ss_net_paid#22, s_store_name#2, s_state#4, i_current_price#24, i_size#25, i_color#26, i_units#27, i_manager_id#28, c_first_name#14, c_last_name#15, ca_state#8] +Input [15]: [s_store_name#2, s_state#4, ca_state#8, c_first_name#14, c_last_name#15, ss_item_sk#18, ss_ticket_number#21, ss_net_paid#22, i_current_price#24, i_size#25, i_color#26, i_units#27, i_manager_id#28, sr_item_sk#31, sr_ticket_number#32] + +(88) HashAggregate [codegen id : 12] +Input [11]: [ss_net_paid#22, s_store_name#2, s_state#4, i_current_price#24, i_size#25, i_color#26, i_units#27, i_manager_id#28, c_first_name#14, c_last_name#15, ca_state#8] +Keys [10]: [c_last_name#15, c_first_name#14, s_store_name#2, ca_state#8, s_state#4, i_color#26, i_current_price#24, i_manager_id#28, i_units#27, i_size#25] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#22))] +Aggregate Attributes [1]: [sum#57] +Results [11]: [c_last_name#15, c_first_name#14, s_store_name#2, ca_state#8, s_state#4, i_color#26, i_current_price#24, i_manager_id#28, i_units#27, i_size#25, sum#58] + +(89) Exchange +Input [11]: [c_last_name#15, c_first_name#14, s_store_name#2, ca_state#8, s_state#4, i_color#26, i_current_price#24, i_manager_id#28, i_units#27, i_size#25, sum#58] +Arguments: hashpartitioning(c_last_name#15, c_first_name#14, s_store_name#2, ca_state#8, s_state#4, i_color#26, i_current_price#24, i_manager_id#28, i_units#27, i_size#25, 5), true, [id=#59] + +(90) HashAggregate [codegen id : 13] +Input [11]: [c_last_name#15, c_first_name#14, s_store_name#2, ca_state#8, s_state#4, i_color#26, i_current_price#24, i_manager_id#28, i_units#27, i_size#25, sum#58] +Keys [10]: [c_last_name#15, c_first_name#14, s_store_name#2, ca_state#8, s_state#4, i_color#26, i_current_price#24, i_manager_id#28, i_units#27, i_size#25] +Functions [1]: [sum(UnscaledValue(ss_net_paid#22))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#22))#60] +Results [1]: [MakeDecimal(sum(UnscaledValue(ss_net_paid#22))#60,17,2) AS netpaid#38] + +(91) HashAggregate [codegen id : 13] +Input [1]: [netpaid#38] +Keys: [] +Functions [1]: [partial_avg(netpaid#38)] +Aggregate Attributes [2]: [sum#61, count#62] +Results [2]: [sum#63, count#64] + +(92) Exchange +Input [2]: [sum#63, count#64] +Arguments: SinglePartition, true, [id=#65] + +(93) HashAggregate [codegen id : 14] +Input [2]: [sum#63, count#64] +Keys: [] +Functions [1]: [avg(netpaid#38)] +Aggregate Attributes [1]: [avg(netpaid#38)#66] +Results [1]: [CheckOverflow((0.050000 * promote_precision(avg(netpaid#38)#66)), DecimalType(24,8), true) AS (CAST(0.05 AS DECIMAL(21,6)) * CAST(avg(netpaid) AS DECIMAL(21,6)))#67] + + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24.sf100/simplified.txt new file mode 100644 index 0000000000000..9adad49cb8b1a --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24.sf100/simplified.txt @@ -0,0 +1,156 @@ +WholeStageCodegen (12) + Sort [c_first_name,c_last_name,s_store_name] + InputAdapter + Exchange [c_first_name,c_last_name,s_store_name] #1 + WholeStageCodegen (11) + Project [c_first_name,c_last_name,paid,s_store_name] + Filter [sum(netpaid)] + Subquery #1 + WholeStageCodegen (14) + HashAggregate [count,sum] [(CAST(0.05 AS DECIMAL(21,6)) * CAST(avg(netpaid) AS DECIMAL(21,6))),avg(netpaid),count,sum] + InputAdapter + Exchange #10 + WholeStageCodegen (13) + HashAggregate [netpaid] [count,count,sum,sum] + HashAggregate [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,sum] [netpaid,sum,sum(UnscaledValue(ss_net_paid))] + InputAdapter + Exchange [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name] #11 + WholeStageCodegen (12) + HashAggregate [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_net_paid] [sum,sum] + Project [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_net_paid] + SortMergeJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + InputAdapter + WholeStageCodegen (9) + Sort [ss_item_sk,ss_ticket_number] + InputAdapter + Exchange [ss_item_sk,ss_ticket_number] #12 + WholeStageCodegen (8) + Project [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_item_sk,ss_net_paid,ss_ticket_number] + SortMergeJoin [i_item_sk,ss_item_sk] + InputAdapter + WholeStageCodegen (5) + Sort [ss_item_sk] + InputAdapter + Exchange [ss_item_sk] #13 + WholeStageCodegen (4) + Project [c_first_name,c_last_name,ca_state,s_state,s_store_name,ss_item_sk,ss_net_paid,ss_ticket_number] + BroadcastHashJoin [c_customer_sk,s_store_sk,ss_customer_sk,ss_store_sk] + InputAdapter + BroadcastExchange #14 + WholeStageCodegen (3) + Project [c_customer_sk,c_first_name,c_last_name,ca_state,s_state,s_store_name,s_store_sk] + BroadcastHashJoin [c_birth_country,c_current_addr_sk,ca_address_sk,ca_country] + InputAdapter + BroadcastExchange #15 + WholeStageCodegen (2) + Project [ca_address_sk,ca_country,ca_state,s_state,s_store_name,s_store_sk] + BroadcastHashJoin [ca_zip,s_zip] + InputAdapter + BroadcastExchange #16 + WholeStageCodegen (1) + Project [s_state,s_store_name,s_store_sk,s_zip] + Filter [s_market_id,s_store_sk,s_zip] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_market_id,s_state,s_store_name,s_store_sk,s_zip] + Filter [ca_address_sk,ca_country,ca_zip] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_country,ca_state,ca_zip] + Filter [c_birth_country,c_current_addr_sk,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_birth_country,c_current_addr_sk,c_customer_sk,c_first_name,c_last_name] + Filter [ss_customer_sk,ss_item_sk,ss_store_sk,ss_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] + InputAdapter + WholeStageCodegen (7) + Sort [i_item_sk] + InputAdapter + Exchange [i_item_sk] #17 + WholeStageCodegen (6) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] + InputAdapter + WholeStageCodegen (11) + Sort [sr_item_sk,sr_ticket_number] + InputAdapter + Exchange [sr_item_sk,sr_ticket_number] #18 + WholeStageCodegen (10) + Filter [sr_item_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] + HashAggregate [c_first_name,c_last_name,isEmpty,s_store_name,sum] [isEmpty,paid,sum,sum(netpaid),sum(netpaid)] + InputAdapter + Exchange [c_first_name,c_last_name,s_store_name] #2 + WholeStageCodegen (10) + HashAggregate [c_first_name,c_last_name,netpaid,s_store_name] [isEmpty,isEmpty,sum,sum] + HashAggregate [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,sum] [netpaid,sum,sum(UnscaledValue(ss_net_paid))] + InputAdapter + Exchange [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name] #3 + WholeStageCodegen (9) + HashAggregate [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_net_paid] [sum,sum] + Project [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_net_paid] + SortMergeJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + InputAdapter + WholeStageCodegen (6) + Sort [ss_item_sk,ss_ticket_number] + InputAdapter + Exchange [ss_item_sk,ss_ticket_number] #4 + WholeStageCodegen (5) + Project [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_item_sk,ss_net_paid,ss_ticket_number] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [c_first_name,c_last_name,ca_state,s_state,s_store_name,ss_item_sk,ss_net_paid,ss_ticket_number] + BroadcastHashJoin [c_customer_sk,s_store_sk,ss_customer_sk,ss_store_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Project [c_customer_sk,c_first_name,c_last_name,ca_state,s_state,s_store_name,s_store_sk] + BroadcastHashJoin [c_birth_country,c_current_addr_sk,ca_address_sk,ca_country] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (2) + Project [ca_address_sk,ca_country,ca_state,s_state,s_store_name,s_store_sk] + BroadcastHashJoin [ca_zip,s_zip] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (1) + Project [s_state,s_store_name,s_store_sk,s_zip] + Filter [s_market_id,s_store_sk,s_zip] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_market_id,s_state,s_store_name,s_store_sk,s_zip] + Filter [ca_address_sk,ca_country,ca_zip] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_country,ca_state,ca_zip] + Filter [c_birth_country,c_current_addr_sk,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_birth_country,c_current_addr_sk,c_customer_sk,c_first_name,c_last_name] + Filter [ss_customer_sk,ss_item_sk,ss_store_sk,ss_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (4) + Filter [i_color,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] + InputAdapter + WholeStageCodegen (8) + Sort [sr_item_sk,sr_ticket_number] + InputAdapter + Exchange [sr_item_sk,sr_ticket_number] #9 + WholeStageCodegen (7) + Filter [sr_item_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24/explain.txt new file mode 100644 index 0000000000000..58d40545d046b --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24/explain.txt @@ -0,0 +1,487 @@ +== Physical Plan == +* Sort (44) ++- Exchange (43) + +- * Project (42) + +- * Filter (41) + +- * HashAggregate (40) + +- Exchange (39) + +- * HashAggregate (38) + +- * HashAggregate (37) + +- Exchange (36) + +- * HashAggregate (35) + +- * Project (34) + +- * BroadcastHashJoin Inner BuildRight (33) + :- * Project (28) + : +- * BroadcastHashJoin Inner BuildRight (27) + : :- * Project (22) + : : +- * BroadcastHashJoin Inner BuildRight (21) + : : :- * Project (16) + : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : :- * Project (9) + : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.store_sales (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.store_returns (4) + : : : +- BroadcastExchange (14) + : : : +- * Project (13) + : : : +- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet default.store (10) + : : +- BroadcastExchange (20) + : : +- * Filter (19) + : : +- * ColumnarToRow (18) + : : +- Scan parquet default.item (17) + : +- BroadcastExchange (26) + : +- * Filter (25) + : +- * ColumnarToRow (24) + : +- Scan parquet default.customer (23) + +- BroadcastExchange (32) + +- * Filter (31) + +- * ColumnarToRow (30) + +- Scan parquet default.customer_address (29) + + +(1) Scan parquet default.store_sales +Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 6] +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] + +(3) Filter [codegen id : 6] +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Condition : (((isnotnull(ss_ticket_number#4) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_customer_sk#2)) + +(4) Scan parquet default.store_returns +Output [2]: [sr_item_sk#6, sr_ticket_number#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store_returns] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [sr_item_sk#6, sr_ticket_number#7] + +(6) Filter [codegen id : 1] +Input [2]: [sr_item_sk#6, sr_ticket_number#7] +Condition : (isnotnull(sr_ticket_number#7) AND isnotnull(sr_item_sk#6)) + +(7) BroadcastExchange +Input [2]: [sr_item_sk#6, sr_ticket_number#7] +Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false], input[0, bigint, false]),false), [id=#8] + +(8) BroadcastHashJoin [codegen id : 6] +Left keys [2]: [cast(ss_ticket_number#4 as bigint), cast(ss_item_sk#1 as bigint)] +Right keys [2]: [sr_ticket_number#7, sr_item_sk#6] +Join condition: None + +(9) Project [codegen id : 6] +Output [4]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5] +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, sr_item_sk#6, sr_ticket_number#7] + +(10) Scan parquet default.store +Output [5]: [s_store_sk#9, s_store_name#10, s_market_id#11, s_state#12, s_zip#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [5]: [s_store_sk#9, s_store_name#10, s_market_id#11, s_state#12, s_zip#13] + +(12) Filter [codegen id : 2] +Input [5]: [s_store_sk#9, s_store_name#10, s_market_id#11, s_state#12, s_zip#13] +Condition : (((isnotnull(s_market_id#11) AND (s_market_id#11 = 8)) AND isnotnull(s_store_sk#9)) AND isnotnull(s_zip#13)) + +(13) Project [codegen id : 2] +Output [4]: [s_store_sk#9, s_store_name#10, s_state#12, s_zip#13] +Input [5]: [s_store_sk#9, s_store_name#10, s_market_id#11, s_state#12, s_zip#13] + +(14) BroadcastExchange +Input [4]: [s_store_sk#9, s_store_name#10, s_state#12, s_zip#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] + +(15) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#9] +Join condition: None + +(16) Project [codegen id : 6] +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13] +Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5, s_store_sk#9, s_store_name#10, s_state#12, s_zip#13] + +(17) Scan parquet default.item +Output [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_color), EqualTo(i_color,pale), IsNotNull(i_item_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(19) Filter [codegen id : 3] +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Condition : ((isnotnull(i_color#18) AND (i_color#18 = pale)) AND isnotnull(i_item_sk#15)) + +(20) BroadcastExchange +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#21] + +(21) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#15] +Join condition: None + +(22) Project [codegen id : 6] +Output [10]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(23) Scan parquet default.customer +Output [5]: [c_customer_sk#22, c_current_addr_sk#23, c_first_name#24, c_last_name#25, c_birth_country#26] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_birth_country), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 4] +Input [5]: [c_customer_sk#22, c_current_addr_sk#23, c_first_name#24, c_last_name#25, c_birth_country#26] + +(25) Filter [codegen id : 4] +Input [5]: [c_customer_sk#22, c_current_addr_sk#23, c_first_name#24, c_last_name#25, c_birth_country#26] +Condition : ((isnotnull(c_customer_sk#22) AND isnotnull(c_birth_country#26)) AND isnotnull(c_current_addr_sk#23)) + +(26) BroadcastExchange +Input [5]: [c_customer_sk#22, c_current_addr_sk#23, c_first_name#24, c_last_name#25, c_birth_country#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#27] + +(27) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#22] +Join condition: None + +(28) Project [codegen id : 6] +Output [13]: [ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_current_addr_sk#23, c_first_name#24, c_last_name#25, c_birth_country#26] +Input [15]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_customer_sk#22, c_current_addr_sk#23, c_first_name#24, c_last_name#25, c_birth_country#26] + +(29) Scan parquet default.customer_address +Output [4]: [ca_address_sk#28, ca_state#29, ca_zip#30, ca_country#31] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_zip), IsNotNull(ca_country)] +ReadSchema: struct + +(30) ColumnarToRow [codegen id : 5] +Input [4]: [ca_address_sk#28, ca_state#29, ca_zip#30, ca_country#31] + +(31) Filter [codegen id : 5] +Input [4]: [ca_address_sk#28, ca_state#29, ca_zip#30, ca_country#31] +Condition : ((isnotnull(ca_address_sk#28) AND isnotnull(ca_zip#30)) AND isnotnull(ca_country#31)) + +(32) BroadcastExchange +Input [4]: [ca_address_sk#28, ca_state#29, ca_zip#30, ca_country#31] +Arguments: HashedRelationBroadcastMode(List(input[0, int, false], upper(input[3, string, false]), input[2, string, false]),false), [id=#32] + +(33) BroadcastHashJoin [codegen id : 6] +Left keys [3]: [c_current_addr_sk#23, c_birth_country#26, s_zip#13] +Right keys [3]: [ca_address_sk#28, upper(ca_country#31), ca_zip#30] +Join condition: None + +(34) Project [codegen id : 6] +Output [11]: [ss_net_paid#5, s_store_name#10, s_state#12, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#24, c_last_name#25, ca_state#29] +Input [17]: [ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_current_addr_sk#23, c_first_name#24, c_last_name#25, c_birth_country#26, ca_address_sk#28, ca_state#29, ca_zip#30, ca_country#31] + +(35) HashAggregate [codegen id : 6] +Input [11]: [ss_net_paid#5, s_store_name#10, s_state#12, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#24, c_last_name#25, ca_state#29] +Keys [10]: [c_last_name#25, c_first_name#24, s_store_name#10, ca_state#29, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum#33] +Results [11]: [c_last_name#25, c_first_name#24, s_store_name#10, ca_state#29, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#34] + +(36) Exchange +Input [11]: [c_last_name#25, c_first_name#24, s_store_name#10, ca_state#29, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#34] +Arguments: hashpartitioning(c_last_name#25, c_first_name#24, s_store_name#10, ca_state#29, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, 5), true, [id=#35] + +(37) HashAggregate [codegen id : 7] +Input [11]: [c_last_name#25, c_first_name#24, s_store_name#10, ca_state#29, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#34] +Keys [10]: [c_last_name#25, c_first_name#24, s_store_name#10, ca_state#29, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#36] +Results [4]: [c_last_name#25, c_first_name#24, s_store_name#10, MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#36,17,2) AS netpaid#37] + +(38) HashAggregate [codegen id : 7] +Input [4]: [c_last_name#25, c_first_name#24, s_store_name#10, netpaid#37] +Keys [3]: [c_last_name#25, c_first_name#24, s_store_name#10] +Functions [1]: [partial_sum(netpaid#37)] +Aggregate Attributes [2]: [sum#38, isEmpty#39] +Results [5]: [c_last_name#25, c_first_name#24, s_store_name#10, sum#40, isEmpty#41] + +(39) Exchange +Input [5]: [c_last_name#25, c_first_name#24, s_store_name#10, sum#40, isEmpty#41] +Arguments: hashpartitioning(c_last_name#25, c_first_name#24, s_store_name#10, 5), true, [id=#42] + +(40) HashAggregate [codegen id : 8] +Input [5]: [c_last_name#25, c_first_name#24, s_store_name#10, sum#40, isEmpty#41] +Keys [3]: [c_last_name#25, c_first_name#24, s_store_name#10] +Functions [1]: [sum(netpaid#37)] +Aggregate Attributes [1]: [sum(netpaid#37)#43] +Results [5]: [c_last_name#25, c_first_name#24, s_store_name#10, sum(netpaid#37)#43 AS paid#44, sum(netpaid#37)#43 AS sum(netpaid#37)#45] + +(41) Filter [codegen id : 8] +Input [5]: [c_last_name#25, c_first_name#24, s_store_name#10, paid#44, sum(netpaid#37)#45] +Condition : (isnotnull(sum(netpaid#37)#45) AND (cast(sum(netpaid#37)#45 as decimal(33,8)) > cast(Subquery scalar-subquery#46, [id=#47] as decimal(33,8)))) + +(42) Project [codegen id : 8] +Output [4]: [c_last_name#25, c_first_name#24, s_store_name#10, paid#44] +Input [5]: [c_last_name#25, c_first_name#24, s_store_name#10, paid#44, sum(netpaid#37)#45] + +(43) Exchange +Input [4]: [c_last_name#25, c_first_name#24, s_store_name#10, paid#44] +Arguments: rangepartitioning(c_last_name#25 ASC NULLS FIRST, c_first_name#24 ASC NULLS FIRST, s_store_name#10 ASC NULLS FIRST, 5), true, [id=#48] + +(44) Sort [codegen id : 9] +Input [4]: [c_last_name#25, c_first_name#24, s_store_name#10, paid#44] +Arguments: [c_last_name#25 ASC NULLS FIRST, c_first_name#24 ASC NULLS FIRST, s_store_name#10 ASC NULLS FIRST], true, 0 + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 41 Hosting Expression = Subquery scalar-subquery#46, [id=#47] +* HashAggregate (84) ++- Exchange (83) + +- * HashAggregate (82) + +- * HashAggregate (81) + +- Exchange (80) + +- * HashAggregate (79) + +- * Project (78) + +- * BroadcastHashJoin Inner BuildRight (77) + :- * Project (72) + : +- * BroadcastHashJoin Inner BuildRight (71) + : :- * Project (66) + : : +- * BroadcastHashJoin Inner BuildRight (65) + : : :- * Project (60) + : : : +- * BroadcastHashJoin Inner BuildRight (59) + : : : :- * Project (53) + : : : : +- * BroadcastHashJoin Inner BuildRight (52) + : : : : :- * Filter (47) + : : : : : +- * ColumnarToRow (46) + : : : : : +- Scan parquet default.store_sales (45) + : : : : +- BroadcastExchange (51) + : : : : +- * Filter (50) + : : : : +- * ColumnarToRow (49) + : : : : +- Scan parquet default.store_returns (48) + : : : +- BroadcastExchange (58) + : : : +- * Project (57) + : : : +- * Filter (56) + : : : +- * ColumnarToRow (55) + : : : +- Scan parquet default.store (54) + : : +- BroadcastExchange (64) + : : +- * Filter (63) + : : +- * ColumnarToRow (62) + : : +- Scan parquet default.item (61) + : +- BroadcastExchange (70) + : +- * Filter (69) + : +- * ColumnarToRow (68) + : +- Scan parquet default.customer (67) + +- BroadcastExchange (76) + +- * Filter (75) + +- * ColumnarToRow (74) + +- Scan parquet default.customer_address (73) + + +(45) Scan parquet default.store_sales +Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(46) ColumnarToRow [codegen id : 6] +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] + +(47) Filter [codegen id : 6] +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Condition : (((isnotnull(ss_ticket_number#4) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_customer_sk#2)) + +(48) Scan parquet default.store_returns +Output [2]: [sr_item_sk#6, sr_ticket_number#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store_returns] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] +ReadSchema: struct + +(49) ColumnarToRow [codegen id : 1] +Input [2]: [sr_item_sk#6, sr_ticket_number#7] + +(50) Filter [codegen id : 1] +Input [2]: [sr_item_sk#6, sr_ticket_number#7] +Condition : (isnotnull(sr_ticket_number#7) AND isnotnull(sr_item_sk#6)) + +(51) BroadcastExchange +Input [2]: [sr_item_sk#6, sr_ticket_number#7] +Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false], input[0, bigint, false]),false), [id=#49] + +(52) BroadcastHashJoin [codegen id : 6] +Left keys [2]: [cast(ss_ticket_number#4 as bigint), cast(ss_item_sk#1 as bigint)] +Right keys [2]: [sr_ticket_number#7, sr_item_sk#6] +Join condition: None + +(53) Project [codegen id : 6] +Output [4]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5] +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, sr_item_sk#6, sr_ticket_number#7] + +(54) Scan parquet default.store +Output [5]: [s_store_sk#9, s_store_name#10, s_market_id#11, s_state#12, s_zip#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)] +ReadSchema: struct + +(55) ColumnarToRow [codegen id : 2] +Input [5]: [s_store_sk#9, s_store_name#10, s_market_id#11, s_state#12, s_zip#13] + +(56) Filter [codegen id : 2] +Input [5]: [s_store_sk#9, s_store_name#10, s_market_id#11, s_state#12, s_zip#13] +Condition : (((isnotnull(s_market_id#11) AND (s_market_id#11 = 8)) AND isnotnull(s_store_sk#9)) AND isnotnull(s_zip#13)) + +(57) Project [codegen id : 2] +Output [4]: [s_store_sk#9, s_store_name#10, s_state#12, s_zip#13] +Input [5]: [s_store_sk#9, s_store_name#10, s_market_id#11, s_state#12, s_zip#13] + +(58) BroadcastExchange +Input [4]: [s_store_sk#9, s_store_name#10, s_state#12, s_zip#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#50] + +(59) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#9] +Join condition: None + +(60) Project [codegen id : 6] +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13] +Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5, s_store_sk#9, s_store_name#10, s_state#12, s_zip#13] + +(61) Scan parquet default.item +Output [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(62) ColumnarToRow [codegen id : 3] +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(63) Filter [codegen id : 3] +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Condition : isnotnull(i_item_sk#15) + +(64) BroadcastExchange +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#51] + +(65) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#15] +Join condition: None + +(66) Project [codegen id : 6] +Output [10]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(67) Scan parquet default.customer +Output [5]: [c_customer_sk#22, c_current_addr_sk#23, c_first_name#24, c_last_name#25, c_birth_country#26] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_birth_country), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(68) ColumnarToRow [codegen id : 4] +Input [5]: [c_customer_sk#22, c_current_addr_sk#23, c_first_name#24, c_last_name#25, c_birth_country#26] + +(69) Filter [codegen id : 4] +Input [5]: [c_customer_sk#22, c_current_addr_sk#23, c_first_name#24, c_last_name#25, c_birth_country#26] +Condition : ((isnotnull(c_customer_sk#22) AND isnotnull(c_birth_country#26)) AND isnotnull(c_current_addr_sk#23)) + +(70) BroadcastExchange +Input [5]: [c_customer_sk#22, c_current_addr_sk#23, c_first_name#24, c_last_name#25, c_birth_country#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#52] + +(71) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#22] +Join condition: None + +(72) Project [codegen id : 6] +Output [13]: [ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_current_addr_sk#23, c_first_name#24, c_last_name#25, c_birth_country#26] +Input [15]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_customer_sk#22, c_current_addr_sk#23, c_first_name#24, c_last_name#25, c_birth_country#26] + +(73) Scan parquet default.customer_address +Output [4]: [ca_address_sk#28, ca_state#29, ca_zip#30, ca_country#31] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_zip), IsNotNull(ca_country)] +ReadSchema: struct + +(74) ColumnarToRow [codegen id : 5] +Input [4]: [ca_address_sk#28, ca_state#29, ca_zip#30, ca_country#31] + +(75) Filter [codegen id : 5] +Input [4]: [ca_address_sk#28, ca_state#29, ca_zip#30, ca_country#31] +Condition : ((isnotnull(ca_address_sk#28) AND isnotnull(ca_zip#30)) AND isnotnull(ca_country#31)) + +(76) BroadcastExchange +Input [4]: [ca_address_sk#28, ca_state#29, ca_zip#30, ca_country#31] +Arguments: HashedRelationBroadcastMode(List(input[0, int, false], upper(input[3, string, false]), input[2, string, false]),false), [id=#53] + +(77) BroadcastHashJoin [codegen id : 6] +Left keys [3]: [c_current_addr_sk#23, c_birth_country#26, s_zip#13] +Right keys [3]: [ca_address_sk#28, upper(ca_country#31), ca_zip#30] +Join condition: None + +(78) Project [codegen id : 6] +Output [11]: [ss_net_paid#5, s_store_name#10, s_state#12, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#24, c_last_name#25, ca_state#29] +Input [17]: [ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_current_addr_sk#23, c_first_name#24, c_last_name#25, c_birth_country#26, ca_address_sk#28, ca_state#29, ca_zip#30, ca_country#31] + +(79) HashAggregate [codegen id : 6] +Input [11]: [ss_net_paid#5, s_store_name#10, s_state#12, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#24, c_last_name#25, ca_state#29] +Keys [10]: [c_last_name#25, c_first_name#24, s_store_name#10, ca_state#29, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum#54] +Results [11]: [c_last_name#25, c_first_name#24, s_store_name#10, ca_state#29, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#55] + +(80) Exchange +Input [11]: [c_last_name#25, c_first_name#24, s_store_name#10, ca_state#29, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#55] +Arguments: hashpartitioning(c_last_name#25, c_first_name#24, s_store_name#10, ca_state#29, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, 5), true, [id=#56] + +(81) HashAggregate [codegen id : 7] +Input [11]: [c_last_name#25, c_first_name#24, s_store_name#10, ca_state#29, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#55] +Keys [10]: [c_last_name#25, c_first_name#24, s_store_name#10, ca_state#29, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#57] +Results [1]: [MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#57,17,2) AS netpaid#37] + +(82) HashAggregate [codegen id : 7] +Input [1]: [netpaid#37] +Keys: [] +Functions [1]: [partial_avg(netpaid#37)] +Aggregate Attributes [2]: [sum#58, count#59] +Results [2]: [sum#60, count#61] + +(83) Exchange +Input [2]: [sum#60, count#61] +Arguments: SinglePartition, true, [id=#62] + +(84) HashAggregate [codegen id : 8] +Input [2]: [sum#60, count#61] +Keys: [] +Functions [1]: [avg(netpaid#37)] +Aggregate Attributes [1]: [avg(netpaid#37)#63] +Results [1]: [CheckOverflow((0.050000 * promote_precision(avg(netpaid#37)#63)), DecimalType(24,8), true) AS (CAST(0.05 AS DECIMAL(21,6)) * CAST(avg(netpaid) AS DECIMAL(21,6)))#64] + + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24/simplified.txt new file mode 100644 index 0000000000000..ee0ecc649b7dd --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24/simplified.txt @@ -0,0 +1,129 @@ +WholeStageCodegen (9) + Sort [c_first_name,c_last_name,s_store_name] + InputAdapter + Exchange [c_first_name,c_last_name,s_store_name] #1 + WholeStageCodegen (8) + Project [c_first_name,c_last_name,paid,s_store_name] + Filter [sum(netpaid)] + Subquery #1 + WholeStageCodegen (8) + HashAggregate [count,sum] [(CAST(0.05 AS DECIMAL(21,6)) * CAST(avg(netpaid) AS DECIMAL(21,6))),avg(netpaid),count,sum] + InputAdapter + Exchange #9 + WholeStageCodegen (7) + HashAggregate [netpaid] [count,count,sum,sum] + HashAggregate [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,sum] [netpaid,sum,sum(UnscaledValue(ss_net_paid))] + InputAdapter + Exchange [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name] #10 + WholeStageCodegen (6) + HashAggregate [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_net_paid] [sum,sum] + Project [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_net_paid] + BroadcastHashJoin [c_birth_country,c_current_addr_sk,ca_address_sk,ca_country,ca_zip,s_zip] + Project [c_birth_country,c_current_addr_sk,c_first_name,c_last_name,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,s_zip,ss_net_paid] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Project [i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,s_zip,ss_customer_sk,ss_net_paid] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [s_state,s_store_name,s_zip,ss_customer_sk,ss_item_sk,ss_net_paid] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk] + BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + Filter [ss_customer_sk,ss_item_sk,ss_store_sk,ss_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (1) + Filter [sr_item_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (2) + Project [s_state,s_store_name,s_store_sk,s_zip] + Filter [s_market_id,s_store_sk,s_zip] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_market_id,s_state,s_store_name,s_store_sk,s_zip] + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (3) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] + InputAdapter + BroadcastExchange #14 + WholeStageCodegen (4) + Filter [c_birth_country,c_current_addr_sk,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_birth_country,c_current_addr_sk,c_customer_sk,c_first_name,c_last_name] + InputAdapter + BroadcastExchange #15 + WholeStageCodegen (5) + Filter [ca_address_sk,ca_country,ca_zip] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_country,ca_state,ca_zip] + HashAggregate [c_first_name,c_last_name,isEmpty,s_store_name,sum] [isEmpty,paid,sum,sum(netpaid),sum(netpaid)] + InputAdapter + Exchange [c_first_name,c_last_name,s_store_name] #2 + WholeStageCodegen (7) + HashAggregate [c_first_name,c_last_name,netpaid,s_store_name] [isEmpty,isEmpty,sum,sum] + HashAggregate [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,sum] [netpaid,sum,sum(UnscaledValue(ss_net_paid))] + InputAdapter + Exchange [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name] #3 + WholeStageCodegen (6) + HashAggregate [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_net_paid] [sum,sum] + Project [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_net_paid] + BroadcastHashJoin [c_birth_country,c_current_addr_sk,ca_address_sk,ca_country,ca_zip,s_zip] + Project [c_birth_country,c_current_addr_sk,c_first_name,c_last_name,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,s_zip,ss_net_paid] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Project [i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,s_zip,ss_customer_sk,ss_net_paid] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [s_state,s_store_name,s_zip,ss_customer_sk,ss_item_sk,ss_net_paid] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk] + BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + Filter [ss_customer_sk,ss_item_sk,ss_store_sk,ss_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Filter [sr_item_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Project [s_state,s_store_name,s_store_sk,s_zip] + Filter [s_market_id,s_store_sk,s_zip] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_market_id,s_state,s_store_name,s_store_sk,s_zip] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (3) + Filter [i_color,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (4) + Filter [c_birth_country,c_current_addr_sk,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_birth_country,c_current_addr_sk,c_customer_sk,c_first_name,c_last_name] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (5) + Filter [ca_address_sk,ca_country,ca_zip] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_country,ca_state,ca_zip] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q27a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q27a.sf100/explain.txt new file mode 100644 index 0000000000000..0ebc809387dc6 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q27a.sf100/explain.txt @@ -0,0 +1,428 @@ +== Physical Plan == +TakeOrderedAndProject (77) ++- Union (76) + :- * HashAggregate (32) + : +- Exchange (31) + : +- * HashAggregate (30) + : +- * Project (29) + : +- * BroadcastHashJoin Inner BuildRight (28) + : :- * Project (23) + : : +- * BroadcastHashJoin Inner BuildRight (22) + : : :- * Project (17) + : : : +- * BroadcastHashJoin Inner BuildRight (16) + : : : :- * Project (10) + : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.store_sales (1) + : : : : +- BroadcastExchange (8) + : : : : +- * Project (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.customer_demographics (4) + : : : +- BroadcastExchange (15) + : : : +- * Project (14) + : : : +- * Filter (13) + : : : +- * ColumnarToRow (12) + : : : +- Scan parquet default.date_dim (11) + : : +- BroadcastExchange (21) + : : +- * Filter (20) + : : +- * ColumnarToRow (19) + : : +- Scan parquet default.store (18) + : +- BroadcastExchange (27) + : +- * Filter (26) + : +- * ColumnarToRow (25) + : +- Scan parquet default.item (24) + :- * HashAggregate (54) + : +- Exchange (53) + : +- * HashAggregate (52) + : +- * Project (51) + : +- * BroadcastHashJoin Inner BuildRight (50) + : :- * Project (48) + : : +- * BroadcastHashJoin Inner BuildRight (47) + : : :- * Project (45) + : : : +- * BroadcastHashJoin Inner BuildRight (44) + : : : :- * Project (38) + : : : : +- * BroadcastHashJoin Inner BuildRight (37) + : : : : :- * Filter (35) + : : : : : +- * ColumnarToRow (34) + : : : : : +- Scan parquet default.store_sales (33) + : : : : +- ReusedExchange (36) + : : : +- BroadcastExchange (43) + : : : +- * Project (42) + : : : +- * Filter (41) + : : : +- * ColumnarToRow (40) + : : : +- Scan parquet default.store (39) + : : +- ReusedExchange (46) + : +- ReusedExchange (49) + +- * HashAggregate (75) + +- Exchange (74) + +- * HashAggregate (73) + +- * Project (72) + +- * BroadcastHashJoin Inner BuildRight (71) + :- * Project (66) + : +- * BroadcastHashJoin Inner BuildRight (65) + : :- * Project (63) + : : +- * BroadcastHashJoin Inner BuildRight (62) + : : :- * Project (60) + : : : +- * BroadcastHashJoin Inner BuildRight (59) + : : : :- * Filter (57) + : : : : +- * ColumnarToRow (56) + : : : : +- Scan parquet default.store_sales (55) + : : : +- ReusedExchange (58) + : : +- ReusedExchange (61) + : +- ReusedExchange (64) + +- BroadcastExchange (70) + +- * Filter (69) + +- * ColumnarToRow (68) + +- Scan parquet default.item (67) + + +(1) Scan parquet default.store_sales +Output [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_cdemo_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] + +(3) Filter [codegen id : 5] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Condition : (((isnotnull(ss_cdemo_sk#3) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_store_sk#4)) AND isnotnull(ss_item_sk#2)) + +(4) Scan parquet default.customer_demographics +Output [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/customer_demographics] +PushedFilters: [IsNotNull(cd_education_status), IsNotNull(cd_gender), IsNotNull(cd_marital_status), EqualTo(cd_gender,F), EqualTo(cd_marital_status,W), EqualTo(cd_education_status,Primary), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] + +(6) Filter [codegen id : 1] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Condition : ((((((isnotnull(cd_education_status#12) AND isnotnull(cd_gender#10)) AND isnotnull(cd_marital_status#11)) AND (cd_gender#10 = F)) AND (cd_marital_status#11 = W)) AND (cd_education_status#12 = Primary)) AND isnotnull(cd_demo_sk#9)) + +(7) Project [codegen id : 1] +Output [1]: [cd_demo_sk#9] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] + +(8) BroadcastExchange +Input [1]: [cd_demo_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] + +(9) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_cdemo_sk#3] +Right keys [1]: [cd_demo_sk#9] +Join condition: None + +(10) Project [codegen id : 5] +Output [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, cd_demo_sk#9] + +(11) Scan parquet default.date_dim +Output [2]: [d_date_sk#14, d_year#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1998), IsNotNull(d_date_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#14, d_year#15] + +(13) Filter [codegen id : 2] +Input [2]: [d_date_sk#14, d_year#15] +Condition : ((isnotnull(d_year#15) AND (d_year#15 = 1998)) AND isnotnull(d_date_sk#14)) + +(14) Project [codegen id : 2] +Output [1]: [d_date_sk#14] +Input [2]: [d_date_sk#14, d_year#15] + +(15) BroadcastExchange +Input [1]: [d_date_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] + +(16) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#14] +Join condition: None + +(17) Project [codegen id : 5] +Output [6]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, d_date_sk#14] + +(18) Scan parquet default.store +Output [2]: [s_store_sk#17, s_state#18] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [2]: [s_store_sk#17, s_state#18] + +(20) Filter [codegen id : 3] +Input [2]: [s_store_sk#17, s_state#18] +Condition : ((isnotnull(s_state#18) AND (s_state#18 = TN)) AND isnotnull(s_store_sk#17)) + +(21) BroadcastExchange +Input [2]: [s_store_sk#17, s_state#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] + +(22) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#17] +Join condition: None + +(23) Project [codegen id : 5] +Output [6]: [ss_item_sk#2, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, s_state#18] +Input [8]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, s_store_sk#17, s_state#18] + +(24) Scan parquet default.item +Output [2]: [i_item_sk#20, i_item_id#21] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(25) ColumnarToRow [codegen id : 4] +Input [2]: [i_item_sk#20, i_item_id#21] + +(26) Filter [codegen id : 4] +Input [2]: [i_item_sk#20, i_item_id#21] +Condition : isnotnull(i_item_sk#20) + +(27) BroadcastExchange +Input [2]: [i_item_sk#20, i_item_id#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#22] + +(28) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#20] +Join condition: None + +(29) Project [codegen id : 5] +Output [6]: [i_item_id#21, s_state#18, ss_quantity#5 AS agg1#23, ss_list_price#6 AS agg2#24, ss_coupon_amt#8 AS agg3#25, ss_sales_price#7 AS agg4#26] +Input [8]: [ss_item_sk#2, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, s_state#18, i_item_sk#20, i_item_id#21] + +(30) HashAggregate [codegen id : 5] +Input [6]: [i_item_id#21, s_state#18, agg1#23, agg2#24, agg3#25, agg4#26] +Keys [2]: [i_item_id#21, s_state#18] +Functions [4]: [partial_avg(cast(agg1#23 as bigint)), partial_avg(UnscaledValue(agg2#24)), partial_avg(UnscaledValue(agg3#25)), partial_avg(UnscaledValue(agg4#26))] +Aggregate Attributes [8]: [sum#27, count#28, sum#29, count#30, sum#31, count#32, sum#33, count#34] +Results [10]: [i_item_id#21, s_state#18, sum#35, count#36, sum#37, count#38, sum#39, count#40, sum#41, count#42] + +(31) Exchange +Input [10]: [i_item_id#21, s_state#18, sum#35, count#36, sum#37, count#38, sum#39, count#40, sum#41, count#42] +Arguments: hashpartitioning(i_item_id#21, s_state#18, 5), true, [id=#43] + +(32) HashAggregate [codegen id : 6] +Input [10]: [i_item_id#21, s_state#18, sum#35, count#36, sum#37, count#38, sum#39, count#40, sum#41, count#42] +Keys [2]: [i_item_id#21, s_state#18] +Functions [4]: [avg(cast(agg1#23 as bigint)), avg(UnscaledValue(agg2#24)), avg(UnscaledValue(agg3#25)), avg(UnscaledValue(agg4#26))] +Aggregate Attributes [4]: [avg(cast(agg1#23 as bigint))#44, avg(UnscaledValue(agg2#24))#45, avg(UnscaledValue(agg3#25))#46, avg(UnscaledValue(agg4#26))#47] +Results [7]: [i_item_id#21, s_state#18, 0 AS g_state#48, avg(cast(agg1#23 as bigint))#44 AS agg1#49, cast((avg(UnscaledValue(agg2#24))#45 / 100.0) as decimal(11,6)) AS agg2#50, cast((avg(UnscaledValue(agg3#25))#46 / 100.0) as decimal(11,6)) AS agg3#51, cast((avg(UnscaledValue(agg4#26))#47 / 100.0) as decimal(11,6)) AS agg4#52] + +(33) Scan parquet default.store_sales +Output [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_cdemo_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 11] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] + +(35) Filter [codegen id : 11] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Condition : (((isnotnull(ss_cdemo_sk#3) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_store_sk#4)) AND isnotnull(ss_item_sk#2)) + +(36) ReusedExchange [Reuses operator id: 8] +Output [1]: [cd_demo_sk#9] + +(37) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ss_cdemo_sk#3] +Right keys [1]: [cd_demo_sk#9] +Join condition: None + +(38) Project [codegen id : 11] +Output [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, cd_demo_sk#9] + +(39) Scan parquet default.store +Output [2]: [s_store_sk#17, s_state#18] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)] +ReadSchema: struct + +(40) ColumnarToRow [codegen id : 8] +Input [2]: [s_store_sk#17, s_state#18] + +(41) Filter [codegen id : 8] +Input [2]: [s_store_sk#17, s_state#18] +Condition : ((isnotnull(s_state#18) AND (s_state#18 = TN)) AND isnotnull(s_store_sk#17)) + +(42) Project [codegen id : 8] +Output [1]: [s_store_sk#17] +Input [2]: [s_store_sk#17, s_state#18] + +(43) BroadcastExchange +Input [1]: [s_store_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#53] + +(44) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#17] +Join condition: None + +(45) Project [codegen id : 11] +Output [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, s_store_sk#17] + +(46) ReusedExchange [Reuses operator id: 15] +Output [1]: [d_date_sk#14] + +(47) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#14] +Join condition: None + +(48) Project [codegen id : 11] +Output [5]: [ss_item_sk#2, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Input [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, d_date_sk#14] + +(49) ReusedExchange [Reuses operator id: 27] +Output [2]: [i_item_sk#20, i_item_id#21] + +(50) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#20] +Join condition: None + +(51) Project [codegen id : 11] +Output [5]: [i_item_id#21, ss_quantity#5 AS agg1#23, ss_list_price#6 AS agg2#24, ss_coupon_amt#8 AS agg3#25, ss_sales_price#7 AS agg4#26] +Input [7]: [ss_item_sk#2, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_sk#20, i_item_id#21] + +(52) HashAggregate [codegen id : 11] +Input [5]: [i_item_id#21, agg1#23, agg2#24, agg3#25, agg4#26] +Keys [1]: [i_item_id#21] +Functions [4]: [partial_avg(cast(agg1#23 as bigint)), partial_avg(UnscaledValue(agg2#24)), partial_avg(UnscaledValue(agg3#25)), partial_avg(UnscaledValue(agg4#26))] +Aggregate Attributes [8]: [sum#54, count#55, sum#56, count#57, sum#58, count#59, sum#60, count#61] +Results [9]: [i_item_id#21, sum#62, count#63, sum#64, count#65, sum#66, count#67, sum#68, count#69] + +(53) Exchange +Input [9]: [i_item_id#21, sum#62, count#63, sum#64, count#65, sum#66, count#67, sum#68, count#69] +Arguments: hashpartitioning(i_item_id#21, 5), true, [id=#70] + +(54) HashAggregate [codegen id : 12] +Input [9]: [i_item_id#21, sum#62, count#63, sum#64, count#65, sum#66, count#67, sum#68, count#69] +Keys [1]: [i_item_id#21] +Functions [4]: [avg(cast(agg1#23 as bigint)), avg(UnscaledValue(agg2#24)), avg(UnscaledValue(agg3#25)), avg(UnscaledValue(agg4#26))] +Aggregate Attributes [4]: [avg(cast(agg1#23 as bigint))#71, avg(UnscaledValue(agg2#24))#72, avg(UnscaledValue(agg3#25))#73, avg(UnscaledValue(agg4#26))#74] +Results [7]: [i_item_id#21, null AS s_state#75, 1 AS g_state#76, avg(cast(agg1#23 as bigint))#71 AS agg1#77, cast((avg(UnscaledValue(agg2#24))#72 / 100.0) as decimal(11,6)) AS agg2#78, cast((avg(UnscaledValue(agg3#25))#73 / 100.0) as decimal(11,6)) AS agg3#79, cast((avg(UnscaledValue(agg4#26))#74 / 100.0) as decimal(11,6)) AS agg4#80] + +(55) Scan parquet default.store_sales +Output [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_cdemo_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(56) ColumnarToRow [codegen id : 17] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] + +(57) Filter [codegen id : 17] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Condition : (((isnotnull(ss_cdemo_sk#3) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_store_sk#4)) AND isnotnull(ss_item_sk#2)) + +(58) ReusedExchange [Reuses operator id: 8] +Output [1]: [cd_demo_sk#9] + +(59) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ss_cdemo_sk#3] +Right keys [1]: [cd_demo_sk#9] +Join condition: None + +(60) Project [codegen id : 17] +Output [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, cd_demo_sk#9] + +(61) ReusedExchange [Reuses operator id: 43] +Output [1]: [s_store_sk#17] + +(62) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#17] +Join condition: None + +(63) Project [codegen id : 17] +Output [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, s_store_sk#17] + +(64) ReusedExchange [Reuses operator id: 15] +Output [1]: [d_date_sk#14] + +(65) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#14] +Join condition: None + +(66) Project [codegen id : 17] +Output [5]: [ss_item_sk#2, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Input [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, d_date_sk#14] + +(67) Scan parquet default.item +Output [1]: [i_item_sk#20] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(68) ColumnarToRow [codegen id : 16] +Input [1]: [i_item_sk#20] + +(69) Filter [codegen id : 16] +Input [1]: [i_item_sk#20] +Condition : isnotnull(i_item_sk#20) + +(70) BroadcastExchange +Input [1]: [i_item_sk#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#81] + +(71) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#20] +Join condition: None + +(72) Project [codegen id : 17] +Output [4]: [ss_quantity#5 AS agg1#23, ss_list_price#6 AS agg2#24, ss_coupon_amt#8 AS agg3#25, ss_sales_price#7 AS agg4#26] +Input [6]: [ss_item_sk#2, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_sk#20] + +(73) HashAggregate [codegen id : 17] +Input [4]: [agg1#23, agg2#24, agg3#25, agg4#26] +Keys: [] +Functions [4]: [partial_avg(cast(agg1#23 as bigint)), partial_avg(UnscaledValue(agg2#24)), partial_avg(UnscaledValue(agg3#25)), partial_avg(UnscaledValue(agg4#26))] +Aggregate Attributes [8]: [sum#82, count#83, sum#84, count#85, sum#86, count#87, sum#88, count#89] +Results [8]: [sum#90, count#91, sum#92, count#93, sum#94, count#95, sum#96, count#97] + +(74) Exchange +Input [8]: [sum#90, count#91, sum#92, count#93, sum#94, count#95, sum#96, count#97] +Arguments: SinglePartition, true, [id=#98] + +(75) HashAggregate [codegen id : 18] +Input [8]: [sum#90, count#91, sum#92, count#93, sum#94, count#95, sum#96, count#97] +Keys: [] +Functions [4]: [avg(cast(agg1#23 as bigint)), avg(UnscaledValue(agg2#24)), avg(UnscaledValue(agg3#25)), avg(UnscaledValue(agg4#26))] +Aggregate Attributes [4]: [avg(cast(agg1#23 as bigint))#99, avg(UnscaledValue(agg2#24))#100, avg(UnscaledValue(agg3#25))#101, avg(UnscaledValue(agg4#26))#102] +Results [7]: [null AS i_item_id#103, null AS s_state#104, 1 AS g_state#105, avg(cast(agg1#23 as bigint))#99 AS agg1#106, cast((avg(UnscaledValue(agg2#24))#100 / 100.0) as decimal(11,6)) AS agg2#107, cast((avg(UnscaledValue(agg3#25))#101 / 100.0) as decimal(11,6)) AS agg3#108, cast((avg(UnscaledValue(agg4#26))#102 / 100.0) as decimal(11,6)) AS agg4#109] + +(76) Union + +(77) TakeOrderedAndProject +Input [7]: [i_item_id#21, s_state#18, g_state#48, agg1#49, agg2#50, agg3#51, agg4#52] +Arguments: 100, [i_item_id#21 ASC NULLS FIRST, s_state#18 ASC NULLS FIRST], [i_item_id#21, s_state#18, g_state#48, agg1#49, agg2#50, agg3#51, agg4#52] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q27a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q27a.sf100/simplified.txt new file mode 100644 index 0000000000000..cc47c4b7bc332 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q27a.sf100/simplified.txt @@ -0,0 +1,113 @@ +TakeOrderedAndProject [agg1,agg2,agg3,agg4,g_state,i_item_id,s_state] + Union + WholeStageCodegen (6) + HashAggregate [count,count,count,count,i_item_id,s_state,sum,sum,sum,sum] [agg1,agg2,agg3,agg4,avg(UnscaledValue(agg2)),avg(UnscaledValue(agg3)),avg(UnscaledValue(agg4)),avg(cast(agg1 as bigint)),count,count,count,count,g_state,sum,sum,sum,sum] + InputAdapter + Exchange [i_item_id,s_state] #1 + WholeStageCodegen (5) + HashAggregate [agg1,agg2,agg3,agg4,i_item_id,s_state] [count,count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum,sum] + Project [i_item_id,s_state,ss_coupon_amt,ss_list_price,ss_quantity,ss_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [s_state,ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [cd_demo_sk,ss_cdemo_sk] + Filter [ss_cdemo_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_cdemo_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [cd_demo_sk] + Filter [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [s_state,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_state,s_store_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_id,i_item_sk] + WholeStageCodegen (12) + HashAggregate [count,count,count,count,i_item_id,sum,sum,sum,sum] [agg1,agg2,agg3,agg4,avg(UnscaledValue(agg2)),avg(UnscaledValue(agg3)),avg(UnscaledValue(agg4)),avg(cast(agg1 as bigint)),count,count,count,count,g_state,s_state,sum,sum,sum,sum] + InputAdapter + Exchange [i_item_id] #6 + WholeStageCodegen (11) + HashAggregate [agg1,agg2,agg3,agg4,i_item_id] [count,count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum,sum] + Project [i_item_id,ss_coupon_amt,ss_list_price,ss_quantity,ss_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_sold_date_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [cd_demo_sk,ss_cdemo_sk] + Filter [ss_cdemo_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_cdemo_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + ReusedExchange [cd_demo_sk] #2 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (8) + Project [s_store_sk] + Filter [s_state,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_state,s_store_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + ReusedExchange [i_item_id,i_item_sk] #5 + WholeStageCodegen (18) + HashAggregate [count,count,count,count,sum,sum,sum,sum] [agg1,agg2,agg3,agg4,avg(UnscaledValue(agg2)),avg(UnscaledValue(agg3)),avg(UnscaledValue(agg4)),avg(cast(agg1 as bigint)),count,count,count,count,g_state,i_item_id,s_state,sum,sum,sum,sum] + InputAdapter + Exchange #8 + WholeStageCodegen (17) + HashAggregate [agg1,agg2,agg3,agg4] [count,count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum,sum] + Project [ss_coupon_amt,ss_list_price,ss_quantity,ss_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_sold_date_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [cd_demo_sk,ss_cdemo_sk] + Filter [ss_cdemo_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_cdemo_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + ReusedExchange [cd_demo_sk] #2 + InputAdapter + ReusedExchange [s_store_sk] #7 + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (16) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q27a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q27a/explain.txt new file mode 100644 index 0000000000000..2d6deabcf64a4 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q27a/explain.txt @@ -0,0 +1,428 @@ +== Physical Plan == +TakeOrderedAndProject (77) ++- Union (76) + :- * HashAggregate (32) + : +- Exchange (31) + : +- * HashAggregate (30) + : +- * Project (29) + : +- * BroadcastHashJoin Inner BuildRight (28) + : :- * Project (23) + : : +- * BroadcastHashJoin Inner BuildRight (22) + : : :- * Project (17) + : : : +- * BroadcastHashJoin Inner BuildRight (16) + : : : :- * Project (10) + : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.store_sales (1) + : : : : +- BroadcastExchange (8) + : : : : +- * Project (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.customer_demographics (4) + : : : +- BroadcastExchange (15) + : : : +- * Project (14) + : : : +- * Filter (13) + : : : +- * ColumnarToRow (12) + : : : +- Scan parquet default.date_dim (11) + : : +- BroadcastExchange (21) + : : +- * Filter (20) + : : +- * ColumnarToRow (19) + : : +- Scan parquet default.store (18) + : +- BroadcastExchange (27) + : +- * Filter (26) + : +- * ColumnarToRow (25) + : +- Scan parquet default.item (24) + :- * HashAggregate (54) + : +- Exchange (53) + : +- * HashAggregate (52) + : +- * Project (51) + : +- * BroadcastHashJoin Inner BuildRight (50) + : :- * Project (48) + : : +- * BroadcastHashJoin Inner BuildRight (47) + : : :- * Project (41) + : : : +- * BroadcastHashJoin Inner BuildRight (40) + : : : :- * Project (38) + : : : : +- * BroadcastHashJoin Inner BuildRight (37) + : : : : :- * Filter (35) + : : : : : +- * ColumnarToRow (34) + : : : : : +- Scan parquet default.store_sales (33) + : : : : +- ReusedExchange (36) + : : : +- ReusedExchange (39) + : : +- BroadcastExchange (46) + : : +- * Project (45) + : : +- * Filter (44) + : : +- * ColumnarToRow (43) + : : +- Scan parquet default.store (42) + : +- ReusedExchange (49) + +- * HashAggregate (75) + +- Exchange (74) + +- * HashAggregate (73) + +- * Project (72) + +- * BroadcastHashJoin Inner BuildRight (71) + :- * Project (66) + : +- * BroadcastHashJoin Inner BuildRight (65) + : :- * Project (63) + : : +- * BroadcastHashJoin Inner BuildRight (62) + : : :- * Project (60) + : : : +- * BroadcastHashJoin Inner BuildRight (59) + : : : :- * Filter (57) + : : : : +- * ColumnarToRow (56) + : : : : +- Scan parquet default.store_sales (55) + : : : +- ReusedExchange (58) + : : +- ReusedExchange (61) + : +- ReusedExchange (64) + +- BroadcastExchange (70) + +- * Filter (69) + +- * ColumnarToRow (68) + +- Scan parquet default.item (67) + + +(1) Scan parquet default.store_sales +Output [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_cdemo_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] + +(3) Filter [codegen id : 5] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Condition : (((isnotnull(ss_cdemo_sk#3) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_store_sk#4)) AND isnotnull(ss_item_sk#2)) + +(4) Scan parquet default.customer_demographics +Output [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/customer_demographics] +PushedFilters: [IsNotNull(cd_education_status), IsNotNull(cd_marital_status), IsNotNull(cd_gender), EqualTo(cd_gender,F), EqualTo(cd_marital_status,W), EqualTo(cd_education_status,Primary), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] + +(6) Filter [codegen id : 1] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Condition : ((((((isnotnull(cd_education_status#12) AND isnotnull(cd_marital_status#11)) AND isnotnull(cd_gender#10)) AND (cd_gender#10 = F)) AND (cd_marital_status#11 = W)) AND (cd_education_status#12 = Primary)) AND isnotnull(cd_demo_sk#9)) + +(7) Project [codegen id : 1] +Output [1]: [cd_demo_sk#9] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] + +(8) BroadcastExchange +Input [1]: [cd_demo_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] + +(9) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_cdemo_sk#3] +Right keys [1]: [cd_demo_sk#9] +Join condition: None + +(10) Project [codegen id : 5] +Output [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, cd_demo_sk#9] + +(11) Scan parquet default.date_dim +Output [2]: [d_date_sk#14, d_year#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1998), IsNotNull(d_date_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#14, d_year#15] + +(13) Filter [codegen id : 2] +Input [2]: [d_date_sk#14, d_year#15] +Condition : ((isnotnull(d_year#15) AND (d_year#15 = 1998)) AND isnotnull(d_date_sk#14)) + +(14) Project [codegen id : 2] +Output [1]: [d_date_sk#14] +Input [2]: [d_date_sk#14, d_year#15] + +(15) BroadcastExchange +Input [1]: [d_date_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] + +(16) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#14] +Join condition: None + +(17) Project [codegen id : 5] +Output [6]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, d_date_sk#14] + +(18) Scan parquet default.store +Output [2]: [s_store_sk#17, s_state#18] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [2]: [s_store_sk#17, s_state#18] + +(20) Filter [codegen id : 3] +Input [2]: [s_store_sk#17, s_state#18] +Condition : ((isnotnull(s_state#18) AND (s_state#18 = TN)) AND isnotnull(s_store_sk#17)) + +(21) BroadcastExchange +Input [2]: [s_store_sk#17, s_state#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] + +(22) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#17] +Join condition: None + +(23) Project [codegen id : 5] +Output [6]: [ss_item_sk#2, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, s_state#18] +Input [8]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, s_store_sk#17, s_state#18] + +(24) Scan parquet default.item +Output [2]: [i_item_sk#20, i_item_id#21] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(25) ColumnarToRow [codegen id : 4] +Input [2]: [i_item_sk#20, i_item_id#21] + +(26) Filter [codegen id : 4] +Input [2]: [i_item_sk#20, i_item_id#21] +Condition : isnotnull(i_item_sk#20) + +(27) BroadcastExchange +Input [2]: [i_item_sk#20, i_item_id#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#22] + +(28) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#20] +Join condition: None + +(29) Project [codegen id : 5] +Output [6]: [i_item_id#21, s_state#18, ss_quantity#5 AS agg1#23, ss_list_price#6 AS agg2#24, ss_coupon_amt#8 AS agg3#25, ss_sales_price#7 AS agg4#26] +Input [8]: [ss_item_sk#2, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, s_state#18, i_item_sk#20, i_item_id#21] + +(30) HashAggregate [codegen id : 5] +Input [6]: [i_item_id#21, s_state#18, agg1#23, agg2#24, agg3#25, agg4#26] +Keys [2]: [i_item_id#21, s_state#18] +Functions [4]: [partial_avg(cast(agg1#23 as bigint)), partial_avg(UnscaledValue(agg2#24)), partial_avg(UnscaledValue(agg3#25)), partial_avg(UnscaledValue(agg4#26))] +Aggregate Attributes [8]: [sum#27, count#28, sum#29, count#30, sum#31, count#32, sum#33, count#34] +Results [10]: [i_item_id#21, s_state#18, sum#35, count#36, sum#37, count#38, sum#39, count#40, sum#41, count#42] + +(31) Exchange +Input [10]: [i_item_id#21, s_state#18, sum#35, count#36, sum#37, count#38, sum#39, count#40, sum#41, count#42] +Arguments: hashpartitioning(i_item_id#21, s_state#18, 5), true, [id=#43] + +(32) HashAggregate [codegen id : 6] +Input [10]: [i_item_id#21, s_state#18, sum#35, count#36, sum#37, count#38, sum#39, count#40, sum#41, count#42] +Keys [2]: [i_item_id#21, s_state#18] +Functions [4]: [avg(cast(agg1#23 as bigint)), avg(UnscaledValue(agg2#24)), avg(UnscaledValue(agg3#25)), avg(UnscaledValue(agg4#26))] +Aggregate Attributes [4]: [avg(cast(agg1#23 as bigint))#44, avg(UnscaledValue(agg2#24))#45, avg(UnscaledValue(agg3#25))#46, avg(UnscaledValue(agg4#26))#47] +Results [7]: [i_item_id#21, s_state#18, 0 AS g_state#48, avg(cast(agg1#23 as bigint))#44 AS agg1#49, cast((avg(UnscaledValue(agg2#24))#45 / 100.0) as decimal(11,6)) AS agg2#50, cast((avg(UnscaledValue(agg3#25))#46 / 100.0) as decimal(11,6)) AS agg3#51, cast((avg(UnscaledValue(agg4#26))#47 / 100.0) as decimal(11,6)) AS agg4#52] + +(33) Scan parquet default.store_sales +Output [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_cdemo_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 11] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] + +(35) Filter [codegen id : 11] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Condition : (((isnotnull(ss_cdemo_sk#3) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_store_sk#4)) AND isnotnull(ss_item_sk#2)) + +(36) ReusedExchange [Reuses operator id: 8] +Output [1]: [cd_demo_sk#9] + +(37) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ss_cdemo_sk#3] +Right keys [1]: [cd_demo_sk#9] +Join condition: None + +(38) Project [codegen id : 11] +Output [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, cd_demo_sk#9] + +(39) ReusedExchange [Reuses operator id: 15] +Output [1]: [d_date_sk#14] + +(40) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#14] +Join condition: None + +(41) Project [codegen id : 11] +Output [6]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, d_date_sk#14] + +(42) Scan parquet default.store +Output [2]: [s_store_sk#17, s_state#18] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)] +ReadSchema: struct + +(43) ColumnarToRow [codegen id : 9] +Input [2]: [s_store_sk#17, s_state#18] + +(44) Filter [codegen id : 9] +Input [2]: [s_store_sk#17, s_state#18] +Condition : ((isnotnull(s_state#18) AND (s_state#18 = TN)) AND isnotnull(s_store_sk#17)) + +(45) Project [codegen id : 9] +Output [1]: [s_store_sk#17] +Input [2]: [s_store_sk#17, s_state#18] + +(46) BroadcastExchange +Input [1]: [s_store_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#53] + +(47) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#17] +Join condition: None + +(48) Project [codegen id : 11] +Output [5]: [ss_item_sk#2, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Input [7]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, s_store_sk#17] + +(49) ReusedExchange [Reuses operator id: 27] +Output [2]: [i_item_sk#20, i_item_id#21] + +(50) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#20] +Join condition: None + +(51) Project [codegen id : 11] +Output [5]: [i_item_id#21, ss_quantity#5 AS agg1#23, ss_list_price#6 AS agg2#24, ss_coupon_amt#8 AS agg3#25, ss_sales_price#7 AS agg4#26] +Input [7]: [ss_item_sk#2, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_sk#20, i_item_id#21] + +(52) HashAggregate [codegen id : 11] +Input [5]: [i_item_id#21, agg1#23, agg2#24, agg3#25, agg4#26] +Keys [1]: [i_item_id#21] +Functions [4]: [partial_avg(cast(agg1#23 as bigint)), partial_avg(UnscaledValue(agg2#24)), partial_avg(UnscaledValue(agg3#25)), partial_avg(UnscaledValue(agg4#26))] +Aggregate Attributes [8]: [sum#54, count#55, sum#56, count#57, sum#58, count#59, sum#60, count#61] +Results [9]: [i_item_id#21, sum#62, count#63, sum#64, count#65, sum#66, count#67, sum#68, count#69] + +(53) Exchange +Input [9]: [i_item_id#21, sum#62, count#63, sum#64, count#65, sum#66, count#67, sum#68, count#69] +Arguments: hashpartitioning(i_item_id#21, 5), true, [id=#70] + +(54) HashAggregate [codegen id : 12] +Input [9]: [i_item_id#21, sum#62, count#63, sum#64, count#65, sum#66, count#67, sum#68, count#69] +Keys [1]: [i_item_id#21] +Functions [4]: [avg(cast(agg1#23 as bigint)), avg(UnscaledValue(agg2#24)), avg(UnscaledValue(agg3#25)), avg(UnscaledValue(agg4#26))] +Aggregate Attributes [4]: [avg(cast(agg1#23 as bigint))#71, avg(UnscaledValue(agg2#24))#72, avg(UnscaledValue(agg3#25))#73, avg(UnscaledValue(agg4#26))#74] +Results [7]: [i_item_id#21, null AS s_state#75, 1 AS g_state#76, avg(cast(agg1#23 as bigint))#71 AS agg1#77, cast((avg(UnscaledValue(agg2#24))#72 / 100.0) as decimal(11,6)) AS agg2#78, cast((avg(UnscaledValue(agg3#25))#73 / 100.0) as decimal(11,6)) AS agg3#79, cast((avg(UnscaledValue(agg4#26))#74 / 100.0) as decimal(11,6)) AS agg4#80] + +(55) Scan parquet default.store_sales +Output [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_cdemo_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(56) ColumnarToRow [codegen id : 17] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] + +(57) Filter [codegen id : 17] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Condition : (((isnotnull(ss_cdemo_sk#3) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_store_sk#4)) AND isnotnull(ss_item_sk#2)) + +(58) ReusedExchange [Reuses operator id: 8] +Output [1]: [cd_demo_sk#9] + +(59) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ss_cdemo_sk#3] +Right keys [1]: [cd_demo_sk#9] +Join condition: None + +(60) Project [codegen id : 17] +Output [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, cd_demo_sk#9] + +(61) ReusedExchange [Reuses operator id: 15] +Output [1]: [d_date_sk#14] + +(62) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#14] +Join condition: None + +(63) Project [codegen id : 17] +Output [6]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, d_date_sk#14] + +(64) ReusedExchange [Reuses operator id: 46] +Output [1]: [s_store_sk#17] + +(65) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#17] +Join condition: None + +(66) Project [codegen id : 17] +Output [5]: [ss_item_sk#2, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Input [7]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, s_store_sk#17] + +(67) Scan parquet default.item +Output [1]: [i_item_sk#20] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(68) ColumnarToRow [codegen id : 16] +Input [1]: [i_item_sk#20] + +(69) Filter [codegen id : 16] +Input [1]: [i_item_sk#20] +Condition : isnotnull(i_item_sk#20) + +(70) BroadcastExchange +Input [1]: [i_item_sk#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#81] + +(71) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#20] +Join condition: None + +(72) Project [codegen id : 17] +Output [4]: [ss_quantity#5 AS agg1#23, ss_list_price#6 AS agg2#24, ss_coupon_amt#8 AS agg3#25, ss_sales_price#7 AS agg4#26] +Input [6]: [ss_item_sk#2, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_sk#20] + +(73) HashAggregate [codegen id : 17] +Input [4]: [agg1#23, agg2#24, agg3#25, agg4#26] +Keys: [] +Functions [4]: [partial_avg(cast(agg1#23 as bigint)), partial_avg(UnscaledValue(agg2#24)), partial_avg(UnscaledValue(agg3#25)), partial_avg(UnscaledValue(agg4#26))] +Aggregate Attributes [8]: [sum#82, count#83, sum#84, count#85, sum#86, count#87, sum#88, count#89] +Results [8]: [sum#90, count#91, sum#92, count#93, sum#94, count#95, sum#96, count#97] + +(74) Exchange +Input [8]: [sum#90, count#91, sum#92, count#93, sum#94, count#95, sum#96, count#97] +Arguments: SinglePartition, true, [id=#98] + +(75) HashAggregate [codegen id : 18] +Input [8]: [sum#90, count#91, sum#92, count#93, sum#94, count#95, sum#96, count#97] +Keys: [] +Functions [4]: [avg(cast(agg1#23 as bigint)), avg(UnscaledValue(agg2#24)), avg(UnscaledValue(agg3#25)), avg(UnscaledValue(agg4#26))] +Aggregate Attributes [4]: [avg(cast(agg1#23 as bigint))#99, avg(UnscaledValue(agg2#24))#100, avg(UnscaledValue(agg3#25))#101, avg(UnscaledValue(agg4#26))#102] +Results [7]: [null AS i_item_id#103, null AS s_state#104, 1 AS g_state#105, avg(cast(agg1#23 as bigint))#99 AS agg1#106, cast((avg(UnscaledValue(agg2#24))#100 / 100.0) as decimal(11,6)) AS agg2#107, cast((avg(UnscaledValue(agg3#25))#101 / 100.0) as decimal(11,6)) AS agg3#108, cast((avg(UnscaledValue(agg4#26))#102 / 100.0) as decimal(11,6)) AS agg4#109] + +(76) Union + +(77) TakeOrderedAndProject +Input [7]: [i_item_id#21, s_state#18, g_state#48, agg1#49, agg2#50, agg3#51, agg4#52] +Arguments: 100, [i_item_id#21 ASC NULLS FIRST, s_state#18 ASC NULLS FIRST], [i_item_id#21, s_state#18, g_state#48, agg1#49, agg2#50, agg3#51, agg4#52] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q27a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q27a/simplified.txt new file mode 100644 index 0000000000000..7bfdbae974b51 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q27a/simplified.txt @@ -0,0 +1,113 @@ +TakeOrderedAndProject [agg1,agg2,agg3,agg4,g_state,i_item_id,s_state] + Union + WholeStageCodegen (6) + HashAggregate [count,count,count,count,i_item_id,s_state,sum,sum,sum,sum] [agg1,agg2,agg3,agg4,avg(UnscaledValue(agg2)),avg(UnscaledValue(agg3)),avg(UnscaledValue(agg4)),avg(cast(agg1 as bigint)),count,count,count,count,g_state,sum,sum,sum,sum] + InputAdapter + Exchange [i_item_id,s_state] #1 + WholeStageCodegen (5) + HashAggregate [agg1,agg2,agg3,agg4,i_item_id,s_state] [count,count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum,sum] + Project [i_item_id,s_state,ss_coupon_amt,ss_list_price,ss_quantity,ss_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [s_state,ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [cd_demo_sk,ss_cdemo_sk] + Filter [ss_cdemo_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_cdemo_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [cd_demo_sk] + Filter [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [s_state,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_state,s_store_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_id,i_item_sk] + WholeStageCodegen (12) + HashAggregate [count,count,count,count,i_item_id,sum,sum,sum,sum] [agg1,agg2,agg3,agg4,avg(UnscaledValue(agg2)),avg(UnscaledValue(agg3)),avg(UnscaledValue(agg4)),avg(cast(agg1 as bigint)),count,count,count,count,g_state,s_state,sum,sum,sum,sum] + InputAdapter + Exchange [i_item_id] #6 + WholeStageCodegen (11) + HashAggregate [agg1,agg2,agg3,agg4,i_item_id] [count,count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum,sum] + Project [i_item_id,ss_coupon_amt,ss_list_price,ss_quantity,ss_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [cd_demo_sk,ss_cdemo_sk] + Filter [ss_cdemo_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_cdemo_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + ReusedExchange [cd_demo_sk] #2 + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (9) + Project [s_store_sk] + Filter [s_state,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_state,s_store_sk] + InputAdapter + ReusedExchange [i_item_id,i_item_sk] #5 + WholeStageCodegen (18) + HashAggregate [count,count,count,count,sum,sum,sum,sum] [agg1,agg2,agg3,agg4,avg(UnscaledValue(agg2)),avg(UnscaledValue(agg3)),avg(UnscaledValue(agg4)),avg(cast(agg1 as bigint)),count,count,count,count,g_state,i_item_id,s_state,sum,sum,sum,sum] + InputAdapter + Exchange #8 + WholeStageCodegen (17) + HashAggregate [agg1,agg2,agg3,agg4] [count,count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum,sum] + Project [ss_coupon_amt,ss_list_price,ss_quantity,ss_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [cd_demo_sk,ss_cdemo_sk] + Filter [ss_cdemo_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_cdemo_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + ReusedExchange [cd_demo_sk] #2 + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + ReusedExchange [s_store_sk] #7 + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (16) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.sf100/explain.txt new file mode 100644 index 0000000000000..45e1768b05c2a --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.sf100/explain.txt @@ -0,0 +1,218 @@ +== Physical Plan == +* Sort (39) ++- Exchange (38) + +- * Project (37) + +- * SortMergeJoin Inner (36) + :- * Sort (30) + : +- Exchange (29) + : +- * Filter (28) + : +- * HashAggregate (27) + : +- Exchange (26) + : +- * HashAggregate (25) + : +- * Project (24) + : +- * BroadcastHashJoin Inner BuildRight (23) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.store (11) + : +- BroadcastExchange (22) + : +- * Project (21) + : +- * Filter (20) + : +- * ColumnarToRow (19) + : +- Scan parquet default.household_demographics (18) + +- * Sort (35) + +- Exchange (34) + +- * Filter (33) + +- * ColumnarToRow (32) + +- Scan parquet default.customer (31) + + +(1) Scan parquet default.store_sales +Output [5]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [5]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5] + +(3) Filter [codegen id : 4] +Input [5]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5] +Condition : (((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#4)) AND isnotnull(ss_hdemo_sk#3)) AND isnotnull(ss_customer_sk#2)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#6, d_year#7, d_dom#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [Or(And(GreaterThanOrEqual(d_dom,1),LessThanOrEqual(d_dom,3)),And(GreaterThanOrEqual(d_dom,25),LessThanOrEqual(d_dom,28))), In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] +Condition : (((((d_dom#8 >= 1) AND (d_dom#8 <= 3)) OR ((d_dom#8 >= 25) AND (d_dom#8 <= 28))) AND d_year#7 IN (1999,2000,2001)) AND isnotnull(d_date_sk#6)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#6] +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] + +(8) BroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#6] +Join condition: None + +(10) Project [codegen id : 4] +Output [4]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5] +Input [6]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5, d_date_sk#6] + +(11) Scan parquet default.store +Output [2]: [s_store_sk#10, s_county#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_county), EqualTo(s_county,Williamson County), IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [s_store_sk#10, s_county#11] + +(13) Filter [codegen id : 2] +Input [2]: [s_store_sk#10, s_county#11] +Condition : ((isnotnull(s_county#11) AND (s_county#11 = Williamson County)) AND isnotnull(s_store_sk#10)) + +(14) Project [codegen id : 2] +Output [1]: [s_store_sk#10] +Input [2]: [s_store_sk#10, s_county#11] + +(15) BroadcastExchange +Input [1]: [s_store_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(16) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#10] +Join condition: None + +(17) Project [codegen id : 4] +Output [3]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_ticket_number#5] +Input [5]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5, s_store_sk#10] + +(18) Scan parquet default.household_demographics +Output [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/household_demographics] +PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknown)), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] + +(20) Filter [codegen id : 3] +Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] +Condition : ((((isnotnull(hd_vehicle_count#16) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = unknown))) AND (hd_vehicle_count#16 > 0)) AND (CASE WHEN (hd_vehicle_count#16 > 0) THEN (cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) ELSE null END > 1.2)) AND isnotnull(hd_demo_sk#13)) + +(21) Project [codegen id : 3] +Output [1]: [hd_demo_sk#13] +Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] + +(22) BroadcastExchange +Input [1]: [hd_demo_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#17] + +(23) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_hdemo_sk#3] +Right keys [1]: [hd_demo_sk#13] +Join condition: None + +(24) Project [codegen id : 4] +Output [2]: [ss_customer_sk#2, ss_ticket_number#5] +Input [4]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_ticket_number#5, hd_demo_sk#13] + +(25) HashAggregate [codegen id : 4] +Input [2]: [ss_customer_sk#2, ss_ticket_number#5] +Keys [2]: [ss_ticket_number#5, ss_customer_sk#2] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#18] +Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] + +(26) Exchange +Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] +Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), true, [id=#20] + +(27) HashAggregate [codegen id : 5] +Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] +Keys [2]: [ss_ticket_number#5, ss_customer_sk#2] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#21] +Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count(1)#21 AS cnt#22] + +(28) Filter [codegen id : 5] +Input [3]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22] +Condition : ((cnt#22 >= 15) AND (cnt#22 <= 20)) + +(29) Exchange +Input [3]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22] +Arguments: hashpartitioning(ss_customer_sk#2, 5), true, [id=#23] + +(30) Sort [codegen id : 6] +Input [3]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22] +Arguments: [ss_customer_sk#2 ASC NULLS FIRST], false, 0 + +(31) Scan parquet default.customer +Output [5]: [c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(32) ColumnarToRow [codegen id : 7] +Input [5]: [c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28] + +(33) Filter [codegen id : 7] +Input [5]: [c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28] +Condition : isnotnull(c_customer_sk#24) + +(34) Exchange +Input [5]: [c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28] +Arguments: hashpartitioning(c_customer_sk#24, 5), true, [id=#29] + +(35) Sort [codegen id : 8] +Input [5]: [c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28] +Arguments: [c_customer_sk#24 ASC NULLS FIRST], false, 0 + +(36) SortMergeJoin [codegen id : 9] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#24] +Join condition: None + +(37) Project [codegen id : 9] +Output [6]: [c_last_name#27, c_first_name#26, c_salutation#25, c_preferred_cust_flag#28, ss_ticket_number#5, cnt#22] +Input [8]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22, c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28] + +(38) Exchange +Input [6]: [c_last_name#27, c_first_name#26, c_salutation#25, c_preferred_cust_flag#28, ss_ticket_number#5, cnt#22] +Arguments: rangepartitioning(c_last_name#27 ASC NULLS FIRST, c_first_name#26 ASC NULLS FIRST, c_salutation#25 ASC NULLS FIRST, c_preferred_cust_flag#28 DESC NULLS LAST, ss_ticket_number#5 ASC NULLS FIRST, 5), true, [id=#30] + +(39) Sort [codegen id : 10] +Input [6]: [c_last_name#27, c_first_name#26, c_salutation#25, c_preferred_cust_flag#28, ss_ticket_number#5, cnt#22] +Arguments: [c_last_name#27 ASC NULLS FIRST, c_first_name#26 ASC NULLS FIRST, c_salutation#25 ASC NULLS FIRST, c_preferred_cust_flag#28 DESC NULLS LAST, ss_ticket_number#5 ASC NULLS FIRST], true, 0 + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.sf100/simplified.txt new file mode 100644 index 0000000000000..cd212364ff4f7 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.sf100/simplified.txt @@ -0,0 +1,63 @@ +WholeStageCodegen (10) + Sort [c_first_name,c_last_name,c_preferred_cust_flag,c_salutation,ss_ticket_number] + InputAdapter + Exchange [c_first_name,c_last_name,c_preferred_cust_flag,c_salutation,ss_ticket_number] #1 + WholeStageCodegen (9) + Project [c_first_name,c_last_name,c_preferred_cust_flag,c_salutation,cnt,ss_ticket_number] + SortMergeJoin [c_customer_sk,ss_customer_sk] + InputAdapter + WholeStageCodegen (6) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #2 + WholeStageCodegen (5) + Filter [cnt] + HashAggregate [count,ss_customer_sk,ss_ticket_number] [cnt,count,count(1)] + InputAdapter + Exchange [ss_customer_sk,ss_ticket_number] #3 + WholeStageCodegen (4) + HashAggregate [ss_customer_sk,ss_ticket_number] [count,count] + Project [ss_customer_sk,ss_ticket_number] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [ss_customer_sk,ss_hdemo_sk,ss_ticket_number] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_dom,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_dom,d_year] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Project [s_store_sk] + Filter [s_county,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_county,s_store_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (3) + Project [hd_demo_sk] + Filter [hd_buy_potential,hd_demo_sk,hd_dep_count,hd_vehicle_count] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_buy_potential,hd_demo_sk,hd_dep_count,hd_vehicle_count] + InputAdapter + WholeStageCodegen (8) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #7 + WholeStageCodegen (7) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name,c_preferred_cust_flag,c_salutation] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34/explain.txt new file mode 100644 index 0000000000000..d6dcdb2ecdb8d --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34/explain.txt @@ -0,0 +1,203 @@ +== Physical Plan == +* Sort (36) ++- Exchange (35) + +- * Project (34) + +- * BroadcastHashJoin Inner BuildRight (33) + :- * Filter (28) + : +- * HashAggregate (27) + : +- Exchange (26) + : +- * HashAggregate (25) + : +- * Project (24) + : +- * BroadcastHashJoin Inner BuildRight (23) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.store (11) + : +- BroadcastExchange (22) + : +- * Project (21) + : +- * Filter (20) + : +- * ColumnarToRow (19) + : +- Scan parquet default.household_demographics (18) + +- BroadcastExchange (32) + +- * Filter (31) + +- * ColumnarToRow (30) + +- Scan parquet default.customer (29) + + +(1) Scan parquet default.store_sales +Output [5]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [5]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5] + +(3) Filter [codegen id : 4] +Input [5]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5] +Condition : (((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#4)) AND isnotnull(ss_hdemo_sk#3)) AND isnotnull(ss_customer_sk#2)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#6, d_year#7, d_dom#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/date_dim] +PushedFilters: [Or(And(GreaterThanOrEqual(d_dom,1),LessThanOrEqual(d_dom,3)),And(GreaterThanOrEqual(d_dom,25),LessThanOrEqual(d_dom,28))), In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] +Condition : (((((d_dom#8 >= 1) AND (d_dom#8 <= 3)) OR ((d_dom#8 >= 25) AND (d_dom#8 <= 28))) AND d_year#7 IN (1999,2000,2001)) AND isnotnull(d_date_sk#6)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#6] +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] + +(8) BroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#6] +Join condition: None + +(10) Project [codegen id : 4] +Output [4]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5] +Input [6]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5, d_date_sk#6] + +(11) Scan parquet default.store +Output [2]: [s_store_sk#10, s_county#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_county), EqualTo(s_county,Williamson County), IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [s_store_sk#10, s_county#11] + +(13) Filter [codegen id : 2] +Input [2]: [s_store_sk#10, s_county#11] +Condition : ((isnotnull(s_county#11) AND (s_county#11 = Williamson County)) AND isnotnull(s_store_sk#10)) + +(14) Project [codegen id : 2] +Output [1]: [s_store_sk#10] +Input [2]: [s_store_sk#10, s_county#11] + +(15) BroadcastExchange +Input [1]: [s_store_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(16) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#10] +Join condition: None + +(17) Project [codegen id : 4] +Output [3]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_ticket_number#5] +Input [5]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5, s_store_sk#10] + +(18) Scan parquet default.household_demographics +Output [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/household_demographics] +PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknown)), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] + +(20) Filter [codegen id : 3] +Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] +Condition : ((((isnotnull(hd_vehicle_count#16) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = unknown))) AND (hd_vehicle_count#16 > 0)) AND (CASE WHEN (hd_vehicle_count#16 > 0) THEN (cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) ELSE null END > 1.2)) AND isnotnull(hd_demo_sk#13)) + +(21) Project [codegen id : 3] +Output [1]: [hd_demo_sk#13] +Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] + +(22) BroadcastExchange +Input [1]: [hd_demo_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#17] + +(23) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_hdemo_sk#3] +Right keys [1]: [hd_demo_sk#13] +Join condition: None + +(24) Project [codegen id : 4] +Output [2]: [ss_customer_sk#2, ss_ticket_number#5] +Input [4]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_ticket_number#5, hd_demo_sk#13] + +(25) HashAggregate [codegen id : 4] +Input [2]: [ss_customer_sk#2, ss_ticket_number#5] +Keys [2]: [ss_ticket_number#5, ss_customer_sk#2] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#18] +Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] + +(26) Exchange +Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] +Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), true, [id=#20] + +(27) HashAggregate [codegen id : 6] +Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] +Keys [2]: [ss_ticket_number#5, ss_customer_sk#2] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#21] +Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count(1)#21 AS cnt#22] + +(28) Filter [codegen id : 6] +Input [3]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22] +Condition : ((cnt#22 >= 15) AND (cnt#22 <= 20)) + +(29) Scan parquet default.customer +Output [5]: [c_customer_sk#23, c_salutation#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(30) ColumnarToRow [codegen id : 5] +Input [5]: [c_customer_sk#23, c_salutation#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27] + +(31) Filter [codegen id : 5] +Input [5]: [c_customer_sk#23, c_salutation#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27] +Condition : isnotnull(c_customer_sk#23) + +(32) BroadcastExchange +Input [5]: [c_customer_sk#23, c_salutation#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#28] + +(33) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#23] +Join condition: None + +(34) Project [codegen id : 6] +Output [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22] +Input [8]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22, c_customer_sk#23, c_salutation#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27] + +(35) Exchange +Input [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22] +Arguments: rangepartitioning(c_last_name#26 ASC NULLS FIRST, c_first_name#25 ASC NULLS FIRST, c_salutation#24 ASC NULLS FIRST, c_preferred_cust_flag#27 DESC NULLS LAST, ss_ticket_number#5 ASC NULLS FIRST, 5), true, [id=#29] + +(36) Sort [codegen id : 7] +Input [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22] +Arguments: [c_last_name#26 ASC NULLS FIRST, c_first_name#25 ASC NULLS FIRST, c_salutation#24 ASC NULLS FIRST, c_preferred_cust_flag#27 DESC NULLS LAST, ss_ticket_number#5 ASC NULLS FIRST], true, 0 + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34/simplified.txt new file mode 100644 index 0000000000000..0bdcd83bf2968 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34/simplified.txt @@ -0,0 +1,54 @@ +WholeStageCodegen (7) + Sort [c_first_name,c_last_name,c_preferred_cust_flag,c_salutation,ss_ticket_number] + InputAdapter + Exchange [c_first_name,c_last_name,c_preferred_cust_flag,c_salutation,ss_ticket_number] #1 + WholeStageCodegen (6) + Project [c_first_name,c_last_name,c_preferred_cust_flag,c_salutation,cnt,ss_ticket_number] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [cnt] + HashAggregate [count,ss_customer_sk,ss_ticket_number] [cnt,count,count(1)] + InputAdapter + Exchange [ss_customer_sk,ss_ticket_number] #2 + WholeStageCodegen (4) + HashAggregate [ss_customer_sk,ss_ticket_number] [count,count] + Project [ss_customer_sk,ss_ticket_number] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [ss_customer_sk,ss_hdemo_sk,ss_ticket_number] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_dom,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_dom,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [s_store_sk] + Filter [s_county,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_county,s_store_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Project [hd_demo_sk] + Filter [hd_buy_potential,hd_demo_sk,hd_dep_count,hd_vehicle_count] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_buy_potential,hd_demo_sk,hd_dep_count,hd_vehicle_count] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name,c_preferred_cust_flag,c_salutation] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35.sf100/explain.txt new file mode 100644 index 0000000000000..25b48af7f658f --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35.sf100/explain.txt @@ -0,0 +1,329 @@ +== Physical Plan == +TakeOrderedAndProject (60) ++- * HashAggregate (59) + +- Exchange (58) + +- * HashAggregate (57) + +- * Project (56) + +- * SortMergeJoin Inner (55) + :- * Sort (49) + : +- Exchange (48) + : +- * Project (47) + : +- * SortMergeJoin Inner (46) + : :- * Sort (40) + : : +- Exchange (39) + : : +- * Project (38) + : : +- * Filter (37) + : : +- SortMergeJoin ExistenceJoin(exists#1) (36) + : : :- SortMergeJoin ExistenceJoin(exists#2) (27) + : : : :- SortMergeJoin LeftSemi (18) + : : : : :- * Sort (5) + : : : : : +- Exchange (4) + : : : : : +- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.customer (1) + : : : : +- * Sort (17) + : : : : +- Exchange (16) + : : : : +- * Project (15) + : : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : : :- * Filter (8) + : : : : : +- * ColumnarToRow (7) + : : : : : +- Scan parquet default.store_sales (6) + : : : : +- BroadcastExchange (13) + : : : : +- * Project (12) + : : : : +- * Filter (11) + : : : : +- * ColumnarToRow (10) + : : : : +- Scan parquet default.date_dim (9) + : : : +- * Sort (26) + : : : +- Exchange (25) + : : : +- * Project (24) + : : : +- * BroadcastHashJoin Inner BuildRight (23) + : : : :- * Filter (21) + : : : : +- * ColumnarToRow (20) + : : : : +- Scan parquet default.web_sales (19) + : : : +- ReusedExchange (22) + : : +- * Sort (35) + : : +- Exchange (34) + : : +- * Project (33) + : : +- * BroadcastHashJoin Inner BuildRight (32) + : : :- * Filter (30) + : : : +- * ColumnarToRow (29) + : : : +- Scan parquet default.catalog_sales (28) + : : +- ReusedExchange (31) + : +- * Sort (45) + : +- Exchange (44) + : +- * Filter (43) + : +- * ColumnarToRow (42) + : +- Scan parquet default.customer_address (41) + +- * Sort (54) + +- Exchange (53) + +- * Filter (52) + +- * ColumnarToRow (51) + +- Scan parquet default.customer_demographics (50) + + +(1) Scan parquet default.customer +Output [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/customer] +PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] + +(3) Filter [codegen id : 1] +Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] +Condition : (isnotnull(c_current_addr_sk#5) AND isnotnull(c_current_cdemo_sk#4)) + +(4) Exchange +Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] +Arguments: hashpartitioning(c_customer_sk#3, 5), true, [id=#6] + +(5) Sort [codegen id : 2] +Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] +Arguments: [c_customer_sk#3 ASC NULLS FIRST], false, 0 + +(6) Scan parquet default.store_sales +Output [2]: [ss_sold_date_sk#7, ss_customer_sk#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 4] +Input [2]: [ss_sold_date_sk#7, ss_customer_sk#8] + +(8) Filter [codegen id : 4] +Input [2]: [ss_sold_date_sk#7, ss_customer_sk#8] +Condition : isnotnull(ss_sold_date_sk#7) + +(9) Scan parquet default.date_dim +Output [3]: [d_date_sk#9, d_year#10, d_qoy#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_qoy), EqualTo(d_year,2002), LessThan(d_qoy,4), IsNotNull(d_date_sk)] +ReadSchema: struct + +(10) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#9, d_year#10, d_qoy#11] + +(11) Filter [codegen id : 3] +Input [3]: [d_date_sk#9, d_year#10, d_qoy#11] +Condition : ((((isnotnull(d_year#10) AND isnotnull(d_qoy#11)) AND (d_year#10 = 2002)) AND (d_qoy#11 < 4)) AND isnotnull(d_date_sk#9)) + +(12) Project [codegen id : 3] +Output [1]: [d_date_sk#9] +Input [3]: [d_date_sk#9, d_year#10, d_qoy#11] + +(13) BroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(14) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#7] +Right keys [1]: [d_date_sk#9] +Join condition: None + +(15) Project [codegen id : 4] +Output [1]: [ss_customer_sk#8] +Input [3]: [ss_sold_date_sk#7, ss_customer_sk#8, d_date_sk#9] + +(16) Exchange +Input [1]: [ss_customer_sk#8] +Arguments: hashpartitioning(ss_customer_sk#8, 5), true, [id=#13] + +(17) Sort [codegen id : 5] +Input [1]: [ss_customer_sk#8] +Arguments: [ss_customer_sk#8 ASC NULLS FIRST], false, 0 + +(18) SortMergeJoin +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [ss_customer_sk#8] +Join condition: None + +(19) Scan parquet default.web_sales +Output [2]: [ws_sold_date_sk#14, ws_bill_customer_sk#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 7] +Input [2]: [ws_sold_date_sk#14, ws_bill_customer_sk#15] + +(21) Filter [codegen id : 7] +Input [2]: [ws_sold_date_sk#14, ws_bill_customer_sk#15] +Condition : isnotnull(ws_sold_date_sk#14) + +(22) ReusedExchange [Reuses operator id: 13] +Output [1]: [d_date_sk#9] + +(23) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ws_sold_date_sk#14] +Right keys [1]: [d_date_sk#9] +Join condition: None + +(24) Project [codegen id : 7] +Output [1]: [ws_bill_customer_sk#15] +Input [3]: [ws_sold_date_sk#14, ws_bill_customer_sk#15, d_date_sk#9] + +(25) Exchange +Input [1]: [ws_bill_customer_sk#15] +Arguments: hashpartitioning(ws_bill_customer_sk#15, 5), true, [id=#16] + +(26) Sort [codegen id : 8] +Input [1]: [ws_bill_customer_sk#15] +Arguments: [ws_bill_customer_sk#15 ASC NULLS FIRST], false, 0 + +(27) SortMergeJoin +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [ws_bill_customer_sk#15] +Join condition: None + +(28) Scan parquet default.catalog_sales +Output [2]: [cs_sold_date_sk#17, cs_ship_customer_sk#18] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 10] +Input [2]: [cs_sold_date_sk#17, cs_ship_customer_sk#18] + +(30) Filter [codegen id : 10] +Input [2]: [cs_sold_date_sk#17, cs_ship_customer_sk#18] +Condition : isnotnull(cs_sold_date_sk#17) + +(31) ReusedExchange [Reuses operator id: 13] +Output [1]: [d_date_sk#9] + +(32) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_sold_date_sk#17] +Right keys [1]: [d_date_sk#9] +Join condition: None + +(33) Project [codegen id : 10] +Output [1]: [cs_ship_customer_sk#18] +Input [3]: [cs_sold_date_sk#17, cs_ship_customer_sk#18, d_date_sk#9] + +(34) Exchange +Input [1]: [cs_ship_customer_sk#18] +Arguments: hashpartitioning(cs_ship_customer_sk#18, 5), true, [id=#19] + +(35) Sort [codegen id : 11] +Input [1]: [cs_ship_customer_sk#18] +Arguments: [cs_ship_customer_sk#18 ASC NULLS FIRST], false, 0 + +(36) SortMergeJoin +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [cs_ship_customer_sk#18] +Join condition: None + +(37) Filter [codegen id : 12] +Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1] +Condition : (exists#2 OR exists#1) + +(38) Project [codegen id : 12] +Output [2]: [c_current_cdemo_sk#4, c_current_addr_sk#5] +Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1] + +(39) Exchange +Input [2]: [c_current_cdemo_sk#4, c_current_addr_sk#5] +Arguments: hashpartitioning(c_current_addr_sk#5, 5), true, [id=#20] + +(40) Sort [codegen id : 13] +Input [2]: [c_current_cdemo_sk#4, c_current_addr_sk#5] +Arguments: [c_current_addr_sk#5 ASC NULLS FIRST], false, 0 + +(41) Scan parquet default.customer_address +Output [2]: [ca_address_sk#21, ca_state#22] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct + +(42) ColumnarToRow [codegen id : 14] +Input [2]: [ca_address_sk#21, ca_state#22] + +(43) Filter [codegen id : 14] +Input [2]: [ca_address_sk#21, ca_state#22] +Condition : isnotnull(ca_address_sk#21) + +(44) Exchange +Input [2]: [ca_address_sk#21, ca_state#22] +Arguments: hashpartitioning(ca_address_sk#21, 5), true, [id=#23] + +(45) Sort [codegen id : 15] +Input [2]: [ca_address_sk#21, ca_state#22] +Arguments: [ca_address_sk#21 ASC NULLS FIRST], false, 0 + +(46) SortMergeJoin [codegen id : 16] +Left keys [1]: [c_current_addr_sk#5] +Right keys [1]: [ca_address_sk#21] +Join condition: None + +(47) Project [codegen id : 16] +Output [2]: [c_current_cdemo_sk#4, ca_state#22] +Input [4]: [c_current_cdemo_sk#4, c_current_addr_sk#5, ca_address_sk#21, ca_state#22] + +(48) Exchange +Input [2]: [c_current_cdemo_sk#4, ca_state#22] +Arguments: hashpartitioning(c_current_cdemo_sk#4, 5), true, [id=#24] + +(49) Sort [codegen id : 17] +Input [2]: [c_current_cdemo_sk#4, ca_state#22] +Arguments: [c_current_cdemo_sk#4 ASC NULLS FIRST], false, 0 + +(50) Scan parquet default.customer_demographics +Output [6]: [cd_demo_sk#25, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(51) ColumnarToRow [codegen id : 18] +Input [6]: [cd_demo_sk#25, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] + +(52) Filter [codegen id : 18] +Input [6]: [cd_demo_sk#25, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] +Condition : isnotnull(cd_demo_sk#25) + +(53) Exchange +Input [6]: [cd_demo_sk#25, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] +Arguments: hashpartitioning(cd_demo_sk#25, 5), true, [id=#31] + +(54) Sort [codegen id : 19] +Input [6]: [cd_demo_sk#25, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] +Arguments: [cd_demo_sk#25 ASC NULLS FIRST], false, 0 + +(55) SortMergeJoin [codegen id : 20] +Left keys [1]: [c_current_cdemo_sk#4] +Right keys [1]: [cd_demo_sk#25] +Join condition: None + +(56) Project [codegen id : 20] +Output [6]: [ca_state#22, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] +Input [8]: [c_current_cdemo_sk#4, ca_state#22, cd_demo_sk#25, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] + +(57) HashAggregate [codegen id : 20] +Input [6]: [ca_state#22, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] +Keys [6]: [ca_state#22, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] +Functions [10]: [partial_count(1), partial_avg(cast(cd_dep_count#28 as bigint)), partial_max(cd_dep_count#28), partial_sum(cast(cd_dep_count#28 as bigint)), partial_avg(cast(cd_dep_employed_count#29 as bigint)), partial_max(cd_dep_employed_count#29), partial_sum(cast(cd_dep_employed_count#29 as bigint)), partial_avg(cast(cd_dep_college_count#30 as bigint)), partial_max(cd_dep_college_count#30), partial_sum(cast(cd_dep_college_count#30 as bigint))] +Aggregate Attributes [13]: [count#32, sum#33, count#34, max#35, sum#36, sum#37, count#38, max#39, sum#40, sum#41, count#42, max#43, sum#44] +Results [19]: [ca_state#22, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30, count#45, sum#46, count#47, max#48, sum#49, sum#50, count#51, max#52, sum#53, sum#54, count#55, max#56, sum#57] + +(58) Exchange +Input [19]: [ca_state#22, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30, count#45, sum#46, count#47, max#48, sum#49, sum#50, count#51, max#52, sum#53, sum#54, count#55, max#56, sum#57] +Arguments: hashpartitioning(ca_state#22, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30, 5), true, [id=#58] + +(59) HashAggregate [codegen id : 21] +Input [19]: [ca_state#22, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30, count#45, sum#46, count#47, max#48, sum#49, sum#50, count#51, max#52, sum#53, sum#54, count#55, max#56, sum#57] +Keys [6]: [ca_state#22, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] +Functions [10]: [count(1), avg(cast(cd_dep_count#28 as bigint)), max(cd_dep_count#28), sum(cast(cd_dep_count#28 as bigint)), avg(cast(cd_dep_employed_count#29 as bigint)), max(cd_dep_employed_count#29), sum(cast(cd_dep_employed_count#29 as bigint)), avg(cast(cd_dep_college_count#30 as bigint)), max(cd_dep_college_count#30), sum(cast(cd_dep_college_count#30 as bigint))] +Aggregate Attributes [10]: [count(1)#59, avg(cast(cd_dep_count#28 as bigint))#60, max(cd_dep_count#28)#61, sum(cast(cd_dep_count#28 as bigint))#62, avg(cast(cd_dep_employed_count#29 as bigint))#63, max(cd_dep_employed_count#29)#64, sum(cast(cd_dep_employed_count#29 as bigint))#65, avg(cast(cd_dep_college_count#30 as bigint))#66, max(cd_dep_college_count#30)#67, sum(cast(cd_dep_college_count#30 as bigint))#68] +Results [18]: [ca_state#22, cd_gender#26, cd_marital_status#27, cd_dep_count#28, count(1)#59 AS cnt1#69, avg(cast(cd_dep_count#28 as bigint))#60 AS avg(cd_dep_count)#70, max(cd_dep_count#28)#61 AS max(cd_dep_count)#71, sum(cast(cd_dep_count#28 as bigint))#62 AS sum(cd_dep_count)#72, cd_dep_employed_count#29, count(1)#59 AS cnt2#73, avg(cast(cd_dep_employed_count#29 as bigint))#63 AS avg(cd_dep_employed_count)#74, max(cd_dep_employed_count#29)#64 AS max(cd_dep_employed_count)#75, sum(cast(cd_dep_employed_count#29 as bigint))#65 AS sum(cd_dep_employed_count)#76, cd_dep_college_count#30, count(1)#59 AS cnt3#77, avg(cast(cd_dep_college_count#30 as bigint))#66 AS avg(cd_dep_college_count)#78, max(cd_dep_college_count#30)#67 AS max(cd_dep_college_count)#79, sum(cast(cd_dep_college_count#30 as bigint))#68 AS sum(cd_dep_college_count)#80] + +(60) TakeOrderedAndProject +Input [18]: [ca_state#22, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cnt1#69, avg(cd_dep_count)#70, max(cd_dep_count)#71, sum(cd_dep_count)#72, cd_dep_employed_count#29, cnt2#73, avg(cd_dep_employed_count)#74, max(cd_dep_employed_count)#75, sum(cd_dep_employed_count)#76, cd_dep_college_count#30, cnt3#77, avg(cd_dep_college_count)#78, max(cd_dep_college_count)#79, sum(cd_dep_college_count)#80] +Arguments: 100, [ca_state#22 ASC NULLS FIRST, cd_gender#26 ASC NULLS FIRST, cd_marital_status#27 ASC NULLS FIRST, cd_dep_count#28 ASC NULLS FIRST, cd_dep_employed_count#29 ASC NULLS FIRST, cd_dep_college_count#30 ASC NULLS FIRST], [ca_state#22, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cnt1#69, avg(cd_dep_count)#70, max(cd_dep_count)#71, sum(cd_dep_count)#72, cd_dep_employed_count#29, cnt2#73, avg(cd_dep_employed_count)#74, max(cd_dep_employed_count)#75, sum(cd_dep_employed_count)#76, cd_dep_college_count#30, cnt3#77, avg(cd_dep_college_count)#78, max(cd_dep_college_count)#79, sum(cd_dep_college_count)#80] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35.sf100/simplified.txt new file mode 100644 index 0000000000000..3e21f6dae18d6 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35.sf100/simplified.txt @@ -0,0 +1,103 @@ +TakeOrderedAndProject [avg(cd_dep_college_count),avg(cd_dep_count),avg(cd_dep_employed_count),ca_state,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_gender,cd_marital_status,cnt1,cnt2,cnt3,max(cd_dep_college_count),max(cd_dep_count),max(cd_dep_employed_count),sum(cd_dep_college_count),sum(cd_dep_count),sum(cd_dep_employed_count)] + WholeStageCodegen (21) + HashAggregate [ca_state,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_gender,cd_marital_status,count,count,count,count,max,max,max,sum,sum,sum,sum,sum,sum] [avg(cast(cd_dep_college_count as bigint)),avg(cast(cd_dep_count as bigint)),avg(cast(cd_dep_employed_count as bigint)),avg(cd_dep_college_count),avg(cd_dep_count),avg(cd_dep_employed_count),cnt1,cnt2,cnt3,count,count,count,count,count(1),max,max,max,max(cd_dep_college_count),max(cd_dep_college_count),max(cd_dep_count),max(cd_dep_count),max(cd_dep_employed_count),max(cd_dep_employed_count),sum,sum,sum,sum,sum,sum,sum(cast(cd_dep_college_count as bigint)),sum(cast(cd_dep_count as bigint)),sum(cast(cd_dep_employed_count as bigint)),sum(cd_dep_college_count),sum(cd_dep_count),sum(cd_dep_employed_count)] + InputAdapter + Exchange [ca_state,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_gender,cd_marital_status] #1 + WholeStageCodegen (20) + HashAggregate [ca_state,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_gender,cd_marital_status] [count,count,count,count,count,count,count,count,max,max,max,max,max,max,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [ca_state,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_gender,cd_marital_status] + SortMergeJoin [c_current_cdemo_sk,cd_demo_sk] + InputAdapter + WholeStageCodegen (17) + Sort [c_current_cdemo_sk] + InputAdapter + Exchange [c_current_cdemo_sk] #2 + WholeStageCodegen (16) + Project [c_current_cdemo_sk,ca_state] + SortMergeJoin [c_current_addr_sk,ca_address_sk] + InputAdapter + WholeStageCodegen (13) + Sort [c_current_addr_sk] + InputAdapter + Exchange [c_current_addr_sk] #3 + WholeStageCodegen (12) + Project [c_current_addr_sk,c_current_cdemo_sk] + Filter [exists,exists] + InputAdapter + SortMergeJoin [c_customer_sk,cs_ship_customer_sk] + SortMergeJoin [c_customer_sk,ws_bill_customer_sk] + SortMergeJoin [c_customer_sk,ss_customer_sk] + WholeStageCodegen (2) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #4 + WholeStageCodegen (1) + Filter [c_current_addr_sk,c_current_cdemo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] + WholeStageCodegen (5) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #5 + WholeStageCodegen (4) + Project [ss_customer_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_date_sk,d_qoy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_qoy,d_year] + WholeStageCodegen (8) + Sort [ws_bill_customer_sk] + InputAdapter + Exchange [ws_bill_customer_sk] #7 + WholeStageCodegen (7) + Project [ws_bill_customer_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #6 + WholeStageCodegen (11) + Sort [cs_ship_customer_sk] + InputAdapter + Exchange [cs_ship_customer_sk] #8 + WholeStageCodegen (10) + Project [cs_ship_customer_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #6 + InputAdapter + WholeStageCodegen (15) + Sort [ca_address_sk] + InputAdapter + Exchange [ca_address_sk] #9 + WholeStageCodegen (14) + Filter [ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_state] + InputAdapter + WholeStageCodegen (19) + Sort [cd_demo_sk] + InputAdapter + Exchange [cd_demo_sk] #10 + WholeStageCodegen (18) + Filter [cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_gender,cd_marital_status] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35/explain.txt new file mode 100644 index 0000000000000..a6341c55f0457 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35/explain.txt @@ -0,0 +1,274 @@ +== Physical Plan == +TakeOrderedAndProject (49) ++- * HashAggregate (48) + +- Exchange (47) + +- * HashAggregate (46) + +- * Project (45) + +- * BroadcastHashJoin Inner BuildRight (44) + :- * Project (39) + : +- * BroadcastHashJoin Inner BuildRight (38) + : :- * Project (33) + : : +- * Filter (32) + : : +- * BroadcastHashJoin ExistenceJoin(exists#1) BuildRight (31) + : : :- * BroadcastHashJoin ExistenceJoin(exists#2) BuildRight (23) + : : : :- * BroadcastHashJoin LeftSemi BuildRight (15) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.customer (1) + : : : : +- BroadcastExchange (14) + : : : : +- * Project (13) + : : : : +- * BroadcastHashJoin Inner BuildRight (12) + : : : : :- * Filter (6) + : : : : : +- * ColumnarToRow (5) + : : : : : +- Scan parquet default.store_sales (4) + : : : : +- BroadcastExchange (11) + : : : : +- * Project (10) + : : : : +- * Filter (9) + : : : : +- * ColumnarToRow (8) + : : : : +- Scan parquet default.date_dim (7) + : : : +- BroadcastExchange (22) + : : : +- * Project (21) + : : : +- * BroadcastHashJoin Inner BuildRight (20) + : : : :- * Filter (18) + : : : : +- * ColumnarToRow (17) + : : : : +- Scan parquet default.web_sales (16) + : : : +- ReusedExchange (19) + : : +- BroadcastExchange (30) + : : +- * Project (29) + : : +- * BroadcastHashJoin Inner BuildRight (28) + : : :- * Filter (26) + : : : +- * ColumnarToRow (25) + : : : +- Scan parquet default.catalog_sales (24) + : : +- ReusedExchange (27) + : +- BroadcastExchange (37) + : +- * Filter (36) + : +- * ColumnarToRow (35) + : +- Scan parquet default.customer_address (34) + +- BroadcastExchange (43) + +- * Filter (42) + +- * ColumnarToRow (41) + +- Scan parquet default.customer_demographics (40) + + +(1) Scan parquet default.customer +Output [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 9] +Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] + +(3) Filter [codegen id : 9] +Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] +Condition : (isnotnull(c_current_addr_sk#5) AND isnotnull(c_current_cdemo_sk#4)) + +(4) Scan parquet default.store_sales +Output [2]: [ss_sold_date_sk#6, ss_customer_sk#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 2] +Input [2]: [ss_sold_date_sk#6, ss_customer_sk#7] + +(6) Filter [codegen id : 2] +Input [2]: [ss_sold_date_sk#6, ss_customer_sk#7] +Condition : isnotnull(ss_sold_date_sk#6) + +(7) Scan parquet default.date_dim +Output [3]: [d_date_sk#8, d_year#9, d_qoy#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_qoy), EqualTo(d_year,2002), LessThan(d_qoy,4), IsNotNull(d_date_sk)] +ReadSchema: struct + +(8) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#8, d_year#9, d_qoy#10] + +(9) Filter [codegen id : 1] +Input [3]: [d_date_sk#8, d_year#9, d_qoy#10] +Condition : ((((isnotnull(d_year#9) AND isnotnull(d_qoy#10)) AND (d_year#9 = 2002)) AND (d_qoy#10 < 4)) AND isnotnull(d_date_sk#8)) + +(10) Project [codegen id : 1] +Output [1]: [d_date_sk#8] +Input [3]: [d_date_sk#8, d_year#9, d_qoy#10] + +(11) BroadcastExchange +Input [1]: [d_date_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] + +(12) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#6] +Right keys [1]: [d_date_sk#8] +Join condition: None + +(13) Project [codegen id : 2] +Output [1]: [ss_customer_sk#7] +Input [3]: [ss_sold_date_sk#6, ss_customer_sk#7, d_date_sk#8] + +(14) BroadcastExchange +Input [1]: [ss_customer_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(15) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [ss_customer_sk#7] +Join condition: None + +(16) Scan parquet default.web_sales +Output [2]: [ws_sold_date_sk#13, ws_bill_customer_sk#14] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 4] +Input [2]: [ws_sold_date_sk#13, ws_bill_customer_sk#14] + +(18) Filter [codegen id : 4] +Input [2]: [ws_sold_date_sk#13, ws_bill_customer_sk#14] +Condition : isnotnull(ws_sold_date_sk#13) + +(19) ReusedExchange [Reuses operator id: 11] +Output [1]: [d_date_sk#8] + +(20) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ws_sold_date_sk#13] +Right keys [1]: [d_date_sk#8] +Join condition: None + +(21) Project [codegen id : 4] +Output [1]: [ws_bill_customer_sk#14] +Input [3]: [ws_sold_date_sk#13, ws_bill_customer_sk#14, d_date_sk#8] + +(22) BroadcastExchange +Input [1]: [ws_bill_customer_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] + +(23) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [ws_bill_customer_sk#14] +Join condition: None + +(24) Scan parquet default.catalog_sales +Output [2]: [cs_sold_date_sk#16, cs_ship_customer_sk#17] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(25) ColumnarToRow [codegen id : 6] +Input [2]: [cs_sold_date_sk#16, cs_ship_customer_sk#17] + +(26) Filter [codegen id : 6] +Input [2]: [cs_sold_date_sk#16, cs_ship_customer_sk#17] +Condition : isnotnull(cs_sold_date_sk#16) + +(27) ReusedExchange [Reuses operator id: 11] +Output [1]: [d_date_sk#8] + +(28) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cs_sold_date_sk#16] +Right keys [1]: [d_date_sk#8] +Join condition: None + +(29) Project [codegen id : 6] +Output [1]: [cs_ship_customer_sk#17] +Input [3]: [cs_sold_date_sk#16, cs_ship_customer_sk#17, d_date_sk#8] + +(30) BroadcastExchange +Input [1]: [cs_ship_customer_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] + +(31) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [cs_ship_customer_sk#17] +Join condition: None + +(32) Filter [codegen id : 9] +Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1] +Condition : (exists#2 OR exists#1) + +(33) Project [codegen id : 9] +Output [2]: [c_current_cdemo_sk#4, c_current_addr_sk#5] +Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1] + +(34) Scan parquet default.customer_address +Output [2]: [ca_address_sk#19, ca_state#20] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct + +(35) ColumnarToRow [codegen id : 7] +Input [2]: [ca_address_sk#19, ca_state#20] + +(36) Filter [codegen id : 7] +Input [2]: [ca_address_sk#19, ca_state#20] +Condition : isnotnull(ca_address_sk#19) + +(37) BroadcastExchange +Input [2]: [ca_address_sk#19, ca_state#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#21] + +(38) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_current_addr_sk#5] +Right keys [1]: [ca_address_sk#19] +Join condition: None + +(39) Project [codegen id : 9] +Output [2]: [c_current_cdemo_sk#4, ca_state#20] +Input [4]: [c_current_cdemo_sk#4, c_current_addr_sk#5, ca_address_sk#19, ca_state#20] + +(40) Scan parquet default.customer_demographics +Output [6]: [cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(41) ColumnarToRow [codegen id : 8] +Input [6]: [cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] + +(42) Filter [codegen id : 8] +Input [6]: [cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Condition : isnotnull(cd_demo_sk#22) + +(43) BroadcastExchange +Input [6]: [cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#28] + +(44) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_current_cdemo_sk#4] +Right keys [1]: [cd_demo_sk#22] +Join condition: None + +(45) Project [codegen id : 9] +Output [6]: [ca_state#20, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Input [8]: [c_current_cdemo_sk#4, ca_state#20, cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] + +(46) HashAggregate [codegen id : 9] +Input [6]: [ca_state#20, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Keys [6]: [ca_state#20, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Functions [10]: [partial_count(1), partial_avg(cast(cd_dep_count#25 as bigint)), partial_max(cd_dep_count#25), partial_sum(cast(cd_dep_count#25 as bigint)), partial_avg(cast(cd_dep_employed_count#26 as bigint)), partial_max(cd_dep_employed_count#26), partial_sum(cast(cd_dep_employed_count#26 as bigint)), partial_avg(cast(cd_dep_college_count#27 as bigint)), partial_max(cd_dep_college_count#27), partial_sum(cast(cd_dep_college_count#27 as bigint))] +Aggregate Attributes [13]: [count#29, sum#30, count#31, max#32, sum#33, sum#34, count#35, max#36, sum#37, sum#38, count#39, max#40, sum#41] +Results [19]: [ca_state#20, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, count#42, sum#43, count#44, max#45, sum#46, sum#47, count#48, max#49, sum#50, sum#51, count#52, max#53, sum#54] + +(47) Exchange +Input [19]: [ca_state#20, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, count#42, sum#43, count#44, max#45, sum#46, sum#47, count#48, max#49, sum#50, sum#51, count#52, max#53, sum#54] +Arguments: hashpartitioning(ca_state#20, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, 5), true, [id=#55] + +(48) HashAggregate [codegen id : 10] +Input [19]: [ca_state#20, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, count#42, sum#43, count#44, max#45, sum#46, sum#47, count#48, max#49, sum#50, sum#51, count#52, max#53, sum#54] +Keys [6]: [ca_state#20, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Functions [10]: [count(1), avg(cast(cd_dep_count#25 as bigint)), max(cd_dep_count#25), sum(cast(cd_dep_count#25 as bigint)), avg(cast(cd_dep_employed_count#26 as bigint)), max(cd_dep_employed_count#26), sum(cast(cd_dep_employed_count#26 as bigint)), avg(cast(cd_dep_college_count#27 as bigint)), max(cd_dep_college_count#27), sum(cast(cd_dep_college_count#27 as bigint))] +Aggregate Attributes [10]: [count(1)#56, avg(cast(cd_dep_count#25 as bigint))#57, max(cd_dep_count#25)#58, sum(cast(cd_dep_count#25 as bigint))#59, avg(cast(cd_dep_employed_count#26 as bigint))#60, max(cd_dep_employed_count#26)#61, sum(cast(cd_dep_employed_count#26 as bigint))#62, avg(cast(cd_dep_college_count#27 as bigint))#63, max(cd_dep_college_count#27)#64, sum(cast(cd_dep_college_count#27 as bigint))#65] +Results [18]: [ca_state#20, cd_gender#23, cd_marital_status#24, cd_dep_count#25, count(1)#56 AS cnt1#66, avg(cast(cd_dep_count#25 as bigint))#57 AS avg(cd_dep_count)#67, max(cd_dep_count#25)#58 AS max(cd_dep_count)#68, sum(cast(cd_dep_count#25 as bigint))#59 AS sum(cd_dep_count)#69, cd_dep_employed_count#26, count(1)#56 AS cnt2#70, avg(cast(cd_dep_employed_count#26 as bigint))#60 AS avg(cd_dep_employed_count)#71, max(cd_dep_employed_count#26)#61 AS max(cd_dep_employed_count)#72, sum(cast(cd_dep_employed_count#26 as bigint))#62 AS sum(cd_dep_employed_count)#73, cd_dep_college_count#27, count(1)#56 AS cnt3#74, avg(cast(cd_dep_college_count#27 as bigint))#63 AS avg(cd_dep_college_count)#75, max(cd_dep_college_count#27)#64 AS max(cd_dep_college_count)#76, sum(cast(cd_dep_college_count#27 as bigint))#65 AS sum(cd_dep_college_count)#77] + +(49) TakeOrderedAndProject +Input [18]: [ca_state#20, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cnt1#66, avg(cd_dep_count)#67, max(cd_dep_count)#68, sum(cd_dep_count)#69, cd_dep_employed_count#26, cnt2#70, avg(cd_dep_employed_count)#71, max(cd_dep_employed_count)#72, sum(cd_dep_employed_count)#73, cd_dep_college_count#27, cnt3#74, avg(cd_dep_college_count)#75, max(cd_dep_college_count)#76, sum(cd_dep_college_count)#77] +Arguments: 100, [ca_state#20 ASC NULLS FIRST, cd_gender#23 ASC NULLS FIRST, cd_marital_status#24 ASC NULLS FIRST, cd_dep_count#25 ASC NULLS FIRST, cd_dep_employed_count#26 ASC NULLS FIRST, cd_dep_college_count#27 ASC NULLS FIRST], [ca_state#20, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cnt1#66, avg(cd_dep_count)#67, max(cd_dep_count)#68, sum(cd_dep_count)#69, cd_dep_employed_count#26, cnt2#70, avg(cd_dep_employed_count)#71, max(cd_dep_employed_count)#72, sum(cd_dep_employed_count)#73, cd_dep_college_count#27, cnt3#74, avg(cd_dep_college_count)#75, max(cd_dep_college_count)#76, sum(cd_dep_college_count)#77] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35/simplified.txt new file mode 100644 index 0000000000000..2da6d615fe17d --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35/simplified.txt @@ -0,0 +1,73 @@ +TakeOrderedAndProject [avg(cd_dep_college_count),avg(cd_dep_count),avg(cd_dep_employed_count),ca_state,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_gender,cd_marital_status,cnt1,cnt2,cnt3,max(cd_dep_college_count),max(cd_dep_count),max(cd_dep_employed_count),sum(cd_dep_college_count),sum(cd_dep_count),sum(cd_dep_employed_count)] + WholeStageCodegen (10) + HashAggregate [ca_state,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_gender,cd_marital_status,count,count,count,count,max,max,max,sum,sum,sum,sum,sum,sum] [avg(cast(cd_dep_college_count as bigint)),avg(cast(cd_dep_count as bigint)),avg(cast(cd_dep_employed_count as bigint)),avg(cd_dep_college_count),avg(cd_dep_count),avg(cd_dep_employed_count),cnt1,cnt2,cnt3,count,count,count,count,count(1),max,max,max,max(cd_dep_college_count),max(cd_dep_college_count),max(cd_dep_count),max(cd_dep_count),max(cd_dep_employed_count),max(cd_dep_employed_count),sum,sum,sum,sum,sum,sum,sum(cast(cd_dep_college_count as bigint)),sum(cast(cd_dep_count as bigint)),sum(cast(cd_dep_employed_count as bigint)),sum(cd_dep_college_count),sum(cd_dep_count),sum(cd_dep_employed_count)] + InputAdapter + Exchange [ca_state,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_gender,cd_marital_status] #1 + WholeStageCodegen (9) + HashAggregate [ca_state,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_gender,cd_marital_status] [count,count,count,count,count,count,count,count,max,max,max,max,max,max,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [ca_state,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_gender,cd_marital_status] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [c_current_cdemo_sk,ca_state] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_addr_sk,c_current_cdemo_sk] + Filter [exists,exists] + BroadcastHashJoin [c_customer_sk,cs_ship_customer_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [c_current_addr_sk,c_current_cdemo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (2) + Project [ss_customer_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_qoy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_qoy,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (4) + Project [ws_bill_customer_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (6) + Project [cs_ship_customer_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (7) + Filter [ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (8) + Filter [cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_gender,cd_marital_status] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35a.sf100/explain.txt new file mode 100644 index 0000000000000..9e668c7015769 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35a.sf100/explain.txt @@ -0,0 +1,311 @@ +== Physical Plan == +TakeOrderedAndProject (57) ++- * HashAggregate (56) + +- Exchange (55) + +- * HashAggregate (54) + +- * Project (53) + +- * SortMergeJoin Inner (52) + :- * Sort (46) + : +- Exchange (45) + : +- * Project (44) + : +- * SortMergeJoin Inner (43) + : :- * Sort (37) + : : +- Exchange (36) + : : +- * Project (35) + : : +- SortMergeJoin LeftSemi (34) + : : :- SortMergeJoin LeftSemi (18) + : : : :- * Sort (5) + : : : : +- Exchange (4) + : : : : +- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.customer (1) + : : : +- * Sort (17) + : : : +- Exchange (16) + : : : +- * Project (15) + : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : :- * Filter (8) + : : : : +- * ColumnarToRow (7) + : : : : +- Scan parquet default.store_sales (6) + : : : +- BroadcastExchange (13) + : : : +- * Project (12) + : : : +- * Filter (11) + : : : +- * ColumnarToRow (10) + : : : +- Scan parquet default.date_dim (9) + : : +- * Sort (33) + : : +- Exchange (32) + : : +- Union (31) + : : :- * Project (24) + : : : +- * BroadcastHashJoin Inner BuildRight (23) + : : : :- * Filter (21) + : : : : +- * ColumnarToRow (20) + : : : : +- Scan parquet default.web_sales (19) + : : : +- ReusedExchange (22) + : : +- * Project (30) + : : +- * BroadcastHashJoin Inner BuildRight (29) + : : :- * Filter (27) + : : : +- * ColumnarToRow (26) + : : : +- Scan parquet default.catalog_sales (25) + : : +- ReusedExchange (28) + : +- * Sort (42) + : +- Exchange (41) + : +- * Filter (40) + : +- * ColumnarToRow (39) + : +- Scan parquet default.customer_address (38) + +- * Sort (51) + +- Exchange (50) + +- * Filter (49) + +- * ColumnarToRow (48) + +- Scan parquet default.customer_demographics (47) + + +(1) Scan parquet default.customer +Output [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/customer] +PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] + +(3) Filter [codegen id : 1] +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Condition : (isnotnull(c_current_addr_sk#3) AND isnotnull(c_current_cdemo_sk#2)) + +(4) Exchange +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Arguments: hashpartitioning(c_customer_sk#1, 5), true, [id=#4] + +(5) Sort [codegen id : 2] +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Arguments: [c_customer_sk#1 ASC NULLS FIRST], false, 0 + +(6) Scan parquet default.store_sales +Output [2]: [ss_sold_date_sk#5, ss_customer_sk#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 4] +Input [2]: [ss_sold_date_sk#5, ss_customer_sk#6] + +(8) Filter [codegen id : 4] +Input [2]: [ss_sold_date_sk#5, ss_customer_sk#6] +Condition : isnotnull(ss_sold_date_sk#5) + +(9) Scan parquet default.date_dim +Output [3]: [d_date_sk#7, d_year#8, d_qoy#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_qoy), EqualTo(d_year,1999), LessThan(d_qoy,4), IsNotNull(d_date_sk)] +ReadSchema: struct + +(10) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#7, d_year#8, d_qoy#9] + +(11) Filter [codegen id : 3] +Input [3]: [d_date_sk#7, d_year#8, d_qoy#9] +Condition : ((((isnotnull(d_year#8) AND isnotnull(d_qoy#9)) AND (d_year#8 = 1999)) AND (d_qoy#9 < 4)) AND isnotnull(d_date_sk#7)) + +(12) Project [codegen id : 3] +Output [1]: [d_date_sk#7] +Input [3]: [d_date_sk#7, d_year#8, d_qoy#9] + +(13) BroadcastExchange +Input [1]: [d_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#10] + +(14) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#5] +Right keys [1]: [d_date_sk#7] +Join condition: None + +(15) Project [codegen id : 4] +Output [1]: [ss_customer_sk#6] +Input [3]: [ss_sold_date_sk#5, ss_customer_sk#6, d_date_sk#7] + +(16) Exchange +Input [1]: [ss_customer_sk#6] +Arguments: hashpartitioning(ss_customer_sk#6, 5), true, [id=#11] + +(17) Sort [codegen id : 5] +Input [1]: [ss_customer_sk#6] +Arguments: [ss_customer_sk#6 ASC NULLS FIRST], false, 0 + +(18) SortMergeJoin +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ss_customer_sk#6] +Join condition: None + +(19) Scan parquet default.web_sales +Output [2]: [ws_sold_date_sk#12, ws_bill_customer_sk#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 7] +Input [2]: [ws_sold_date_sk#12, ws_bill_customer_sk#13] + +(21) Filter [codegen id : 7] +Input [2]: [ws_sold_date_sk#12, ws_bill_customer_sk#13] +Condition : isnotnull(ws_sold_date_sk#12) + +(22) ReusedExchange [Reuses operator id: 13] +Output [1]: [d_date_sk#7] + +(23) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ws_sold_date_sk#12] +Right keys [1]: [d_date_sk#7] +Join condition: None + +(24) Project [codegen id : 7] +Output [1]: [ws_bill_customer_sk#13 AS customsk#14] +Input [3]: [ws_sold_date_sk#12, ws_bill_customer_sk#13, d_date_sk#7] + +(25) Scan parquet default.catalog_sales +Output [2]: [cs_sold_date_sk#15, cs_ship_customer_sk#16] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(26) ColumnarToRow [codegen id : 9] +Input [2]: [cs_sold_date_sk#15, cs_ship_customer_sk#16] + +(27) Filter [codegen id : 9] +Input [2]: [cs_sold_date_sk#15, cs_ship_customer_sk#16] +Condition : isnotnull(cs_sold_date_sk#15) + +(28) ReusedExchange [Reuses operator id: 13] +Output [1]: [d_date_sk#7] + +(29) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [cs_sold_date_sk#15] +Right keys [1]: [d_date_sk#7] +Join condition: None + +(30) Project [codegen id : 9] +Output [1]: [cs_ship_customer_sk#16 AS customsk#17] +Input [3]: [cs_sold_date_sk#15, cs_ship_customer_sk#16, d_date_sk#7] + +(31) Union + +(32) Exchange +Input [1]: [customsk#14] +Arguments: hashpartitioning(customsk#14, 5), true, [id=#18] + +(33) Sort [codegen id : 10] +Input [1]: [customsk#14] +Arguments: [customsk#14 ASC NULLS FIRST], false, 0 + +(34) SortMergeJoin +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [customsk#14] +Join condition: None + +(35) Project [codegen id : 11] +Output [2]: [c_current_cdemo_sk#2, c_current_addr_sk#3] +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] + +(36) Exchange +Input [2]: [c_current_cdemo_sk#2, c_current_addr_sk#3] +Arguments: hashpartitioning(c_current_addr_sk#3, 5), true, [id=#19] + +(37) Sort [codegen id : 12] +Input [2]: [c_current_cdemo_sk#2, c_current_addr_sk#3] +Arguments: [c_current_addr_sk#3 ASC NULLS FIRST], false, 0 + +(38) Scan parquet default.customer_address +Output [2]: [ca_address_sk#20, ca_state#21] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct + +(39) ColumnarToRow [codegen id : 13] +Input [2]: [ca_address_sk#20, ca_state#21] + +(40) Filter [codegen id : 13] +Input [2]: [ca_address_sk#20, ca_state#21] +Condition : isnotnull(ca_address_sk#20) + +(41) Exchange +Input [2]: [ca_address_sk#20, ca_state#21] +Arguments: hashpartitioning(ca_address_sk#20, 5), true, [id=#22] + +(42) Sort [codegen id : 14] +Input [2]: [ca_address_sk#20, ca_state#21] +Arguments: [ca_address_sk#20 ASC NULLS FIRST], false, 0 + +(43) SortMergeJoin [codegen id : 15] +Left keys [1]: [c_current_addr_sk#3] +Right keys [1]: [ca_address_sk#20] +Join condition: None + +(44) Project [codegen id : 15] +Output [2]: [c_current_cdemo_sk#2, ca_state#21] +Input [4]: [c_current_cdemo_sk#2, c_current_addr_sk#3, ca_address_sk#20, ca_state#21] + +(45) Exchange +Input [2]: [c_current_cdemo_sk#2, ca_state#21] +Arguments: hashpartitioning(c_current_cdemo_sk#2, 5), true, [id=#23] + +(46) Sort [codegen id : 16] +Input [2]: [c_current_cdemo_sk#2, ca_state#21] +Arguments: [c_current_cdemo_sk#2 ASC NULLS FIRST], false, 0 + +(47) Scan parquet default.customer_demographics +Output [6]: [cd_demo_sk#24, cd_gender#25, cd_marital_status#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(48) ColumnarToRow [codegen id : 17] +Input [6]: [cd_demo_sk#24, cd_gender#25, cd_marital_status#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29] + +(49) Filter [codegen id : 17] +Input [6]: [cd_demo_sk#24, cd_gender#25, cd_marital_status#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29] +Condition : isnotnull(cd_demo_sk#24) + +(50) Exchange +Input [6]: [cd_demo_sk#24, cd_gender#25, cd_marital_status#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29] +Arguments: hashpartitioning(cd_demo_sk#24, 5), true, [id=#30] + +(51) Sort [codegen id : 18] +Input [6]: [cd_demo_sk#24, cd_gender#25, cd_marital_status#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29] +Arguments: [cd_demo_sk#24 ASC NULLS FIRST], false, 0 + +(52) SortMergeJoin [codegen id : 19] +Left keys [1]: [c_current_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#24] +Join condition: None + +(53) Project [codegen id : 19] +Output [6]: [ca_state#21, cd_gender#25, cd_marital_status#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29] +Input [8]: [c_current_cdemo_sk#2, ca_state#21, cd_demo_sk#24, cd_gender#25, cd_marital_status#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29] + +(54) HashAggregate [codegen id : 19] +Input [6]: [ca_state#21, cd_gender#25, cd_marital_status#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29] +Keys [6]: [ca_state#21, cd_gender#25, cd_marital_status#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29] +Functions [10]: [partial_count(1), partial_avg(cast(cd_dep_count#27 as bigint)), partial_max(cd_dep_count#27), partial_sum(cast(cd_dep_count#27 as bigint)), partial_avg(cast(cd_dep_employed_count#28 as bigint)), partial_max(cd_dep_employed_count#28), partial_sum(cast(cd_dep_employed_count#28 as bigint)), partial_avg(cast(cd_dep_college_count#29 as bigint)), partial_max(cd_dep_college_count#29), partial_sum(cast(cd_dep_college_count#29 as bigint))] +Aggregate Attributes [13]: [count#31, sum#32, count#33, max#34, sum#35, sum#36, count#37, max#38, sum#39, sum#40, count#41, max#42, sum#43] +Results [19]: [ca_state#21, cd_gender#25, cd_marital_status#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29, count#44, sum#45, count#46, max#47, sum#48, sum#49, count#50, max#51, sum#52, sum#53, count#54, max#55, sum#56] + +(55) Exchange +Input [19]: [ca_state#21, cd_gender#25, cd_marital_status#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29, count#44, sum#45, count#46, max#47, sum#48, sum#49, count#50, max#51, sum#52, sum#53, count#54, max#55, sum#56] +Arguments: hashpartitioning(ca_state#21, cd_gender#25, cd_marital_status#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29, 5), true, [id=#57] + +(56) HashAggregate [codegen id : 20] +Input [19]: [ca_state#21, cd_gender#25, cd_marital_status#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29, count#44, sum#45, count#46, max#47, sum#48, sum#49, count#50, max#51, sum#52, sum#53, count#54, max#55, sum#56] +Keys [6]: [ca_state#21, cd_gender#25, cd_marital_status#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29] +Functions [10]: [count(1), avg(cast(cd_dep_count#27 as bigint)), max(cd_dep_count#27), sum(cast(cd_dep_count#27 as bigint)), avg(cast(cd_dep_employed_count#28 as bigint)), max(cd_dep_employed_count#28), sum(cast(cd_dep_employed_count#28 as bigint)), avg(cast(cd_dep_college_count#29 as bigint)), max(cd_dep_college_count#29), sum(cast(cd_dep_college_count#29 as bigint))] +Aggregate Attributes [10]: [count(1)#58, avg(cast(cd_dep_count#27 as bigint))#59, max(cd_dep_count#27)#60, sum(cast(cd_dep_count#27 as bigint))#61, avg(cast(cd_dep_employed_count#28 as bigint))#62, max(cd_dep_employed_count#28)#63, sum(cast(cd_dep_employed_count#28 as bigint))#64, avg(cast(cd_dep_college_count#29 as bigint))#65, max(cd_dep_college_count#29)#66, sum(cast(cd_dep_college_count#29 as bigint))#67] +Results [18]: [ca_state#21, cd_gender#25, cd_marital_status#26, cd_dep_count#27, count(1)#58 AS cnt1#68, avg(cast(cd_dep_count#27 as bigint))#59 AS avg(cd_dep_count)#69, max(cd_dep_count#27)#60 AS max(cd_dep_count)#70, sum(cast(cd_dep_count#27 as bigint))#61 AS sum(cd_dep_count)#71, cd_dep_employed_count#28, count(1)#58 AS cnt2#72, avg(cast(cd_dep_employed_count#28 as bigint))#62 AS avg(cd_dep_employed_count)#73, max(cd_dep_employed_count#28)#63 AS max(cd_dep_employed_count)#74, sum(cast(cd_dep_employed_count#28 as bigint))#64 AS sum(cd_dep_employed_count)#75, cd_dep_college_count#29, count(1)#58 AS cnt3#76, avg(cast(cd_dep_college_count#29 as bigint))#65 AS avg(cd_dep_college_count)#77, max(cd_dep_college_count#29)#66 AS max(cd_dep_college_count)#78, sum(cast(cd_dep_college_count#29 as bigint))#67 AS sum(cd_dep_college_count)#79] + +(57) TakeOrderedAndProject +Input [18]: [ca_state#21, cd_gender#25, cd_marital_status#26, cd_dep_count#27, cnt1#68, avg(cd_dep_count)#69, max(cd_dep_count)#70, sum(cd_dep_count)#71, cd_dep_employed_count#28, cnt2#72, avg(cd_dep_employed_count)#73, max(cd_dep_employed_count)#74, sum(cd_dep_employed_count)#75, cd_dep_college_count#29, cnt3#76, avg(cd_dep_college_count)#77, max(cd_dep_college_count)#78, sum(cd_dep_college_count)#79] +Arguments: 100, [ca_state#21 ASC NULLS FIRST, cd_gender#25 ASC NULLS FIRST, cd_marital_status#26 ASC NULLS FIRST, cd_dep_count#27 ASC NULLS FIRST, cd_dep_employed_count#28 ASC NULLS FIRST, cd_dep_college_count#29 ASC NULLS FIRST], [ca_state#21, cd_gender#25, cd_marital_status#26, cd_dep_count#27, cnt1#68, avg(cd_dep_count)#69, max(cd_dep_count)#70, sum(cd_dep_count)#71, cd_dep_employed_count#28, cnt2#72, avg(cd_dep_employed_count)#73, max(cd_dep_employed_count)#74, sum(cd_dep_employed_count)#75, cd_dep_college_count#29, cnt3#76, avg(cd_dep_college_count)#77, max(cd_dep_college_count)#78, sum(cd_dep_college_count)#79] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35a.sf100/simplified.txt new file mode 100644 index 0000000000000..2840f275ff2b7 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35a.sf100/simplified.txt @@ -0,0 +1,98 @@ +TakeOrderedAndProject [avg(cd_dep_college_count),avg(cd_dep_count),avg(cd_dep_employed_count),ca_state,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_gender,cd_marital_status,cnt1,cnt2,cnt3,max(cd_dep_college_count),max(cd_dep_count),max(cd_dep_employed_count),sum(cd_dep_college_count),sum(cd_dep_count),sum(cd_dep_employed_count)] + WholeStageCodegen (20) + HashAggregate [ca_state,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_gender,cd_marital_status,count,count,count,count,max,max,max,sum,sum,sum,sum,sum,sum] [avg(cast(cd_dep_college_count as bigint)),avg(cast(cd_dep_count as bigint)),avg(cast(cd_dep_employed_count as bigint)),avg(cd_dep_college_count),avg(cd_dep_count),avg(cd_dep_employed_count),cnt1,cnt2,cnt3,count,count,count,count,count(1),max,max,max,max(cd_dep_college_count),max(cd_dep_college_count),max(cd_dep_count),max(cd_dep_count),max(cd_dep_employed_count),max(cd_dep_employed_count),sum,sum,sum,sum,sum,sum,sum(cast(cd_dep_college_count as bigint)),sum(cast(cd_dep_count as bigint)),sum(cast(cd_dep_employed_count as bigint)),sum(cd_dep_college_count),sum(cd_dep_count),sum(cd_dep_employed_count)] + InputAdapter + Exchange [ca_state,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_gender,cd_marital_status] #1 + WholeStageCodegen (19) + HashAggregate [ca_state,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_gender,cd_marital_status] [count,count,count,count,count,count,count,count,max,max,max,max,max,max,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [ca_state,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_gender,cd_marital_status] + SortMergeJoin [c_current_cdemo_sk,cd_demo_sk] + InputAdapter + WholeStageCodegen (16) + Sort [c_current_cdemo_sk] + InputAdapter + Exchange [c_current_cdemo_sk] #2 + WholeStageCodegen (15) + Project [c_current_cdemo_sk,ca_state] + SortMergeJoin [c_current_addr_sk,ca_address_sk] + InputAdapter + WholeStageCodegen (12) + Sort [c_current_addr_sk] + InputAdapter + Exchange [c_current_addr_sk] #3 + WholeStageCodegen (11) + Project [c_current_addr_sk,c_current_cdemo_sk] + InputAdapter + SortMergeJoin [c_customer_sk,customsk] + SortMergeJoin [c_customer_sk,ss_customer_sk] + WholeStageCodegen (2) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #4 + WholeStageCodegen (1) + Filter [c_current_addr_sk,c_current_cdemo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] + WholeStageCodegen (5) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #5 + WholeStageCodegen (4) + Project [ss_customer_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_date_sk,d_qoy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_qoy,d_year] + WholeStageCodegen (10) + Sort [customsk] + InputAdapter + Exchange [customsk] #7 + Union + WholeStageCodegen (7) + Project [ws_bill_customer_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #6 + WholeStageCodegen (9) + Project [cs_ship_customer_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #6 + InputAdapter + WholeStageCodegen (14) + Sort [ca_address_sk] + InputAdapter + Exchange [ca_address_sk] #8 + WholeStageCodegen (13) + Filter [ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_state] + InputAdapter + WholeStageCodegen (18) + Sort [cd_demo_sk] + InputAdapter + Exchange [cd_demo_sk] #9 + WholeStageCodegen (17) + Filter [cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_gender,cd_marital_status] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35a/explain.txt new file mode 100644 index 0000000000000..69ad4ba7290bf --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35a/explain.txt @@ -0,0 +1,261 @@ +== Physical Plan == +TakeOrderedAndProject (47) ++- * HashAggregate (46) + +- Exchange (45) + +- * HashAggregate (44) + +- * Project (43) + +- * BroadcastHashJoin Inner BuildRight (42) + :- * Project (37) + : +- * BroadcastHashJoin Inner BuildRight (36) + : :- * Project (31) + : : +- * BroadcastHashJoin LeftSemi BuildRight (30) + : : :- * BroadcastHashJoin LeftSemi BuildRight (15) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.customer (1) + : : : +- BroadcastExchange (14) + : : : +- * Project (13) + : : : +- * BroadcastHashJoin Inner BuildRight (12) + : : : :- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.store_sales (4) + : : : +- BroadcastExchange (11) + : : : +- * Project (10) + : : : +- * Filter (9) + : : : +- * ColumnarToRow (8) + : : : +- Scan parquet default.date_dim (7) + : : +- BroadcastExchange (29) + : : +- Union (28) + : : :- * Project (21) + : : : +- * BroadcastHashJoin Inner BuildRight (20) + : : : :- * Filter (18) + : : : : +- * ColumnarToRow (17) + : : : : +- Scan parquet default.web_sales (16) + : : : +- ReusedExchange (19) + : : +- * Project (27) + : : +- * BroadcastHashJoin Inner BuildRight (26) + : : :- * Filter (24) + : : : +- * ColumnarToRow (23) + : : : +- Scan parquet default.catalog_sales (22) + : : +- ReusedExchange (25) + : +- BroadcastExchange (35) + : +- * Filter (34) + : +- * ColumnarToRow (33) + : +- Scan parquet default.customer_address (32) + +- BroadcastExchange (41) + +- * Filter (40) + +- * ColumnarToRow (39) + +- Scan parquet default.customer_demographics (38) + + +(1) Scan parquet default.customer +Output [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 9] +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] + +(3) Filter [codegen id : 9] +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Condition : (isnotnull(c_current_addr_sk#3) AND isnotnull(c_current_cdemo_sk#2)) + +(4) Scan parquet default.store_sales +Output [2]: [ss_sold_date_sk#4, ss_customer_sk#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 2] +Input [2]: [ss_sold_date_sk#4, ss_customer_sk#5] + +(6) Filter [codegen id : 2] +Input [2]: [ss_sold_date_sk#4, ss_customer_sk#5] +Condition : isnotnull(ss_sold_date_sk#4) + +(7) Scan parquet default.date_dim +Output [3]: [d_date_sk#6, d_year#7, d_qoy#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_qoy), EqualTo(d_year,1999), LessThan(d_qoy,4), IsNotNull(d_date_sk)] +ReadSchema: struct + +(8) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_qoy#8] + +(9) Filter [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_qoy#8] +Condition : ((((isnotnull(d_year#7) AND isnotnull(d_qoy#8)) AND (d_year#7 = 1999)) AND (d_qoy#8 < 4)) AND isnotnull(d_date_sk#6)) + +(10) Project [codegen id : 1] +Output [1]: [d_date_sk#6] +Input [3]: [d_date_sk#6, d_year#7, d_qoy#8] + +(11) BroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] + +(12) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#4] +Right keys [1]: [d_date_sk#6] +Join condition: None + +(13) Project [codegen id : 2] +Output [1]: [ss_customer_sk#5] +Input [3]: [ss_sold_date_sk#4, ss_customer_sk#5, d_date_sk#6] + +(14) BroadcastExchange +Input [1]: [ss_customer_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#10] + +(15) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ss_customer_sk#5] +Join condition: None + +(16) Scan parquet default.web_sales +Output [2]: [ws_sold_date_sk#11, ws_bill_customer_sk#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 4] +Input [2]: [ws_sold_date_sk#11, ws_bill_customer_sk#12] + +(18) Filter [codegen id : 4] +Input [2]: [ws_sold_date_sk#11, ws_bill_customer_sk#12] +Condition : isnotnull(ws_sold_date_sk#11) + +(19) ReusedExchange [Reuses operator id: 11] +Output [1]: [d_date_sk#6] + +(20) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ws_sold_date_sk#11] +Right keys [1]: [d_date_sk#6] +Join condition: None + +(21) Project [codegen id : 4] +Output [1]: [ws_bill_customer_sk#12 AS customsk#13] +Input [3]: [ws_sold_date_sk#11, ws_bill_customer_sk#12, d_date_sk#6] + +(22) Scan parquet default.catalog_sales +Output [2]: [cs_sold_date_sk#14, cs_ship_customer_sk#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 6] +Input [2]: [cs_sold_date_sk#14, cs_ship_customer_sk#15] + +(24) Filter [codegen id : 6] +Input [2]: [cs_sold_date_sk#14, cs_ship_customer_sk#15] +Condition : isnotnull(cs_sold_date_sk#14) + +(25) ReusedExchange [Reuses operator id: 11] +Output [1]: [d_date_sk#6] + +(26) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cs_sold_date_sk#14] +Right keys [1]: [d_date_sk#6] +Join condition: None + +(27) Project [codegen id : 6] +Output [1]: [cs_ship_customer_sk#15 AS customsk#16] +Input [3]: [cs_sold_date_sk#14, cs_ship_customer_sk#15, d_date_sk#6] + +(28) Union + +(29) BroadcastExchange +Input [1]: [customsk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#17] + +(30) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [customsk#13] +Join condition: None + +(31) Project [codegen id : 9] +Output [2]: [c_current_cdemo_sk#2, c_current_addr_sk#3] +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] + +(32) Scan parquet default.customer_address +Output [2]: [ca_address_sk#18, ca_state#19] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct + +(33) ColumnarToRow [codegen id : 7] +Input [2]: [ca_address_sk#18, ca_state#19] + +(34) Filter [codegen id : 7] +Input [2]: [ca_address_sk#18, ca_state#19] +Condition : isnotnull(ca_address_sk#18) + +(35) BroadcastExchange +Input [2]: [ca_address_sk#18, ca_state#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#20] + +(36) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_current_addr_sk#3] +Right keys [1]: [ca_address_sk#18] +Join condition: None + +(37) Project [codegen id : 9] +Output [2]: [c_current_cdemo_sk#2, ca_state#19] +Input [4]: [c_current_cdemo_sk#2, c_current_addr_sk#3, ca_address_sk#18, ca_state#19] + +(38) Scan parquet default.customer_demographics +Output [6]: [cd_demo_sk#21, cd_gender#22, cd_marital_status#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(39) ColumnarToRow [codegen id : 8] +Input [6]: [cd_demo_sk#21, cd_gender#22, cd_marital_status#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26] + +(40) Filter [codegen id : 8] +Input [6]: [cd_demo_sk#21, cd_gender#22, cd_marital_status#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26] +Condition : isnotnull(cd_demo_sk#21) + +(41) BroadcastExchange +Input [6]: [cd_demo_sk#21, cd_gender#22, cd_marital_status#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#27] + +(42) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_current_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#21] +Join condition: None + +(43) Project [codegen id : 9] +Output [6]: [ca_state#19, cd_gender#22, cd_marital_status#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26] +Input [8]: [c_current_cdemo_sk#2, ca_state#19, cd_demo_sk#21, cd_gender#22, cd_marital_status#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26] + +(44) HashAggregate [codegen id : 9] +Input [6]: [ca_state#19, cd_gender#22, cd_marital_status#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26] +Keys [6]: [ca_state#19, cd_gender#22, cd_marital_status#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26] +Functions [10]: [partial_count(1), partial_avg(cast(cd_dep_count#24 as bigint)), partial_max(cd_dep_count#24), partial_sum(cast(cd_dep_count#24 as bigint)), partial_avg(cast(cd_dep_employed_count#25 as bigint)), partial_max(cd_dep_employed_count#25), partial_sum(cast(cd_dep_employed_count#25 as bigint)), partial_avg(cast(cd_dep_college_count#26 as bigint)), partial_max(cd_dep_college_count#26), partial_sum(cast(cd_dep_college_count#26 as bigint))] +Aggregate Attributes [13]: [count#28, sum#29, count#30, max#31, sum#32, sum#33, count#34, max#35, sum#36, sum#37, count#38, max#39, sum#40] +Results [19]: [ca_state#19, cd_gender#22, cd_marital_status#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26, count#41, sum#42, count#43, max#44, sum#45, sum#46, count#47, max#48, sum#49, sum#50, count#51, max#52, sum#53] + +(45) Exchange +Input [19]: [ca_state#19, cd_gender#22, cd_marital_status#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26, count#41, sum#42, count#43, max#44, sum#45, sum#46, count#47, max#48, sum#49, sum#50, count#51, max#52, sum#53] +Arguments: hashpartitioning(ca_state#19, cd_gender#22, cd_marital_status#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26, 5), true, [id=#54] + +(46) HashAggregate [codegen id : 10] +Input [19]: [ca_state#19, cd_gender#22, cd_marital_status#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26, count#41, sum#42, count#43, max#44, sum#45, sum#46, count#47, max#48, sum#49, sum#50, count#51, max#52, sum#53] +Keys [6]: [ca_state#19, cd_gender#22, cd_marital_status#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26] +Functions [10]: [count(1), avg(cast(cd_dep_count#24 as bigint)), max(cd_dep_count#24), sum(cast(cd_dep_count#24 as bigint)), avg(cast(cd_dep_employed_count#25 as bigint)), max(cd_dep_employed_count#25), sum(cast(cd_dep_employed_count#25 as bigint)), avg(cast(cd_dep_college_count#26 as bigint)), max(cd_dep_college_count#26), sum(cast(cd_dep_college_count#26 as bigint))] +Aggregate Attributes [10]: [count(1)#55, avg(cast(cd_dep_count#24 as bigint))#56, max(cd_dep_count#24)#57, sum(cast(cd_dep_count#24 as bigint))#58, avg(cast(cd_dep_employed_count#25 as bigint))#59, max(cd_dep_employed_count#25)#60, sum(cast(cd_dep_employed_count#25 as bigint))#61, avg(cast(cd_dep_college_count#26 as bigint))#62, max(cd_dep_college_count#26)#63, sum(cast(cd_dep_college_count#26 as bigint))#64] +Results [18]: [ca_state#19, cd_gender#22, cd_marital_status#23, cd_dep_count#24, count(1)#55 AS cnt1#65, avg(cast(cd_dep_count#24 as bigint))#56 AS avg(cd_dep_count)#66, max(cd_dep_count#24)#57 AS max(cd_dep_count)#67, sum(cast(cd_dep_count#24 as bigint))#58 AS sum(cd_dep_count)#68, cd_dep_employed_count#25, count(1)#55 AS cnt2#69, avg(cast(cd_dep_employed_count#25 as bigint))#59 AS avg(cd_dep_employed_count)#70, max(cd_dep_employed_count#25)#60 AS max(cd_dep_employed_count)#71, sum(cast(cd_dep_employed_count#25 as bigint))#61 AS sum(cd_dep_employed_count)#72, cd_dep_college_count#26, count(1)#55 AS cnt3#73, avg(cast(cd_dep_college_count#26 as bigint))#62 AS avg(cd_dep_college_count)#74, max(cd_dep_college_count#26)#63 AS max(cd_dep_college_count)#75, sum(cast(cd_dep_college_count#26 as bigint))#64 AS sum(cd_dep_college_count)#76] + +(47) TakeOrderedAndProject +Input [18]: [ca_state#19, cd_gender#22, cd_marital_status#23, cd_dep_count#24, cnt1#65, avg(cd_dep_count)#66, max(cd_dep_count)#67, sum(cd_dep_count)#68, cd_dep_employed_count#25, cnt2#69, avg(cd_dep_employed_count)#70, max(cd_dep_employed_count)#71, sum(cd_dep_employed_count)#72, cd_dep_college_count#26, cnt3#73, avg(cd_dep_college_count)#74, max(cd_dep_college_count)#75, sum(cd_dep_college_count)#76] +Arguments: 100, [ca_state#19 ASC NULLS FIRST, cd_gender#22 ASC NULLS FIRST, cd_marital_status#23 ASC NULLS FIRST, cd_dep_count#24 ASC NULLS FIRST, cd_dep_employed_count#25 ASC NULLS FIRST, cd_dep_college_count#26 ASC NULLS FIRST], [ca_state#19, cd_gender#22, cd_marital_status#23, cd_dep_count#24, cnt1#65, avg(cd_dep_count)#66, max(cd_dep_count)#67, sum(cd_dep_count)#68, cd_dep_employed_count#25, cnt2#69, avg(cd_dep_employed_count)#70, max(cd_dep_employed_count)#71, sum(cd_dep_employed_count)#72, cd_dep_college_count#26, cnt3#73, avg(cd_dep_college_count)#74, max(cd_dep_college_count)#75, sum(cd_dep_college_count)#76] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35a/simplified.txt new file mode 100644 index 0000000000000..054296501acf2 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35a/simplified.txt @@ -0,0 +1,70 @@ +TakeOrderedAndProject [avg(cd_dep_college_count),avg(cd_dep_count),avg(cd_dep_employed_count),ca_state,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_gender,cd_marital_status,cnt1,cnt2,cnt3,max(cd_dep_college_count),max(cd_dep_count),max(cd_dep_employed_count),sum(cd_dep_college_count),sum(cd_dep_count),sum(cd_dep_employed_count)] + WholeStageCodegen (10) + HashAggregate [ca_state,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_gender,cd_marital_status,count,count,count,count,max,max,max,sum,sum,sum,sum,sum,sum] [avg(cast(cd_dep_college_count as bigint)),avg(cast(cd_dep_count as bigint)),avg(cast(cd_dep_employed_count as bigint)),avg(cd_dep_college_count),avg(cd_dep_count),avg(cd_dep_employed_count),cnt1,cnt2,cnt3,count,count,count,count,count(1),max,max,max,max(cd_dep_college_count),max(cd_dep_college_count),max(cd_dep_count),max(cd_dep_count),max(cd_dep_employed_count),max(cd_dep_employed_count),sum,sum,sum,sum,sum,sum,sum(cast(cd_dep_college_count as bigint)),sum(cast(cd_dep_count as bigint)),sum(cast(cd_dep_employed_count as bigint)),sum(cd_dep_college_count),sum(cd_dep_count),sum(cd_dep_employed_count)] + InputAdapter + Exchange [ca_state,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_gender,cd_marital_status] #1 + WholeStageCodegen (9) + HashAggregate [ca_state,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_gender,cd_marital_status] [count,count,count,count,count,count,count,count,max,max,max,max,max,max,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [ca_state,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_gender,cd_marital_status] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [c_current_cdemo_sk,ca_state] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_addr_sk,c_current_cdemo_sk] + BroadcastHashJoin [c_customer_sk,customsk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [c_current_addr_sk,c_current_cdemo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (2) + Project [ss_customer_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_qoy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_qoy,d_year] + InputAdapter + BroadcastExchange #4 + Union + WholeStageCodegen (4) + Project [ws_bill_customer_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + WholeStageCodegen (6) + Project [cs_ship_customer_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (7) + Filter [ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (8) + Filter [cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_gender,cd_marital_status] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a.sf100/explain.txt new file mode 100644 index 0000000000000..9d1194dcd7550 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a.sf100/explain.txt @@ -0,0 +1,289 @@ +== Physical Plan == +TakeOrderedAndProject (49) ++- * Project (48) + +- Window (47) + +- * Sort (46) + +- Exchange (45) + +- * HashAggregate (44) + +- Exchange (43) + +- * HashAggregate (42) + +- Union (41) + :- * HashAggregate (35) + : +- Exchange (34) + : +- * HashAggregate (33) + : +- Union (32) + : :- * HashAggregate (26) + : : +- Exchange (25) + : : +- * HashAggregate (24) + : : +- * Project (23) + : : +- * BroadcastHashJoin Inner BuildRight (22) + : : :- * Project (17) + : : : +- * BroadcastHashJoin Inner BuildRight (16) + : : : :- * Project (10) + : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.store_sales (1) + : : : : +- BroadcastExchange (8) + : : : : +- * Project (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.date_dim (4) + : : : +- BroadcastExchange (15) + : : : +- * Project (14) + : : : +- * Filter (13) + : : : +- * ColumnarToRow (12) + : : : +- Scan parquet default.store (11) + : : +- BroadcastExchange (21) + : : +- * Filter (20) + : : +- * ColumnarToRow (19) + : : +- Scan parquet default.item (18) + : +- * HashAggregate (31) + : +- Exchange (30) + : +- * HashAggregate (29) + : +- * HashAggregate (28) + : +- ReusedExchange (27) + +- * HashAggregate (40) + +- Exchange (39) + +- * HashAggregate (38) + +- * HashAggregate (37) + +- ReusedExchange (36) + + +(1) Scan parquet default.store_sales +Output [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_ext_sales_price#4, ss_net_profit#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_ext_sales_price#4, ss_net_profit#5] + +(3) Filter [codegen id : 4] +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_ext_sales_price#4, ss_net_profit#5] +Condition : ((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_item_sk#2)) AND isnotnull(ss_store_sk#3)) + +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#6, d_year#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#6, d_year#7] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#6, d_year#7] +Condition : ((isnotnull(d_year#7) AND (d_year#7 = 2001)) AND isnotnull(d_date_sk#6)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#6] +Input [2]: [d_date_sk#6, d_year#7] + +(8) BroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#8] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#6] +Join condition: None + +(10) Project [codegen id : 4] +Output [4]: [ss_item_sk#2, ss_store_sk#3, ss_ext_sales_price#4, ss_net_profit#5] +Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_ext_sales_price#4, ss_net_profit#5, d_date_sk#6] + +(11) Scan parquet default.store +Output [2]: [s_store_sk#9, s_state#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [s_store_sk#9, s_state#10] + +(13) Filter [codegen id : 2] +Input [2]: [s_store_sk#9, s_state#10] +Condition : ((isnotnull(s_state#10) AND (s_state#10 = TN)) AND isnotnull(s_store_sk#9)) + +(14) Project [codegen id : 2] +Output [1]: [s_store_sk#9] +Input [2]: [s_store_sk#9, s_state#10] + +(15) BroadcastExchange +Input [1]: [s_store_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] + +(16) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#9] +Join condition: None + +(17) Project [codegen id : 4] +Output [3]: [ss_item_sk#2, ss_ext_sales_price#4, ss_net_profit#5] +Input [5]: [ss_item_sk#2, ss_store_sk#3, ss_ext_sales_price#4, ss_net_profit#5, s_store_sk#9] + +(18) Scan parquet default.item +Output [3]: [i_item_sk#12, i_class#13, i_category#14] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [3]: [i_item_sk#12, i_class#13, i_category#14] + +(20) Filter [codegen id : 3] +Input [3]: [i_item_sk#12, i_class#13, i_category#14] +Condition : isnotnull(i_item_sk#12) + +(21) BroadcastExchange +Input [3]: [i_item_sk#12, i_class#13, i_category#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#15] + +(22) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#12] +Join condition: None + +(23) Project [codegen id : 4] +Output [4]: [ss_ext_sales_price#4, ss_net_profit#5, i_class#13, i_category#14] +Input [6]: [ss_item_sk#2, ss_ext_sales_price#4, ss_net_profit#5, i_item_sk#12, i_class#13, i_category#14] + +(24) HashAggregate [codegen id : 4] +Input [4]: [ss_ext_sales_price#4, ss_net_profit#5, i_class#13, i_category#14] +Keys [2]: [i_category#14, i_class#13] +Functions [2]: [partial_sum(UnscaledValue(ss_net_profit#5)), partial_sum(UnscaledValue(ss_ext_sales_price#4))] +Aggregate Attributes [2]: [sum#16, sum#17] +Results [4]: [i_category#14, i_class#13, sum#18, sum#19] + +(25) Exchange +Input [4]: [i_category#14, i_class#13, sum#18, sum#19] +Arguments: hashpartitioning(i_category#14, i_class#13, 5), true, [id=#20] + +(26) HashAggregate [codegen id : 5] +Input [4]: [i_category#14, i_class#13, sum#18, sum#19] +Keys [2]: [i_category#14, i_class#13] +Functions [2]: [sum(UnscaledValue(ss_net_profit#5)), sum(UnscaledValue(ss_ext_sales_price#4))] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_net_profit#5))#21, sum(UnscaledValue(ss_ext_sales_price#4))#22] +Results [6]: [cast(CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_net_profit#5))#21,17,2)) / promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#4))#22,17,2))), DecimalType(37,20), true) as decimal(38,20)) AS gross_margin#23, i_category#14, i_class#13, 0 AS t_category#24, 0 AS t_class#25, 0 AS lochierarchy#26] + +(27) ReusedExchange [Reuses operator id: 25] +Output [4]: [i_category#14, i_class#13, sum#27, sum#28] + +(28) HashAggregate [codegen id : 10] +Input [4]: [i_category#14, i_class#13, sum#27, sum#28] +Keys [2]: [i_category#14, i_class#13] +Functions [2]: [sum(UnscaledValue(ss_net_profit#5)), sum(UnscaledValue(ss_ext_sales_price#4))] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_net_profit#5))#29, sum(UnscaledValue(ss_ext_sales_price#4))#30] +Results [3]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#5))#29,17,2) AS ss_net_profit#31, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#4))#30,17,2) AS ss_ext_sales_price#32, i_category#14] + +(29) HashAggregate [codegen id : 10] +Input [3]: [ss_net_profit#31, ss_ext_sales_price#32, i_category#14] +Keys [1]: [i_category#14] +Functions [2]: [partial_sum(ss_net_profit#31), partial_sum(ss_ext_sales_price#32)] +Aggregate Attributes [4]: [sum#33, isEmpty#34, sum#35, isEmpty#36] +Results [5]: [i_category#14, sum#37, isEmpty#38, sum#39, isEmpty#40] + +(30) Exchange +Input [5]: [i_category#14, sum#37, isEmpty#38, sum#39, isEmpty#40] +Arguments: hashpartitioning(i_category#14, 5), true, [id=#41] + +(31) HashAggregate [codegen id : 11] +Input [5]: [i_category#14, sum#37, isEmpty#38, sum#39, isEmpty#40] +Keys [1]: [i_category#14] +Functions [2]: [sum(ss_net_profit#31), sum(ss_ext_sales_price#32)] +Aggregate Attributes [2]: [sum(ss_net_profit#31)#42, sum(ss_ext_sales_price#32)#43] +Results [6]: [cast(CheckOverflow((promote_precision(sum(ss_net_profit#31)#42) / promote_precision(sum(ss_ext_sales_price#32)#43)), DecimalType(38,11), true) as decimal(38,20)) AS gross_margin#44, i_category#14, null AS i_class#45, 0 AS t_category#46, 1 AS t_class#47, 1 AS lochierarchy#48] + +(32) Union + +(33) HashAggregate [codegen id : 12] +Input [6]: [gross_margin#23, i_category#14, i_class#13, t_category#24, t_class#25, lochierarchy#26] +Keys [6]: [gross_margin#23, i_category#14, i_class#13, t_category#24, t_class#25, lochierarchy#26] +Functions: [] +Aggregate Attributes: [] +Results [6]: [gross_margin#23, i_category#14, i_class#13, t_category#24, t_class#25, lochierarchy#26] + +(34) Exchange +Input [6]: [gross_margin#23, i_category#14, i_class#13, t_category#24, t_class#25, lochierarchy#26] +Arguments: hashpartitioning(gross_margin#23, i_category#14, i_class#13, t_category#24, t_class#25, lochierarchy#26, 5), true, [id=#49] + +(35) HashAggregate [codegen id : 13] +Input [6]: [gross_margin#23, i_category#14, i_class#13, t_category#24, t_class#25, lochierarchy#26] +Keys [6]: [gross_margin#23, i_category#14, i_class#13, t_category#24, t_class#25, lochierarchy#26] +Functions: [] +Aggregate Attributes: [] +Results [6]: [gross_margin#23, i_category#14, i_class#13, t_category#24, t_class#25, lochierarchy#26] + +(36) ReusedExchange [Reuses operator id: 25] +Output [4]: [i_category#14, i_class#13, sum#50, sum#51] + +(37) HashAggregate [codegen id : 18] +Input [4]: [i_category#14, i_class#13, sum#50, sum#51] +Keys [2]: [i_category#14, i_class#13] +Functions [2]: [sum(UnscaledValue(ss_net_profit#5)), sum(UnscaledValue(ss_ext_sales_price#4))] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_net_profit#5))#52, sum(UnscaledValue(ss_ext_sales_price#4))#53] +Results [2]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#5))#52,17,2) AS ss_net_profit#31, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#4))#53,17,2) AS ss_ext_sales_price#32] + +(38) HashAggregate [codegen id : 18] +Input [2]: [ss_net_profit#31, ss_ext_sales_price#32] +Keys: [] +Functions [2]: [partial_sum(ss_net_profit#31), partial_sum(ss_ext_sales_price#32)] +Aggregate Attributes [4]: [sum#54, isEmpty#55, sum#56, isEmpty#57] +Results [4]: [sum#58, isEmpty#59, sum#60, isEmpty#61] + +(39) Exchange +Input [4]: [sum#58, isEmpty#59, sum#60, isEmpty#61] +Arguments: SinglePartition, true, [id=#62] + +(40) HashAggregate [codegen id : 19] +Input [4]: [sum#58, isEmpty#59, sum#60, isEmpty#61] +Keys: [] +Functions [2]: [sum(ss_net_profit#31), sum(ss_ext_sales_price#32)] +Aggregate Attributes [2]: [sum(ss_net_profit#31)#63, sum(ss_ext_sales_price#32)#64] +Results [6]: [cast(CheckOverflow((promote_precision(sum(ss_net_profit#31)#63) / promote_precision(sum(ss_ext_sales_price#32)#64)), DecimalType(38,11), true) as decimal(38,20)) AS gross_margin#65, null AS i_category#66, null AS i_class#67, 1 AS t_category#68, 1 AS t_class#69, 2 AS lochierarchy#70] + +(41) Union + +(42) HashAggregate [codegen id : 20] +Input [6]: [gross_margin#23, i_category#14, i_class#13, t_category#24, t_class#25, lochierarchy#26] +Keys [6]: [gross_margin#23, i_category#14, i_class#13, t_category#24, t_class#25, lochierarchy#26] +Functions: [] +Aggregate Attributes: [] +Results [6]: [gross_margin#23, i_category#14, i_class#13, t_category#24, t_class#25, lochierarchy#26] + +(43) Exchange +Input [6]: [gross_margin#23, i_category#14, i_class#13, t_category#24, t_class#25, lochierarchy#26] +Arguments: hashpartitioning(gross_margin#23, i_category#14, i_class#13, t_category#24, t_class#25, lochierarchy#26, 5), true, [id=#71] + +(44) HashAggregate [codegen id : 21] +Input [6]: [gross_margin#23, i_category#14, i_class#13, t_category#24, t_class#25, lochierarchy#26] +Keys [6]: [gross_margin#23, i_category#14, i_class#13, t_category#24, t_class#25, lochierarchy#26] +Functions: [] +Aggregate Attributes: [] +Results [5]: [gross_margin#23, i_category#14, i_class#13, lochierarchy#26, CASE WHEN (t_class#25 = 0) THEN i_category#14 END AS _w0#72] + +(45) Exchange +Input [5]: [gross_margin#23, i_category#14, i_class#13, lochierarchy#26, _w0#72] +Arguments: hashpartitioning(lochierarchy#26, _w0#72, 5), true, [id=#73] + +(46) Sort [codegen id : 22] +Input [5]: [gross_margin#23, i_category#14, i_class#13, lochierarchy#26, _w0#72] +Arguments: [lochierarchy#26 ASC NULLS FIRST, _w0#72 ASC NULLS FIRST, gross_margin#23 ASC NULLS FIRST], false, 0 + +(47) Window +Input [5]: [gross_margin#23, i_category#14, i_class#13, lochierarchy#26, _w0#72] +Arguments: [rank(gross_margin#23) windowspecdefinition(lochierarchy#26, _w0#72, gross_margin#23 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#74], [lochierarchy#26, _w0#72], [gross_margin#23 ASC NULLS FIRST] + +(48) Project [codegen id : 23] +Output [5]: [gross_margin#23, i_category#14, i_class#13, lochierarchy#26, rank_within_parent#74] +Input [6]: [gross_margin#23, i_category#14, i_class#13, lochierarchy#26, _w0#72, rank_within_parent#74] + +(49) TakeOrderedAndProject +Input [5]: [gross_margin#23, i_category#14, i_class#13, lochierarchy#26, rank_within_parent#74] +Arguments: 100, [lochierarchy#26 DESC NULLS LAST, CASE WHEN (lochierarchy#26 = 0) THEN i_category#14 END ASC NULLS FIRST, rank_within_parent#74 ASC NULLS FIRST], [gross_margin#23, i_category#14, i_class#13, lochierarchy#26, rank_within_parent#74] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a.sf100/simplified.txt new file mode 100644 index 0000000000000..2338ff8061e19 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a.sf100/simplified.txt @@ -0,0 +1,82 @@ +TakeOrderedAndProject [gross_margin,i_category,i_class,lochierarchy,rank_within_parent] + WholeStageCodegen (23) + Project [gross_margin,i_category,i_class,lochierarchy,rank_within_parent] + InputAdapter + Window [_w0,gross_margin,lochierarchy] + WholeStageCodegen (22) + Sort [_w0,gross_margin,lochierarchy] + InputAdapter + Exchange [_w0,lochierarchy] #1 + WholeStageCodegen (21) + HashAggregate [gross_margin,i_category,i_class,lochierarchy,t_category,t_class] [_w0] + InputAdapter + Exchange [gross_margin,i_category,i_class,lochierarchy,t_category,t_class] #2 + WholeStageCodegen (20) + HashAggregate [gross_margin,i_category,i_class,lochierarchy,t_category,t_class] + InputAdapter + Union + WholeStageCodegen (13) + HashAggregate [gross_margin,i_category,i_class,lochierarchy,t_category,t_class] + InputAdapter + Exchange [gross_margin,i_category,i_class,lochierarchy,t_category,t_class] #3 + WholeStageCodegen (12) + HashAggregate [gross_margin,i_category,i_class,lochierarchy,t_category,t_class] + InputAdapter + Union + WholeStageCodegen (5) + HashAggregate [i_category,i_class,sum,sum] [gross_margin,lochierarchy,sum,sum,sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_net_profit)),t_category,t_class] + InputAdapter + Exchange [i_category,i_class] #4 + WholeStageCodegen (4) + HashAggregate [i_category,i_class,ss_ext_sales_price,ss_net_profit] [sum,sum,sum,sum] + Project [i_category,i_class,ss_ext_sales_price,ss_net_profit] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_ext_sales_price,ss_item_sk,ss_net_profit] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (2) + Project [s_store_sk] + Filter [s_state,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_state,s_store_sk] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (3) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_category,i_class,i_item_sk] + WholeStageCodegen (11) + HashAggregate [i_category,isEmpty,isEmpty,sum,sum] [gross_margin,i_class,isEmpty,isEmpty,lochierarchy,sum,sum,sum(ss_ext_sales_price),sum(ss_net_profit),t_category,t_class] + InputAdapter + Exchange [i_category] #8 + WholeStageCodegen (10) + HashAggregate [i_category,ss_ext_sales_price,ss_net_profit] [isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum] + HashAggregate [i_category,i_class,sum,sum] [ss_ext_sales_price,ss_net_profit,sum,sum,sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_net_profit))] + InputAdapter + ReusedExchange [i_category,i_class,sum,sum] #4 + WholeStageCodegen (19) + HashAggregate [isEmpty,isEmpty,sum,sum] [gross_margin,i_category,i_class,isEmpty,isEmpty,lochierarchy,sum,sum,sum(ss_ext_sales_price),sum(ss_net_profit),t_category,t_class] + InputAdapter + Exchange #9 + WholeStageCodegen (18) + HashAggregate [ss_ext_sales_price,ss_net_profit] [isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum] + HashAggregate [i_category,i_class,sum,sum] [ss_ext_sales_price,ss_net_profit,sum,sum,sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_net_profit))] + InputAdapter + ReusedExchange [i_category,i_class,sum,sum] #4 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a/explain.txt new file mode 100644 index 0000000000000..5d1e7206bdef0 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a/explain.txt @@ -0,0 +1,289 @@ +== Physical Plan == +TakeOrderedAndProject (49) ++- * Project (48) + +- Window (47) + +- * Sort (46) + +- Exchange (45) + +- * HashAggregate (44) + +- Exchange (43) + +- * HashAggregate (42) + +- Union (41) + :- * HashAggregate (35) + : +- Exchange (34) + : +- * HashAggregate (33) + : +- Union (32) + : :- * HashAggregate (26) + : : +- Exchange (25) + : : +- * HashAggregate (24) + : : +- * Project (23) + : : +- * BroadcastHashJoin Inner BuildRight (22) + : : :- * Project (16) + : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : :- * Project (10) + : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.store_sales (1) + : : : : +- BroadcastExchange (8) + : : : : +- * Project (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.date_dim (4) + : : : +- BroadcastExchange (14) + : : : +- * Filter (13) + : : : +- * ColumnarToRow (12) + : : : +- Scan parquet default.item (11) + : : +- BroadcastExchange (21) + : : +- * Project (20) + : : +- * Filter (19) + : : +- * ColumnarToRow (18) + : : +- Scan parquet default.store (17) + : +- * HashAggregate (31) + : +- Exchange (30) + : +- * HashAggregate (29) + : +- * HashAggregate (28) + : +- ReusedExchange (27) + +- * HashAggregate (40) + +- Exchange (39) + +- * HashAggregate (38) + +- * HashAggregate (37) + +- ReusedExchange (36) + + +(1) Scan parquet default.store_sales +Output [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_ext_sales_price#4, ss_net_profit#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_ext_sales_price#4, ss_net_profit#5] + +(3) Filter [codegen id : 4] +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_ext_sales_price#4, ss_net_profit#5] +Condition : ((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_item_sk#2)) AND isnotnull(ss_store_sk#3)) + +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#6, d_year#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#6, d_year#7] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#6, d_year#7] +Condition : ((isnotnull(d_year#7) AND (d_year#7 = 2001)) AND isnotnull(d_date_sk#6)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#6] +Input [2]: [d_date_sk#6, d_year#7] + +(8) BroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#8] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#6] +Join condition: None + +(10) Project [codegen id : 4] +Output [4]: [ss_item_sk#2, ss_store_sk#3, ss_ext_sales_price#4, ss_net_profit#5] +Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_ext_sales_price#4, ss_net_profit#5, d_date_sk#6] + +(11) Scan parquet default.item +Output [3]: [i_item_sk#9, i_class#10, i_category#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [i_item_sk#9, i_class#10, i_category#11] + +(13) Filter [codegen id : 2] +Input [3]: [i_item_sk#9, i_class#10, i_category#11] +Condition : isnotnull(i_item_sk#9) + +(14) BroadcastExchange +Input [3]: [i_item_sk#9, i_class#10, i_category#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#12] + +(15) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#9] +Join condition: None + +(16) Project [codegen id : 4] +Output [5]: [ss_store_sk#3, ss_ext_sales_price#4, ss_net_profit#5, i_class#10, i_category#11] +Input [7]: [ss_item_sk#2, ss_store_sk#3, ss_ext_sales_price#4, ss_net_profit#5, i_item_sk#9, i_class#10, i_category#11] + +(17) Scan parquet default.store +Output [2]: [s_store_sk#13, s_state#14] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [2]: [s_store_sk#13, s_state#14] + +(19) Filter [codegen id : 3] +Input [2]: [s_store_sk#13, s_state#14] +Condition : ((isnotnull(s_state#14) AND (s_state#14 = TN)) AND isnotnull(s_store_sk#13)) + +(20) Project [codegen id : 3] +Output [1]: [s_store_sk#13] +Input [2]: [s_store_sk#13, s_state#14] + +(21) BroadcastExchange +Input [1]: [s_store_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] + +(22) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#13] +Join condition: None + +(23) Project [codegen id : 4] +Output [4]: [ss_ext_sales_price#4, ss_net_profit#5, i_class#10, i_category#11] +Input [6]: [ss_store_sk#3, ss_ext_sales_price#4, ss_net_profit#5, i_class#10, i_category#11, s_store_sk#13] + +(24) HashAggregate [codegen id : 4] +Input [4]: [ss_ext_sales_price#4, ss_net_profit#5, i_class#10, i_category#11] +Keys [2]: [i_category#11, i_class#10] +Functions [2]: [partial_sum(UnscaledValue(ss_net_profit#5)), partial_sum(UnscaledValue(ss_ext_sales_price#4))] +Aggregate Attributes [2]: [sum#16, sum#17] +Results [4]: [i_category#11, i_class#10, sum#18, sum#19] + +(25) Exchange +Input [4]: [i_category#11, i_class#10, sum#18, sum#19] +Arguments: hashpartitioning(i_category#11, i_class#10, 5), true, [id=#20] + +(26) HashAggregate [codegen id : 5] +Input [4]: [i_category#11, i_class#10, sum#18, sum#19] +Keys [2]: [i_category#11, i_class#10] +Functions [2]: [sum(UnscaledValue(ss_net_profit#5)), sum(UnscaledValue(ss_ext_sales_price#4))] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_net_profit#5))#21, sum(UnscaledValue(ss_ext_sales_price#4))#22] +Results [6]: [cast(CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_net_profit#5))#21,17,2)) / promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#4))#22,17,2))), DecimalType(37,20), true) as decimal(38,20)) AS gross_margin#23, i_category#11, i_class#10, 0 AS t_category#24, 0 AS t_class#25, 0 AS lochierarchy#26] + +(27) ReusedExchange [Reuses operator id: 25] +Output [4]: [i_category#11, i_class#10, sum#27, sum#28] + +(28) HashAggregate [codegen id : 10] +Input [4]: [i_category#11, i_class#10, sum#27, sum#28] +Keys [2]: [i_category#11, i_class#10] +Functions [2]: [sum(UnscaledValue(ss_net_profit#5)), sum(UnscaledValue(ss_ext_sales_price#4))] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_net_profit#5))#29, sum(UnscaledValue(ss_ext_sales_price#4))#30] +Results [3]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#5))#29,17,2) AS ss_net_profit#31, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#4))#30,17,2) AS ss_ext_sales_price#32, i_category#11] + +(29) HashAggregate [codegen id : 10] +Input [3]: [ss_net_profit#31, ss_ext_sales_price#32, i_category#11] +Keys [1]: [i_category#11] +Functions [2]: [partial_sum(ss_net_profit#31), partial_sum(ss_ext_sales_price#32)] +Aggregate Attributes [4]: [sum#33, isEmpty#34, sum#35, isEmpty#36] +Results [5]: [i_category#11, sum#37, isEmpty#38, sum#39, isEmpty#40] + +(30) Exchange +Input [5]: [i_category#11, sum#37, isEmpty#38, sum#39, isEmpty#40] +Arguments: hashpartitioning(i_category#11, 5), true, [id=#41] + +(31) HashAggregate [codegen id : 11] +Input [5]: [i_category#11, sum#37, isEmpty#38, sum#39, isEmpty#40] +Keys [1]: [i_category#11] +Functions [2]: [sum(ss_net_profit#31), sum(ss_ext_sales_price#32)] +Aggregate Attributes [2]: [sum(ss_net_profit#31)#42, sum(ss_ext_sales_price#32)#43] +Results [6]: [cast(CheckOverflow((promote_precision(sum(ss_net_profit#31)#42) / promote_precision(sum(ss_ext_sales_price#32)#43)), DecimalType(38,11), true) as decimal(38,20)) AS gross_margin#44, i_category#11, null AS i_class#45, 0 AS t_category#46, 1 AS t_class#47, 1 AS lochierarchy#48] + +(32) Union + +(33) HashAggregate [codegen id : 12] +Input [6]: [gross_margin#23, i_category#11, i_class#10, t_category#24, t_class#25, lochierarchy#26] +Keys [6]: [gross_margin#23, i_category#11, i_class#10, t_category#24, t_class#25, lochierarchy#26] +Functions: [] +Aggregate Attributes: [] +Results [6]: [gross_margin#23, i_category#11, i_class#10, t_category#24, t_class#25, lochierarchy#26] + +(34) Exchange +Input [6]: [gross_margin#23, i_category#11, i_class#10, t_category#24, t_class#25, lochierarchy#26] +Arguments: hashpartitioning(gross_margin#23, i_category#11, i_class#10, t_category#24, t_class#25, lochierarchy#26, 5), true, [id=#49] + +(35) HashAggregate [codegen id : 13] +Input [6]: [gross_margin#23, i_category#11, i_class#10, t_category#24, t_class#25, lochierarchy#26] +Keys [6]: [gross_margin#23, i_category#11, i_class#10, t_category#24, t_class#25, lochierarchy#26] +Functions: [] +Aggregate Attributes: [] +Results [6]: [gross_margin#23, i_category#11, i_class#10, t_category#24, t_class#25, lochierarchy#26] + +(36) ReusedExchange [Reuses operator id: 25] +Output [4]: [i_category#11, i_class#10, sum#50, sum#51] + +(37) HashAggregate [codegen id : 18] +Input [4]: [i_category#11, i_class#10, sum#50, sum#51] +Keys [2]: [i_category#11, i_class#10] +Functions [2]: [sum(UnscaledValue(ss_net_profit#5)), sum(UnscaledValue(ss_ext_sales_price#4))] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_net_profit#5))#52, sum(UnscaledValue(ss_ext_sales_price#4))#53] +Results [2]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#5))#52,17,2) AS ss_net_profit#31, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#4))#53,17,2) AS ss_ext_sales_price#32] + +(38) HashAggregate [codegen id : 18] +Input [2]: [ss_net_profit#31, ss_ext_sales_price#32] +Keys: [] +Functions [2]: [partial_sum(ss_net_profit#31), partial_sum(ss_ext_sales_price#32)] +Aggregate Attributes [4]: [sum#54, isEmpty#55, sum#56, isEmpty#57] +Results [4]: [sum#58, isEmpty#59, sum#60, isEmpty#61] + +(39) Exchange +Input [4]: [sum#58, isEmpty#59, sum#60, isEmpty#61] +Arguments: SinglePartition, true, [id=#62] + +(40) HashAggregate [codegen id : 19] +Input [4]: [sum#58, isEmpty#59, sum#60, isEmpty#61] +Keys: [] +Functions [2]: [sum(ss_net_profit#31), sum(ss_ext_sales_price#32)] +Aggregate Attributes [2]: [sum(ss_net_profit#31)#63, sum(ss_ext_sales_price#32)#64] +Results [6]: [cast(CheckOverflow((promote_precision(sum(ss_net_profit#31)#63) / promote_precision(sum(ss_ext_sales_price#32)#64)), DecimalType(38,11), true) as decimal(38,20)) AS gross_margin#65, null AS i_category#66, null AS i_class#67, 1 AS t_category#68, 1 AS t_class#69, 2 AS lochierarchy#70] + +(41) Union + +(42) HashAggregate [codegen id : 20] +Input [6]: [gross_margin#23, i_category#11, i_class#10, t_category#24, t_class#25, lochierarchy#26] +Keys [6]: [gross_margin#23, i_category#11, i_class#10, t_category#24, t_class#25, lochierarchy#26] +Functions: [] +Aggregate Attributes: [] +Results [6]: [gross_margin#23, i_category#11, i_class#10, t_category#24, t_class#25, lochierarchy#26] + +(43) Exchange +Input [6]: [gross_margin#23, i_category#11, i_class#10, t_category#24, t_class#25, lochierarchy#26] +Arguments: hashpartitioning(gross_margin#23, i_category#11, i_class#10, t_category#24, t_class#25, lochierarchy#26, 5), true, [id=#71] + +(44) HashAggregate [codegen id : 21] +Input [6]: [gross_margin#23, i_category#11, i_class#10, t_category#24, t_class#25, lochierarchy#26] +Keys [6]: [gross_margin#23, i_category#11, i_class#10, t_category#24, t_class#25, lochierarchy#26] +Functions: [] +Aggregate Attributes: [] +Results [5]: [gross_margin#23, i_category#11, i_class#10, lochierarchy#26, CASE WHEN (t_class#25 = 0) THEN i_category#11 END AS _w0#72] + +(45) Exchange +Input [5]: [gross_margin#23, i_category#11, i_class#10, lochierarchy#26, _w0#72] +Arguments: hashpartitioning(lochierarchy#26, _w0#72, 5), true, [id=#73] + +(46) Sort [codegen id : 22] +Input [5]: [gross_margin#23, i_category#11, i_class#10, lochierarchy#26, _w0#72] +Arguments: [lochierarchy#26 ASC NULLS FIRST, _w0#72 ASC NULLS FIRST, gross_margin#23 ASC NULLS FIRST], false, 0 + +(47) Window +Input [5]: [gross_margin#23, i_category#11, i_class#10, lochierarchy#26, _w0#72] +Arguments: [rank(gross_margin#23) windowspecdefinition(lochierarchy#26, _w0#72, gross_margin#23 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#74], [lochierarchy#26, _w0#72], [gross_margin#23 ASC NULLS FIRST] + +(48) Project [codegen id : 23] +Output [5]: [gross_margin#23, i_category#11, i_class#10, lochierarchy#26, rank_within_parent#74] +Input [6]: [gross_margin#23, i_category#11, i_class#10, lochierarchy#26, _w0#72, rank_within_parent#74] + +(49) TakeOrderedAndProject +Input [5]: [gross_margin#23, i_category#11, i_class#10, lochierarchy#26, rank_within_parent#74] +Arguments: 100, [lochierarchy#26 DESC NULLS LAST, CASE WHEN (lochierarchy#26 = 0) THEN i_category#11 END ASC NULLS FIRST, rank_within_parent#74 ASC NULLS FIRST], [gross_margin#23, i_category#11, i_class#10, lochierarchy#26, rank_within_parent#74] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a/simplified.txt new file mode 100644 index 0000000000000..9cc1e03f3ec4f --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a/simplified.txt @@ -0,0 +1,82 @@ +TakeOrderedAndProject [gross_margin,i_category,i_class,lochierarchy,rank_within_parent] + WholeStageCodegen (23) + Project [gross_margin,i_category,i_class,lochierarchy,rank_within_parent] + InputAdapter + Window [_w0,gross_margin,lochierarchy] + WholeStageCodegen (22) + Sort [_w0,gross_margin,lochierarchy] + InputAdapter + Exchange [_w0,lochierarchy] #1 + WholeStageCodegen (21) + HashAggregate [gross_margin,i_category,i_class,lochierarchy,t_category,t_class] [_w0] + InputAdapter + Exchange [gross_margin,i_category,i_class,lochierarchy,t_category,t_class] #2 + WholeStageCodegen (20) + HashAggregate [gross_margin,i_category,i_class,lochierarchy,t_category,t_class] + InputAdapter + Union + WholeStageCodegen (13) + HashAggregate [gross_margin,i_category,i_class,lochierarchy,t_category,t_class] + InputAdapter + Exchange [gross_margin,i_category,i_class,lochierarchy,t_category,t_class] #3 + WholeStageCodegen (12) + HashAggregate [gross_margin,i_category,i_class,lochierarchy,t_category,t_class] + InputAdapter + Union + WholeStageCodegen (5) + HashAggregate [i_category,i_class,sum,sum] [gross_margin,lochierarchy,sum,sum,sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_net_profit)),t_category,t_class] + InputAdapter + Exchange [i_category,i_class] #4 + WholeStageCodegen (4) + HashAggregate [i_category,i_class,ss_ext_sales_price,ss_net_profit] [sum,sum,sum,sum] + Project [i_category,i_class,ss_ext_sales_price,ss_net_profit] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [i_category,i_class,ss_ext_sales_price,ss_net_profit,ss_store_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (2) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_category,i_class,i_item_sk] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (3) + Project [s_store_sk] + Filter [s_state,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_state,s_store_sk] + WholeStageCodegen (11) + HashAggregate [i_category,isEmpty,isEmpty,sum,sum] [gross_margin,i_class,isEmpty,isEmpty,lochierarchy,sum,sum,sum(ss_ext_sales_price),sum(ss_net_profit),t_category,t_class] + InputAdapter + Exchange [i_category] #8 + WholeStageCodegen (10) + HashAggregate [i_category,ss_ext_sales_price,ss_net_profit] [isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum] + HashAggregate [i_category,i_class,sum,sum] [ss_ext_sales_price,ss_net_profit,sum,sum,sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_net_profit))] + InputAdapter + ReusedExchange [i_category,i_class,sum,sum] #4 + WholeStageCodegen (19) + HashAggregate [isEmpty,isEmpty,sum,sum] [gross_margin,i_category,i_class,isEmpty,isEmpty,lochierarchy,sum,sum,sum(ss_ext_sales_price),sum(ss_net_profit),t_category,t_class] + InputAdapter + Exchange #9 + WholeStageCodegen (18) + HashAggregate [ss_ext_sales_price,ss_net_profit] [isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum] + HashAggregate [i_category,i_class,sum,sum] [ss_ext_sales_price,ss_net_profit,sum,sum,sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_net_profit))] + InputAdapter + ReusedExchange [i_category,i_class,sum,sum] #4 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47.sf100/explain.txt new file mode 100644 index 0000000000000..7151c1cb4db4d --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47.sf100/explain.txt @@ -0,0 +1,313 @@ +== Physical Plan == +TakeOrderedAndProject (58) ++- * Project (57) + +- * SortMergeJoin Inner (56) + :- * Project (48) + : +- * SortMergeJoin Inner (47) + : :- * Sort (37) + : : +- Exchange (36) + : : +- * Filter (35) + : : +- Window (34) + : : +- * Sort (33) + : : +- Exchange (32) + : : +- * Project (31) + : : +- Window (30) + : : +- * Sort (29) + : : +- Exchange (28) + : : +- * HashAggregate (27) + : : +- Exchange (26) + : : +- * HashAggregate (25) + : : +- * Project (24) + : : +- * SortMergeJoin Inner (23) + : : :- * Sort (17) + : : : +- Exchange (16) + : : : +- * Project (15) + : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : :- * Project (9) + : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.store_sales (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.date_dim (4) + : : : +- BroadcastExchange (13) + : : : +- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet default.store (10) + : : +- * Sort (22) + : : +- Exchange (21) + : : +- * Filter (20) + : : +- * ColumnarToRow (19) + : : +- Scan parquet default.item (18) + : +- * Sort (46) + : +- Exchange (45) + : +- * Project (44) + : +- * Filter (43) + : +- Window (42) + : +- * Sort (41) + : +- Exchange (40) + : +- * HashAggregate (39) + : +- ReusedExchange (38) + +- * Sort (55) + +- Exchange (54) + +- * Project (53) + +- * Filter (52) + +- Window (51) + +- * Sort (50) + +- ReusedExchange (49) + + +(1) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_sales_price#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_sales_price#4] + +(3) Filter [codegen id : 3] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_sales_price#4] +Condition : ((isnotnull(ss_item_sk#2) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_store_sk#3)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#5, d_year#6, d_moy#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000),EqualTo(d_moy,1))), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] +Condition : ((((d_year#6 = 1999) OR ((d_year#6 = 1998) AND (d_moy#7 = 12))) OR ((d_year#6 = 2000) AND (d_moy#7 = 1))) AND isnotnull(d_date_sk#5)) + +(7) BroadcastExchange +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#8] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(9) Project [codegen id : 3] +Output [5]: [ss_item_sk#2, ss_store_sk#3, ss_sales_price#4, d_year#6, d_moy#7] +Input [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_sales_price#4, d_date_sk#5, d_year#6, d_moy#7] + +(10) Scan parquet default.store +Output [3]: [s_store_sk#9, s_store_name#10, s_company_name#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_company_name), IsNotNull(s_store_name)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [s_store_sk#9, s_store_name#10, s_company_name#11] + +(12) Filter [codegen id : 2] +Input [3]: [s_store_sk#9, s_store_name#10, s_company_name#11] +Condition : ((isnotnull(s_store_sk#9) AND isnotnull(s_company_name#11)) AND isnotnull(s_store_name#10)) + +(13) BroadcastExchange +Input [3]: [s_store_sk#9, s_store_name#10, s_company_name#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#12] + +(14) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#9] +Join condition: None + +(15) Project [codegen id : 3] +Output [6]: [ss_item_sk#2, ss_sales_price#4, d_year#6, d_moy#7, s_store_name#10, s_company_name#11] +Input [8]: [ss_item_sk#2, ss_store_sk#3, ss_sales_price#4, d_year#6, d_moy#7, s_store_sk#9, s_store_name#10, s_company_name#11] + +(16) Exchange +Input [6]: [ss_item_sk#2, ss_sales_price#4, d_year#6, d_moy#7, s_store_name#10, s_company_name#11] +Arguments: hashpartitioning(ss_item_sk#2, 5), true, [id=#13] + +(17) Sort [codegen id : 4] +Input [6]: [ss_item_sk#2, ss_sales_price#4, d_year#6, d_moy#7, s_store_name#10, s_company_name#11] +Arguments: [ss_item_sk#2 ASC NULLS FIRST], false, 0 + +(18) Scan parquet default.item +Output [3]: [i_item_sk#14, i_brand#15, i_category#16] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand), IsNotNull(i_category)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 5] +Input [3]: [i_item_sk#14, i_brand#15, i_category#16] + +(20) Filter [codegen id : 5] +Input [3]: [i_item_sk#14, i_brand#15, i_category#16] +Condition : ((isnotnull(i_item_sk#14) AND isnotnull(i_brand#15)) AND isnotnull(i_category#16)) + +(21) Exchange +Input [3]: [i_item_sk#14, i_brand#15, i_category#16] +Arguments: hashpartitioning(i_item_sk#14, 5), true, [id=#17] + +(22) Sort [codegen id : 6] +Input [3]: [i_item_sk#14, i_brand#15, i_category#16] +Arguments: [i_item_sk#14 ASC NULLS FIRST], false, 0 + +(23) SortMergeJoin [codegen id : 7] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#14] +Join condition: None + +(24) Project [codegen id : 7] +Output [7]: [i_brand#15, i_category#16, ss_sales_price#4, d_year#6, d_moy#7, s_store_name#10, s_company_name#11] +Input [9]: [ss_item_sk#2, ss_sales_price#4, d_year#6, d_moy#7, s_store_name#10, s_company_name#11, i_item_sk#14, i_brand#15, i_category#16] + +(25) HashAggregate [codegen id : 7] +Input [7]: [i_brand#15, i_category#16, ss_sales_price#4, d_year#6, d_moy#7, s_store_name#10, s_company_name#11] +Keys [6]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#6, d_moy#7] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#4))] +Aggregate Attributes [1]: [sum#18] +Results [7]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#6, d_moy#7, sum#19] + +(26) Exchange +Input [7]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#6, d_moy#7, sum#19] +Arguments: hashpartitioning(i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#6, d_moy#7, 5), true, [id=#20] + +(27) HashAggregate [codegen id : 8] +Input [7]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#6, d_moy#7, sum#19] +Keys [6]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#6, d_moy#7] +Functions [1]: [sum(UnscaledValue(ss_sales_price#4))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#4))#21] +Results [8]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#6, d_moy#7, MakeDecimal(sum(UnscaledValue(ss_sales_price#4))#21,17,2) AS sum_sales#22, MakeDecimal(sum(UnscaledValue(ss_sales_price#4))#21,17,2) AS _w0#23] + +(28) Exchange +Input [8]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#6, d_moy#7, sum_sales#22, _w0#23] +Arguments: hashpartitioning(i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#6, 5), true, [id=#24] + +(29) Sort [codegen id : 9] +Input [8]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#6, d_moy#7, sum_sales#22, _w0#23] +Arguments: [i_category#16 ASC NULLS FIRST, i_brand#15 ASC NULLS FIRST, s_store_name#10 ASC NULLS FIRST, s_company_name#11 ASC NULLS FIRST, d_year#6 ASC NULLS FIRST], false, 0 + +(30) Window +Input [8]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#6, d_moy#7, sum_sales#22, _w0#23] +Arguments: [avg(_w0#23) windowspecdefinition(i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#6, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#25], [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#6] + +(31) Project [codegen id : 10] +Output [8]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#6, d_moy#7, sum_sales#22, avg_monthly_sales#25] +Input [9]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#6, d_moy#7, sum_sales#22, _w0#23, avg_monthly_sales#25] + +(32) Exchange +Input [8]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#6, d_moy#7, sum_sales#22, avg_monthly_sales#25] +Arguments: hashpartitioning(i_category#16, i_brand#15, s_store_name#10, s_company_name#11, 5), true, [id=#26] + +(33) Sort [codegen id : 11] +Input [8]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#6, d_moy#7, sum_sales#22, avg_monthly_sales#25] +Arguments: [i_category#16 ASC NULLS FIRST, i_brand#15 ASC NULLS FIRST, s_store_name#10 ASC NULLS FIRST, s_company_name#11 ASC NULLS FIRST, d_year#6 ASC NULLS FIRST, d_moy#7 ASC NULLS FIRST], false, 0 + +(34) Window +Input [8]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#6, d_moy#7, sum_sales#22, avg_monthly_sales#25] +Arguments: [rank(d_year#6, d_moy#7) windowspecdefinition(i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#6 ASC NULLS FIRST, d_moy#7 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#27], [i_category#16, i_brand#15, s_store_name#10, s_company_name#11], [d_year#6 ASC NULLS FIRST, d_moy#7 ASC NULLS FIRST] + +(35) Filter [codegen id : 12] +Input [9]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#6, d_moy#7, sum_sales#22, avg_monthly_sales#25, rn#27] +Condition : (((((isnotnull(avg_monthly_sales#25) AND isnotnull(d_year#6)) AND (d_year#6 = 1999)) AND (avg_monthly_sales#25 > 0.000000)) AND (CASE WHEN (avg_monthly_sales#25 > 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#22 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(22,6), true))) / promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(38,16), true) ELSE null END > 0.1000000000000000)) AND isnotnull(rn#27)) + +(36) Exchange +Input [9]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#6, d_moy#7, sum_sales#22, avg_monthly_sales#25, rn#27] +Arguments: hashpartitioning(i_category#16, i_brand#15, s_store_name#10, s_company_name#11, rn#27, 5), true, [id=#28] + +(37) Sort [codegen id : 13] +Input [9]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#6, d_moy#7, sum_sales#22, avg_monthly_sales#25, rn#27] +Arguments: [i_category#16 ASC NULLS FIRST, i_brand#15 ASC NULLS FIRST, s_store_name#10 ASC NULLS FIRST, s_company_name#11 ASC NULLS FIRST, rn#27 ASC NULLS FIRST], false, 0 + +(38) ReusedExchange [Reuses operator id: 26] +Output [7]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, d_year#33, d_moy#34, sum#35] + +(39) HashAggregate [codegen id : 21] +Input [7]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, d_year#33, d_moy#34, sum#35] +Keys [6]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, d_year#33, d_moy#34] +Functions [1]: [sum(UnscaledValue(ss_sales_price#4))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#4))#36] +Results [7]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, d_year#33, d_moy#34, MakeDecimal(sum(UnscaledValue(ss_sales_price#4))#36,17,2) AS sum_sales#37] + +(40) Exchange +Input [7]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, d_year#33, d_moy#34, sum_sales#37] +Arguments: hashpartitioning(i_category#29, i_brand#30, s_store_name#31, s_company_name#32, 5), true, [id=#38] + +(41) Sort [codegen id : 22] +Input [7]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, d_year#33, d_moy#34, sum_sales#37] +Arguments: [i_category#29 ASC NULLS FIRST, i_brand#30 ASC NULLS FIRST, s_store_name#31 ASC NULLS FIRST, s_company_name#32 ASC NULLS FIRST, d_year#33 ASC NULLS FIRST, d_moy#34 ASC NULLS FIRST], false, 0 + +(42) Window +Input [7]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, d_year#33, d_moy#34, sum_sales#37] +Arguments: [rank(d_year#33, d_moy#34) windowspecdefinition(i_category#29, i_brand#30, s_store_name#31, s_company_name#32, d_year#33 ASC NULLS FIRST, d_moy#34 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#39], [i_category#29, i_brand#30, s_store_name#31, s_company_name#32], [d_year#33 ASC NULLS FIRST, d_moy#34 ASC NULLS FIRST] + +(43) Filter [codegen id : 23] +Input [8]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, d_year#33, d_moy#34, sum_sales#37, rn#39] +Condition : isnotnull(rn#39) + +(44) Project [codegen id : 23] +Output [6]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, sum_sales#37, rn#39] +Input [8]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, d_year#33, d_moy#34, sum_sales#37, rn#39] + +(45) Exchange +Input [6]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, sum_sales#37, rn#39] +Arguments: hashpartitioning(i_category#29, i_brand#30, s_store_name#31, s_company_name#32, (rn#39 + 1), 5), true, [id=#40] + +(46) Sort [codegen id : 24] +Input [6]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, sum_sales#37, rn#39] +Arguments: [i_category#29 ASC NULLS FIRST, i_brand#30 ASC NULLS FIRST, s_store_name#31 ASC NULLS FIRST, s_company_name#32 ASC NULLS FIRST, (rn#39 + 1) ASC NULLS FIRST], false, 0 + +(47) SortMergeJoin [codegen id : 25] +Left keys [5]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, rn#27] +Right keys [5]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, (rn#39 + 1)] +Join condition: None + +(48) Project [codegen id : 25] +Output [10]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#6, d_moy#7, sum_sales#22, avg_monthly_sales#25, rn#27, sum_sales#37] +Input [15]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#6, d_moy#7, sum_sales#22, avg_monthly_sales#25, rn#27, i_category#29, i_brand#30, s_store_name#31, s_company_name#32, sum_sales#37, rn#39] + +(49) ReusedExchange [Reuses operator id: 40] +Output [7]: [i_category#41, i_brand#42, s_store_name#43, s_company_name#44, d_year#45, d_moy#46, sum_sales#47] + +(50) Sort [codegen id : 34] +Input [7]: [i_category#41, i_brand#42, s_store_name#43, s_company_name#44, d_year#45, d_moy#46, sum_sales#47] +Arguments: [i_category#41 ASC NULLS FIRST, i_brand#42 ASC NULLS FIRST, s_store_name#43 ASC NULLS FIRST, s_company_name#44 ASC NULLS FIRST, d_year#45 ASC NULLS FIRST, d_moy#46 ASC NULLS FIRST], false, 0 + +(51) Window +Input [7]: [i_category#41, i_brand#42, s_store_name#43, s_company_name#44, d_year#45, d_moy#46, sum_sales#47] +Arguments: [rank(d_year#45, d_moy#46) windowspecdefinition(i_category#41, i_brand#42, s_store_name#43, s_company_name#44, d_year#45 ASC NULLS FIRST, d_moy#46 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#48], [i_category#41, i_brand#42, s_store_name#43, s_company_name#44], [d_year#45 ASC NULLS FIRST, d_moy#46 ASC NULLS FIRST] + +(52) Filter [codegen id : 35] +Input [8]: [i_category#41, i_brand#42, s_store_name#43, s_company_name#44, d_year#45, d_moy#46, sum_sales#47, rn#48] +Condition : isnotnull(rn#48) + +(53) Project [codegen id : 35] +Output [6]: [i_category#41, i_brand#42, s_store_name#43, s_company_name#44, sum_sales#47, rn#48] +Input [8]: [i_category#41, i_brand#42, s_store_name#43, s_company_name#44, d_year#45, d_moy#46, sum_sales#47, rn#48] + +(54) Exchange +Input [6]: [i_category#41, i_brand#42, s_store_name#43, s_company_name#44, sum_sales#47, rn#48] +Arguments: hashpartitioning(i_category#41, i_brand#42, s_store_name#43, s_company_name#44, (rn#48 - 1), 5), true, [id=#49] + +(55) Sort [codegen id : 36] +Input [6]: [i_category#41, i_brand#42, s_store_name#43, s_company_name#44, sum_sales#47, rn#48] +Arguments: [i_category#41 ASC NULLS FIRST, i_brand#42 ASC NULLS FIRST, s_store_name#43 ASC NULLS FIRST, s_company_name#44 ASC NULLS FIRST, (rn#48 - 1) ASC NULLS FIRST], false, 0 + +(56) SortMergeJoin [codegen id : 37] +Left keys [5]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, rn#27] +Right keys [5]: [i_category#41, i_brand#42, s_store_name#43, s_company_name#44, (rn#48 - 1)] +Join condition: None + +(57) Project [codegen id : 37] +Output [7]: [i_category#16, d_year#6, d_moy#7, avg_monthly_sales#25, sum_sales#22, sum_sales#37 AS psum#50, sum_sales#47 AS nsum#51] +Input [16]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#6, d_moy#7, sum_sales#22, avg_monthly_sales#25, rn#27, sum_sales#37, i_category#41, i_brand#42, s_store_name#43, s_company_name#44, sum_sales#47, rn#48] + +(58) TakeOrderedAndProject +Input [7]: [i_category#16, d_year#6, d_moy#7, avg_monthly_sales#25, sum_sales#22, psum#50, nsum#51] +Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#22 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(22,6), true) ASC NULLS FIRST, d_moy#7 ASC NULLS FIRST], [i_category#16, d_year#6, d_moy#7, avg_monthly_sales#25, sum_sales#22, psum#50, nsum#51] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47.sf100/simplified.txt new file mode 100644 index 0000000000000..e91829b2812f8 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47.sf100/simplified.txt @@ -0,0 +1,107 @@ +TakeOrderedAndProject [avg_monthly_sales,d_moy,d_year,i_category,nsum,psum,sum_sales] + WholeStageCodegen (37) + Project [avg_monthly_sales,d_moy,d_year,i_category,sum_sales,sum_sales,sum_sales] + SortMergeJoin [i_brand,i_brand,i_category,i_category,rn,rn,s_company_name,s_company_name,s_store_name,s_store_name] + InputAdapter + WholeStageCodegen (25) + Project [avg_monthly_sales,d_moy,d_year,i_brand,i_category,rn,s_company_name,s_store_name,sum_sales,sum_sales] + SortMergeJoin [i_brand,i_brand,i_category,i_category,rn,rn,s_company_name,s_company_name,s_store_name,s_store_name] + InputAdapter + WholeStageCodegen (13) + Sort [i_brand,i_category,rn,s_company_name,s_store_name] + InputAdapter + Exchange [i_brand,i_category,rn,s_company_name,s_store_name] #1 + WholeStageCodegen (12) + Filter [avg_monthly_sales,d_year,rn,sum_sales] + InputAdapter + Window [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] + WholeStageCodegen (11) + Sort [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] + InputAdapter + Exchange [i_brand,i_category,s_company_name,s_store_name] #2 + WholeStageCodegen (10) + Project [avg_monthly_sales,d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum_sales] + InputAdapter + Window [_w0,d_year,i_brand,i_category,s_company_name,s_store_name] + WholeStageCodegen (9) + Sort [d_year,i_brand,i_category,s_company_name,s_store_name] + InputAdapter + Exchange [d_year,i_brand,i_category,s_company_name,s_store_name] #3 + WholeStageCodegen (8) + HashAggregate [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum] [_w0,sum,sum(UnscaledValue(ss_sales_price)),sum_sales] + InputAdapter + Exchange [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] #4 + WholeStageCodegen (7) + HashAggregate [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,ss_sales_price] [sum,sum] + Project [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,ss_sales_price] + SortMergeJoin [i_item_sk,ss_item_sk] + InputAdapter + WholeStageCodegen (4) + Sort [ss_item_sk] + InputAdapter + Exchange [ss_item_sk] #5 + WholeStageCodegen (3) + Project [d_moy,d_year,s_company_name,s_store_name,ss_item_sk,ss_sales_price] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [d_moy,d_year,ss_item_sk,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (1) + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (2) + Filter [s_company_name,s_store_name,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_company_name,s_store_name,s_store_sk] + InputAdapter + WholeStageCodegen (6) + Sort [i_item_sk] + InputAdapter + Exchange [i_item_sk] #8 + WholeStageCodegen (5) + Filter [i_brand,i_category,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand,i_category,i_item_sk] + InputAdapter + WholeStageCodegen (24) + Sort [i_brand,i_category,rn,s_company_name,s_store_name] + InputAdapter + Exchange [i_brand,i_category,rn,s_company_name,s_store_name] #9 + WholeStageCodegen (23) + Project [i_brand,i_category,rn,s_company_name,s_store_name,sum_sales] + Filter [rn] + InputAdapter + Window [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] + WholeStageCodegen (22) + Sort [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] + InputAdapter + Exchange [i_brand,i_category,s_company_name,s_store_name] #10 + WholeStageCodegen (21) + HashAggregate [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum] [sum,sum(UnscaledValue(ss_sales_price)),sum_sales] + InputAdapter + ReusedExchange [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum] #4 + InputAdapter + WholeStageCodegen (36) + Sort [i_brand,i_category,rn,s_company_name,s_store_name] + InputAdapter + Exchange [i_brand,i_category,rn,s_company_name,s_store_name] #11 + WholeStageCodegen (35) + Project [i_brand,i_category,rn,s_company_name,s_store_name,sum_sales] + Filter [rn] + InputAdapter + Window [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] + WholeStageCodegen (34) + Sort [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] + InputAdapter + ReusedExchange [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum_sales] #10 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47/explain.txt new file mode 100644 index 0000000000000..769051bfa32c9 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47/explain.txt @@ -0,0 +1,278 @@ +== Physical Plan == +TakeOrderedAndProject (51) ++- * Project (50) + +- * BroadcastHashJoin Inner BuildRight (49) + :- * Project (42) + : +- * BroadcastHashJoin Inner BuildRight (41) + : :- * Filter (32) + : : +- Window (31) + : : +- * Sort (30) + : : +- Exchange (29) + : : +- * Project (28) + : : +- Window (27) + : : +- * Sort (26) + : : +- Exchange (25) + : : +- * HashAggregate (24) + : : +- Exchange (23) + : : +- * HashAggregate (22) + : : +- * Project (21) + : : +- * BroadcastHashJoin Inner BuildRight (20) + : : :- * Project (15) + : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : :- * Project (9) + : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.item (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.store_sales (4) + : : : +- BroadcastExchange (13) + : : : +- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet default.date_dim (10) + : : +- BroadcastExchange (19) + : : +- * Filter (18) + : : +- * ColumnarToRow (17) + : : +- Scan parquet default.store (16) + : +- BroadcastExchange (40) + : +- * Project (39) + : +- * Filter (38) + : +- Window (37) + : +- * Sort (36) + : +- Exchange (35) + : +- * HashAggregate (34) + : +- ReusedExchange (33) + +- BroadcastExchange (48) + +- * Project (47) + +- * Filter (46) + +- Window (45) + +- * Sort (44) + +- ReusedExchange (43) + + +(1) Scan parquet default.item +Output [3]: [i_item_sk#1, i_brand#2, i_category#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_category), IsNotNull(i_brand)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [3]: [i_item_sk#1, i_brand#2, i_category#3] + +(3) Filter [codegen id : 4] +Input [3]: [i_item_sk#1, i_brand#2, i_category#3] +Condition : ((isnotnull(i_item_sk#1) AND isnotnull(i_category#3)) AND isnotnull(i_brand#2)) + +(4) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#4, ss_item_sk#5, ss_store_sk#6, ss_sales_price#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [ss_sold_date_sk#4, ss_item_sk#5, ss_store_sk#6, ss_sales_price#7] + +(6) Filter [codegen id : 1] +Input [4]: [ss_sold_date_sk#4, ss_item_sk#5, ss_store_sk#6, ss_sales_price#7] +Condition : ((isnotnull(ss_item_sk#5) AND isnotnull(ss_sold_date_sk#4)) AND isnotnull(ss_store_sk#6)) + +(7) BroadcastExchange +Input [4]: [ss_sold_date_sk#4, ss_item_sk#5, ss_store_sk#6, ss_sales_price#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#8] + +(8) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [ss_item_sk#5] +Join condition: None + +(9) Project [codegen id : 4] +Output [5]: [i_brand#2, i_category#3, ss_sold_date_sk#4, ss_store_sk#6, ss_sales_price#7] +Input [7]: [i_item_sk#1, i_brand#2, i_category#3, ss_sold_date_sk#4, ss_item_sk#5, ss_store_sk#6, ss_sales_price#7] + +(10) Scan parquet default.date_dim +Output [3]: [d_date_sk#9, d_year#10, d_moy#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/date_dim] +PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000),EqualTo(d_moy,1))), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] + +(12) Filter [codegen id : 2] +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] +Condition : ((((d_year#10 = 1999) OR ((d_year#10 = 1998) AND (d_moy#11 = 12))) OR ((d_year#10 = 2000) AND (d_moy#11 = 1))) AND isnotnull(d_date_sk#9)) + +(13) BroadcastExchange +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#12] + +(14) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#4] +Right keys [1]: [d_date_sk#9] +Join condition: None + +(15) Project [codegen id : 4] +Output [6]: [i_brand#2, i_category#3, ss_store_sk#6, ss_sales_price#7, d_year#10, d_moy#11] +Input [8]: [i_brand#2, i_category#3, ss_sold_date_sk#4, ss_store_sk#6, ss_sales_price#7, d_date_sk#9, d_year#10, d_moy#11] + +(16) Scan parquet default.store +Output [3]: [s_store_sk#13, s_store_name#14, s_company_name#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_name), IsNotNull(s_company_name)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [3]: [s_store_sk#13, s_store_name#14, s_company_name#15] + +(18) Filter [codegen id : 3] +Input [3]: [s_store_sk#13, s_store_name#14, s_company_name#15] +Condition : ((isnotnull(s_store_sk#13) AND isnotnull(s_store_name#14)) AND isnotnull(s_company_name#15)) + +(19) BroadcastExchange +Input [3]: [s_store_sk#13, s_store_name#14, s_company_name#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#16] + +(20) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#6] +Right keys [1]: [s_store_sk#13] +Join condition: None + +(21) Project [codegen id : 4] +Output [7]: [i_brand#2, i_category#3, ss_sales_price#7, d_year#10, d_moy#11, s_store_name#14, s_company_name#15] +Input [9]: [i_brand#2, i_category#3, ss_store_sk#6, ss_sales_price#7, d_year#10, d_moy#11, s_store_sk#13, s_store_name#14, s_company_name#15] + +(22) HashAggregate [codegen id : 4] +Input [7]: [i_brand#2, i_category#3, ss_sales_price#7, d_year#10, d_moy#11, s_store_name#14, s_company_name#15] +Keys [6]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#7))] +Aggregate Attributes [1]: [sum#17] +Results [7]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum#18] + +(23) Exchange +Input [7]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum#18] +Arguments: hashpartitioning(i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, 5), true, [id=#19] + +(24) HashAggregate [codegen id : 5] +Input [7]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum#18] +Keys [6]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11] +Functions [1]: [sum(UnscaledValue(ss_sales_price#7))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#7))#20] +Results [8]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, MakeDecimal(sum(UnscaledValue(ss_sales_price#7))#20,17,2) AS sum_sales#21, MakeDecimal(sum(UnscaledValue(ss_sales_price#7))#20,17,2) AS _w0#22] + +(25) Exchange +Input [8]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum_sales#21, _w0#22] +Arguments: hashpartitioning(i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, 5), true, [id=#23] + +(26) Sort [codegen id : 6] +Input [8]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum_sales#21, _w0#22] +Arguments: [i_category#3 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, s_store_name#14 ASC NULLS FIRST, s_company_name#15 ASC NULLS FIRST, d_year#10 ASC NULLS FIRST], false, 0 + +(27) Window +Input [8]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum_sales#21, _w0#22] +Arguments: [avg(_w0#22) windowspecdefinition(i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#24], [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10] + +(28) Project [codegen id : 7] +Output [8]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum_sales#21, avg_monthly_sales#24] +Input [9]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum_sales#21, _w0#22, avg_monthly_sales#24] + +(29) Exchange +Input [8]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum_sales#21, avg_monthly_sales#24] +Arguments: hashpartitioning(i_category#3, i_brand#2, s_store_name#14, s_company_name#15, 5), true, [id=#25] + +(30) Sort [codegen id : 8] +Input [8]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum_sales#21, avg_monthly_sales#24] +Arguments: [i_category#3 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, s_store_name#14 ASC NULLS FIRST, s_company_name#15 ASC NULLS FIRST, d_year#10 ASC NULLS FIRST, d_moy#11 ASC NULLS FIRST], false, 0 + +(31) Window +Input [8]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum_sales#21, avg_monthly_sales#24] +Arguments: [rank(d_year#10, d_moy#11) windowspecdefinition(i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10 ASC NULLS FIRST, d_moy#11 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#26], [i_category#3, i_brand#2, s_store_name#14, s_company_name#15], [d_year#10 ASC NULLS FIRST, d_moy#11 ASC NULLS FIRST] + +(32) Filter [codegen id : 23] +Input [9]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum_sales#21, avg_monthly_sales#24, rn#26] +Condition : (((((isnotnull(d_year#10) AND isnotnull(avg_monthly_sales#24)) AND (d_year#10 = 1999)) AND (avg_monthly_sales#24 > 0.000000)) AND (CASE WHEN (avg_monthly_sales#24 > 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(22,6), true))) / promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(38,16), true) ELSE null END > 0.1000000000000000)) AND isnotnull(rn#26)) + +(33) ReusedExchange [Reuses operator id: 23] +Output [7]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31, d_moy#32, sum#33] + +(34) HashAggregate [codegen id : 13] +Input [7]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31, d_moy#32, sum#33] +Keys [6]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31, d_moy#32] +Functions [1]: [sum(UnscaledValue(ss_sales_price#7))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#7))#34] +Results [7]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31, d_moy#32, MakeDecimal(sum(UnscaledValue(ss_sales_price#7))#34,17,2) AS sum_sales#35] + +(35) Exchange +Input [7]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31, d_moy#32, sum_sales#35] +Arguments: hashpartitioning(i_category#27, i_brand#28, s_store_name#29, s_company_name#30, 5), true, [id=#36] + +(36) Sort [codegen id : 14] +Input [7]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31, d_moy#32, sum_sales#35] +Arguments: [i_category#27 ASC NULLS FIRST, i_brand#28 ASC NULLS FIRST, s_store_name#29 ASC NULLS FIRST, s_company_name#30 ASC NULLS FIRST, d_year#31 ASC NULLS FIRST, d_moy#32 ASC NULLS FIRST], false, 0 + +(37) Window +Input [7]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31, d_moy#32, sum_sales#35] +Arguments: [rank(d_year#31, d_moy#32) windowspecdefinition(i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31 ASC NULLS FIRST, d_moy#32 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#37], [i_category#27, i_brand#28, s_store_name#29, s_company_name#30], [d_year#31 ASC NULLS FIRST, d_moy#32 ASC NULLS FIRST] + +(38) Filter [codegen id : 15] +Input [8]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31, d_moy#32, sum_sales#35, rn#37] +Condition : isnotnull(rn#37) + +(39) Project [codegen id : 15] +Output [6]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, sum_sales#35, rn#37] +Input [8]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31, d_moy#32, sum_sales#35, rn#37] + +(40) BroadcastExchange +Input [6]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, sum_sales#35, rn#37] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, true] + 1)),false), [id=#38] + +(41) BroadcastHashJoin [codegen id : 23] +Left keys [5]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, rn#26] +Right keys [5]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, (rn#37 + 1)] +Join condition: None + +(42) Project [codegen id : 23] +Output [10]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum_sales#21, avg_monthly_sales#24, rn#26, sum_sales#35] +Input [15]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum_sales#21, avg_monthly_sales#24, rn#26, i_category#27, i_brand#28, s_store_name#29, s_company_name#30, sum_sales#35, rn#37] + +(43) ReusedExchange [Reuses operator id: 35] +Output [7]: [i_category#39, i_brand#40, s_store_name#41, s_company_name#42, d_year#43, d_moy#44, sum_sales#45] + +(44) Sort [codegen id : 21] +Input [7]: [i_category#39, i_brand#40, s_store_name#41, s_company_name#42, d_year#43, d_moy#44, sum_sales#45] +Arguments: [i_category#39 ASC NULLS FIRST, i_brand#40 ASC NULLS FIRST, s_store_name#41 ASC NULLS FIRST, s_company_name#42 ASC NULLS FIRST, d_year#43 ASC NULLS FIRST, d_moy#44 ASC NULLS FIRST], false, 0 + +(45) Window +Input [7]: [i_category#39, i_brand#40, s_store_name#41, s_company_name#42, d_year#43, d_moy#44, sum_sales#45] +Arguments: [rank(d_year#43, d_moy#44) windowspecdefinition(i_category#39, i_brand#40, s_store_name#41, s_company_name#42, d_year#43 ASC NULLS FIRST, d_moy#44 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#46], [i_category#39, i_brand#40, s_store_name#41, s_company_name#42], [d_year#43 ASC NULLS FIRST, d_moy#44 ASC NULLS FIRST] + +(46) Filter [codegen id : 22] +Input [8]: [i_category#39, i_brand#40, s_store_name#41, s_company_name#42, d_year#43, d_moy#44, sum_sales#45, rn#46] +Condition : isnotnull(rn#46) + +(47) Project [codegen id : 22] +Output [6]: [i_category#39, i_brand#40, s_store_name#41, s_company_name#42, sum_sales#45, rn#46] +Input [8]: [i_category#39, i_brand#40, s_store_name#41, s_company_name#42, d_year#43, d_moy#44, sum_sales#45, rn#46] + +(48) BroadcastExchange +Input [6]: [i_category#39, i_brand#40, s_store_name#41, s_company_name#42, sum_sales#45, rn#46] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, true] - 1)),false), [id=#47] + +(49) BroadcastHashJoin [codegen id : 23] +Left keys [5]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, rn#26] +Right keys [5]: [i_category#39, i_brand#40, s_store_name#41, s_company_name#42, (rn#46 - 1)] +Join condition: None + +(50) Project [codegen id : 23] +Output [7]: [i_category#3, d_year#10, d_moy#11, avg_monthly_sales#24, sum_sales#21, sum_sales#35 AS psum#48, sum_sales#45 AS nsum#49] +Input [16]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum_sales#21, avg_monthly_sales#24, rn#26, sum_sales#35, i_category#39, i_brand#40, s_store_name#41, s_company_name#42, sum_sales#45, rn#46] + +(51) TakeOrderedAndProject +Input [7]: [i_category#3, d_year#10, d_moy#11, avg_monthly_sales#24, sum_sales#21, psum#48, nsum#49] +Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(22,6), true) ASC NULLS FIRST, d_moy#11 ASC NULLS FIRST], [i_category#3, d_year#10, d_moy#11, avg_monthly_sales#24, sum_sales#21, psum#48, nsum#49] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47/simplified.txt new file mode 100644 index 0000000000000..98c0f46ab2b71 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47/simplified.txt @@ -0,0 +1,84 @@ +TakeOrderedAndProject [avg_monthly_sales,d_moy,d_year,i_category,nsum,psum,sum_sales] + WholeStageCodegen (23) + Project [avg_monthly_sales,d_moy,d_year,i_category,sum_sales,sum_sales,sum_sales] + BroadcastHashJoin [i_brand,i_brand,i_category,i_category,rn,rn,s_company_name,s_company_name,s_store_name,s_store_name] + Project [avg_monthly_sales,d_moy,d_year,i_brand,i_category,rn,s_company_name,s_store_name,sum_sales,sum_sales] + BroadcastHashJoin [i_brand,i_brand,i_category,i_category,rn,rn,s_company_name,s_company_name,s_store_name,s_store_name] + Filter [avg_monthly_sales,d_year,rn,sum_sales] + InputAdapter + Window [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] + WholeStageCodegen (8) + Sort [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] + InputAdapter + Exchange [i_brand,i_category,s_company_name,s_store_name] #1 + WholeStageCodegen (7) + Project [avg_monthly_sales,d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum_sales] + InputAdapter + Window [_w0,d_year,i_brand,i_category,s_company_name,s_store_name] + WholeStageCodegen (6) + Sort [d_year,i_brand,i_category,s_company_name,s_store_name] + InputAdapter + Exchange [d_year,i_brand,i_category,s_company_name,s_store_name] #2 + WholeStageCodegen (5) + HashAggregate [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum] [_w0,sum,sum(UnscaledValue(ss_sales_price)),sum_sales] + InputAdapter + Exchange [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] #3 + WholeStageCodegen (4) + HashAggregate [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,ss_sales_price] [sum,sum] + Project [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,ss_sales_price] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [d_moy,d_year,i_brand,i_category,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_brand,i_category,ss_sales_price,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Filter [i_brand,i_category,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand,i_category,i_item_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (3) + Filter [s_company_name,s_store_name,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_company_name,s_store_name,s_store_sk] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (15) + Project [i_brand,i_category,rn,s_company_name,s_store_name,sum_sales] + Filter [rn] + InputAdapter + Window [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] + WholeStageCodegen (14) + Sort [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] + InputAdapter + Exchange [i_brand,i_category,s_company_name,s_store_name] #8 + WholeStageCodegen (13) + HashAggregate [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum] [sum,sum(UnscaledValue(ss_sales_price)),sum_sales] + InputAdapter + ReusedExchange [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum] #3 + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (22) + Project [i_brand,i_category,rn,s_company_name,s_store_name,sum_sales] + Filter [rn] + InputAdapter + Window [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] + WholeStageCodegen (21) + Sort [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] + InputAdapter + ReusedExchange [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum_sales] #8 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49.sf100/explain.txt new file mode 100644 index 0000000000000..0662a51d8f9cd --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49.sf100/explain.txt @@ -0,0 +1,478 @@ +== Physical Plan == +TakeOrderedAndProject (87) ++- * HashAggregate (86) + +- Exchange (85) + +- * HashAggregate (84) + +- Union (83) + :- * Project (30) + : +- * Filter (29) + : +- Window (28) + : +- * Sort (27) + : +- Window (26) + : +- * Sort (25) + : +- Exchange (24) + : +- * HashAggregate (23) + : +- Exchange (22) + : +- * HashAggregate (21) + : +- * Project (20) + : +- * SortMergeJoin Inner (19) + : :- * Sort (13) + : : +- Exchange (12) + : : +- * Project (11) + : : +- * BroadcastHashJoin Inner BuildRight (10) + : : :- * Project (4) + : : : +- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.web_sales (1) + : : +- BroadcastExchange (9) + : : +- * Project (8) + : : +- * Filter (7) + : : +- * ColumnarToRow (6) + : : +- Scan parquet default.date_dim (5) + : +- * Sort (18) + : +- Exchange (17) + : +- * Filter (16) + : +- * ColumnarToRow (15) + : +- Scan parquet default.web_returns (14) + :- * Project (56) + : +- * Filter (55) + : +- Window (54) + : +- * Sort (53) + : +- Window (52) + : +- * Sort (51) + : +- Exchange (50) + : +- * HashAggregate (49) + : +- Exchange (48) + : +- * HashAggregate (47) + : +- * Project (46) + : +- * SortMergeJoin Inner (45) + : :- * Sort (39) + : : +- Exchange (38) + : : +- * Project (37) + : : +- * BroadcastHashJoin Inner BuildRight (36) + : : :- * Project (34) + : : : +- * Filter (33) + : : : +- * ColumnarToRow (32) + : : : +- Scan parquet default.catalog_sales (31) + : : +- ReusedExchange (35) + : +- * Sort (44) + : +- Exchange (43) + : +- * Filter (42) + : +- * ColumnarToRow (41) + : +- Scan parquet default.catalog_returns (40) + +- * Project (82) + +- * Filter (81) + +- Window (80) + +- * Sort (79) + +- Window (78) + +- * Sort (77) + +- Exchange (76) + +- * HashAggregate (75) + +- Exchange (74) + +- * HashAggregate (73) + +- * Project (72) + +- * SortMergeJoin Inner (71) + :- * Sort (65) + : +- Exchange (64) + : +- * Project (63) + : +- * BroadcastHashJoin Inner BuildRight (62) + : :- * Project (60) + : : +- * Filter (59) + : : +- * ColumnarToRow (58) + : : +- Scan parquet default.store_sales (57) + : +- ReusedExchange (61) + +- * Sort (70) + +- Exchange (69) + +- * Filter (68) + +- * ColumnarToRow (67) + +- Scan parquet default.store_returns (66) + + +(1) Scan parquet default.web_sales +Output [6]: [ws_sold_date_sk#1, ws_item_sk#2, ws_order_number#3, ws_quantity#4, ws_net_paid#5, ws_net_profit#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_quantity), IsNotNull(ws_net_paid), IsNotNull(ws_net_profit), GreaterThan(ws_net_profit,1.00), GreaterThan(ws_net_paid,0.00), GreaterThan(ws_quantity,0), IsNotNull(ws_item_sk), IsNotNull(ws_order_number), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 2] +Input [6]: [ws_sold_date_sk#1, ws_item_sk#2, ws_order_number#3, ws_quantity#4, ws_net_paid#5, ws_net_profit#6] + +(3) Filter [codegen id : 2] +Input [6]: [ws_sold_date_sk#1, ws_item_sk#2, ws_order_number#3, ws_quantity#4, ws_net_paid#5, ws_net_profit#6] +Condition : ((((((((isnotnull(ws_quantity#4) AND isnotnull(ws_net_paid#5)) AND isnotnull(ws_net_profit#6)) AND (ws_net_profit#6 > 1.00)) AND (ws_net_paid#5 > 0.00)) AND (ws_quantity#4 > 0)) AND isnotnull(ws_item_sk#2)) AND isnotnull(ws_order_number#3)) AND isnotnull(ws_sold_date_sk#1)) + +(4) Project [codegen id : 2] +Output [5]: [ws_sold_date_sk#1, ws_item_sk#2, ws_order_number#3, ws_quantity#4, ws_net_paid#5] +Input [6]: [ws_sold_date_sk#1, ws_item_sk#2, ws_order_number#3, ws_quantity#4, ws_net_paid#5, ws_net_profit#6] + +(5) Scan parquet default.date_dim +Output [3]: [d_date_sk#7, d_year#8, d_moy#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,12), IsNotNull(d_date_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#7, d_year#8, d_moy#9] + +(7) Filter [codegen id : 1] +Input [3]: [d_date_sk#7, d_year#8, d_moy#9] +Condition : ((((isnotnull(d_year#8) AND isnotnull(d_moy#9)) AND (d_year#8 = 2001)) AND (d_moy#9 = 12)) AND isnotnull(d_date_sk#7)) + +(8) Project [codegen id : 1] +Output [1]: [d_date_sk#7] +Input [3]: [d_date_sk#7, d_year#8, d_moy#9] + +(9) BroadcastExchange +Input [1]: [d_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#10] + +(10) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ws_sold_date_sk#1] +Right keys [1]: [d_date_sk#7] +Join condition: None + +(11) Project [codegen id : 2] +Output [4]: [ws_item_sk#2, ws_order_number#3, ws_quantity#4, ws_net_paid#5] +Input [6]: [ws_sold_date_sk#1, ws_item_sk#2, ws_order_number#3, ws_quantity#4, ws_net_paid#5, d_date_sk#7] + +(12) Exchange +Input [4]: [ws_item_sk#2, ws_order_number#3, ws_quantity#4, ws_net_paid#5] +Arguments: hashpartitioning(cast(ws_order_number#3 as bigint), cast(ws_item_sk#2 as bigint), 5), true, [id=#11] + +(13) Sort [codegen id : 3] +Input [4]: [ws_item_sk#2, ws_order_number#3, ws_quantity#4, ws_net_paid#5] +Arguments: [cast(ws_order_number#3 as bigint) ASC NULLS FIRST, cast(ws_item_sk#2 as bigint) ASC NULLS FIRST], false, 0 + +(14) Scan parquet default.web_returns +Output [4]: [wr_item_sk#12, wr_order_number#13, wr_return_quantity#14, wr_return_amt#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/web_returns] +PushedFilters: [IsNotNull(wr_return_amt), GreaterThan(wr_return_amt,10000.00), IsNotNull(wr_item_sk), IsNotNull(wr_order_number)] +ReadSchema: struct + +(15) ColumnarToRow [codegen id : 4] +Input [4]: [wr_item_sk#12, wr_order_number#13, wr_return_quantity#14, wr_return_amt#15] + +(16) Filter [codegen id : 4] +Input [4]: [wr_item_sk#12, wr_order_number#13, wr_return_quantity#14, wr_return_amt#15] +Condition : (((isnotnull(wr_return_amt#15) AND (wr_return_amt#15 > 10000.00)) AND isnotnull(wr_item_sk#12)) AND isnotnull(wr_order_number#13)) + +(17) Exchange +Input [4]: [wr_item_sk#12, wr_order_number#13, wr_return_quantity#14, wr_return_amt#15] +Arguments: hashpartitioning(wr_order_number#13, wr_item_sk#12, 5), true, [id=#16] + +(18) Sort [codegen id : 5] +Input [4]: [wr_item_sk#12, wr_order_number#13, wr_return_quantity#14, wr_return_amt#15] +Arguments: [wr_order_number#13 ASC NULLS FIRST, wr_item_sk#12 ASC NULLS FIRST], false, 0 + +(19) SortMergeJoin [codegen id : 6] +Left keys [2]: [cast(ws_order_number#3 as bigint), cast(ws_item_sk#2 as bigint)] +Right keys [2]: [wr_order_number#13, wr_item_sk#12] +Join condition: None + +(20) Project [codegen id : 6] +Output [5]: [ws_item_sk#2, ws_quantity#4, ws_net_paid#5, wr_return_quantity#14, wr_return_amt#15] +Input [8]: [ws_item_sk#2, ws_order_number#3, ws_quantity#4, ws_net_paid#5, wr_item_sk#12, wr_order_number#13, wr_return_quantity#14, wr_return_amt#15] + +(21) HashAggregate [codegen id : 6] +Input [5]: [ws_item_sk#2, ws_quantity#4, ws_net_paid#5, wr_return_quantity#14, wr_return_amt#15] +Keys [1]: [ws_item_sk#2] +Functions [4]: [partial_sum(cast(coalesce(wr_return_quantity#14, 0) as bigint)), partial_sum(cast(coalesce(ws_quantity#4, 0) as bigint)), partial_sum(coalesce(cast(wr_return_amt#15 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ws_net_paid#5 as decimal(12,2)), 0.00))] +Aggregate Attributes [6]: [sum#17, sum#18, sum#19, isEmpty#20, sum#21, isEmpty#22] +Results [7]: [ws_item_sk#2, sum#23, sum#24, sum#25, isEmpty#26, sum#27, isEmpty#28] + +(22) Exchange +Input [7]: [ws_item_sk#2, sum#23, sum#24, sum#25, isEmpty#26, sum#27, isEmpty#28] +Arguments: hashpartitioning(ws_item_sk#2, 5), true, [id=#29] + +(23) HashAggregate [codegen id : 7] +Input [7]: [ws_item_sk#2, sum#23, sum#24, sum#25, isEmpty#26, sum#27, isEmpty#28] +Keys [1]: [ws_item_sk#2] +Functions [4]: [sum(cast(coalesce(wr_return_quantity#14, 0) as bigint)), sum(cast(coalesce(ws_quantity#4, 0) as bigint)), sum(coalesce(cast(wr_return_amt#15 as decimal(12,2)), 0.00)), sum(coalesce(cast(ws_net_paid#5 as decimal(12,2)), 0.00))] +Aggregate Attributes [4]: [sum(cast(coalesce(wr_return_quantity#14, 0) as bigint))#30, sum(cast(coalesce(ws_quantity#4, 0) as bigint))#31, sum(coalesce(cast(wr_return_amt#15 as decimal(12,2)), 0.00))#32, sum(coalesce(cast(ws_net_paid#5 as decimal(12,2)), 0.00))#33] +Results [3]: [ws_item_sk#2 AS item#34, CheckOverflow((promote_precision(cast(sum(cast(coalesce(wr_return_quantity#14, 0) as bigint))#30 as decimal(15,4))) / promote_precision(cast(sum(cast(coalesce(ws_quantity#4, 0) as bigint))#31 as decimal(15,4)))), DecimalType(35,20), true) AS return_ratio#35, CheckOverflow((promote_precision(cast(sum(coalesce(cast(wr_return_amt#15 as decimal(12,2)), 0.00))#32 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(ws_net_paid#5 as decimal(12,2)), 0.00))#33 as decimal(15,4)))), DecimalType(35,20), true) AS currency_ratio#36] + +(24) Exchange +Input [3]: [item#34, return_ratio#35, currency_ratio#36] +Arguments: SinglePartition, true, [id=#37] + +(25) Sort [codegen id : 8] +Input [3]: [item#34, return_ratio#35, currency_ratio#36] +Arguments: [return_ratio#35 ASC NULLS FIRST], false, 0 + +(26) Window +Input [3]: [item#34, return_ratio#35, currency_ratio#36] +Arguments: [rank(return_ratio#35) windowspecdefinition(return_ratio#35 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#38], [return_ratio#35 ASC NULLS FIRST] + +(27) Sort [codegen id : 9] +Input [4]: [item#34, return_ratio#35, currency_ratio#36, return_rank#38] +Arguments: [currency_ratio#36 ASC NULLS FIRST], false, 0 + +(28) Window +Input [4]: [item#34, return_ratio#35, currency_ratio#36, return_rank#38] +Arguments: [rank(currency_ratio#36) windowspecdefinition(currency_ratio#36 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#39], [currency_ratio#36 ASC NULLS FIRST] + +(29) Filter [codegen id : 10] +Input [5]: [item#34, return_ratio#35, currency_ratio#36, return_rank#38, currency_rank#39] +Condition : ((return_rank#38 <= 10) OR (currency_rank#39 <= 10)) + +(30) Project [codegen id : 10] +Output [5]: [web AS channel#40, item#34, return_ratio#35, return_rank#38, currency_rank#39] +Input [5]: [item#34, return_ratio#35, currency_ratio#36, return_rank#38, currency_rank#39] + +(31) Scan parquet default.catalog_sales +Output [6]: [cs_sold_date_sk#41, cs_item_sk#42, cs_order_number#43, cs_quantity#44, cs_net_paid#45, cs_net_profit#46] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_net_paid), IsNotNull(cs_quantity), IsNotNull(cs_net_profit), GreaterThan(cs_net_profit,1.00), GreaterThan(cs_net_paid,0.00), GreaterThan(cs_quantity,0), IsNotNull(cs_item_sk), IsNotNull(cs_order_number), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(32) ColumnarToRow [codegen id : 12] +Input [6]: [cs_sold_date_sk#41, cs_item_sk#42, cs_order_number#43, cs_quantity#44, cs_net_paid#45, cs_net_profit#46] + +(33) Filter [codegen id : 12] +Input [6]: [cs_sold_date_sk#41, cs_item_sk#42, cs_order_number#43, cs_quantity#44, cs_net_paid#45, cs_net_profit#46] +Condition : ((((((((isnotnull(cs_net_paid#45) AND isnotnull(cs_quantity#44)) AND isnotnull(cs_net_profit#46)) AND (cs_net_profit#46 > 1.00)) AND (cs_net_paid#45 > 0.00)) AND (cs_quantity#44 > 0)) AND isnotnull(cs_item_sk#42)) AND isnotnull(cs_order_number#43)) AND isnotnull(cs_sold_date_sk#41)) + +(34) Project [codegen id : 12] +Output [5]: [cs_sold_date_sk#41, cs_item_sk#42, cs_order_number#43, cs_quantity#44, cs_net_paid#45] +Input [6]: [cs_sold_date_sk#41, cs_item_sk#42, cs_order_number#43, cs_quantity#44, cs_net_paid#45, cs_net_profit#46] + +(35) ReusedExchange [Reuses operator id: 9] +Output [1]: [d_date_sk#7] + +(36) BroadcastHashJoin [codegen id : 12] +Left keys [1]: [cs_sold_date_sk#41] +Right keys [1]: [d_date_sk#7] +Join condition: None + +(37) Project [codegen id : 12] +Output [4]: [cs_item_sk#42, cs_order_number#43, cs_quantity#44, cs_net_paid#45] +Input [6]: [cs_sold_date_sk#41, cs_item_sk#42, cs_order_number#43, cs_quantity#44, cs_net_paid#45, d_date_sk#7] + +(38) Exchange +Input [4]: [cs_item_sk#42, cs_order_number#43, cs_quantity#44, cs_net_paid#45] +Arguments: hashpartitioning(cs_order_number#43, cs_item_sk#42, 5), true, [id=#47] + +(39) Sort [codegen id : 13] +Input [4]: [cs_item_sk#42, cs_order_number#43, cs_quantity#44, cs_net_paid#45] +Arguments: [cs_order_number#43 ASC NULLS FIRST, cs_item_sk#42 ASC NULLS FIRST], false, 0 + +(40) Scan parquet default.catalog_returns +Output [4]: [cr_item_sk#48, cr_order_number#49, cr_return_quantity#50, cr_return_amount#51] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/catalog_returns] +PushedFilters: [IsNotNull(cr_return_amount), GreaterThan(cr_return_amount,10000.00), IsNotNull(cr_item_sk), IsNotNull(cr_order_number)] +ReadSchema: struct + +(41) ColumnarToRow [codegen id : 14] +Input [4]: [cr_item_sk#48, cr_order_number#49, cr_return_quantity#50, cr_return_amount#51] + +(42) Filter [codegen id : 14] +Input [4]: [cr_item_sk#48, cr_order_number#49, cr_return_quantity#50, cr_return_amount#51] +Condition : (((isnotnull(cr_return_amount#51) AND (cr_return_amount#51 > 10000.00)) AND isnotnull(cr_item_sk#48)) AND isnotnull(cr_order_number#49)) + +(43) Exchange +Input [4]: [cr_item_sk#48, cr_order_number#49, cr_return_quantity#50, cr_return_amount#51] +Arguments: hashpartitioning(cr_order_number#49, cr_item_sk#48, 5), true, [id=#52] + +(44) Sort [codegen id : 15] +Input [4]: [cr_item_sk#48, cr_order_number#49, cr_return_quantity#50, cr_return_amount#51] +Arguments: [cr_order_number#49 ASC NULLS FIRST, cr_item_sk#48 ASC NULLS FIRST], false, 0 + +(45) SortMergeJoin [codegen id : 16] +Left keys [2]: [cs_order_number#43, cs_item_sk#42] +Right keys [2]: [cr_order_number#49, cr_item_sk#48] +Join condition: None + +(46) Project [codegen id : 16] +Output [5]: [cs_item_sk#42, cs_quantity#44, cs_net_paid#45, cr_return_quantity#50, cr_return_amount#51] +Input [8]: [cs_item_sk#42, cs_order_number#43, cs_quantity#44, cs_net_paid#45, cr_item_sk#48, cr_order_number#49, cr_return_quantity#50, cr_return_amount#51] + +(47) HashAggregate [codegen id : 16] +Input [5]: [cs_item_sk#42, cs_quantity#44, cs_net_paid#45, cr_return_quantity#50, cr_return_amount#51] +Keys [1]: [cs_item_sk#42] +Functions [4]: [partial_sum(cast(coalesce(cr_return_quantity#50, 0) as bigint)), partial_sum(cast(coalesce(cs_quantity#44, 0) as bigint)), partial_sum(coalesce(cast(cr_return_amount#51 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(cs_net_paid#45 as decimal(12,2)), 0.00))] +Aggregate Attributes [6]: [sum#53, sum#54, sum#55, isEmpty#56, sum#57, isEmpty#58] +Results [7]: [cs_item_sk#42, sum#59, sum#60, sum#61, isEmpty#62, sum#63, isEmpty#64] + +(48) Exchange +Input [7]: [cs_item_sk#42, sum#59, sum#60, sum#61, isEmpty#62, sum#63, isEmpty#64] +Arguments: hashpartitioning(cs_item_sk#42, 5), true, [id=#65] + +(49) HashAggregate [codegen id : 17] +Input [7]: [cs_item_sk#42, sum#59, sum#60, sum#61, isEmpty#62, sum#63, isEmpty#64] +Keys [1]: [cs_item_sk#42] +Functions [4]: [sum(cast(coalesce(cr_return_quantity#50, 0) as bigint)), sum(cast(coalesce(cs_quantity#44, 0) as bigint)), sum(coalesce(cast(cr_return_amount#51 as decimal(12,2)), 0.00)), sum(coalesce(cast(cs_net_paid#45 as decimal(12,2)), 0.00))] +Aggregate Attributes [4]: [sum(cast(coalesce(cr_return_quantity#50, 0) as bigint))#66, sum(cast(coalesce(cs_quantity#44, 0) as bigint))#67, sum(coalesce(cast(cr_return_amount#51 as decimal(12,2)), 0.00))#68, sum(coalesce(cast(cs_net_paid#45 as decimal(12,2)), 0.00))#69] +Results [3]: [cs_item_sk#42 AS item#70, CheckOverflow((promote_precision(cast(sum(cast(coalesce(cr_return_quantity#50, 0) as bigint))#66 as decimal(15,4))) / promote_precision(cast(sum(cast(coalesce(cs_quantity#44, 0) as bigint))#67 as decimal(15,4)))), DecimalType(35,20), true) AS return_ratio#71, CheckOverflow((promote_precision(cast(sum(coalesce(cast(cr_return_amount#51 as decimal(12,2)), 0.00))#68 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(cs_net_paid#45 as decimal(12,2)), 0.00))#69 as decimal(15,4)))), DecimalType(35,20), true) AS currency_ratio#72] + +(50) Exchange +Input [3]: [item#70, return_ratio#71, currency_ratio#72] +Arguments: SinglePartition, true, [id=#73] + +(51) Sort [codegen id : 18] +Input [3]: [item#70, return_ratio#71, currency_ratio#72] +Arguments: [return_ratio#71 ASC NULLS FIRST], false, 0 + +(52) Window +Input [3]: [item#70, return_ratio#71, currency_ratio#72] +Arguments: [rank(return_ratio#71) windowspecdefinition(return_ratio#71 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#74], [return_ratio#71 ASC NULLS FIRST] + +(53) Sort [codegen id : 19] +Input [4]: [item#70, return_ratio#71, currency_ratio#72, return_rank#74] +Arguments: [currency_ratio#72 ASC NULLS FIRST], false, 0 + +(54) Window +Input [4]: [item#70, return_ratio#71, currency_ratio#72, return_rank#74] +Arguments: [rank(currency_ratio#72) windowspecdefinition(currency_ratio#72 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#75], [currency_ratio#72 ASC NULLS FIRST] + +(55) Filter [codegen id : 20] +Input [5]: [item#70, return_ratio#71, currency_ratio#72, return_rank#74, currency_rank#75] +Condition : ((return_rank#74 <= 10) OR (currency_rank#75 <= 10)) + +(56) Project [codegen id : 20] +Output [5]: [catalog AS channel#76, item#70, return_ratio#71, return_rank#74, currency_rank#75] +Input [5]: [item#70, return_ratio#71, currency_ratio#72, return_rank#74, currency_rank#75] + +(57) Scan parquet default.store_sales +Output [6]: [ss_sold_date_sk#77, ss_item_sk#78, ss_ticket_number#79, ss_quantity#80, ss_net_paid#81, ss_net_profit#82] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_net_profit), IsNotNull(ss_quantity), IsNotNull(ss_net_paid), GreaterThan(ss_net_profit,1.00), GreaterThan(ss_net_paid,0.00), GreaterThan(ss_quantity,0), IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(58) ColumnarToRow [codegen id : 22] +Input [6]: [ss_sold_date_sk#77, ss_item_sk#78, ss_ticket_number#79, ss_quantity#80, ss_net_paid#81, ss_net_profit#82] + +(59) Filter [codegen id : 22] +Input [6]: [ss_sold_date_sk#77, ss_item_sk#78, ss_ticket_number#79, ss_quantity#80, ss_net_paid#81, ss_net_profit#82] +Condition : ((((((((isnotnull(ss_net_profit#82) AND isnotnull(ss_quantity#80)) AND isnotnull(ss_net_paid#81)) AND (ss_net_profit#82 > 1.00)) AND (ss_net_paid#81 > 0.00)) AND (ss_quantity#80 > 0)) AND isnotnull(ss_ticket_number#79)) AND isnotnull(ss_item_sk#78)) AND isnotnull(ss_sold_date_sk#77)) + +(60) Project [codegen id : 22] +Output [5]: [ss_sold_date_sk#77, ss_item_sk#78, ss_ticket_number#79, ss_quantity#80, ss_net_paid#81] +Input [6]: [ss_sold_date_sk#77, ss_item_sk#78, ss_ticket_number#79, ss_quantity#80, ss_net_paid#81, ss_net_profit#82] + +(61) ReusedExchange [Reuses operator id: 9] +Output [1]: [d_date_sk#7] + +(62) BroadcastHashJoin [codegen id : 22] +Left keys [1]: [ss_sold_date_sk#77] +Right keys [1]: [d_date_sk#7] +Join condition: None + +(63) Project [codegen id : 22] +Output [4]: [ss_item_sk#78, ss_ticket_number#79, ss_quantity#80, ss_net_paid#81] +Input [6]: [ss_sold_date_sk#77, ss_item_sk#78, ss_ticket_number#79, ss_quantity#80, ss_net_paid#81, d_date_sk#7] + +(64) Exchange +Input [4]: [ss_item_sk#78, ss_ticket_number#79, ss_quantity#80, ss_net_paid#81] +Arguments: hashpartitioning(cast(ss_ticket_number#79 as bigint), cast(ss_item_sk#78 as bigint), 5), true, [id=#83] + +(65) Sort [codegen id : 23] +Input [4]: [ss_item_sk#78, ss_ticket_number#79, ss_quantity#80, ss_net_paid#81] +Arguments: [cast(ss_ticket_number#79 as bigint) ASC NULLS FIRST, cast(ss_item_sk#78 as bigint) ASC NULLS FIRST], false, 0 + +(66) Scan parquet default.store_returns +Output [4]: [sr_item_sk#84, sr_ticket_number#85, sr_return_quantity#86, sr_return_amt#87] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store_returns] +PushedFilters: [IsNotNull(sr_return_amt), GreaterThan(sr_return_amt,10000.00), IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] +ReadSchema: struct + +(67) ColumnarToRow [codegen id : 24] +Input [4]: [sr_item_sk#84, sr_ticket_number#85, sr_return_quantity#86, sr_return_amt#87] + +(68) Filter [codegen id : 24] +Input [4]: [sr_item_sk#84, sr_ticket_number#85, sr_return_quantity#86, sr_return_amt#87] +Condition : (((isnotnull(sr_return_amt#87) AND (sr_return_amt#87 > 10000.00)) AND isnotnull(sr_ticket_number#85)) AND isnotnull(sr_item_sk#84)) + +(69) Exchange +Input [4]: [sr_item_sk#84, sr_ticket_number#85, sr_return_quantity#86, sr_return_amt#87] +Arguments: hashpartitioning(sr_ticket_number#85, sr_item_sk#84, 5), true, [id=#88] + +(70) Sort [codegen id : 25] +Input [4]: [sr_item_sk#84, sr_ticket_number#85, sr_return_quantity#86, sr_return_amt#87] +Arguments: [sr_ticket_number#85 ASC NULLS FIRST, sr_item_sk#84 ASC NULLS FIRST], false, 0 + +(71) SortMergeJoin [codegen id : 26] +Left keys [2]: [cast(ss_ticket_number#79 as bigint), cast(ss_item_sk#78 as bigint)] +Right keys [2]: [sr_ticket_number#85, sr_item_sk#84] +Join condition: None + +(72) Project [codegen id : 26] +Output [5]: [ss_item_sk#78, ss_quantity#80, ss_net_paid#81, sr_return_quantity#86, sr_return_amt#87] +Input [8]: [ss_item_sk#78, ss_ticket_number#79, ss_quantity#80, ss_net_paid#81, sr_item_sk#84, sr_ticket_number#85, sr_return_quantity#86, sr_return_amt#87] + +(73) HashAggregate [codegen id : 26] +Input [5]: [ss_item_sk#78, ss_quantity#80, ss_net_paid#81, sr_return_quantity#86, sr_return_amt#87] +Keys [1]: [ss_item_sk#78] +Functions [4]: [partial_sum(cast(coalesce(sr_return_quantity#86, 0) as bigint)), partial_sum(cast(coalesce(ss_quantity#80, 0) as bigint)), partial_sum(coalesce(cast(sr_return_amt#87 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ss_net_paid#81 as decimal(12,2)), 0.00))] +Aggregate Attributes [6]: [sum#89, sum#90, sum#91, isEmpty#92, sum#93, isEmpty#94] +Results [7]: [ss_item_sk#78, sum#95, sum#96, sum#97, isEmpty#98, sum#99, isEmpty#100] + +(74) Exchange +Input [7]: [ss_item_sk#78, sum#95, sum#96, sum#97, isEmpty#98, sum#99, isEmpty#100] +Arguments: hashpartitioning(ss_item_sk#78, 5), true, [id=#101] + +(75) HashAggregate [codegen id : 27] +Input [7]: [ss_item_sk#78, sum#95, sum#96, sum#97, isEmpty#98, sum#99, isEmpty#100] +Keys [1]: [ss_item_sk#78] +Functions [4]: [sum(cast(coalesce(sr_return_quantity#86, 0) as bigint)), sum(cast(coalesce(ss_quantity#80, 0) as bigint)), sum(coalesce(cast(sr_return_amt#87 as decimal(12,2)), 0.00)), sum(coalesce(cast(ss_net_paid#81 as decimal(12,2)), 0.00))] +Aggregate Attributes [4]: [sum(cast(coalesce(sr_return_quantity#86, 0) as bigint))#102, sum(cast(coalesce(ss_quantity#80, 0) as bigint))#103, sum(coalesce(cast(sr_return_amt#87 as decimal(12,2)), 0.00))#104, sum(coalesce(cast(ss_net_paid#81 as decimal(12,2)), 0.00))#105] +Results [3]: [ss_item_sk#78 AS item#106, CheckOverflow((promote_precision(cast(sum(cast(coalesce(sr_return_quantity#86, 0) as bigint))#102 as decimal(15,4))) / promote_precision(cast(sum(cast(coalesce(ss_quantity#80, 0) as bigint))#103 as decimal(15,4)))), DecimalType(35,20), true) AS return_ratio#107, CheckOverflow((promote_precision(cast(sum(coalesce(cast(sr_return_amt#87 as decimal(12,2)), 0.00))#104 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(ss_net_paid#81 as decimal(12,2)), 0.00))#105 as decimal(15,4)))), DecimalType(35,20), true) AS currency_ratio#108] + +(76) Exchange +Input [3]: [item#106, return_ratio#107, currency_ratio#108] +Arguments: SinglePartition, true, [id=#109] + +(77) Sort [codegen id : 28] +Input [3]: [item#106, return_ratio#107, currency_ratio#108] +Arguments: [return_ratio#107 ASC NULLS FIRST], false, 0 + +(78) Window +Input [3]: [item#106, return_ratio#107, currency_ratio#108] +Arguments: [rank(return_ratio#107) windowspecdefinition(return_ratio#107 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#110], [return_ratio#107 ASC NULLS FIRST] + +(79) Sort [codegen id : 29] +Input [4]: [item#106, return_ratio#107, currency_ratio#108, return_rank#110] +Arguments: [currency_ratio#108 ASC NULLS FIRST], false, 0 + +(80) Window +Input [4]: [item#106, return_ratio#107, currency_ratio#108, return_rank#110] +Arguments: [rank(currency_ratio#108) windowspecdefinition(currency_ratio#108 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#111], [currency_ratio#108 ASC NULLS FIRST] + +(81) Filter [codegen id : 30] +Input [5]: [item#106, return_ratio#107, currency_ratio#108, return_rank#110, currency_rank#111] +Condition : ((return_rank#110 <= 10) OR (currency_rank#111 <= 10)) + +(82) Project [codegen id : 30] +Output [5]: [store AS channel#112, item#106, return_ratio#107, return_rank#110, currency_rank#111] +Input [5]: [item#106, return_ratio#107, currency_ratio#108, return_rank#110, currency_rank#111] + +(83) Union + +(84) HashAggregate [codegen id : 31] +Input [5]: [channel#40, item#34, return_ratio#35, return_rank#38, currency_rank#39] +Keys [5]: [channel#40, item#34, return_ratio#35, return_rank#38, currency_rank#39] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#40, item#34, return_ratio#35, return_rank#38, currency_rank#39] + +(85) Exchange +Input [5]: [channel#40, item#34, return_ratio#35, return_rank#38, currency_rank#39] +Arguments: hashpartitioning(channel#40, item#34, return_ratio#35, return_rank#38, currency_rank#39, 5), true, [id=#113] + +(86) HashAggregate [codegen id : 32] +Input [5]: [channel#40, item#34, return_ratio#35, return_rank#38, currency_rank#39] +Keys [5]: [channel#40, item#34, return_ratio#35, return_rank#38, currency_rank#39] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#40, item#34, return_ratio#35, return_rank#38, currency_rank#39] + +(87) TakeOrderedAndProject +Input [5]: [channel#40, item#34, return_ratio#35, return_rank#38, currency_rank#39] +Arguments: 100, [channel#40 ASC NULLS FIRST, return_rank#38 ASC NULLS FIRST, currency_rank#39 ASC NULLS FIRST, item#34 ASC NULLS FIRST], [channel#40, item#34, return_ratio#35, return_rank#38, currency_rank#39] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49.sf100/simplified.txt new file mode 100644 index 0000000000000..7fc20fb4df8d2 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49.sf100/simplified.txt @@ -0,0 +1,153 @@ +TakeOrderedAndProject [channel,currency_rank,item,return_rank,return_ratio] + WholeStageCodegen (32) + HashAggregate [channel,currency_rank,item,return_rank,return_ratio] + InputAdapter + Exchange [channel,currency_rank,item,return_rank,return_ratio] #1 + WholeStageCodegen (31) + HashAggregate [channel,currency_rank,item,return_rank,return_ratio] + InputAdapter + Union + WholeStageCodegen (10) + Project [currency_rank,item,return_rank,return_ratio] + Filter [currency_rank,return_rank] + InputAdapter + Window [currency_ratio] + WholeStageCodegen (9) + Sort [currency_ratio] + InputAdapter + Window [return_ratio] + WholeStageCodegen (8) + Sort [return_ratio] + InputAdapter + Exchange #2 + WholeStageCodegen (7) + HashAggregate [isEmpty,isEmpty,sum,sum,sum,sum,ws_item_sk] [currency_ratio,isEmpty,isEmpty,item,return_ratio,sum,sum,sum,sum,sum(cast(coalesce(wr_return_quantity, 0) as bigint)),sum(cast(coalesce(ws_quantity, 0) as bigint)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ws_net_paid as decimal(12,2)), 0.00))] + InputAdapter + Exchange [ws_item_sk] #3 + WholeStageCodegen (6) + HashAggregate [wr_return_amt,wr_return_quantity,ws_item_sk,ws_net_paid,ws_quantity] [isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum,sum,sum] + Project [wr_return_amt,wr_return_quantity,ws_item_sk,ws_net_paid,ws_quantity] + SortMergeJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] + InputAdapter + WholeStageCodegen (3) + Sort [ws_item_sk,ws_order_number] + InputAdapter + Exchange [ws_item_sk,ws_order_number] #4 + WholeStageCodegen (2) + Project [ws_item_sk,ws_net_paid,ws_order_number,ws_quantity] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ws_item_sk,ws_net_paid,ws_order_number,ws_quantity,ws_sold_date_sk] + Filter [ws_item_sk,ws_net_paid,ws_net_profit,ws_order_number,ws_quantity,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_item_sk,ws_net_paid,ws_net_profit,ws_order_number,ws_quantity,ws_sold_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + InputAdapter + WholeStageCodegen (5) + Sort [wr_item_sk,wr_order_number] + InputAdapter + Exchange [wr_item_sk,wr_order_number] #6 + WholeStageCodegen (4) + Filter [wr_item_sk,wr_order_number,wr_return_amt] + ColumnarToRow + InputAdapter + Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_return_quantity] + WholeStageCodegen (20) + Project [currency_rank,item,return_rank,return_ratio] + Filter [currency_rank,return_rank] + InputAdapter + Window [currency_ratio] + WholeStageCodegen (19) + Sort [currency_ratio] + InputAdapter + Window [return_ratio] + WholeStageCodegen (18) + Sort [return_ratio] + InputAdapter + Exchange #7 + WholeStageCodegen (17) + HashAggregate [cs_item_sk,isEmpty,isEmpty,sum,sum,sum,sum] [currency_ratio,isEmpty,isEmpty,item,return_ratio,sum,sum,sum,sum,sum(cast(coalesce(cr_return_quantity, 0) as bigint)),sum(cast(coalesce(cs_quantity, 0) as bigint)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(coalesce(cast(cs_net_paid as decimal(12,2)), 0.00))] + InputAdapter + Exchange [cs_item_sk] #8 + WholeStageCodegen (16) + HashAggregate [cr_return_amount,cr_return_quantity,cs_item_sk,cs_net_paid,cs_quantity] [isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum,sum,sum] + Project [cr_return_amount,cr_return_quantity,cs_item_sk,cs_net_paid,cs_quantity] + SortMergeJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] + InputAdapter + WholeStageCodegen (13) + Sort [cs_item_sk,cs_order_number] + InputAdapter + Exchange [cs_item_sk,cs_order_number] #9 + WholeStageCodegen (12) + Project [cs_item_sk,cs_net_paid,cs_order_number,cs_quantity] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_item_sk,cs_net_paid,cs_order_number,cs_quantity,cs_sold_date_sk] + Filter [cs_item_sk,cs_net_paid,cs_net_profit,cs_order_number,cs_quantity,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_item_sk,cs_net_paid,cs_net_profit,cs_order_number,cs_quantity,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #5 + InputAdapter + WholeStageCodegen (15) + Sort [cr_item_sk,cr_order_number] + InputAdapter + Exchange [cr_item_sk,cr_order_number] #10 + WholeStageCodegen (14) + Filter [cr_item_sk,cr_order_number,cr_return_amount] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_amount,cr_return_quantity] + WholeStageCodegen (30) + Project [currency_rank,item,return_rank,return_ratio] + Filter [currency_rank,return_rank] + InputAdapter + Window [currency_ratio] + WholeStageCodegen (29) + Sort [currency_ratio] + InputAdapter + Window [return_ratio] + WholeStageCodegen (28) + Sort [return_ratio] + InputAdapter + Exchange #11 + WholeStageCodegen (27) + HashAggregate [isEmpty,isEmpty,ss_item_sk,sum,sum,sum,sum] [currency_ratio,isEmpty,isEmpty,item,return_ratio,sum,sum,sum,sum,sum(cast(coalesce(sr_return_quantity, 0) as bigint)),sum(cast(coalesce(ss_quantity, 0) as bigint)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ss_net_paid as decimal(12,2)), 0.00))] + InputAdapter + Exchange [ss_item_sk] #12 + WholeStageCodegen (26) + HashAggregate [sr_return_amt,sr_return_quantity,ss_item_sk,ss_net_paid,ss_quantity] [isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum,sum,sum] + Project [sr_return_amt,sr_return_quantity,ss_item_sk,ss_net_paid,ss_quantity] + SortMergeJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + InputAdapter + WholeStageCodegen (23) + Sort [ss_item_sk,ss_ticket_number] + InputAdapter + Exchange [ss_item_sk,ss_ticket_number] #13 + WholeStageCodegen (22) + Project [ss_item_sk,ss_net_paid,ss_quantity,ss_ticket_number] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_item_sk,ss_net_paid,ss_quantity,ss_sold_date_sk,ss_ticket_number] + Filter [ss_item_sk,ss_net_paid,ss_net_profit,ss_quantity,ss_sold_date_sk,ss_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_net_paid,ss_net_profit,ss_quantity,ss_sold_date_sk,ss_ticket_number] + InputAdapter + ReusedExchange [d_date_sk] #5 + InputAdapter + WholeStageCodegen (25) + Sort [sr_item_sk,sr_ticket_number] + InputAdapter + Exchange [sr_item_sk,sr_ticket_number] #14 + WholeStageCodegen (24) + Filter [sr_item_sk,sr_return_amt,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_item_sk,sr_return_amt,sr_return_quantity,sr_ticket_number] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49/explain.txt new file mode 100644 index 0000000000000..0ce48667d73a1 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49/explain.txt @@ -0,0 +1,433 @@ +== Physical Plan == +TakeOrderedAndProject (78) ++- * HashAggregate (77) + +- Exchange (76) + +- * HashAggregate (75) + +- Union (74) + :- * Project (27) + : +- * Filter (26) + : +- Window (25) + : +- * Sort (24) + : +- Window (23) + : +- * Sort (22) + : +- Exchange (21) + : +- * HashAggregate (20) + : +- Exchange (19) + : +- * HashAggregate (18) + : +- * Project (17) + : +- * BroadcastHashJoin Inner BuildRight (16) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Project (4) + : : : +- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.web_sales (1) + : : +- BroadcastExchange (8) + : : +- * Filter (7) + : : +- * ColumnarToRow (6) + : : +- Scan parquet default.web_returns (5) + : +- BroadcastExchange (15) + : +- * Project (14) + : +- * Filter (13) + : +- * ColumnarToRow (12) + : +- Scan parquet default.date_dim (11) + :- * Project (50) + : +- * Filter (49) + : +- Window (48) + : +- * Sort (47) + : +- Window (46) + : +- * Sort (45) + : +- Exchange (44) + : +- * HashAggregate (43) + : +- Exchange (42) + : +- * HashAggregate (41) + : +- * Project (40) + : +- * BroadcastHashJoin Inner BuildRight (39) + : :- * Project (37) + : : +- * BroadcastHashJoin Inner BuildRight (36) + : : :- * Project (31) + : : : +- * Filter (30) + : : : +- * ColumnarToRow (29) + : : : +- Scan parquet default.catalog_sales (28) + : : +- BroadcastExchange (35) + : : +- * Filter (34) + : : +- * ColumnarToRow (33) + : : +- Scan parquet default.catalog_returns (32) + : +- ReusedExchange (38) + +- * Project (73) + +- * Filter (72) + +- Window (71) + +- * Sort (70) + +- Window (69) + +- * Sort (68) + +- Exchange (67) + +- * HashAggregate (66) + +- Exchange (65) + +- * HashAggregate (64) + +- * Project (63) + +- * BroadcastHashJoin Inner BuildRight (62) + :- * Project (60) + : +- * BroadcastHashJoin Inner BuildRight (59) + : :- * Project (54) + : : +- * Filter (53) + : : +- * ColumnarToRow (52) + : : +- Scan parquet default.store_sales (51) + : +- BroadcastExchange (58) + : +- * Filter (57) + : +- * ColumnarToRow (56) + : +- Scan parquet default.store_returns (55) + +- ReusedExchange (61) + + +(1) Scan parquet default.web_sales +Output [6]: [ws_sold_date_sk#1, ws_item_sk#2, ws_order_number#3, ws_quantity#4, ws_net_paid#5, ws_net_profit#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_net_paid), IsNotNull(ws_quantity), IsNotNull(ws_net_profit), GreaterThan(ws_net_profit,1.00), GreaterThan(ws_net_paid,0.00), GreaterThan(ws_quantity,0), IsNotNull(ws_order_number), IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [6]: [ws_sold_date_sk#1, ws_item_sk#2, ws_order_number#3, ws_quantity#4, ws_net_paid#5, ws_net_profit#6] + +(3) Filter [codegen id : 3] +Input [6]: [ws_sold_date_sk#1, ws_item_sk#2, ws_order_number#3, ws_quantity#4, ws_net_paid#5, ws_net_profit#6] +Condition : ((((((((isnotnull(ws_net_paid#5) AND isnotnull(ws_quantity#4)) AND isnotnull(ws_net_profit#6)) AND (ws_net_profit#6 > 1.00)) AND (ws_net_paid#5 > 0.00)) AND (ws_quantity#4 > 0)) AND isnotnull(ws_order_number#3)) AND isnotnull(ws_item_sk#2)) AND isnotnull(ws_sold_date_sk#1)) + +(4) Project [codegen id : 3] +Output [5]: [ws_sold_date_sk#1, ws_item_sk#2, ws_order_number#3, ws_quantity#4, ws_net_paid#5] +Input [6]: [ws_sold_date_sk#1, ws_item_sk#2, ws_order_number#3, ws_quantity#4, ws_net_paid#5, ws_net_profit#6] + +(5) Scan parquet default.web_returns +Output [4]: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/web_returns] +PushedFilters: [IsNotNull(wr_return_amt), GreaterThan(wr_return_amt,10000.00), IsNotNull(wr_order_number), IsNotNull(wr_item_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [4]: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10] + +(7) Filter [codegen id : 1] +Input [4]: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10] +Condition : (((isnotnull(wr_return_amt#10) AND (wr_return_amt#10 > 10000.00)) AND isnotnull(wr_order_number#8)) AND isnotnull(wr_item_sk#7)) + +(8) BroadcastExchange +Input [4]: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10] +Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false], input[0, bigint, false]),false), [id=#11] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [2]: [cast(ws_order_number#3 as bigint), cast(ws_item_sk#2 as bigint)] +Right keys [2]: [wr_order_number#8, wr_item_sk#7] +Join condition: None + +(10) Project [codegen id : 3] +Output [6]: [ws_sold_date_sk#1, ws_item_sk#2, ws_quantity#4, ws_net_paid#5, wr_return_quantity#9, wr_return_amt#10] +Input [9]: [ws_sold_date_sk#1, ws_item_sk#2, ws_order_number#3, ws_quantity#4, ws_net_paid#5, wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10] + +(11) Scan parquet default.date_dim +Output [3]: [d_date_sk#12, d_year#13, d_moy#14] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,12), IsNotNull(d_date_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#12, d_year#13, d_moy#14] + +(13) Filter [codegen id : 2] +Input [3]: [d_date_sk#12, d_year#13, d_moy#14] +Condition : ((((isnotnull(d_year#13) AND isnotnull(d_moy#14)) AND (d_year#13 = 2001)) AND (d_moy#14 = 12)) AND isnotnull(d_date_sk#12)) + +(14) Project [codegen id : 2] +Output [1]: [d_date_sk#12] +Input [3]: [d_date_sk#12, d_year#13, d_moy#14] + +(15) BroadcastExchange +Input [1]: [d_date_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] + +(16) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ws_sold_date_sk#1] +Right keys [1]: [d_date_sk#12] +Join condition: None + +(17) Project [codegen id : 3] +Output [5]: [ws_item_sk#2, ws_quantity#4, ws_net_paid#5, wr_return_quantity#9, wr_return_amt#10] +Input [7]: [ws_sold_date_sk#1, ws_item_sk#2, ws_quantity#4, ws_net_paid#5, wr_return_quantity#9, wr_return_amt#10, d_date_sk#12] + +(18) HashAggregate [codegen id : 3] +Input [5]: [ws_item_sk#2, ws_quantity#4, ws_net_paid#5, wr_return_quantity#9, wr_return_amt#10] +Keys [1]: [ws_item_sk#2] +Functions [4]: [partial_sum(cast(coalesce(wr_return_quantity#9, 0) as bigint)), partial_sum(cast(coalesce(ws_quantity#4, 0) as bigint)), partial_sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ws_net_paid#5 as decimal(12,2)), 0.00))] +Aggregate Attributes [6]: [sum#16, sum#17, sum#18, isEmpty#19, sum#20, isEmpty#21] +Results [7]: [ws_item_sk#2, sum#22, sum#23, sum#24, isEmpty#25, sum#26, isEmpty#27] + +(19) Exchange +Input [7]: [ws_item_sk#2, sum#22, sum#23, sum#24, isEmpty#25, sum#26, isEmpty#27] +Arguments: hashpartitioning(ws_item_sk#2, 5), true, [id=#28] + +(20) HashAggregate [codegen id : 4] +Input [7]: [ws_item_sk#2, sum#22, sum#23, sum#24, isEmpty#25, sum#26, isEmpty#27] +Keys [1]: [ws_item_sk#2] +Functions [4]: [sum(cast(coalesce(wr_return_quantity#9, 0) as bigint)), sum(cast(coalesce(ws_quantity#4, 0) as bigint)), sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00)), sum(coalesce(cast(ws_net_paid#5 as decimal(12,2)), 0.00))] +Aggregate Attributes [4]: [sum(cast(coalesce(wr_return_quantity#9, 0) as bigint))#29, sum(cast(coalesce(ws_quantity#4, 0) as bigint))#30, sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00))#31, sum(coalesce(cast(ws_net_paid#5 as decimal(12,2)), 0.00))#32] +Results [3]: [ws_item_sk#2 AS item#33, CheckOverflow((promote_precision(cast(sum(cast(coalesce(wr_return_quantity#9, 0) as bigint))#29 as decimal(15,4))) / promote_precision(cast(sum(cast(coalesce(ws_quantity#4, 0) as bigint))#30 as decimal(15,4)))), DecimalType(35,20), true) AS return_ratio#34, CheckOverflow((promote_precision(cast(sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00))#31 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(ws_net_paid#5 as decimal(12,2)), 0.00))#32 as decimal(15,4)))), DecimalType(35,20), true) AS currency_ratio#35] + +(21) Exchange +Input [3]: [item#33, return_ratio#34, currency_ratio#35] +Arguments: SinglePartition, true, [id=#36] + +(22) Sort [codegen id : 5] +Input [3]: [item#33, return_ratio#34, currency_ratio#35] +Arguments: [return_ratio#34 ASC NULLS FIRST], false, 0 + +(23) Window +Input [3]: [item#33, return_ratio#34, currency_ratio#35] +Arguments: [rank(return_ratio#34) windowspecdefinition(return_ratio#34 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#37], [return_ratio#34 ASC NULLS FIRST] + +(24) Sort [codegen id : 6] +Input [4]: [item#33, return_ratio#34, currency_ratio#35, return_rank#37] +Arguments: [currency_ratio#35 ASC NULLS FIRST], false, 0 + +(25) Window +Input [4]: [item#33, return_ratio#34, currency_ratio#35, return_rank#37] +Arguments: [rank(currency_ratio#35) windowspecdefinition(currency_ratio#35 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#38], [currency_ratio#35 ASC NULLS FIRST] + +(26) Filter [codegen id : 7] +Input [5]: [item#33, return_ratio#34, currency_ratio#35, return_rank#37, currency_rank#38] +Condition : ((return_rank#37 <= 10) OR (currency_rank#38 <= 10)) + +(27) Project [codegen id : 7] +Output [5]: [web AS channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] +Input [5]: [item#33, return_ratio#34, currency_ratio#35, return_rank#37, currency_rank#38] + +(28) Scan parquet default.catalog_sales +Output [6]: [cs_sold_date_sk#40, cs_item_sk#41, cs_order_number#42, cs_quantity#43, cs_net_paid#44, cs_net_profit#45] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_net_paid), IsNotNull(cs_quantity), IsNotNull(cs_net_profit), GreaterThan(cs_net_profit,1.00), GreaterThan(cs_net_paid,0.00), GreaterThan(cs_quantity,0), IsNotNull(cs_order_number), IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 10] +Input [6]: [cs_sold_date_sk#40, cs_item_sk#41, cs_order_number#42, cs_quantity#43, cs_net_paid#44, cs_net_profit#45] + +(30) Filter [codegen id : 10] +Input [6]: [cs_sold_date_sk#40, cs_item_sk#41, cs_order_number#42, cs_quantity#43, cs_net_paid#44, cs_net_profit#45] +Condition : ((((((((isnotnull(cs_net_paid#44) AND isnotnull(cs_quantity#43)) AND isnotnull(cs_net_profit#45)) AND (cs_net_profit#45 > 1.00)) AND (cs_net_paid#44 > 0.00)) AND (cs_quantity#43 > 0)) AND isnotnull(cs_order_number#42)) AND isnotnull(cs_item_sk#41)) AND isnotnull(cs_sold_date_sk#40)) + +(31) Project [codegen id : 10] +Output [5]: [cs_sold_date_sk#40, cs_item_sk#41, cs_order_number#42, cs_quantity#43, cs_net_paid#44] +Input [6]: [cs_sold_date_sk#40, cs_item_sk#41, cs_order_number#42, cs_quantity#43, cs_net_paid#44, cs_net_profit#45] + +(32) Scan parquet default.catalog_returns +Output [4]: [cr_item_sk#46, cr_order_number#47, cr_return_quantity#48, cr_return_amount#49] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/catalog_returns] +PushedFilters: [IsNotNull(cr_return_amount), GreaterThan(cr_return_amount,10000.00), IsNotNull(cr_item_sk), IsNotNull(cr_order_number)] +ReadSchema: struct + +(33) ColumnarToRow [codegen id : 8] +Input [4]: [cr_item_sk#46, cr_order_number#47, cr_return_quantity#48, cr_return_amount#49] + +(34) Filter [codegen id : 8] +Input [4]: [cr_item_sk#46, cr_order_number#47, cr_return_quantity#48, cr_return_amount#49] +Condition : (((isnotnull(cr_return_amount#49) AND (cr_return_amount#49 > 10000.00)) AND isnotnull(cr_item_sk#46)) AND isnotnull(cr_order_number#47)) + +(35) BroadcastExchange +Input [4]: [cr_item_sk#46, cr_order_number#47, cr_return_quantity#48, cr_return_amount#49] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, false] as bigint), 32) | (cast(input[0, int, false] as bigint) & 4294967295))),false), [id=#50] + +(36) BroadcastHashJoin [codegen id : 10] +Left keys [2]: [cs_order_number#42, cs_item_sk#41] +Right keys [2]: [cr_order_number#47, cr_item_sk#46] +Join condition: None + +(37) Project [codegen id : 10] +Output [6]: [cs_sold_date_sk#40, cs_item_sk#41, cs_quantity#43, cs_net_paid#44, cr_return_quantity#48, cr_return_amount#49] +Input [9]: [cs_sold_date_sk#40, cs_item_sk#41, cs_order_number#42, cs_quantity#43, cs_net_paid#44, cr_item_sk#46, cr_order_number#47, cr_return_quantity#48, cr_return_amount#49] + +(38) ReusedExchange [Reuses operator id: 15] +Output [1]: [d_date_sk#12] + +(39) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_sold_date_sk#40] +Right keys [1]: [d_date_sk#12] +Join condition: None + +(40) Project [codegen id : 10] +Output [5]: [cs_item_sk#41, cs_quantity#43, cs_net_paid#44, cr_return_quantity#48, cr_return_amount#49] +Input [7]: [cs_sold_date_sk#40, cs_item_sk#41, cs_quantity#43, cs_net_paid#44, cr_return_quantity#48, cr_return_amount#49, d_date_sk#12] + +(41) HashAggregate [codegen id : 10] +Input [5]: [cs_item_sk#41, cs_quantity#43, cs_net_paid#44, cr_return_quantity#48, cr_return_amount#49] +Keys [1]: [cs_item_sk#41] +Functions [4]: [partial_sum(cast(coalesce(cr_return_quantity#48, 0) as bigint)), partial_sum(cast(coalesce(cs_quantity#43, 0) as bigint)), partial_sum(coalesce(cast(cr_return_amount#49 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(cs_net_paid#44 as decimal(12,2)), 0.00))] +Aggregate Attributes [6]: [sum#51, sum#52, sum#53, isEmpty#54, sum#55, isEmpty#56] +Results [7]: [cs_item_sk#41, sum#57, sum#58, sum#59, isEmpty#60, sum#61, isEmpty#62] + +(42) Exchange +Input [7]: [cs_item_sk#41, sum#57, sum#58, sum#59, isEmpty#60, sum#61, isEmpty#62] +Arguments: hashpartitioning(cs_item_sk#41, 5), true, [id=#63] + +(43) HashAggregate [codegen id : 11] +Input [7]: [cs_item_sk#41, sum#57, sum#58, sum#59, isEmpty#60, sum#61, isEmpty#62] +Keys [1]: [cs_item_sk#41] +Functions [4]: [sum(cast(coalesce(cr_return_quantity#48, 0) as bigint)), sum(cast(coalesce(cs_quantity#43, 0) as bigint)), sum(coalesce(cast(cr_return_amount#49 as decimal(12,2)), 0.00)), sum(coalesce(cast(cs_net_paid#44 as decimal(12,2)), 0.00))] +Aggregate Attributes [4]: [sum(cast(coalesce(cr_return_quantity#48, 0) as bigint))#64, sum(cast(coalesce(cs_quantity#43, 0) as bigint))#65, sum(coalesce(cast(cr_return_amount#49 as decimal(12,2)), 0.00))#66, sum(coalesce(cast(cs_net_paid#44 as decimal(12,2)), 0.00))#67] +Results [3]: [cs_item_sk#41 AS item#68, CheckOverflow((promote_precision(cast(sum(cast(coalesce(cr_return_quantity#48, 0) as bigint))#64 as decimal(15,4))) / promote_precision(cast(sum(cast(coalesce(cs_quantity#43, 0) as bigint))#65 as decimal(15,4)))), DecimalType(35,20), true) AS return_ratio#69, CheckOverflow((promote_precision(cast(sum(coalesce(cast(cr_return_amount#49 as decimal(12,2)), 0.00))#66 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(cs_net_paid#44 as decimal(12,2)), 0.00))#67 as decimal(15,4)))), DecimalType(35,20), true) AS currency_ratio#70] + +(44) Exchange +Input [3]: [item#68, return_ratio#69, currency_ratio#70] +Arguments: SinglePartition, true, [id=#71] + +(45) Sort [codegen id : 12] +Input [3]: [item#68, return_ratio#69, currency_ratio#70] +Arguments: [return_ratio#69 ASC NULLS FIRST], false, 0 + +(46) Window +Input [3]: [item#68, return_ratio#69, currency_ratio#70] +Arguments: [rank(return_ratio#69) windowspecdefinition(return_ratio#69 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#72], [return_ratio#69 ASC NULLS FIRST] + +(47) Sort [codegen id : 13] +Input [4]: [item#68, return_ratio#69, currency_ratio#70, return_rank#72] +Arguments: [currency_ratio#70 ASC NULLS FIRST], false, 0 + +(48) Window +Input [4]: [item#68, return_ratio#69, currency_ratio#70, return_rank#72] +Arguments: [rank(currency_ratio#70) windowspecdefinition(currency_ratio#70 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#73], [currency_ratio#70 ASC NULLS FIRST] + +(49) Filter [codegen id : 14] +Input [5]: [item#68, return_ratio#69, currency_ratio#70, return_rank#72, currency_rank#73] +Condition : ((return_rank#72 <= 10) OR (currency_rank#73 <= 10)) + +(50) Project [codegen id : 14] +Output [5]: [catalog AS channel#74, item#68, return_ratio#69, return_rank#72, currency_rank#73] +Input [5]: [item#68, return_ratio#69, currency_ratio#70, return_rank#72, currency_rank#73] + +(51) Scan parquet default.store_sales +Output [6]: [ss_sold_date_sk#75, ss_item_sk#76, ss_ticket_number#77, ss_quantity#78, ss_net_paid#79, ss_net_profit#80] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_quantity), IsNotNull(ss_net_paid), IsNotNull(ss_net_profit), GreaterThan(ss_net_profit,1.00), GreaterThan(ss_net_paid,0.00), GreaterThan(ss_quantity,0), IsNotNull(ss_item_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(52) ColumnarToRow [codegen id : 17] +Input [6]: [ss_sold_date_sk#75, ss_item_sk#76, ss_ticket_number#77, ss_quantity#78, ss_net_paid#79, ss_net_profit#80] + +(53) Filter [codegen id : 17] +Input [6]: [ss_sold_date_sk#75, ss_item_sk#76, ss_ticket_number#77, ss_quantity#78, ss_net_paid#79, ss_net_profit#80] +Condition : ((((((((isnotnull(ss_quantity#78) AND isnotnull(ss_net_paid#79)) AND isnotnull(ss_net_profit#80)) AND (ss_net_profit#80 > 1.00)) AND (ss_net_paid#79 > 0.00)) AND (ss_quantity#78 > 0)) AND isnotnull(ss_item_sk#76)) AND isnotnull(ss_ticket_number#77)) AND isnotnull(ss_sold_date_sk#75)) + +(54) Project [codegen id : 17] +Output [5]: [ss_sold_date_sk#75, ss_item_sk#76, ss_ticket_number#77, ss_quantity#78, ss_net_paid#79] +Input [6]: [ss_sold_date_sk#75, ss_item_sk#76, ss_ticket_number#77, ss_quantity#78, ss_net_paid#79, ss_net_profit#80] + +(55) Scan parquet default.store_returns +Output [4]: [sr_item_sk#81, sr_ticket_number#82, sr_return_quantity#83, sr_return_amt#84] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store_returns] +PushedFilters: [IsNotNull(sr_return_amt), GreaterThan(sr_return_amt,10000.00), IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)] +ReadSchema: struct + +(56) ColumnarToRow [codegen id : 15] +Input [4]: [sr_item_sk#81, sr_ticket_number#82, sr_return_quantity#83, sr_return_amt#84] + +(57) Filter [codegen id : 15] +Input [4]: [sr_item_sk#81, sr_ticket_number#82, sr_return_quantity#83, sr_return_amt#84] +Condition : (((isnotnull(sr_return_amt#84) AND (sr_return_amt#84 > 10000.00)) AND isnotnull(sr_item_sk#81)) AND isnotnull(sr_ticket_number#82)) + +(58) BroadcastExchange +Input [4]: [sr_item_sk#81, sr_ticket_number#82, sr_return_quantity#83, sr_return_amt#84] +Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false], input[0, bigint, false]),false), [id=#85] + +(59) BroadcastHashJoin [codegen id : 17] +Left keys [2]: [cast(ss_ticket_number#77 as bigint), cast(ss_item_sk#76 as bigint)] +Right keys [2]: [sr_ticket_number#82, sr_item_sk#81] +Join condition: None + +(60) Project [codegen id : 17] +Output [6]: [ss_sold_date_sk#75, ss_item_sk#76, ss_quantity#78, ss_net_paid#79, sr_return_quantity#83, sr_return_amt#84] +Input [9]: [ss_sold_date_sk#75, ss_item_sk#76, ss_ticket_number#77, ss_quantity#78, ss_net_paid#79, sr_item_sk#81, sr_ticket_number#82, sr_return_quantity#83, sr_return_amt#84] + +(61) ReusedExchange [Reuses operator id: 15] +Output [1]: [d_date_sk#12] + +(62) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ss_sold_date_sk#75] +Right keys [1]: [d_date_sk#12] +Join condition: None + +(63) Project [codegen id : 17] +Output [5]: [ss_item_sk#76, ss_quantity#78, ss_net_paid#79, sr_return_quantity#83, sr_return_amt#84] +Input [7]: [ss_sold_date_sk#75, ss_item_sk#76, ss_quantity#78, ss_net_paid#79, sr_return_quantity#83, sr_return_amt#84, d_date_sk#12] + +(64) HashAggregate [codegen id : 17] +Input [5]: [ss_item_sk#76, ss_quantity#78, ss_net_paid#79, sr_return_quantity#83, sr_return_amt#84] +Keys [1]: [ss_item_sk#76] +Functions [4]: [partial_sum(cast(coalesce(sr_return_quantity#83, 0) as bigint)), partial_sum(cast(coalesce(ss_quantity#78, 0) as bigint)), partial_sum(coalesce(cast(sr_return_amt#84 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ss_net_paid#79 as decimal(12,2)), 0.00))] +Aggregate Attributes [6]: [sum#86, sum#87, sum#88, isEmpty#89, sum#90, isEmpty#91] +Results [7]: [ss_item_sk#76, sum#92, sum#93, sum#94, isEmpty#95, sum#96, isEmpty#97] + +(65) Exchange +Input [7]: [ss_item_sk#76, sum#92, sum#93, sum#94, isEmpty#95, sum#96, isEmpty#97] +Arguments: hashpartitioning(ss_item_sk#76, 5), true, [id=#98] + +(66) HashAggregate [codegen id : 18] +Input [7]: [ss_item_sk#76, sum#92, sum#93, sum#94, isEmpty#95, sum#96, isEmpty#97] +Keys [1]: [ss_item_sk#76] +Functions [4]: [sum(cast(coalesce(sr_return_quantity#83, 0) as bigint)), sum(cast(coalesce(ss_quantity#78, 0) as bigint)), sum(coalesce(cast(sr_return_amt#84 as decimal(12,2)), 0.00)), sum(coalesce(cast(ss_net_paid#79 as decimal(12,2)), 0.00))] +Aggregate Attributes [4]: [sum(cast(coalesce(sr_return_quantity#83, 0) as bigint))#99, sum(cast(coalesce(ss_quantity#78, 0) as bigint))#100, sum(coalesce(cast(sr_return_amt#84 as decimal(12,2)), 0.00))#101, sum(coalesce(cast(ss_net_paid#79 as decimal(12,2)), 0.00))#102] +Results [3]: [ss_item_sk#76 AS item#103, CheckOverflow((promote_precision(cast(sum(cast(coalesce(sr_return_quantity#83, 0) as bigint))#99 as decimal(15,4))) / promote_precision(cast(sum(cast(coalesce(ss_quantity#78, 0) as bigint))#100 as decimal(15,4)))), DecimalType(35,20), true) AS return_ratio#104, CheckOverflow((promote_precision(cast(sum(coalesce(cast(sr_return_amt#84 as decimal(12,2)), 0.00))#101 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(ss_net_paid#79 as decimal(12,2)), 0.00))#102 as decimal(15,4)))), DecimalType(35,20), true) AS currency_ratio#105] + +(67) Exchange +Input [3]: [item#103, return_ratio#104, currency_ratio#105] +Arguments: SinglePartition, true, [id=#106] + +(68) Sort [codegen id : 19] +Input [3]: [item#103, return_ratio#104, currency_ratio#105] +Arguments: [return_ratio#104 ASC NULLS FIRST], false, 0 + +(69) Window +Input [3]: [item#103, return_ratio#104, currency_ratio#105] +Arguments: [rank(return_ratio#104) windowspecdefinition(return_ratio#104 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#107], [return_ratio#104 ASC NULLS FIRST] + +(70) Sort [codegen id : 20] +Input [4]: [item#103, return_ratio#104, currency_ratio#105, return_rank#107] +Arguments: [currency_ratio#105 ASC NULLS FIRST], false, 0 + +(71) Window +Input [4]: [item#103, return_ratio#104, currency_ratio#105, return_rank#107] +Arguments: [rank(currency_ratio#105) windowspecdefinition(currency_ratio#105 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#108], [currency_ratio#105 ASC NULLS FIRST] + +(72) Filter [codegen id : 21] +Input [5]: [item#103, return_ratio#104, currency_ratio#105, return_rank#107, currency_rank#108] +Condition : ((return_rank#107 <= 10) OR (currency_rank#108 <= 10)) + +(73) Project [codegen id : 21] +Output [5]: [store AS channel#109, item#103, return_ratio#104, return_rank#107, currency_rank#108] +Input [5]: [item#103, return_ratio#104, currency_ratio#105, return_rank#107, currency_rank#108] + +(74) Union + +(75) HashAggregate [codegen id : 22] +Input [5]: [channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] +Keys [5]: [channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] + +(76) Exchange +Input [5]: [channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] +Arguments: hashpartitioning(channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38, 5), true, [id=#110] + +(77) HashAggregate [codegen id : 23] +Input [5]: [channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] +Keys [5]: [channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] + +(78) TakeOrderedAndProject +Input [5]: [channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] +Arguments: 100, [channel#39 ASC NULLS FIRST, return_rank#37 ASC NULLS FIRST, currency_rank#38 ASC NULLS FIRST, item#33 ASC NULLS FIRST], [channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49/simplified.txt new file mode 100644 index 0000000000000..acba83ae8e411 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49/simplified.txt @@ -0,0 +1,126 @@ +TakeOrderedAndProject [channel,currency_rank,item,return_rank,return_ratio] + WholeStageCodegen (23) + HashAggregate [channel,currency_rank,item,return_rank,return_ratio] + InputAdapter + Exchange [channel,currency_rank,item,return_rank,return_ratio] #1 + WholeStageCodegen (22) + HashAggregate [channel,currency_rank,item,return_rank,return_ratio] + InputAdapter + Union + WholeStageCodegen (7) + Project [currency_rank,item,return_rank,return_ratio] + Filter [currency_rank,return_rank] + InputAdapter + Window [currency_ratio] + WholeStageCodegen (6) + Sort [currency_ratio] + InputAdapter + Window [return_ratio] + WholeStageCodegen (5) + Sort [return_ratio] + InputAdapter + Exchange #2 + WholeStageCodegen (4) + HashAggregate [isEmpty,isEmpty,sum,sum,sum,sum,ws_item_sk] [currency_ratio,isEmpty,isEmpty,item,return_ratio,sum,sum,sum,sum,sum(cast(coalesce(wr_return_quantity, 0) as bigint)),sum(cast(coalesce(ws_quantity, 0) as bigint)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ws_net_paid as decimal(12,2)), 0.00))] + InputAdapter + Exchange [ws_item_sk] #3 + WholeStageCodegen (3) + HashAggregate [wr_return_amt,wr_return_quantity,ws_item_sk,ws_net_paid,ws_quantity] [isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum,sum,sum] + Project [wr_return_amt,wr_return_quantity,ws_item_sk,ws_net_paid,ws_quantity] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [wr_return_amt,wr_return_quantity,ws_item_sk,ws_net_paid,ws_quantity,ws_sold_date_sk] + BroadcastHashJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] + Project [ws_item_sk,ws_net_paid,ws_order_number,ws_quantity,ws_sold_date_sk] + Filter [ws_item_sk,ws_net_paid,ws_net_profit,ws_order_number,ws_quantity,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_item_sk,ws_net_paid,ws_net_profit,ws_order_number,ws_quantity,ws_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Filter [wr_item_sk,wr_order_number,wr_return_amt] + ColumnarToRow + InputAdapter + Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_return_quantity] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + WholeStageCodegen (14) + Project [currency_rank,item,return_rank,return_ratio] + Filter [currency_rank,return_rank] + InputAdapter + Window [currency_ratio] + WholeStageCodegen (13) + Sort [currency_ratio] + InputAdapter + Window [return_ratio] + WholeStageCodegen (12) + Sort [return_ratio] + InputAdapter + Exchange #6 + WholeStageCodegen (11) + HashAggregate [cs_item_sk,isEmpty,isEmpty,sum,sum,sum,sum] [currency_ratio,isEmpty,isEmpty,item,return_ratio,sum,sum,sum,sum,sum(cast(coalesce(cr_return_quantity, 0) as bigint)),sum(cast(coalesce(cs_quantity, 0) as bigint)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(coalesce(cast(cs_net_paid as decimal(12,2)), 0.00))] + InputAdapter + Exchange [cs_item_sk] #7 + WholeStageCodegen (10) + HashAggregate [cr_return_amount,cr_return_quantity,cs_item_sk,cs_net_paid,cs_quantity] [isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum,sum,sum] + Project [cr_return_amount,cr_return_quantity,cs_item_sk,cs_net_paid,cs_quantity] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cr_return_amount,cr_return_quantity,cs_item_sk,cs_net_paid,cs_quantity,cs_sold_date_sk] + BroadcastHashJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] + Project [cs_item_sk,cs_net_paid,cs_order_number,cs_quantity,cs_sold_date_sk] + Filter [cs_item_sk,cs_net_paid,cs_net_profit,cs_order_number,cs_quantity,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_item_sk,cs_net_paid,cs_net_profit,cs_order_number,cs_quantity,cs_sold_date_sk] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (8) + Filter [cr_item_sk,cr_order_number,cr_return_amount] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_amount,cr_return_quantity] + InputAdapter + ReusedExchange [d_date_sk] #5 + WholeStageCodegen (21) + Project [currency_rank,item,return_rank,return_ratio] + Filter [currency_rank,return_rank] + InputAdapter + Window [currency_ratio] + WholeStageCodegen (20) + Sort [currency_ratio] + InputAdapter + Window [return_ratio] + WholeStageCodegen (19) + Sort [return_ratio] + InputAdapter + Exchange #9 + WholeStageCodegen (18) + HashAggregate [isEmpty,isEmpty,ss_item_sk,sum,sum,sum,sum] [currency_ratio,isEmpty,isEmpty,item,return_ratio,sum,sum,sum,sum,sum(cast(coalesce(sr_return_quantity, 0) as bigint)),sum(cast(coalesce(ss_quantity, 0) as bigint)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ss_net_paid as decimal(12,2)), 0.00))] + InputAdapter + Exchange [ss_item_sk] #10 + WholeStageCodegen (17) + HashAggregate [sr_return_amt,sr_return_quantity,ss_item_sk,ss_net_paid,ss_quantity] [isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum,sum,sum] + Project [sr_return_amt,sr_return_quantity,ss_item_sk,ss_net_paid,ss_quantity] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [sr_return_amt,sr_return_quantity,ss_item_sk,ss_net_paid,ss_quantity,ss_sold_date_sk] + BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + Project [ss_item_sk,ss_net_paid,ss_quantity,ss_sold_date_sk,ss_ticket_number] + Filter [ss_item_sk,ss_net_paid,ss_net_profit,ss_quantity,ss_sold_date_sk,ss_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_net_paid,ss_net_profit,ss_quantity,ss_sold_date_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (15) + Filter [sr_item_sk,sr_return_amt,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_item_sk,sr_return_amt,sr_return_quantity,sr_ticket_number] + InputAdapter + ReusedExchange [d_date_sk] #5 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a.sf100/explain.txt new file mode 100644 index 0000000000000..409051a7856a1 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a.sf100/explain.txt @@ -0,0 +1,441 @@ +== Physical Plan == +TakeOrderedAndProject (80) ++- * Filter (79) + +- * HashAggregate (78) + +- * HashAggregate (77) + +- * Project (76) + +- * SortMergeJoin Inner (75) + :- * Filter (69) + : +- Window (68) + : +- * Sort (67) + : +- Exchange (66) + : +- * Project (65) + : +- * Filter (64) + : +- SortMergeJoin FullOuter (63) + : :- * Sort (33) + : : +- Exchange (32) + : : +- * HashAggregate (31) + : : +- * HashAggregate (30) + : : +- * Project (29) + : : +- * SortMergeJoin Inner (28) + : : :- * Sort (20) + : : : +- Exchange (19) + : : : +- * Project (18) + : : : +- * Filter (17) + : : : +- Window (16) + : : : +- * Sort (15) + : : : +- Exchange (14) + : : : +- * HashAggregate (13) + : : : +- Exchange (12) + : : : +- * HashAggregate (11) + : : : +- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.web_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- * Sort (27) + : : +- Exchange (26) + : : +- * Project (25) + : : +- * Filter (24) + : : +- Window (23) + : : +- * Sort (22) + : : +- ReusedExchange (21) + : +- * Sort (62) + : +- Exchange (61) + : +- * HashAggregate (60) + : +- * HashAggregate (59) + : +- * Project (58) + : +- * SortMergeJoin Inner (57) + : :- * Sort (49) + : : +- Exchange (48) + : : +- * Project (47) + : : +- * Filter (46) + : : +- Window (45) + : : +- * Sort (44) + : : +- Exchange (43) + : : +- * HashAggregate (42) + : : +- Exchange (41) + : : +- * HashAggregate (40) + : : +- * Project (39) + : : +- * BroadcastHashJoin Inner BuildRight (38) + : : :- * Filter (36) + : : : +- * ColumnarToRow (35) + : : : +- Scan parquet default.store_sales (34) + : : +- ReusedExchange (37) + : +- * Sort (56) + : +- Exchange (55) + : +- * Project (54) + : +- * Filter (53) + : +- Window (52) + : +- * Sort (51) + : +- ReusedExchange (50) + +- * Project (74) + +- * Filter (73) + +- Window (72) + +- * Sort (71) + +- ReusedExchange (70) + + +(1) Scan parquet default.web_sales +Output [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_sales_price#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 2] +Input [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_sales_price#3] + +(3) Filter [codegen id : 2] +Input [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_sales_price#3] +Condition : (isnotnull(ws_item_sk#2) AND isnotnull(ws_sold_date_sk#1)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#4, d_date#5, d_month_seq#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#4, d_date#5, d_month_seq#6] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#4, d_date#5, d_month_seq#6] +Condition : (((isnotnull(d_month_seq#6) AND (d_month_seq#6 >= 1212)) AND (d_month_seq#6 <= 1223)) AND isnotnull(d_date_sk#4)) + +(7) Project [codegen id : 1] +Output [2]: [d_date_sk#4, d_date#5] +Input [3]: [d_date_sk#4, d_date#5, d_month_seq#6] + +(8) BroadcastExchange +Input [2]: [d_date_sk#4, d_date#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#7] + +(9) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ws_sold_date_sk#1] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(10) Project [codegen id : 2] +Output [3]: [ws_item_sk#2, ws_sales_price#3, d_date#5] +Input [5]: [ws_sold_date_sk#1, ws_item_sk#2, ws_sales_price#3, d_date_sk#4, d_date#5] + +(11) HashAggregate [codegen id : 2] +Input [3]: [ws_item_sk#2, ws_sales_price#3, d_date#5] +Keys [2]: [ws_item_sk#2, d_date#5] +Functions [1]: [partial_sum(UnscaledValue(ws_sales_price#3))] +Aggregate Attributes [1]: [sum#8] +Results [3]: [ws_item_sk#2, d_date#5, sum#9] + +(12) Exchange +Input [3]: [ws_item_sk#2, d_date#5, sum#9] +Arguments: hashpartitioning(ws_item_sk#2, d_date#5, 5), true, [id=#10] + +(13) HashAggregate [codegen id : 3] +Input [3]: [ws_item_sk#2, d_date#5, sum#9] +Keys [2]: [ws_item_sk#2, d_date#5] +Functions [1]: [sum(UnscaledValue(ws_sales_price#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_sales_price#3))#11] +Results [4]: [ws_item_sk#2 AS item_sk#12, d_date#5, MakeDecimal(sum(UnscaledValue(ws_sales_price#3))#11,17,2) AS sumws#13, ws_item_sk#2] + +(14) Exchange +Input [4]: [item_sk#12, d_date#5, sumws#13, ws_item_sk#2] +Arguments: hashpartitioning(ws_item_sk#2, 5), true, [id=#14] + +(15) Sort [codegen id : 4] +Input [4]: [item_sk#12, d_date#5, sumws#13, ws_item_sk#2] +Arguments: [ws_item_sk#2 ASC NULLS FIRST, d_date#5 ASC NULLS FIRST], false, 0 + +(16) Window +Input [4]: [item_sk#12, d_date#5, sumws#13, ws_item_sk#2] +Arguments: [row_number() windowspecdefinition(ws_item_sk#2, d_date#5 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#15], [ws_item_sk#2], [d_date#5 ASC NULLS FIRST] + +(17) Filter [codegen id : 5] +Input [5]: [item_sk#12, d_date#5, sumws#13, ws_item_sk#2, rk#15] +Condition : isnotnull(rk#15) + +(18) Project [codegen id : 5] +Output [4]: [item_sk#12, d_date#5, sumws#13, rk#15] +Input [5]: [item_sk#12, d_date#5, sumws#13, ws_item_sk#2, rk#15] + +(19) Exchange +Input [4]: [item_sk#12, d_date#5, sumws#13, rk#15] +Arguments: hashpartitioning(item_sk#12, 5), true, [id=#16] + +(20) Sort [codegen id : 6] +Input [4]: [item_sk#12, d_date#5, sumws#13, rk#15] +Arguments: [item_sk#12 ASC NULLS FIRST], false, 0 + +(21) ReusedExchange [Reuses operator id: 14] +Output [4]: [item_sk#17, d_date#18, sumws#19, ws_item_sk#2] + +(22) Sort [codegen id : 10] +Input [4]: [item_sk#17, d_date#18, sumws#19, ws_item_sk#2] +Arguments: [ws_item_sk#2 ASC NULLS FIRST, d_date#18 ASC NULLS FIRST], false, 0 + +(23) Window +Input [4]: [item_sk#17, d_date#18, sumws#19, ws_item_sk#2] +Arguments: [row_number() windowspecdefinition(ws_item_sk#2, d_date#18 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#20], [ws_item_sk#2], [d_date#18 ASC NULLS FIRST] + +(24) Filter [codegen id : 11] +Input [5]: [item_sk#17, d_date#18, sumws#19, ws_item_sk#2, rk#20] +Condition : isnotnull(rk#20) + +(25) Project [codegen id : 11] +Output [3]: [item_sk#17, sumws#19, rk#20] +Input [5]: [item_sk#17, d_date#18, sumws#19, ws_item_sk#2, rk#20] + +(26) Exchange +Input [3]: [item_sk#17, sumws#19, rk#20] +Arguments: hashpartitioning(item_sk#17, 5), true, [id=#21] + +(27) Sort [codegen id : 12] +Input [3]: [item_sk#17, sumws#19, rk#20] +Arguments: [item_sk#17 ASC NULLS FIRST], false, 0 + +(28) SortMergeJoin [codegen id : 13] +Left keys [1]: [item_sk#12] +Right keys [1]: [item_sk#17] +Join condition: (rk#15 >= rk#20) + +(29) Project [codegen id : 13] +Output [4]: [item_sk#12, d_date#5, sumws#13, sumws#19] +Input [7]: [item_sk#12, d_date#5, sumws#13, rk#15, item_sk#17, sumws#19, rk#20] + +(30) HashAggregate [codegen id : 13] +Input [4]: [item_sk#12, d_date#5, sumws#13, sumws#19] +Keys [3]: [item_sk#12, d_date#5, sumws#13] +Functions [1]: [partial_sum(sumws#19)] +Aggregate Attributes [2]: [sum#22, isEmpty#23] +Results [5]: [item_sk#12, d_date#5, sumws#13, sum#24, isEmpty#25] + +(31) HashAggregate [codegen id : 13] +Input [5]: [item_sk#12, d_date#5, sumws#13, sum#24, isEmpty#25] +Keys [3]: [item_sk#12, d_date#5, sumws#13] +Functions [1]: [sum(sumws#19)] +Aggregate Attributes [1]: [sum(sumws#19)#26] +Results [3]: [item_sk#12, d_date#5, sum(sumws#19)#26 AS cume_sales#27] + +(32) Exchange +Input [3]: [item_sk#12, d_date#5, cume_sales#27] +Arguments: hashpartitioning(item_sk#12, d_date#5, 5), true, [id=#28] + +(33) Sort [codegen id : 14] +Input [3]: [item_sk#12, d_date#5, cume_sales#27] +Arguments: [item_sk#12 ASC NULLS FIRST, d_date#5 ASC NULLS FIRST], false, 0 + +(34) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#29, ss_item_sk#30, ss_sales_price#31] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(35) ColumnarToRow [codegen id : 16] +Input [3]: [ss_sold_date_sk#29, ss_item_sk#30, ss_sales_price#31] + +(36) Filter [codegen id : 16] +Input [3]: [ss_sold_date_sk#29, ss_item_sk#30, ss_sales_price#31] +Condition : (isnotnull(ss_item_sk#30) AND isnotnull(ss_sold_date_sk#29)) + +(37) ReusedExchange [Reuses operator id: 8] +Output [2]: [d_date_sk#32, d_date#33] + +(38) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [ss_sold_date_sk#29] +Right keys [1]: [d_date_sk#32] +Join condition: None + +(39) Project [codegen id : 16] +Output [3]: [ss_item_sk#30, ss_sales_price#31, d_date#33] +Input [5]: [ss_sold_date_sk#29, ss_item_sk#30, ss_sales_price#31, d_date_sk#32, d_date#33] + +(40) HashAggregate [codegen id : 16] +Input [3]: [ss_item_sk#30, ss_sales_price#31, d_date#33] +Keys [2]: [ss_item_sk#30, d_date#33] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#31))] +Aggregate Attributes [1]: [sum#34] +Results [3]: [ss_item_sk#30, d_date#33, sum#35] + +(41) Exchange +Input [3]: [ss_item_sk#30, d_date#33, sum#35] +Arguments: hashpartitioning(ss_item_sk#30, d_date#33, 5), true, [id=#36] + +(42) HashAggregate [codegen id : 17] +Input [3]: [ss_item_sk#30, d_date#33, sum#35] +Keys [2]: [ss_item_sk#30, d_date#33] +Functions [1]: [sum(UnscaledValue(ss_sales_price#31))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#31))#37] +Results [4]: [ss_item_sk#30 AS item_sk#38, d_date#33, MakeDecimal(sum(UnscaledValue(ss_sales_price#31))#37,17,2) AS sumss#39, ss_item_sk#30] + +(43) Exchange +Input [4]: [item_sk#38, d_date#33, sumss#39, ss_item_sk#30] +Arguments: hashpartitioning(ss_item_sk#30, 5), true, [id=#40] + +(44) Sort [codegen id : 18] +Input [4]: [item_sk#38, d_date#33, sumss#39, ss_item_sk#30] +Arguments: [ss_item_sk#30 ASC NULLS FIRST, d_date#33 ASC NULLS FIRST], false, 0 + +(45) Window +Input [4]: [item_sk#38, d_date#33, sumss#39, ss_item_sk#30] +Arguments: [row_number() windowspecdefinition(ss_item_sk#30, d_date#33 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#41], [ss_item_sk#30], [d_date#33 ASC NULLS FIRST] + +(46) Filter [codegen id : 19] +Input [5]: [item_sk#38, d_date#33, sumss#39, ss_item_sk#30, rk#41] +Condition : isnotnull(rk#41) + +(47) Project [codegen id : 19] +Output [4]: [item_sk#38, d_date#33, sumss#39, rk#41] +Input [5]: [item_sk#38, d_date#33, sumss#39, ss_item_sk#30, rk#41] + +(48) Exchange +Input [4]: [item_sk#38, d_date#33, sumss#39, rk#41] +Arguments: hashpartitioning(item_sk#38, 5), true, [id=#42] + +(49) Sort [codegen id : 20] +Input [4]: [item_sk#38, d_date#33, sumss#39, rk#41] +Arguments: [item_sk#38 ASC NULLS FIRST], false, 0 + +(50) ReusedExchange [Reuses operator id: 43] +Output [4]: [item_sk#43, d_date#44, sumss#45, ss_item_sk#30] + +(51) Sort [codegen id : 24] +Input [4]: [item_sk#43, d_date#44, sumss#45, ss_item_sk#30] +Arguments: [ss_item_sk#30 ASC NULLS FIRST, d_date#44 ASC NULLS FIRST], false, 0 + +(52) Window +Input [4]: [item_sk#43, d_date#44, sumss#45, ss_item_sk#30] +Arguments: [row_number() windowspecdefinition(ss_item_sk#30, d_date#44 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#46], [ss_item_sk#30], [d_date#44 ASC NULLS FIRST] + +(53) Filter [codegen id : 25] +Input [5]: [item_sk#43, d_date#44, sumss#45, ss_item_sk#30, rk#46] +Condition : isnotnull(rk#46) + +(54) Project [codegen id : 25] +Output [3]: [item_sk#43, sumss#45, rk#46] +Input [5]: [item_sk#43, d_date#44, sumss#45, ss_item_sk#30, rk#46] + +(55) Exchange +Input [3]: [item_sk#43, sumss#45, rk#46] +Arguments: hashpartitioning(item_sk#43, 5), true, [id=#47] + +(56) Sort [codegen id : 26] +Input [3]: [item_sk#43, sumss#45, rk#46] +Arguments: [item_sk#43 ASC NULLS FIRST], false, 0 + +(57) SortMergeJoin [codegen id : 27] +Left keys [1]: [item_sk#38] +Right keys [1]: [item_sk#43] +Join condition: (rk#41 >= rk#46) + +(58) Project [codegen id : 27] +Output [4]: [item_sk#38, d_date#33, sumss#39, sumss#45] +Input [7]: [item_sk#38, d_date#33, sumss#39, rk#41, item_sk#43, sumss#45, rk#46] + +(59) HashAggregate [codegen id : 27] +Input [4]: [item_sk#38, d_date#33, sumss#39, sumss#45] +Keys [3]: [item_sk#38, d_date#33, sumss#39] +Functions [1]: [partial_sum(sumss#45)] +Aggregate Attributes [2]: [sum#48, isEmpty#49] +Results [5]: [item_sk#38, d_date#33, sumss#39, sum#50, isEmpty#51] + +(60) HashAggregate [codegen id : 27] +Input [5]: [item_sk#38, d_date#33, sumss#39, sum#50, isEmpty#51] +Keys [3]: [item_sk#38, d_date#33, sumss#39] +Functions [1]: [sum(sumss#45)] +Aggregate Attributes [1]: [sum(sumss#45)#52] +Results [3]: [item_sk#38, d_date#33, sum(sumss#45)#52 AS cume_sales#53] + +(61) Exchange +Input [3]: [item_sk#38, d_date#33, cume_sales#53] +Arguments: hashpartitioning(item_sk#38, d_date#33, 5), true, [id=#54] + +(62) Sort [codegen id : 28] +Input [3]: [item_sk#38, d_date#33, cume_sales#53] +Arguments: [item_sk#38 ASC NULLS FIRST, d_date#33 ASC NULLS FIRST], false, 0 + +(63) SortMergeJoin +Left keys [2]: [item_sk#12, d_date#5] +Right keys [2]: [item_sk#38, d_date#33] +Join condition: None + +(64) Filter [codegen id : 29] +Input [6]: [item_sk#12, d_date#5, cume_sales#27, item_sk#38, d_date#33, cume_sales#53] +Condition : isnotnull(CASE WHEN isnotnull(item_sk#12) THEN item_sk#12 ELSE item_sk#38 END) + +(65) Project [codegen id : 29] +Output [4]: [CASE WHEN isnotnull(item_sk#12) THEN item_sk#12 ELSE item_sk#38 END AS item_sk#55, CASE WHEN isnotnull(d_date#5) THEN d_date#5 ELSE d_date#33 END AS d_date#56, cume_sales#27 AS web_sales#57, cume_sales#53 AS store_sales#58] +Input [6]: [item_sk#12, d_date#5, cume_sales#27, item_sk#38, d_date#33, cume_sales#53] + +(66) Exchange +Input [4]: [item_sk#55, d_date#56, web_sales#57, store_sales#58] +Arguments: hashpartitioning(item_sk#55, 5), true, [id=#59] + +(67) Sort [codegen id : 30] +Input [4]: [item_sk#55, d_date#56, web_sales#57, store_sales#58] +Arguments: [item_sk#55 ASC NULLS FIRST, d_date#56 ASC NULLS FIRST], false, 0 + +(68) Window +Input [4]: [item_sk#55, d_date#56, web_sales#57, store_sales#58] +Arguments: [row_number() windowspecdefinition(item_sk#55, d_date#56 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#60], [item_sk#55], [d_date#56 ASC NULLS FIRST] + +(69) Filter [codegen id : 31] +Input [5]: [item_sk#55, d_date#56, web_sales#57, store_sales#58, rk#60] +Condition : isnotnull(rk#60) + +(70) ReusedExchange [Reuses operator id: 66] +Output [4]: [item_sk#61, d_date#62, web_sales#63, store_sales#64] + +(71) Sort [codegen id : 61] +Input [4]: [item_sk#61, d_date#62, web_sales#63, store_sales#64] +Arguments: [item_sk#61 ASC NULLS FIRST, d_date#62 ASC NULLS FIRST], false, 0 + +(72) Window +Input [4]: [item_sk#61, d_date#62, web_sales#63, store_sales#64] +Arguments: [row_number() windowspecdefinition(item_sk#61, d_date#62 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#65], [item_sk#61], [d_date#62 ASC NULLS FIRST] + +(73) Filter [codegen id : 62] +Input [5]: [item_sk#61, d_date#62, web_sales#63, store_sales#64, rk#65] +Condition : isnotnull(rk#65) + +(74) Project [codegen id : 62] +Output [4]: [item_sk#61, web_sales#63, store_sales#64, rk#65] +Input [5]: [item_sk#61, d_date#62, web_sales#63, store_sales#64, rk#65] + +(75) SortMergeJoin [codegen id : 63] +Left keys [1]: [item_sk#55] +Right keys [1]: [item_sk#61] +Join condition: (rk#60 >= rk#65) + +(76) Project [codegen id : 63] +Output [6]: [item_sk#55, d_date#56, web_sales#57, store_sales#58, web_sales#63, store_sales#64] +Input [9]: [item_sk#55, d_date#56, web_sales#57, store_sales#58, rk#60, item_sk#61, web_sales#63, store_sales#64, rk#65] + +(77) HashAggregate [codegen id : 63] +Input [6]: [item_sk#55, d_date#56, web_sales#57, store_sales#58, web_sales#63, store_sales#64] +Keys [4]: [item_sk#55, d_date#56, web_sales#57, store_sales#58] +Functions [2]: [partial_max(web_sales#63), partial_max(store_sales#64)] +Aggregate Attributes [2]: [max#66, max#67] +Results [6]: [item_sk#55, d_date#56, web_sales#57, store_sales#58, max#68, max#69] + +(78) HashAggregate [codegen id : 63] +Input [6]: [item_sk#55, d_date#56, web_sales#57, store_sales#58, max#68, max#69] +Keys [4]: [item_sk#55, d_date#56, web_sales#57, store_sales#58] +Functions [2]: [max(web_sales#63), max(store_sales#64)] +Aggregate Attributes [2]: [max(web_sales#63)#70, max(store_sales#64)#71] +Results [6]: [item_sk#55, d_date#56, web_sales#57, store_sales#58, max(web_sales#63)#70 AS web_cumulative#72, max(store_sales#64)#71 AS store_cumulative#73] + +(79) Filter [codegen id : 63] +Input [6]: [item_sk#55, d_date#56, web_sales#57, store_sales#58, web_cumulative#72, store_cumulative#73] +Condition : ((isnotnull(web_cumulative#72) AND isnotnull(store_cumulative#73)) AND (web_cumulative#72 > store_cumulative#73)) + +(80) TakeOrderedAndProject +Input [6]: [item_sk#55, d_date#56, web_sales#57, store_sales#58, web_cumulative#72, store_cumulative#73] +Arguments: 100, [item_sk#55 ASC NULLS FIRST, d_date#56 ASC NULLS FIRST], [item_sk#55, d_date#56, web_sales#57, store_sales#58, web_cumulative#72, store_cumulative#73] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a.sf100/simplified.txt new file mode 100644 index 0000000000000..245ad9d53f2cf --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a.sf100/simplified.txt @@ -0,0 +1,139 @@ +TakeOrderedAndProject [d_date,item_sk,store_cumulative,store_sales,web_cumulative,web_sales] + WholeStageCodegen (63) + Filter [store_cumulative,web_cumulative] + HashAggregate [d_date,item_sk,max,max,store_sales,web_sales] [max,max,max(store_sales),max(web_sales),store_cumulative,web_cumulative] + HashAggregate [d_date,item_sk,store_sales,store_sales,web_sales,web_sales] [max,max,max,max] + Project [d_date,item_sk,store_sales,store_sales,web_sales,web_sales] + SortMergeJoin [item_sk,item_sk,rk,rk] + InputAdapter + WholeStageCodegen (31) + Filter [rk] + InputAdapter + Window [d_date,item_sk] + WholeStageCodegen (30) + Sort [d_date,item_sk] + InputAdapter + Exchange [item_sk] #1 + WholeStageCodegen (29) + Project [cume_sales,cume_sales,d_date,d_date,item_sk,item_sk] + Filter [item_sk,item_sk] + InputAdapter + SortMergeJoin [d_date,d_date,item_sk,item_sk] + WholeStageCodegen (14) + Sort [d_date,item_sk] + InputAdapter + Exchange [d_date,item_sk] #2 + WholeStageCodegen (13) + HashAggregate [d_date,isEmpty,item_sk,sum,sumws] [cume_sales,isEmpty,sum,sum(sumws)] + HashAggregate [d_date,item_sk,sumws,sumws] [isEmpty,isEmpty,sum,sum] + Project [d_date,item_sk,sumws,sumws] + SortMergeJoin [item_sk,item_sk,rk,rk] + InputAdapter + WholeStageCodegen (6) + Sort [item_sk] + InputAdapter + Exchange [item_sk] #3 + WholeStageCodegen (5) + Project [d_date,item_sk,rk,sumws] + Filter [rk] + InputAdapter + Window [d_date,ws_item_sk] + WholeStageCodegen (4) + Sort [d_date,ws_item_sk] + InputAdapter + Exchange [ws_item_sk] #4 + WholeStageCodegen (3) + HashAggregate [d_date,sum,ws_item_sk] [item_sk,sum,sum(UnscaledValue(ws_sales_price)),sumws] + InputAdapter + Exchange [d_date,ws_item_sk] #5 + WholeStageCodegen (2) + HashAggregate [d_date,ws_item_sk,ws_sales_price] [sum,sum] + Project [d_date,ws_item_sk,ws_sales_price] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_item_sk,ws_sales_price,ws_sold_date_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (1) + Project [d_date,d_date_sk] + Filter [d_date_sk,d_month_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk,d_month_seq] + InputAdapter + WholeStageCodegen (12) + Sort [item_sk] + InputAdapter + Exchange [item_sk] #7 + WholeStageCodegen (11) + Project [item_sk,rk,sumws] + Filter [rk] + InputAdapter + Window [d_date,ws_item_sk] + WholeStageCodegen (10) + Sort [d_date,ws_item_sk] + InputAdapter + ReusedExchange [d_date,item_sk,sumws,ws_item_sk] #4 + WholeStageCodegen (28) + Sort [d_date,item_sk] + InputAdapter + Exchange [d_date,item_sk] #8 + WholeStageCodegen (27) + HashAggregate [d_date,isEmpty,item_sk,sum,sumss] [cume_sales,isEmpty,sum,sum(sumss)] + HashAggregate [d_date,item_sk,sumss,sumss] [isEmpty,isEmpty,sum,sum] + Project [d_date,item_sk,sumss,sumss] + SortMergeJoin [item_sk,item_sk,rk,rk] + InputAdapter + WholeStageCodegen (20) + Sort [item_sk] + InputAdapter + Exchange [item_sk] #9 + WholeStageCodegen (19) + Project [d_date,item_sk,rk,sumss] + Filter [rk] + InputAdapter + Window [d_date,ss_item_sk] + WholeStageCodegen (18) + Sort [d_date,ss_item_sk] + InputAdapter + Exchange [ss_item_sk] #10 + WholeStageCodegen (17) + HashAggregate [d_date,ss_item_sk,sum] [item_sk,sum,sum(UnscaledValue(ss_sales_price)),sumss] + InputAdapter + Exchange [d_date,ss_item_sk] #11 + WholeStageCodegen (16) + HashAggregate [d_date,ss_item_sk,ss_sales_price] [sum,sum] + Project [d_date,ss_item_sk,ss_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk] + InputAdapter + ReusedExchange [d_date,d_date_sk] #6 + InputAdapter + WholeStageCodegen (26) + Sort [item_sk] + InputAdapter + Exchange [item_sk] #12 + WholeStageCodegen (25) + Project [item_sk,rk,sumss] + Filter [rk] + InputAdapter + Window [d_date,ss_item_sk] + WholeStageCodegen (24) + Sort [d_date,ss_item_sk] + InputAdapter + ReusedExchange [d_date,item_sk,ss_item_sk,sumss] #10 + InputAdapter + WholeStageCodegen (62) + Project [item_sk,rk,store_sales,web_sales] + Filter [rk] + InputAdapter + Window [d_date,item_sk] + WholeStageCodegen (61) + Sort [d_date,item_sk] + InputAdapter + ReusedExchange [d_date,item_sk,store_sales,web_sales] #1 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a/explain.txt new file mode 100644 index 0000000000000..88500a4c2a834 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a/explain.txt @@ -0,0 +1,426 @@ +== Physical Plan == +TakeOrderedAndProject (77) ++- * Filter (76) + +- * HashAggregate (75) + +- * HashAggregate (74) + +- * Project (73) + +- * BroadcastHashJoin Inner BuildRight (72) + :- * Filter (65) + : +- Window (64) + : +- * Sort (63) + : +- Exchange (62) + : +- * Project (61) + : +- * Filter (60) + : +- SortMergeJoin FullOuter (59) + : :- * Sort (31) + : : +- Exchange (30) + : : +- * HashAggregate (29) + : : +- Exchange (28) + : : +- * HashAggregate (27) + : : +- * Project (26) + : : +- * BroadcastHashJoin Inner BuildRight (25) + : : :- * Project (18) + : : : +- * Filter (17) + : : : +- Window (16) + : : : +- * Sort (15) + : : : +- Exchange (14) + : : : +- * HashAggregate (13) + : : : +- Exchange (12) + : : : +- * HashAggregate (11) + : : : +- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.web_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (24) + : : +- * Project (23) + : : +- * Filter (22) + : : +- Window (21) + : : +- * Sort (20) + : : +- ReusedExchange (19) + : +- * Sort (58) + : +- Exchange (57) + : +- * HashAggregate (56) + : +- Exchange (55) + : +- * HashAggregate (54) + : +- * Project (53) + : +- * BroadcastHashJoin Inner BuildRight (52) + : :- * Project (45) + : : +- * Filter (44) + : : +- Window (43) + : : +- * Sort (42) + : : +- Exchange (41) + : : +- * HashAggregate (40) + : : +- Exchange (39) + : : +- * HashAggregate (38) + : : +- * Project (37) + : : +- * BroadcastHashJoin Inner BuildRight (36) + : : :- * Filter (34) + : : : +- * ColumnarToRow (33) + : : : +- Scan parquet default.store_sales (32) + : : +- ReusedExchange (35) + : +- BroadcastExchange (51) + : +- * Project (50) + : +- * Filter (49) + : +- Window (48) + : +- * Sort (47) + : +- ReusedExchange (46) + +- BroadcastExchange (71) + +- * Project (70) + +- * Filter (69) + +- Window (68) + +- * Sort (67) + +- ReusedExchange (66) + + +(1) Scan parquet default.web_sales +Output [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_sales_price#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 2] +Input [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_sales_price#3] + +(3) Filter [codegen id : 2] +Input [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_sales_price#3] +Condition : (isnotnull(ws_item_sk#2) AND isnotnull(ws_sold_date_sk#1)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#4, d_date#5, d_month_seq#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#4, d_date#5, d_month_seq#6] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#4, d_date#5, d_month_seq#6] +Condition : (((isnotnull(d_month_seq#6) AND (d_month_seq#6 >= 1212)) AND (d_month_seq#6 <= 1223)) AND isnotnull(d_date_sk#4)) + +(7) Project [codegen id : 1] +Output [2]: [d_date_sk#4, d_date#5] +Input [3]: [d_date_sk#4, d_date#5, d_month_seq#6] + +(8) BroadcastExchange +Input [2]: [d_date_sk#4, d_date#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#7] + +(9) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ws_sold_date_sk#1] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(10) Project [codegen id : 2] +Output [3]: [ws_item_sk#2, ws_sales_price#3, d_date#5] +Input [5]: [ws_sold_date_sk#1, ws_item_sk#2, ws_sales_price#3, d_date_sk#4, d_date#5] + +(11) HashAggregate [codegen id : 2] +Input [3]: [ws_item_sk#2, ws_sales_price#3, d_date#5] +Keys [2]: [ws_item_sk#2, d_date#5] +Functions [1]: [partial_sum(UnscaledValue(ws_sales_price#3))] +Aggregate Attributes [1]: [sum#8] +Results [3]: [ws_item_sk#2, d_date#5, sum#9] + +(12) Exchange +Input [3]: [ws_item_sk#2, d_date#5, sum#9] +Arguments: hashpartitioning(ws_item_sk#2, d_date#5, 5), true, [id=#10] + +(13) HashAggregate [codegen id : 3] +Input [3]: [ws_item_sk#2, d_date#5, sum#9] +Keys [2]: [ws_item_sk#2, d_date#5] +Functions [1]: [sum(UnscaledValue(ws_sales_price#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_sales_price#3))#11] +Results [4]: [ws_item_sk#2 AS item_sk#12, d_date#5, MakeDecimal(sum(UnscaledValue(ws_sales_price#3))#11,17,2) AS sumws#13, ws_item_sk#2] + +(14) Exchange +Input [4]: [item_sk#12, d_date#5, sumws#13, ws_item_sk#2] +Arguments: hashpartitioning(ws_item_sk#2, 5), true, [id=#14] + +(15) Sort [codegen id : 4] +Input [4]: [item_sk#12, d_date#5, sumws#13, ws_item_sk#2] +Arguments: [ws_item_sk#2 ASC NULLS FIRST, d_date#5 ASC NULLS FIRST], false, 0 + +(16) Window +Input [4]: [item_sk#12, d_date#5, sumws#13, ws_item_sk#2] +Arguments: [row_number() windowspecdefinition(ws_item_sk#2, d_date#5 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#15], [ws_item_sk#2], [d_date#5 ASC NULLS FIRST] + +(17) Filter [codegen id : 10] +Input [5]: [item_sk#12, d_date#5, sumws#13, ws_item_sk#2, rk#15] +Condition : isnotnull(rk#15) + +(18) Project [codegen id : 10] +Output [4]: [item_sk#12, d_date#5, sumws#13, rk#15] +Input [5]: [item_sk#12, d_date#5, sumws#13, ws_item_sk#2, rk#15] + +(19) ReusedExchange [Reuses operator id: 14] +Output [4]: [item_sk#16, d_date#17, sumws#18, ws_item_sk#2] + +(20) Sort [codegen id : 8] +Input [4]: [item_sk#16, d_date#17, sumws#18, ws_item_sk#2] +Arguments: [ws_item_sk#2 ASC NULLS FIRST, d_date#17 ASC NULLS FIRST], false, 0 + +(21) Window +Input [4]: [item_sk#16, d_date#17, sumws#18, ws_item_sk#2] +Arguments: [row_number() windowspecdefinition(ws_item_sk#2, d_date#17 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#19], [ws_item_sk#2], [d_date#17 ASC NULLS FIRST] + +(22) Filter [codegen id : 9] +Input [5]: [item_sk#16, d_date#17, sumws#18, ws_item_sk#2, rk#19] +Condition : isnotnull(rk#19) + +(23) Project [codegen id : 9] +Output [3]: [item_sk#16, sumws#18, rk#19] +Input [5]: [item_sk#16, d_date#17, sumws#18, ws_item_sk#2, rk#19] + +(24) BroadcastExchange +Input [3]: [item_sk#16, sumws#18, rk#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] + +(25) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [item_sk#12] +Right keys [1]: [item_sk#16] +Join condition: (rk#15 >= rk#19) + +(26) Project [codegen id : 10] +Output [4]: [item_sk#12, d_date#5, sumws#13, sumws#18] +Input [7]: [item_sk#12, d_date#5, sumws#13, rk#15, item_sk#16, sumws#18, rk#19] + +(27) HashAggregate [codegen id : 10] +Input [4]: [item_sk#12, d_date#5, sumws#13, sumws#18] +Keys [3]: [item_sk#12, d_date#5, sumws#13] +Functions [1]: [partial_sum(sumws#18)] +Aggregate Attributes [2]: [sum#21, isEmpty#22] +Results [5]: [item_sk#12, d_date#5, sumws#13, sum#23, isEmpty#24] + +(28) Exchange +Input [5]: [item_sk#12, d_date#5, sumws#13, sum#23, isEmpty#24] +Arguments: hashpartitioning(item_sk#12, d_date#5, sumws#13, 5), true, [id=#25] + +(29) HashAggregate [codegen id : 11] +Input [5]: [item_sk#12, d_date#5, sumws#13, sum#23, isEmpty#24] +Keys [3]: [item_sk#12, d_date#5, sumws#13] +Functions [1]: [sum(sumws#18)] +Aggregate Attributes [1]: [sum(sumws#18)#26] +Results [3]: [item_sk#12, d_date#5, sum(sumws#18)#26 AS cume_sales#27] + +(30) Exchange +Input [3]: [item_sk#12, d_date#5, cume_sales#27] +Arguments: hashpartitioning(item_sk#12, d_date#5, 5), true, [id=#28] + +(31) Sort [codegen id : 12] +Input [3]: [item_sk#12, d_date#5, cume_sales#27] +Arguments: [item_sk#12 ASC NULLS FIRST, d_date#5 ASC NULLS FIRST], false, 0 + +(32) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#29, ss_item_sk#30, ss_sales_price#31] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(33) ColumnarToRow [codegen id : 14] +Input [3]: [ss_sold_date_sk#29, ss_item_sk#30, ss_sales_price#31] + +(34) Filter [codegen id : 14] +Input [3]: [ss_sold_date_sk#29, ss_item_sk#30, ss_sales_price#31] +Condition : (isnotnull(ss_item_sk#30) AND isnotnull(ss_sold_date_sk#29)) + +(35) ReusedExchange [Reuses operator id: 8] +Output [2]: [d_date_sk#32, d_date#33] + +(36) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [ss_sold_date_sk#29] +Right keys [1]: [d_date_sk#32] +Join condition: None + +(37) Project [codegen id : 14] +Output [3]: [ss_item_sk#30, ss_sales_price#31, d_date#33] +Input [5]: [ss_sold_date_sk#29, ss_item_sk#30, ss_sales_price#31, d_date_sk#32, d_date#33] + +(38) HashAggregate [codegen id : 14] +Input [3]: [ss_item_sk#30, ss_sales_price#31, d_date#33] +Keys [2]: [ss_item_sk#30, d_date#33] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#31))] +Aggregate Attributes [1]: [sum#34] +Results [3]: [ss_item_sk#30, d_date#33, sum#35] + +(39) Exchange +Input [3]: [ss_item_sk#30, d_date#33, sum#35] +Arguments: hashpartitioning(ss_item_sk#30, d_date#33, 5), true, [id=#36] + +(40) HashAggregate [codegen id : 15] +Input [3]: [ss_item_sk#30, d_date#33, sum#35] +Keys [2]: [ss_item_sk#30, d_date#33] +Functions [1]: [sum(UnscaledValue(ss_sales_price#31))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#31))#37] +Results [4]: [ss_item_sk#30 AS item_sk#38, d_date#33, MakeDecimal(sum(UnscaledValue(ss_sales_price#31))#37,17,2) AS sumss#39, ss_item_sk#30] + +(41) Exchange +Input [4]: [item_sk#38, d_date#33, sumss#39, ss_item_sk#30] +Arguments: hashpartitioning(ss_item_sk#30, 5), true, [id=#40] + +(42) Sort [codegen id : 16] +Input [4]: [item_sk#38, d_date#33, sumss#39, ss_item_sk#30] +Arguments: [ss_item_sk#30 ASC NULLS FIRST, d_date#33 ASC NULLS FIRST], false, 0 + +(43) Window +Input [4]: [item_sk#38, d_date#33, sumss#39, ss_item_sk#30] +Arguments: [row_number() windowspecdefinition(ss_item_sk#30, d_date#33 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#41], [ss_item_sk#30], [d_date#33 ASC NULLS FIRST] + +(44) Filter [codegen id : 22] +Input [5]: [item_sk#38, d_date#33, sumss#39, ss_item_sk#30, rk#41] +Condition : isnotnull(rk#41) + +(45) Project [codegen id : 22] +Output [4]: [item_sk#38, d_date#33, sumss#39, rk#41] +Input [5]: [item_sk#38, d_date#33, sumss#39, ss_item_sk#30, rk#41] + +(46) ReusedExchange [Reuses operator id: 41] +Output [4]: [item_sk#42, d_date#43, sumss#44, ss_item_sk#30] + +(47) Sort [codegen id : 20] +Input [4]: [item_sk#42, d_date#43, sumss#44, ss_item_sk#30] +Arguments: [ss_item_sk#30 ASC NULLS FIRST, d_date#43 ASC NULLS FIRST], false, 0 + +(48) Window +Input [4]: [item_sk#42, d_date#43, sumss#44, ss_item_sk#30] +Arguments: [row_number() windowspecdefinition(ss_item_sk#30, d_date#43 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#45], [ss_item_sk#30], [d_date#43 ASC NULLS FIRST] + +(49) Filter [codegen id : 21] +Input [5]: [item_sk#42, d_date#43, sumss#44, ss_item_sk#30, rk#45] +Condition : isnotnull(rk#45) + +(50) Project [codegen id : 21] +Output [3]: [item_sk#42, sumss#44, rk#45] +Input [5]: [item_sk#42, d_date#43, sumss#44, ss_item_sk#30, rk#45] + +(51) BroadcastExchange +Input [3]: [item_sk#42, sumss#44, rk#45] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#46] + +(52) BroadcastHashJoin [codegen id : 22] +Left keys [1]: [item_sk#38] +Right keys [1]: [item_sk#42] +Join condition: (rk#41 >= rk#45) + +(53) Project [codegen id : 22] +Output [4]: [item_sk#38, d_date#33, sumss#39, sumss#44] +Input [7]: [item_sk#38, d_date#33, sumss#39, rk#41, item_sk#42, sumss#44, rk#45] + +(54) HashAggregate [codegen id : 22] +Input [4]: [item_sk#38, d_date#33, sumss#39, sumss#44] +Keys [3]: [item_sk#38, d_date#33, sumss#39] +Functions [1]: [partial_sum(sumss#44)] +Aggregate Attributes [2]: [sum#47, isEmpty#48] +Results [5]: [item_sk#38, d_date#33, sumss#39, sum#49, isEmpty#50] + +(55) Exchange +Input [5]: [item_sk#38, d_date#33, sumss#39, sum#49, isEmpty#50] +Arguments: hashpartitioning(item_sk#38, d_date#33, sumss#39, 5), true, [id=#51] + +(56) HashAggregate [codegen id : 23] +Input [5]: [item_sk#38, d_date#33, sumss#39, sum#49, isEmpty#50] +Keys [3]: [item_sk#38, d_date#33, sumss#39] +Functions [1]: [sum(sumss#44)] +Aggregate Attributes [1]: [sum(sumss#44)#52] +Results [3]: [item_sk#38, d_date#33, sum(sumss#44)#52 AS cume_sales#53] + +(57) Exchange +Input [3]: [item_sk#38, d_date#33, cume_sales#53] +Arguments: hashpartitioning(item_sk#38, d_date#33, 5), true, [id=#54] + +(58) Sort [codegen id : 24] +Input [3]: [item_sk#38, d_date#33, cume_sales#53] +Arguments: [item_sk#38 ASC NULLS FIRST, d_date#33 ASC NULLS FIRST], false, 0 + +(59) SortMergeJoin +Left keys [2]: [item_sk#12, d_date#5] +Right keys [2]: [item_sk#38, d_date#33] +Join condition: None + +(60) Filter [codegen id : 25] +Input [6]: [item_sk#12, d_date#5, cume_sales#27, item_sk#38, d_date#33, cume_sales#53] +Condition : isnotnull(CASE WHEN isnotnull(item_sk#12) THEN item_sk#12 ELSE item_sk#38 END) + +(61) Project [codegen id : 25] +Output [4]: [CASE WHEN isnotnull(item_sk#12) THEN item_sk#12 ELSE item_sk#38 END AS item_sk#55, CASE WHEN isnotnull(d_date#5) THEN d_date#5 ELSE d_date#33 END AS d_date#56, cume_sales#27 AS web_sales#57, cume_sales#53 AS store_sales#58] +Input [6]: [item_sk#12, d_date#5, cume_sales#27, item_sk#38, d_date#33, cume_sales#53] + +(62) Exchange +Input [4]: [item_sk#55, d_date#56, web_sales#57, store_sales#58] +Arguments: hashpartitioning(item_sk#55, 5), true, [id=#59] + +(63) Sort [codegen id : 26] +Input [4]: [item_sk#55, d_date#56, web_sales#57, store_sales#58] +Arguments: [item_sk#55 ASC NULLS FIRST, d_date#56 ASC NULLS FIRST], false, 0 + +(64) Window +Input [4]: [item_sk#55, d_date#56, web_sales#57, store_sales#58] +Arguments: [row_number() windowspecdefinition(item_sk#55, d_date#56 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#60], [item_sk#55], [d_date#56 ASC NULLS FIRST] + +(65) Filter [codegen id : 54] +Input [5]: [item_sk#55, d_date#56, web_sales#57, store_sales#58, rk#60] +Condition : isnotnull(rk#60) + +(66) ReusedExchange [Reuses operator id: 62] +Output [4]: [item_sk#61, d_date#62, web_sales#63, store_sales#64] + +(67) Sort [codegen id : 52] +Input [4]: [item_sk#61, d_date#62, web_sales#63, store_sales#64] +Arguments: [item_sk#61 ASC NULLS FIRST, d_date#62 ASC NULLS FIRST], false, 0 + +(68) Window +Input [4]: [item_sk#61, d_date#62, web_sales#63, store_sales#64] +Arguments: [row_number() windowspecdefinition(item_sk#61, d_date#62 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#65], [item_sk#61], [d_date#62 ASC NULLS FIRST] + +(69) Filter [codegen id : 53] +Input [5]: [item_sk#61, d_date#62, web_sales#63, store_sales#64, rk#65] +Condition : isnotnull(rk#65) + +(70) Project [codegen id : 53] +Output [4]: [item_sk#61, web_sales#63, store_sales#64, rk#65] +Input [5]: [item_sk#61, d_date#62, web_sales#63, store_sales#64, rk#65] + +(71) BroadcastExchange +Input [4]: [item_sk#61, web_sales#63, store_sales#64, rk#65] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#66] + +(72) BroadcastHashJoin [codegen id : 54] +Left keys [1]: [item_sk#55] +Right keys [1]: [item_sk#61] +Join condition: (rk#60 >= rk#65) + +(73) Project [codegen id : 54] +Output [6]: [item_sk#55, d_date#56, web_sales#57, store_sales#58, web_sales#63, store_sales#64] +Input [9]: [item_sk#55, d_date#56, web_sales#57, store_sales#58, rk#60, item_sk#61, web_sales#63, store_sales#64, rk#65] + +(74) HashAggregate [codegen id : 54] +Input [6]: [item_sk#55, d_date#56, web_sales#57, store_sales#58, web_sales#63, store_sales#64] +Keys [4]: [item_sk#55, d_date#56, web_sales#57, store_sales#58] +Functions [2]: [partial_max(web_sales#63), partial_max(store_sales#64)] +Aggregate Attributes [2]: [max#67, max#68] +Results [6]: [item_sk#55, d_date#56, web_sales#57, store_sales#58, max#69, max#70] + +(75) HashAggregate [codegen id : 54] +Input [6]: [item_sk#55, d_date#56, web_sales#57, store_sales#58, max#69, max#70] +Keys [4]: [item_sk#55, d_date#56, web_sales#57, store_sales#58] +Functions [2]: [max(web_sales#63), max(store_sales#64)] +Aggregate Attributes [2]: [max(web_sales#63)#71, max(store_sales#64)#72] +Results [6]: [item_sk#55, d_date#56, web_sales#57, store_sales#58, max(web_sales#63)#71 AS web_cumulative#73, max(store_sales#64)#72 AS store_cumulative#74] + +(76) Filter [codegen id : 54] +Input [6]: [item_sk#55, d_date#56, web_sales#57, store_sales#58, web_cumulative#73, store_cumulative#74] +Condition : ((isnotnull(web_cumulative#73) AND isnotnull(store_cumulative#74)) AND (web_cumulative#73 > store_cumulative#74)) + +(77) TakeOrderedAndProject +Input [6]: [item_sk#55, d_date#56, web_sales#57, store_sales#58, web_cumulative#73, store_cumulative#74] +Arguments: 100, [item_sk#55 ASC NULLS FIRST, d_date#56 ASC NULLS FIRST], [item_sk#55, d_date#56, web_sales#57, store_sales#58, web_cumulative#73, store_cumulative#74] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a/simplified.txt new file mode 100644 index 0000000000000..f7f09fd64ee68 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a/simplified.txt @@ -0,0 +1,126 @@ +TakeOrderedAndProject [d_date,item_sk,store_cumulative,store_sales,web_cumulative,web_sales] + WholeStageCodegen (54) + Filter [store_cumulative,web_cumulative] + HashAggregate [d_date,item_sk,max,max,store_sales,web_sales] [max,max,max(store_sales),max(web_sales),store_cumulative,web_cumulative] + HashAggregate [d_date,item_sk,store_sales,store_sales,web_sales,web_sales] [max,max,max,max] + Project [d_date,item_sk,store_sales,store_sales,web_sales,web_sales] + BroadcastHashJoin [item_sk,item_sk,rk,rk] + Filter [rk] + InputAdapter + Window [d_date,item_sk] + WholeStageCodegen (26) + Sort [d_date,item_sk] + InputAdapter + Exchange [item_sk] #1 + WholeStageCodegen (25) + Project [cume_sales,cume_sales,d_date,d_date,item_sk,item_sk] + Filter [item_sk,item_sk] + InputAdapter + SortMergeJoin [d_date,d_date,item_sk,item_sk] + WholeStageCodegen (12) + Sort [d_date,item_sk] + InputAdapter + Exchange [d_date,item_sk] #2 + WholeStageCodegen (11) + HashAggregate [d_date,isEmpty,item_sk,sum,sumws] [cume_sales,isEmpty,sum,sum(sumws)] + InputAdapter + Exchange [d_date,item_sk,sumws] #3 + WholeStageCodegen (10) + HashAggregate [d_date,item_sk,sumws,sumws] [isEmpty,isEmpty,sum,sum] + Project [d_date,item_sk,sumws,sumws] + BroadcastHashJoin [item_sk,item_sk,rk,rk] + Project [d_date,item_sk,rk,sumws] + Filter [rk] + InputAdapter + Window [d_date,ws_item_sk] + WholeStageCodegen (4) + Sort [d_date,ws_item_sk] + InputAdapter + Exchange [ws_item_sk] #4 + WholeStageCodegen (3) + HashAggregate [d_date,sum,ws_item_sk] [item_sk,sum,sum(UnscaledValue(ws_sales_price)),sumws] + InputAdapter + Exchange [d_date,ws_item_sk] #5 + WholeStageCodegen (2) + HashAggregate [d_date,ws_item_sk,ws_sales_price] [sum,sum] + Project [d_date,ws_item_sk,ws_sales_price] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_item_sk,ws_sales_price,ws_sold_date_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (1) + Project [d_date,d_date_sk] + Filter [d_date_sk,d_month_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (9) + Project [item_sk,rk,sumws] + Filter [rk] + InputAdapter + Window [d_date,ws_item_sk] + WholeStageCodegen (8) + Sort [d_date,ws_item_sk] + InputAdapter + ReusedExchange [d_date,item_sk,sumws,ws_item_sk] #4 + WholeStageCodegen (24) + Sort [d_date,item_sk] + InputAdapter + Exchange [d_date,item_sk] #8 + WholeStageCodegen (23) + HashAggregate [d_date,isEmpty,item_sk,sum,sumss] [cume_sales,isEmpty,sum,sum(sumss)] + InputAdapter + Exchange [d_date,item_sk,sumss] #9 + WholeStageCodegen (22) + HashAggregate [d_date,item_sk,sumss,sumss] [isEmpty,isEmpty,sum,sum] + Project [d_date,item_sk,sumss,sumss] + BroadcastHashJoin [item_sk,item_sk,rk,rk] + Project [d_date,item_sk,rk,sumss] + Filter [rk] + InputAdapter + Window [d_date,ss_item_sk] + WholeStageCodegen (16) + Sort [d_date,ss_item_sk] + InputAdapter + Exchange [ss_item_sk] #10 + WholeStageCodegen (15) + HashAggregate [d_date,ss_item_sk,sum] [item_sk,sum,sum(UnscaledValue(ss_sales_price)),sumss] + InputAdapter + Exchange [d_date,ss_item_sk] #11 + WholeStageCodegen (14) + HashAggregate [d_date,ss_item_sk,ss_sales_price] [sum,sum] + Project [d_date,ss_item_sk,ss_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk] + InputAdapter + ReusedExchange [d_date,d_date_sk] #6 + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (21) + Project [item_sk,rk,sumss] + Filter [rk] + InputAdapter + Window [d_date,ss_item_sk] + WholeStageCodegen (20) + Sort [d_date,ss_item_sk] + InputAdapter + ReusedExchange [d_date,item_sk,ss_item_sk,sumss] #10 + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (53) + Project [item_sk,rk,store_sales,web_sales] + Filter [rk] + InputAdapter + Window [d_date,item_sk] + WholeStageCodegen (52) + Sort [d_date,item_sk] + InputAdapter + ReusedExchange [d_date,item_sk,store_sales,web_sales] #1 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57.sf100/explain.txt new file mode 100644 index 0000000000000..f506aebd2cb78 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57.sf100/explain.txt @@ -0,0 +1,313 @@ +== Physical Plan == +TakeOrderedAndProject (58) ++- * Project (57) + +- * SortMergeJoin Inner (56) + :- * Project (48) + : +- * SortMergeJoin Inner (47) + : :- * Sort (37) + : : +- Exchange (36) + : : +- * Filter (35) + : : +- Window (34) + : : +- * Sort (33) + : : +- Exchange (32) + : : +- * Project (31) + : : +- Window (30) + : : +- * Sort (29) + : : +- Exchange (28) + : : +- * HashAggregate (27) + : : +- Exchange (26) + : : +- * HashAggregate (25) + : : +- * Project (24) + : : +- * SortMergeJoin Inner (23) + : : :- * Sort (17) + : : : +- Exchange (16) + : : : +- * Project (15) + : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : :- * Project (9) + : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.catalog_sales (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.date_dim (4) + : : : +- BroadcastExchange (13) + : : : +- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet default.call_center (10) + : : +- * Sort (22) + : : +- Exchange (21) + : : +- * Filter (20) + : : +- * ColumnarToRow (19) + : : +- Scan parquet default.item (18) + : +- * Sort (46) + : +- Exchange (45) + : +- * Project (44) + : +- * Filter (43) + : +- Window (42) + : +- * Sort (41) + : +- Exchange (40) + : +- * HashAggregate (39) + : +- ReusedExchange (38) + +- * Sort (55) + +- Exchange (54) + +- * Project (53) + +- * Filter (52) + +- Window (51) + +- * Sort (50) + +- ReusedExchange (49) + + +(1) Scan parquet default.catalog_sales +Output [4]: [cs_sold_date_sk#1, cs_call_center_sk#2, cs_item_sk#3, cs_sales_price#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_call_center_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [4]: [cs_sold_date_sk#1, cs_call_center_sk#2, cs_item_sk#3, cs_sales_price#4] + +(3) Filter [codegen id : 3] +Input [4]: [cs_sold_date_sk#1, cs_call_center_sk#2, cs_item_sk#3, cs_sales_price#4] +Condition : ((isnotnull(cs_item_sk#3) AND isnotnull(cs_sold_date_sk#1)) AND isnotnull(cs_call_center_sk#2)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#5, d_year#6, d_moy#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000),EqualTo(d_moy,1))), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] +Condition : ((((d_year#6 = 1999) OR ((d_year#6 = 1998) AND (d_moy#7 = 12))) OR ((d_year#6 = 2000) AND (d_moy#7 = 1))) AND isnotnull(d_date_sk#5)) + +(7) BroadcastExchange +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#8] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cs_sold_date_sk#1] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(9) Project [codegen id : 3] +Output [5]: [cs_call_center_sk#2, cs_item_sk#3, cs_sales_price#4, d_year#6, d_moy#7] +Input [7]: [cs_sold_date_sk#1, cs_call_center_sk#2, cs_item_sk#3, cs_sales_price#4, d_date_sk#5, d_year#6, d_moy#7] + +(10) Scan parquet default.call_center +Output [2]: [cc_call_center_sk#9, cc_name#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/call_center] +PushedFilters: [IsNotNull(cc_call_center_sk), IsNotNull(cc_name)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [cc_call_center_sk#9, cc_name#10] + +(12) Filter [codegen id : 2] +Input [2]: [cc_call_center_sk#9, cc_name#10] +Condition : (isnotnull(cc_call_center_sk#9) AND isnotnull(cc_name#10)) + +(13) BroadcastExchange +Input [2]: [cc_call_center_sk#9, cc_name#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#11] + +(14) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cs_call_center_sk#2] +Right keys [1]: [cc_call_center_sk#9] +Join condition: None + +(15) Project [codegen id : 3] +Output [5]: [cs_item_sk#3, cs_sales_price#4, d_year#6, d_moy#7, cc_name#10] +Input [7]: [cs_call_center_sk#2, cs_item_sk#3, cs_sales_price#4, d_year#6, d_moy#7, cc_call_center_sk#9, cc_name#10] + +(16) Exchange +Input [5]: [cs_item_sk#3, cs_sales_price#4, d_year#6, d_moy#7, cc_name#10] +Arguments: hashpartitioning(cs_item_sk#3, 5), true, [id=#12] + +(17) Sort [codegen id : 4] +Input [5]: [cs_item_sk#3, cs_sales_price#4, d_year#6, d_moy#7, cc_name#10] +Arguments: [cs_item_sk#3 ASC NULLS FIRST], false, 0 + +(18) Scan parquet default.item +Output [3]: [i_item_sk#13, i_brand#14, i_category#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand), IsNotNull(i_category)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 5] +Input [3]: [i_item_sk#13, i_brand#14, i_category#15] + +(20) Filter [codegen id : 5] +Input [3]: [i_item_sk#13, i_brand#14, i_category#15] +Condition : ((isnotnull(i_item_sk#13) AND isnotnull(i_brand#14)) AND isnotnull(i_category#15)) + +(21) Exchange +Input [3]: [i_item_sk#13, i_brand#14, i_category#15] +Arguments: hashpartitioning(i_item_sk#13, 5), true, [id=#16] + +(22) Sort [codegen id : 6] +Input [3]: [i_item_sk#13, i_brand#14, i_category#15] +Arguments: [i_item_sk#13 ASC NULLS FIRST], false, 0 + +(23) SortMergeJoin [codegen id : 7] +Left keys [1]: [cs_item_sk#3] +Right keys [1]: [i_item_sk#13] +Join condition: None + +(24) Project [codegen id : 7] +Output [6]: [i_brand#14, i_category#15, cs_sales_price#4, d_year#6, d_moy#7, cc_name#10] +Input [8]: [cs_item_sk#3, cs_sales_price#4, d_year#6, d_moy#7, cc_name#10, i_item_sk#13, i_brand#14, i_category#15] + +(25) HashAggregate [codegen id : 7] +Input [6]: [i_brand#14, i_category#15, cs_sales_price#4, d_year#6, d_moy#7, cc_name#10] +Keys [5]: [i_category#15, i_brand#14, cc_name#10, d_year#6, d_moy#7] +Functions [1]: [partial_sum(UnscaledValue(cs_sales_price#4))] +Aggregate Attributes [1]: [sum#17] +Results [6]: [i_category#15, i_brand#14, cc_name#10, d_year#6, d_moy#7, sum#18] + +(26) Exchange +Input [6]: [i_category#15, i_brand#14, cc_name#10, d_year#6, d_moy#7, sum#18] +Arguments: hashpartitioning(i_category#15, i_brand#14, cc_name#10, d_year#6, d_moy#7, 5), true, [id=#19] + +(27) HashAggregate [codegen id : 8] +Input [6]: [i_category#15, i_brand#14, cc_name#10, d_year#6, d_moy#7, sum#18] +Keys [5]: [i_category#15, i_brand#14, cc_name#10, d_year#6, d_moy#7] +Functions [1]: [sum(UnscaledValue(cs_sales_price#4))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_sales_price#4))#20] +Results [7]: [i_category#15, i_brand#14, cc_name#10, d_year#6, d_moy#7, MakeDecimal(sum(UnscaledValue(cs_sales_price#4))#20,17,2) AS sum_sales#21, MakeDecimal(sum(UnscaledValue(cs_sales_price#4))#20,17,2) AS _w0#22] + +(28) Exchange +Input [7]: [i_category#15, i_brand#14, cc_name#10, d_year#6, d_moy#7, sum_sales#21, _w0#22] +Arguments: hashpartitioning(i_category#15, i_brand#14, cc_name#10, d_year#6, 5), true, [id=#23] + +(29) Sort [codegen id : 9] +Input [7]: [i_category#15, i_brand#14, cc_name#10, d_year#6, d_moy#7, sum_sales#21, _w0#22] +Arguments: [i_category#15 ASC NULLS FIRST, i_brand#14 ASC NULLS FIRST, cc_name#10 ASC NULLS FIRST, d_year#6 ASC NULLS FIRST], false, 0 + +(30) Window +Input [7]: [i_category#15, i_brand#14, cc_name#10, d_year#6, d_moy#7, sum_sales#21, _w0#22] +Arguments: [avg(_w0#22) windowspecdefinition(i_category#15, i_brand#14, cc_name#10, d_year#6, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#24], [i_category#15, i_brand#14, cc_name#10, d_year#6] + +(31) Project [codegen id : 10] +Output [7]: [i_category#15, i_brand#14, cc_name#10, d_year#6, d_moy#7, sum_sales#21, avg_monthly_sales#24] +Input [8]: [i_category#15, i_brand#14, cc_name#10, d_year#6, d_moy#7, sum_sales#21, _w0#22, avg_monthly_sales#24] + +(32) Exchange +Input [7]: [i_category#15, i_brand#14, cc_name#10, d_year#6, d_moy#7, sum_sales#21, avg_monthly_sales#24] +Arguments: hashpartitioning(i_category#15, i_brand#14, cc_name#10, 5), true, [id=#25] + +(33) Sort [codegen id : 11] +Input [7]: [i_category#15, i_brand#14, cc_name#10, d_year#6, d_moy#7, sum_sales#21, avg_monthly_sales#24] +Arguments: [i_category#15 ASC NULLS FIRST, i_brand#14 ASC NULLS FIRST, cc_name#10 ASC NULLS FIRST, d_year#6 ASC NULLS FIRST, d_moy#7 ASC NULLS FIRST], false, 0 + +(34) Window +Input [7]: [i_category#15, i_brand#14, cc_name#10, d_year#6, d_moy#7, sum_sales#21, avg_monthly_sales#24] +Arguments: [rank(d_year#6, d_moy#7) windowspecdefinition(i_category#15, i_brand#14, cc_name#10, d_year#6 ASC NULLS FIRST, d_moy#7 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#26], [i_category#15, i_brand#14, cc_name#10], [d_year#6 ASC NULLS FIRST, d_moy#7 ASC NULLS FIRST] + +(35) Filter [codegen id : 12] +Input [8]: [i_category#15, i_brand#14, cc_name#10, d_year#6, d_moy#7, sum_sales#21, avg_monthly_sales#24, rn#26] +Condition : (((((isnotnull(avg_monthly_sales#24) AND isnotnull(d_year#6)) AND (d_year#6 = 1999)) AND (avg_monthly_sales#24 > 0.000000)) AND (CASE WHEN (avg_monthly_sales#24 > 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(22,6), true))) / promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(38,16), true) ELSE null END > 0.1000000000000000)) AND isnotnull(rn#26)) + +(36) Exchange +Input [8]: [i_category#15, i_brand#14, cc_name#10, d_year#6, d_moy#7, sum_sales#21, avg_monthly_sales#24, rn#26] +Arguments: hashpartitioning(i_category#15, i_brand#14, cc_name#10, rn#26, 5), true, [id=#27] + +(37) Sort [codegen id : 13] +Input [8]: [i_category#15, i_brand#14, cc_name#10, d_year#6, d_moy#7, sum_sales#21, avg_monthly_sales#24, rn#26] +Arguments: [i_category#15 ASC NULLS FIRST, i_brand#14 ASC NULLS FIRST, cc_name#10 ASC NULLS FIRST, rn#26 ASC NULLS FIRST], false, 0 + +(38) ReusedExchange [Reuses operator id: 26] +Output [6]: [i_category#28, i_brand#29, cc_name#30, d_year#31, d_moy#32, sum#33] + +(39) HashAggregate [codegen id : 21] +Input [6]: [i_category#28, i_brand#29, cc_name#30, d_year#31, d_moy#32, sum#33] +Keys [5]: [i_category#28, i_brand#29, cc_name#30, d_year#31, d_moy#32] +Functions [1]: [sum(UnscaledValue(cs_sales_price#4))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_sales_price#4))#34] +Results [6]: [i_category#28, i_brand#29, cc_name#30, d_year#31, d_moy#32, MakeDecimal(sum(UnscaledValue(cs_sales_price#4))#34,17,2) AS sum_sales#35] + +(40) Exchange +Input [6]: [i_category#28, i_brand#29, cc_name#30, d_year#31, d_moy#32, sum_sales#35] +Arguments: hashpartitioning(i_category#28, i_brand#29, cc_name#30, 5), true, [id=#36] + +(41) Sort [codegen id : 22] +Input [6]: [i_category#28, i_brand#29, cc_name#30, d_year#31, d_moy#32, sum_sales#35] +Arguments: [i_category#28 ASC NULLS FIRST, i_brand#29 ASC NULLS FIRST, cc_name#30 ASC NULLS FIRST, d_year#31 ASC NULLS FIRST, d_moy#32 ASC NULLS FIRST], false, 0 + +(42) Window +Input [6]: [i_category#28, i_brand#29, cc_name#30, d_year#31, d_moy#32, sum_sales#35] +Arguments: [rank(d_year#31, d_moy#32) windowspecdefinition(i_category#28, i_brand#29, cc_name#30, d_year#31 ASC NULLS FIRST, d_moy#32 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#37], [i_category#28, i_brand#29, cc_name#30], [d_year#31 ASC NULLS FIRST, d_moy#32 ASC NULLS FIRST] + +(43) Filter [codegen id : 23] +Input [7]: [i_category#28, i_brand#29, cc_name#30, d_year#31, d_moy#32, sum_sales#35, rn#37] +Condition : isnotnull(rn#37) + +(44) Project [codegen id : 23] +Output [5]: [i_category#28, i_brand#29, cc_name#30, sum_sales#35, rn#37] +Input [7]: [i_category#28, i_brand#29, cc_name#30, d_year#31, d_moy#32, sum_sales#35, rn#37] + +(45) Exchange +Input [5]: [i_category#28, i_brand#29, cc_name#30, sum_sales#35, rn#37] +Arguments: hashpartitioning(i_category#28, i_brand#29, cc_name#30, (rn#37 + 1), 5), true, [id=#38] + +(46) Sort [codegen id : 24] +Input [5]: [i_category#28, i_brand#29, cc_name#30, sum_sales#35, rn#37] +Arguments: [i_category#28 ASC NULLS FIRST, i_brand#29 ASC NULLS FIRST, cc_name#30 ASC NULLS FIRST, (rn#37 + 1) ASC NULLS FIRST], false, 0 + +(47) SortMergeJoin [codegen id : 25] +Left keys [4]: [i_category#15, i_brand#14, cc_name#10, rn#26] +Right keys [4]: [i_category#28, i_brand#29, cc_name#30, (rn#37 + 1)] +Join condition: None + +(48) Project [codegen id : 25] +Output [9]: [i_category#15, i_brand#14, cc_name#10, d_year#6, d_moy#7, sum_sales#21, avg_monthly_sales#24, rn#26, sum_sales#35] +Input [13]: [i_category#15, i_brand#14, cc_name#10, d_year#6, d_moy#7, sum_sales#21, avg_monthly_sales#24, rn#26, i_category#28, i_brand#29, cc_name#30, sum_sales#35, rn#37] + +(49) ReusedExchange [Reuses operator id: 40] +Output [6]: [i_category#39, i_brand#40, cc_name#41, d_year#42, d_moy#43, sum_sales#44] + +(50) Sort [codegen id : 34] +Input [6]: [i_category#39, i_brand#40, cc_name#41, d_year#42, d_moy#43, sum_sales#44] +Arguments: [i_category#39 ASC NULLS FIRST, i_brand#40 ASC NULLS FIRST, cc_name#41 ASC NULLS FIRST, d_year#42 ASC NULLS FIRST, d_moy#43 ASC NULLS FIRST], false, 0 + +(51) Window +Input [6]: [i_category#39, i_brand#40, cc_name#41, d_year#42, d_moy#43, sum_sales#44] +Arguments: [rank(d_year#42, d_moy#43) windowspecdefinition(i_category#39, i_brand#40, cc_name#41, d_year#42 ASC NULLS FIRST, d_moy#43 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#45], [i_category#39, i_brand#40, cc_name#41], [d_year#42 ASC NULLS FIRST, d_moy#43 ASC NULLS FIRST] + +(52) Filter [codegen id : 35] +Input [7]: [i_category#39, i_brand#40, cc_name#41, d_year#42, d_moy#43, sum_sales#44, rn#45] +Condition : isnotnull(rn#45) + +(53) Project [codegen id : 35] +Output [5]: [i_category#39, i_brand#40, cc_name#41, sum_sales#44, rn#45] +Input [7]: [i_category#39, i_brand#40, cc_name#41, d_year#42, d_moy#43, sum_sales#44, rn#45] + +(54) Exchange +Input [5]: [i_category#39, i_brand#40, cc_name#41, sum_sales#44, rn#45] +Arguments: hashpartitioning(i_category#39, i_brand#40, cc_name#41, (rn#45 - 1), 5), true, [id=#46] + +(55) Sort [codegen id : 36] +Input [5]: [i_category#39, i_brand#40, cc_name#41, sum_sales#44, rn#45] +Arguments: [i_category#39 ASC NULLS FIRST, i_brand#40 ASC NULLS FIRST, cc_name#41 ASC NULLS FIRST, (rn#45 - 1) ASC NULLS FIRST], false, 0 + +(56) SortMergeJoin [codegen id : 37] +Left keys [4]: [i_category#15, i_brand#14, cc_name#10, rn#26] +Right keys [4]: [i_category#39, i_brand#40, cc_name#41, (rn#45 - 1)] +Join condition: None + +(57) Project [codegen id : 37] +Output [8]: [i_category#15, i_brand#14, d_year#6, d_moy#7, avg_monthly_sales#24, sum_sales#21, sum_sales#35 AS psum#47, sum_sales#44 AS nsum#48] +Input [14]: [i_category#15, i_brand#14, cc_name#10, d_year#6, d_moy#7, sum_sales#21, avg_monthly_sales#24, rn#26, sum_sales#35, i_category#39, i_brand#40, cc_name#41, sum_sales#44, rn#45] + +(58) TakeOrderedAndProject +Input [8]: [i_category#15, i_brand#14, d_year#6, d_moy#7, avg_monthly_sales#24, sum_sales#21, psum#47, nsum#48] +Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(22,6), true) ASC NULLS FIRST, d_year#6 ASC NULLS FIRST], [i_category#15, i_brand#14, d_year#6, d_moy#7, avg_monthly_sales#24, sum_sales#21, psum#47, nsum#48] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57.sf100/simplified.txt new file mode 100644 index 0000000000000..eb386bc583093 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57.sf100/simplified.txt @@ -0,0 +1,107 @@ +TakeOrderedAndProject [avg_monthly_sales,d_moy,d_year,i_brand,i_category,nsum,psum,sum_sales] + WholeStageCodegen (37) + Project [avg_monthly_sales,d_moy,d_year,i_brand,i_category,sum_sales,sum_sales,sum_sales] + SortMergeJoin [cc_name,cc_name,i_brand,i_brand,i_category,i_category,rn,rn] + InputAdapter + WholeStageCodegen (25) + Project [avg_monthly_sales,cc_name,d_moy,d_year,i_brand,i_category,rn,sum_sales,sum_sales] + SortMergeJoin [cc_name,cc_name,i_brand,i_brand,i_category,i_category,rn,rn] + InputAdapter + WholeStageCodegen (13) + Sort [cc_name,i_brand,i_category,rn] + InputAdapter + Exchange [cc_name,i_brand,i_category,rn] #1 + WholeStageCodegen (12) + Filter [avg_monthly_sales,d_year,rn,sum_sales] + InputAdapter + Window [cc_name,d_moy,d_year,i_brand,i_category] + WholeStageCodegen (11) + Sort [cc_name,d_moy,d_year,i_brand,i_category] + InputAdapter + Exchange [cc_name,i_brand,i_category] #2 + WholeStageCodegen (10) + Project [avg_monthly_sales,cc_name,d_moy,d_year,i_brand,i_category,sum_sales] + InputAdapter + Window [_w0,cc_name,d_year,i_brand,i_category] + WholeStageCodegen (9) + Sort [cc_name,d_year,i_brand,i_category] + InputAdapter + Exchange [cc_name,d_year,i_brand,i_category] #3 + WholeStageCodegen (8) + HashAggregate [cc_name,d_moy,d_year,i_brand,i_category,sum] [_w0,sum,sum(UnscaledValue(cs_sales_price)),sum_sales] + InputAdapter + Exchange [cc_name,d_moy,d_year,i_brand,i_category] #4 + WholeStageCodegen (7) + HashAggregate [cc_name,cs_sales_price,d_moy,d_year,i_brand,i_category] [sum,sum] + Project [cc_name,cs_sales_price,d_moy,d_year,i_brand,i_category] + SortMergeJoin [cs_item_sk,i_item_sk] + InputAdapter + WholeStageCodegen (4) + Sort [cs_item_sk] + InputAdapter + Exchange [cs_item_sk] #5 + WholeStageCodegen (3) + Project [cc_name,cs_item_sk,cs_sales_price,d_moy,d_year] + BroadcastHashJoin [cc_call_center_sk,cs_call_center_sk] + Project [cs_call_center_sk,cs_item_sk,cs_sales_price,d_moy,d_year] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_call_center_sk,cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_call_center_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (1) + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (2) + Filter [cc_call_center_sk,cc_name] + ColumnarToRow + InputAdapter + Scan parquet default.call_center [cc_call_center_sk,cc_name] + InputAdapter + WholeStageCodegen (6) + Sort [i_item_sk] + InputAdapter + Exchange [i_item_sk] #8 + WholeStageCodegen (5) + Filter [i_brand,i_category,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand,i_category,i_item_sk] + InputAdapter + WholeStageCodegen (24) + Sort [cc_name,i_brand,i_category,rn] + InputAdapter + Exchange [cc_name,i_brand,i_category,rn] #9 + WholeStageCodegen (23) + Project [cc_name,i_brand,i_category,rn,sum_sales] + Filter [rn] + InputAdapter + Window [cc_name,d_moy,d_year,i_brand,i_category] + WholeStageCodegen (22) + Sort [cc_name,d_moy,d_year,i_brand,i_category] + InputAdapter + Exchange [cc_name,i_brand,i_category] #10 + WholeStageCodegen (21) + HashAggregate [cc_name,d_moy,d_year,i_brand,i_category,sum] [sum,sum(UnscaledValue(cs_sales_price)),sum_sales] + InputAdapter + ReusedExchange [cc_name,d_moy,d_year,i_brand,i_category,sum] #4 + InputAdapter + WholeStageCodegen (36) + Sort [cc_name,i_brand,i_category,rn] + InputAdapter + Exchange [cc_name,i_brand,i_category,rn] #11 + WholeStageCodegen (35) + Project [cc_name,i_brand,i_category,rn,sum_sales] + Filter [rn] + InputAdapter + Window [cc_name,d_moy,d_year,i_brand,i_category] + WholeStageCodegen (34) + Sort [cc_name,d_moy,d_year,i_brand,i_category] + InputAdapter + ReusedExchange [cc_name,d_moy,d_year,i_brand,i_category,sum_sales] #10 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57/explain.txt new file mode 100644 index 0000000000000..1ec955a59b3ca --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57/explain.txt @@ -0,0 +1,278 @@ +== Physical Plan == +TakeOrderedAndProject (51) ++- * Project (50) + +- * BroadcastHashJoin Inner BuildRight (49) + :- * Project (42) + : +- * BroadcastHashJoin Inner BuildRight (41) + : :- * Filter (32) + : : +- Window (31) + : : +- * Sort (30) + : : +- Exchange (29) + : : +- * Project (28) + : : +- Window (27) + : : +- * Sort (26) + : : +- Exchange (25) + : : +- * HashAggregate (24) + : : +- Exchange (23) + : : +- * HashAggregate (22) + : : +- * Project (21) + : : +- * BroadcastHashJoin Inner BuildRight (20) + : : :- * Project (15) + : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : :- * Project (9) + : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.item (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.catalog_sales (4) + : : : +- BroadcastExchange (13) + : : : +- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet default.date_dim (10) + : : +- BroadcastExchange (19) + : : +- * Filter (18) + : : +- * ColumnarToRow (17) + : : +- Scan parquet default.call_center (16) + : +- BroadcastExchange (40) + : +- * Project (39) + : +- * Filter (38) + : +- Window (37) + : +- * Sort (36) + : +- Exchange (35) + : +- * HashAggregate (34) + : +- ReusedExchange (33) + +- BroadcastExchange (48) + +- * Project (47) + +- * Filter (46) + +- Window (45) + +- * Sort (44) + +- ReusedExchange (43) + + +(1) Scan parquet default.item +Output [3]: [i_item_sk#1, i_brand#2, i_category#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_category), IsNotNull(i_brand)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [3]: [i_item_sk#1, i_brand#2, i_category#3] + +(3) Filter [codegen id : 4] +Input [3]: [i_item_sk#1, i_brand#2, i_category#3] +Condition : ((isnotnull(i_item_sk#1) AND isnotnull(i_category#3)) AND isnotnull(i_brand#2)) + +(4) Scan parquet default.catalog_sales +Output [4]: [cs_sold_date_sk#4, cs_call_center_sk#5, cs_item_sk#6, cs_sales_price#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_call_center_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [cs_sold_date_sk#4, cs_call_center_sk#5, cs_item_sk#6, cs_sales_price#7] + +(6) Filter [codegen id : 1] +Input [4]: [cs_sold_date_sk#4, cs_call_center_sk#5, cs_item_sk#6, cs_sales_price#7] +Condition : ((isnotnull(cs_item_sk#6) AND isnotnull(cs_sold_date_sk#4)) AND isnotnull(cs_call_center_sk#5)) + +(7) BroadcastExchange +Input [4]: [cs_sold_date_sk#4, cs_call_center_sk#5, cs_item_sk#6, cs_sales_price#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, false] as bigint)),false), [id=#8] + +(8) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [cs_item_sk#6] +Join condition: None + +(9) Project [codegen id : 4] +Output [5]: [i_brand#2, i_category#3, cs_sold_date_sk#4, cs_call_center_sk#5, cs_sales_price#7] +Input [7]: [i_item_sk#1, i_brand#2, i_category#3, cs_sold_date_sk#4, cs_call_center_sk#5, cs_item_sk#6, cs_sales_price#7] + +(10) Scan parquet default.date_dim +Output [3]: [d_date_sk#9, d_year#10, d_moy#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/date_dim] +PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000),EqualTo(d_moy,1))), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] + +(12) Filter [codegen id : 2] +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] +Condition : ((((d_year#10 = 1999) OR ((d_year#10 = 1998) AND (d_moy#11 = 12))) OR ((d_year#10 = 2000) AND (d_moy#11 = 1))) AND isnotnull(d_date_sk#9)) + +(13) BroadcastExchange +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#12] + +(14) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_sold_date_sk#4] +Right keys [1]: [d_date_sk#9] +Join condition: None + +(15) Project [codegen id : 4] +Output [6]: [i_brand#2, i_category#3, cs_call_center_sk#5, cs_sales_price#7, d_year#10, d_moy#11] +Input [8]: [i_brand#2, i_category#3, cs_sold_date_sk#4, cs_call_center_sk#5, cs_sales_price#7, d_date_sk#9, d_year#10, d_moy#11] + +(16) Scan parquet default.call_center +Output [2]: [cc_call_center_sk#13, cc_name#14] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/call_center] +PushedFilters: [IsNotNull(cc_call_center_sk), IsNotNull(cc_name)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [2]: [cc_call_center_sk#13, cc_name#14] + +(18) Filter [codegen id : 3] +Input [2]: [cc_call_center_sk#13, cc_name#14] +Condition : (isnotnull(cc_call_center_sk#13) AND isnotnull(cc_name#14)) + +(19) BroadcastExchange +Input [2]: [cc_call_center_sk#13, cc_name#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#15] + +(20) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_call_center_sk#5] +Right keys [1]: [cc_call_center_sk#13] +Join condition: None + +(21) Project [codegen id : 4] +Output [6]: [i_brand#2, i_category#3, cs_sales_price#7, d_year#10, d_moy#11, cc_name#14] +Input [8]: [i_brand#2, i_category#3, cs_call_center_sk#5, cs_sales_price#7, d_year#10, d_moy#11, cc_call_center_sk#13, cc_name#14] + +(22) HashAggregate [codegen id : 4] +Input [6]: [i_brand#2, i_category#3, cs_sales_price#7, d_year#10, d_moy#11, cc_name#14] +Keys [5]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11] +Functions [1]: [partial_sum(UnscaledValue(cs_sales_price#7))] +Aggregate Attributes [1]: [sum#16] +Results [6]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum#17] + +(23) Exchange +Input [6]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum#17] +Arguments: hashpartitioning(i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, 5), true, [id=#18] + +(24) HashAggregate [codegen id : 5] +Input [6]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum#17] +Keys [5]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11] +Functions [1]: [sum(UnscaledValue(cs_sales_price#7))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_sales_price#7))#19] +Results [7]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, MakeDecimal(sum(UnscaledValue(cs_sales_price#7))#19,17,2) AS sum_sales#20, MakeDecimal(sum(UnscaledValue(cs_sales_price#7))#19,17,2) AS _w0#21] + +(25) Exchange +Input [7]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum_sales#20, _w0#21] +Arguments: hashpartitioning(i_category#3, i_brand#2, cc_name#14, d_year#10, 5), true, [id=#22] + +(26) Sort [codegen id : 6] +Input [7]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum_sales#20, _w0#21] +Arguments: [i_category#3 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, cc_name#14 ASC NULLS FIRST, d_year#10 ASC NULLS FIRST], false, 0 + +(27) Window +Input [7]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum_sales#20, _w0#21] +Arguments: [avg(_w0#21) windowspecdefinition(i_category#3, i_brand#2, cc_name#14, d_year#10, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#23], [i_category#3, i_brand#2, cc_name#14, d_year#10] + +(28) Project [codegen id : 7] +Output [7]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum_sales#20, avg_monthly_sales#23] +Input [8]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum_sales#20, _w0#21, avg_monthly_sales#23] + +(29) Exchange +Input [7]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum_sales#20, avg_monthly_sales#23] +Arguments: hashpartitioning(i_category#3, i_brand#2, cc_name#14, 5), true, [id=#24] + +(30) Sort [codegen id : 8] +Input [7]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum_sales#20, avg_monthly_sales#23] +Arguments: [i_category#3 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, cc_name#14 ASC NULLS FIRST, d_year#10 ASC NULLS FIRST, d_moy#11 ASC NULLS FIRST], false, 0 + +(31) Window +Input [7]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum_sales#20, avg_monthly_sales#23] +Arguments: [rank(d_year#10, d_moy#11) windowspecdefinition(i_category#3, i_brand#2, cc_name#14, d_year#10 ASC NULLS FIRST, d_moy#11 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#25], [i_category#3, i_brand#2, cc_name#14], [d_year#10 ASC NULLS FIRST, d_moy#11 ASC NULLS FIRST] + +(32) Filter [codegen id : 23] +Input [8]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum_sales#20, avg_monthly_sales#23, rn#25] +Condition : (((((isnotnull(d_year#10) AND isnotnull(avg_monthly_sales#23)) AND (d_year#10 = 1999)) AND (avg_monthly_sales#23 > 0.000000)) AND (CASE WHEN (avg_monthly_sales#23 > 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#20 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#23 as decimal(22,6)))), DecimalType(22,6), true))) / promote_precision(cast(avg_monthly_sales#23 as decimal(22,6)))), DecimalType(38,16), true) ELSE null END > 0.1000000000000000)) AND isnotnull(rn#25)) + +(33) ReusedExchange [Reuses operator id: 23] +Output [6]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum#31] + +(34) HashAggregate [codegen id : 13] +Input [6]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum#31] +Keys [5]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30] +Functions [1]: [sum(UnscaledValue(cs_sales_price#7))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_sales_price#7))#32] +Results [6]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, MakeDecimal(sum(UnscaledValue(cs_sales_price#7))#32,17,2) AS sum_sales#33] + +(35) Exchange +Input [6]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum_sales#33] +Arguments: hashpartitioning(i_category#26, i_brand#27, cc_name#28, 5), true, [id=#34] + +(36) Sort [codegen id : 14] +Input [6]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum_sales#33] +Arguments: [i_category#26 ASC NULLS FIRST, i_brand#27 ASC NULLS FIRST, cc_name#28 ASC NULLS FIRST, d_year#29 ASC NULLS FIRST, d_moy#30 ASC NULLS FIRST], false, 0 + +(37) Window +Input [6]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum_sales#33] +Arguments: [rank(d_year#29, d_moy#30) windowspecdefinition(i_category#26, i_brand#27, cc_name#28, d_year#29 ASC NULLS FIRST, d_moy#30 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#35], [i_category#26, i_brand#27, cc_name#28], [d_year#29 ASC NULLS FIRST, d_moy#30 ASC NULLS FIRST] + +(38) Filter [codegen id : 15] +Input [7]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum_sales#33, rn#35] +Condition : isnotnull(rn#35) + +(39) Project [codegen id : 15] +Output [5]: [i_category#26, i_brand#27, cc_name#28, sum_sales#33, rn#35] +Input [7]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum_sales#33, rn#35] + +(40) BroadcastExchange +Input [5]: [i_category#26, i_brand#27, cc_name#28, sum_sales#33, rn#35] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, true] + 1)),false), [id=#36] + +(41) BroadcastHashJoin [codegen id : 23] +Left keys [4]: [i_category#3, i_brand#2, cc_name#14, rn#25] +Right keys [4]: [i_category#26, i_brand#27, cc_name#28, (rn#35 + 1)] +Join condition: None + +(42) Project [codegen id : 23] +Output [9]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum_sales#20, avg_monthly_sales#23, rn#25, sum_sales#33] +Input [13]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum_sales#20, avg_monthly_sales#23, rn#25, i_category#26, i_brand#27, cc_name#28, sum_sales#33, rn#35] + +(43) ReusedExchange [Reuses operator id: 35] +Output [6]: [i_category#37, i_brand#38, cc_name#39, d_year#40, d_moy#41, sum_sales#42] + +(44) Sort [codegen id : 21] +Input [6]: [i_category#37, i_brand#38, cc_name#39, d_year#40, d_moy#41, sum_sales#42] +Arguments: [i_category#37 ASC NULLS FIRST, i_brand#38 ASC NULLS FIRST, cc_name#39 ASC NULLS FIRST, d_year#40 ASC NULLS FIRST, d_moy#41 ASC NULLS FIRST], false, 0 + +(45) Window +Input [6]: [i_category#37, i_brand#38, cc_name#39, d_year#40, d_moy#41, sum_sales#42] +Arguments: [rank(d_year#40, d_moy#41) windowspecdefinition(i_category#37, i_brand#38, cc_name#39, d_year#40 ASC NULLS FIRST, d_moy#41 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#43], [i_category#37, i_brand#38, cc_name#39], [d_year#40 ASC NULLS FIRST, d_moy#41 ASC NULLS FIRST] + +(46) Filter [codegen id : 22] +Input [7]: [i_category#37, i_brand#38, cc_name#39, d_year#40, d_moy#41, sum_sales#42, rn#43] +Condition : isnotnull(rn#43) + +(47) Project [codegen id : 22] +Output [5]: [i_category#37, i_brand#38, cc_name#39, sum_sales#42, rn#43] +Input [7]: [i_category#37, i_brand#38, cc_name#39, d_year#40, d_moy#41, sum_sales#42, rn#43] + +(48) BroadcastExchange +Input [5]: [i_category#37, i_brand#38, cc_name#39, sum_sales#42, rn#43] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, true] - 1)),false), [id=#44] + +(49) BroadcastHashJoin [codegen id : 23] +Left keys [4]: [i_category#3, i_brand#2, cc_name#14, rn#25] +Right keys [4]: [i_category#37, i_brand#38, cc_name#39, (rn#43 - 1)] +Join condition: None + +(50) Project [codegen id : 23] +Output [8]: [i_category#3, i_brand#2, d_year#10, d_moy#11, avg_monthly_sales#23, sum_sales#20, sum_sales#33 AS psum#45, sum_sales#42 AS nsum#46] +Input [14]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum_sales#20, avg_monthly_sales#23, rn#25, sum_sales#33, i_category#37, i_brand#38, cc_name#39, sum_sales#42, rn#43] + +(51) TakeOrderedAndProject +Input [8]: [i_category#3, i_brand#2, d_year#10, d_moy#11, avg_monthly_sales#23, sum_sales#20, psum#45, nsum#46] +Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#20 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#23 as decimal(22,6)))), DecimalType(22,6), true) ASC NULLS FIRST, d_year#10 ASC NULLS FIRST], [i_category#3, i_brand#2, d_year#10, d_moy#11, avg_monthly_sales#23, sum_sales#20, psum#45, nsum#46] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57/simplified.txt new file mode 100644 index 0000000000000..6a0135b852696 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57/simplified.txt @@ -0,0 +1,84 @@ +TakeOrderedAndProject [avg_monthly_sales,d_moy,d_year,i_brand,i_category,nsum,psum,sum_sales] + WholeStageCodegen (23) + Project [avg_monthly_sales,d_moy,d_year,i_brand,i_category,sum_sales,sum_sales,sum_sales] + BroadcastHashJoin [cc_name,cc_name,i_brand,i_brand,i_category,i_category,rn,rn] + Project [avg_monthly_sales,cc_name,d_moy,d_year,i_brand,i_category,rn,sum_sales,sum_sales] + BroadcastHashJoin [cc_name,cc_name,i_brand,i_brand,i_category,i_category,rn,rn] + Filter [avg_monthly_sales,d_year,rn,sum_sales] + InputAdapter + Window [cc_name,d_moy,d_year,i_brand,i_category] + WholeStageCodegen (8) + Sort [cc_name,d_moy,d_year,i_brand,i_category] + InputAdapter + Exchange [cc_name,i_brand,i_category] #1 + WholeStageCodegen (7) + Project [avg_monthly_sales,cc_name,d_moy,d_year,i_brand,i_category,sum_sales] + InputAdapter + Window [_w0,cc_name,d_year,i_brand,i_category] + WholeStageCodegen (6) + Sort [cc_name,d_year,i_brand,i_category] + InputAdapter + Exchange [cc_name,d_year,i_brand,i_category] #2 + WholeStageCodegen (5) + HashAggregate [cc_name,d_moy,d_year,i_brand,i_category,sum] [_w0,sum,sum(UnscaledValue(cs_sales_price)),sum_sales] + InputAdapter + Exchange [cc_name,d_moy,d_year,i_brand,i_category] #3 + WholeStageCodegen (4) + HashAggregate [cc_name,cs_sales_price,d_moy,d_year,i_brand,i_category] [sum,sum] + Project [cc_name,cs_sales_price,d_moy,d_year,i_brand,i_category] + BroadcastHashJoin [cc_call_center_sk,cs_call_center_sk] + Project [cs_call_center_sk,cs_sales_price,d_moy,d_year,i_brand,i_category] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_call_center_sk,cs_sales_price,cs_sold_date_sk,i_brand,i_category] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Filter [i_brand,i_category,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand,i_category,i_item_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Filter [cs_call_center_sk,cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_call_center_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Filter [d_date_sk,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (3) + Filter [cc_call_center_sk,cc_name] + ColumnarToRow + InputAdapter + Scan parquet default.call_center [cc_call_center_sk,cc_name] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (15) + Project [cc_name,i_brand,i_category,rn,sum_sales] + Filter [rn] + InputAdapter + Window [cc_name,d_moy,d_year,i_brand,i_category] + WholeStageCodegen (14) + Sort [cc_name,d_moy,d_year,i_brand,i_category] + InputAdapter + Exchange [cc_name,i_brand,i_category] #8 + WholeStageCodegen (13) + HashAggregate [cc_name,d_moy,d_year,i_brand,i_category,sum] [sum,sum(UnscaledValue(cs_sales_price)),sum_sales] + InputAdapter + ReusedExchange [cc_name,d_moy,d_year,i_brand,i_category,sum] #3 + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (22) + Project [cc_name,i_brand,i_category,rn,sum_sales] + Filter [rn] + InputAdapter + Window [cc_name,d_moy,d_year,i_brand,i_category] + WholeStageCodegen (21) + Sort [cc_name,d_moy,d_year,i_brand,i_category] + InputAdapter + ReusedExchange [cc_name,d_moy,d_year,i_brand,i_category,sum_sales] #8 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/explain.txt new file mode 100644 index 0000000000000..77a7e2a00c4f4 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/explain.txt @@ -0,0 +1,559 @@ +== Physical Plan == +TakeOrderedAndProject (98) ++- * HashAggregate (97) + +- Exchange (96) + +- * HashAggregate (95) + +- Union (94) + :- * HashAggregate (88) + : +- Exchange (87) + : +- * HashAggregate (86) + : +- Union (85) + : :- * HashAggregate (79) + : : +- Exchange (78) + : : +- * HashAggregate (77) + : : +- Union (76) + : : :- * HashAggregate (25) + : : : +- Exchange (24) + : : : +- * HashAggregate (23) + : : : +- * Project (22) + : : : +- * BroadcastHashJoin Inner BuildRight (21) + : : : :- * Project (16) + : : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : : :- Union (9) + : : : : : :- * Project (4) + : : : : : : +- * Filter (3) + : : : : : : +- * ColumnarToRow (2) + : : : : : : +- Scan parquet default.store_sales (1) + : : : : : +- * Project (8) + : : : : : +- * Filter (7) + : : : : : +- * ColumnarToRow (6) + : : : : : +- Scan parquet default.store_returns (5) + : : : : +- BroadcastExchange (14) + : : : : +- * Project (13) + : : : : +- * Filter (12) + : : : : +- * ColumnarToRow (11) + : : : : +- Scan parquet default.date_dim (10) + : : : +- BroadcastExchange (20) + : : : +- * Filter (19) + : : : +- * ColumnarToRow (18) + : : : +- Scan parquet default.store (17) + : : :- * HashAggregate (46) + : : : +- Exchange (45) + : : : +- * HashAggregate (44) + : : : +- * Project (43) + : : : +- * BroadcastHashJoin Inner BuildRight (42) + : : : :- * Project (37) + : : : : +- * BroadcastHashJoin Inner BuildRight (36) + : : : : :- Union (34) + : : : : : :- * Project (29) + : : : : : : +- * Filter (28) + : : : : : : +- * ColumnarToRow (27) + : : : : : : +- Scan parquet default.catalog_sales (26) + : : : : : +- * Project (33) + : : : : : +- * Filter (32) + : : : : : +- * ColumnarToRow (31) + : : : : : +- Scan parquet default.catalog_returns (30) + : : : : +- ReusedExchange (35) + : : : +- BroadcastExchange (41) + : : : +- * Filter (40) + : : : +- * ColumnarToRow (39) + : : : +- Scan parquet default.catalog_page (38) + : : +- * HashAggregate (75) + : : +- Exchange (74) + : : +- * HashAggregate (73) + : : +- * Project (72) + : : +- * BroadcastHashJoin Inner BuildRight (71) + : : :- * Project (66) + : : : +- * BroadcastHashJoin Inner BuildRight (65) + : : : :- Union (63) + : : : : :- * Project (50) + : : : : : +- * Filter (49) + : : : : : +- * ColumnarToRow (48) + : : : : : +- Scan parquet default.web_sales (47) + : : : : +- * Project (62) + : : : : +- * SortMergeJoin Inner (61) + : : : : :- * Sort (55) + : : : : : +- Exchange (54) + : : : : : +- * Filter (53) + : : : : : +- * ColumnarToRow (52) + : : : : : +- Scan parquet default.web_returns (51) + : : : : +- * Sort (60) + : : : : +- Exchange (59) + : : : : +- * Filter (58) + : : : : +- * ColumnarToRow (57) + : : : : +- Scan parquet default.web_sales (56) + : : : +- ReusedExchange (64) + : : +- BroadcastExchange (70) + : : +- * Filter (69) + : : +- * ColumnarToRow (68) + : : +- Scan parquet default.web_site (67) + : +- * HashAggregate (84) + : +- Exchange (83) + : +- * HashAggregate (82) + : +- * HashAggregate (81) + : +- ReusedExchange (80) + +- * HashAggregate (93) + +- Exchange (92) + +- * HashAggregate (91) + +- * HashAggregate (90) + +- ReusedExchange (89) + + +(1) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store_sales] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [4]: [ss_sold_date_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] + +(3) Filter [codegen id : 1] +Input [4]: [ss_sold_date_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] +Condition : (isnotnull(cast(ss_sold_date_sk#1 as bigint)) AND isnotnull(cast(ss_store_sk#2 as bigint))) + +(4) Project [codegen id : 1] +Output [6]: [cast(ss_store_sk#2 as bigint) AS store_sk#5, cast(ss_sold_date_sk#1 as bigint) AS date_sk#6, ss_ext_sales_price#3 AS sales_price#7, ss_net_profit#4 AS profit#8, 0.00 AS return_amt#9, 0.00 AS net_loss#10] +Input [4]: [ss_sold_date_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] + +(5) Scan parquet default.store_returns +Output [4]: [sr_returned_date_sk#11, sr_store_sk#12, sr_return_amt#13, sr_net_loss#14] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store_returns] +PushedFilters: [IsNotNull(sr_returned_date_sk), IsNotNull(sr_store_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 2] +Input [4]: [sr_returned_date_sk#11, sr_store_sk#12, sr_return_amt#13, sr_net_loss#14] + +(7) Filter [codegen id : 2] +Input [4]: [sr_returned_date_sk#11, sr_store_sk#12, sr_return_amt#13, sr_net_loss#14] +Condition : (isnotnull(sr_returned_date_sk#11) AND isnotnull(sr_store_sk#12)) + +(8) Project [codegen id : 2] +Output [6]: [sr_store_sk#12 AS store_sk#15, sr_returned_date_sk#11 AS date_sk#16, 0.00 AS sales_price#17, 0.00 AS profit#18, sr_return_amt#13 AS return_amt#19, sr_net_loss#14 AS net_loss#20] +Input [4]: [sr_returned_date_sk#11, sr_store_sk#12, sr_return_amt#13, sr_net_loss#14] + +(9) Union + +(10) Scan parquet default.date_dim +Output [2]: [d_date_sk#21, d_date#22] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1998-08-04), LessThanOrEqual(d_date,1998-08-18), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 3] +Input [2]: [d_date_sk#21, d_date#22] + +(12) Filter [codegen id : 3] +Input [2]: [d_date_sk#21, d_date#22] +Condition : (((isnotnull(d_date#22) AND (d_date#22 >= 10442)) AND (d_date#22 <= 10456)) AND isnotnull(d_date_sk#21)) + +(13) Project [codegen id : 3] +Output [1]: [d_date_sk#21] +Input [2]: [d_date_sk#21, d_date#22] + +(14) BroadcastExchange +Input [1]: [d_date_sk#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#23] + +(15) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [date_sk#6] +Right keys [1]: [cast(d_date_sk#21 as bigint)] +Join condition: None + +(16) Project [codegen id : 5] +Output [5]: [store_sk#5, sales_price#7, profit#8, return_amt#9, net_loss#10] +Input [7]: [store_sk#5, date_sk#6, sales_price#7, profit#8, return_amt#9, net_loss#10, d_date_sk#21] + +(17) Scan parquet default.store +Output [2]: [s_store_sk#24, s_store_id#25] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 4] +Input [2]: [s_store_sk#24, s_store_id#25] + +(19) Filter [codegen id : 4] +Input [2]: [s_store_sk#24, s_store_id#25] +Condition : isnotnull(s_store_sk#24) + +(20) BroadcastExchange +Input [2]: [s_store_sk#24, s_store_id#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#26] + +(21) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [store_sk#5] +Right keys [1]: [cast(s_store_sk#24 as bigint)] +Join condition: None + +(22) Project [codegen id : 5] +Output [5]: [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#25] +Input [7]: [store_sk#5, sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_sk#24, s_store_id#25] + +(23) HashAggregate [codegen id : 5] +Input [5]: [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#25] +Keys [1]: [s_store_id#25] +Functions [4]: [partial_sum(UnscaledValue(sales_price#7)), partial_sum(UnscaledValue(return_amt#9)), partial_sum(UnscaledValue(profit#8)), partial_sum(UnscaledValue(net_loss#10))] +Aggregate Attributes [4]: [sum#27, sum#28, sum#29, sum#30] +Results [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34] + +(24) Exchange +Input [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34] +Arguments: hashpartitioning(s_store_id#25, 5), true, [id=#35] + +(25) HashAggregate [codegen id : 6] +Input [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34] +Keys [1]: [s_store_id#25] +Functions [4]: [sum(UnscaledValue(sales_price#7)), sum(UnscaledValue(return_amt#9)), sum(UnscaledValue(profit#8)), sum(UnscaledValue(net_loss#10))] +Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#7))#36, sum(UnscaledValue(return_amt#9))#37, sum(UnscaledValue(profit#8))#38, sum(UnscaledValue(net_loss#10))#39] +Results [5]: [store channel AS channel#40, concat(store, s_store_id#25) AS id#41, MakeDecimal(sum(UnscaledValue(sales_price#7))#36,17,2) AS sales#42, MakeDecimal(sum(UnscaledValue(return_amt#9))#37,17,2) AS returns#43, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#8))#38,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#10))#39,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#44] + +(26) Scan parquet default.catalog_sales +Output [4]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_ext_sales_price#47, cs_net_profit#48] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_catalog_page_sk)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 7] +Input [4]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_ext_sales_price#47, cs_net_profit#48] + +(28) Filter [codegen id : 7] +Input [4]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_ext_sales_price#47, cs_net_profit#48] +Condition : (isnotnull(cs_sold_date_sk#45) AND isnotnull(cs_catalog_page_sk#46)) + +(29) Project [codegen id : 7] +Output [6]: [cs_catalog_page_sk#46 AS page_sk#49, cs_sold_date_sk#45 AS date_sk#50, cs_ext_sales_price#47 AS sales_price#51, cs_net_profit#48 AS profit#52, 0.00 AS return_amt#53, 0.00 AS net_loss#54] +Input [4]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_ext_sales_price#47, cs_net_profit#48] + +(30) Scan parquet default.catalog_returns +Output [4]: [cr_returned_date_sk#55, cr_catalog_page_sk#56, cr_return_amount#57, cr_net_loss#58] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/catalog_returns] +PushedFilters: [IsNotNull(cr_returned_date_sk), IsNotNull(cr_catalog_page_sk)] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 8] +Input [4]: [cr_returned_date_sk#55, cr_catalog_page_sk#56, cr_return_amount#57, cr_net_loss#58] + +(32) Filter [codegen id : 8] +Input [4]: [cr_returned_date_sk#55, cr_catalog_page_sk#56, cr_return_amount#57, cr_net_loss#58] +Condition : (isnotnull(cr_returned_date_sk#55) AND isnotnull(cr_catalog_page_sk#56)) + +(33) Project [codegen id : 8] +Output [6]: [cr_catalog_page_sk#56 AS page_sk#59, cr_returned_date_sk#55 AS date_sk#60, 0.00 AS sales_price#61, 0.00 AS profit#62, cr_return_amount#57 AS return_amt#63, cr_net_loss#58 AS net_loss#64] +Input [4]: [cr_returned_date_sk#55, cr_catalog_page_sk#56, cr_return_amount#57, cr_net_loss#58] + +(34) Union + +(35) ReusedExchange [Reuses operator id: 14] +Output [1]: [d_date_sk#21] + +(36) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [date_sk#50] +Right keys [1]: [d_date_sk#21] +Join condition: None + +(37) Project [codegen id : 11] +Output [5]: [page_sk#49, sales_price#51, profit#52, return_amt#53, net_loss#54] +Input [7]: [page_sk#49, date_sk#50, sales_price#51, profit#52, return_amt#53, net_loss#54, d_date_sk#21] + +(38) Scan parquet default.catalog_page +Output [2]: [cp_catalog_page_sk#65, cp_catalog_page_id#66] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/catalog_page] +PushedFilters: [IsNotNull(cp_catalog_page_sk)] +ReadSchema: struct + +(39) ColumnarToRow [codegen id : 10] +Input [2]: [cp_catalog_page_sk#65, cp_catalog_page_id#66] + +(40) Filter [codegen id : 10] +Input [2]: [cp_catalog_page_sk#65, cp_catalog_page_id#66] +Condition : isnotnull(cp_catalog_page_sk#65) + +(41) BroadcastExchange +Input [2]: [cp_catalog_page_sk#65, cp_catalog_page_id#66] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#67] + +(42) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [page_sk#49] +Right keys [1]: [cp_catalog_page_sk#65] +Join condition: None + +(43) Project [codegen id : 11] +Output [5]: [sales_price#51, profit#52, return_amt#53, net_loss#54, cp_catalog_page_id#66] +Input [7]: [page_sk#49, sales_price#51, profit#52, return_amt#53, net_loss#54, cp_catalog_page_sk#65, cp_catalog_page_id#66] + +(44) HashAggregate [codegen id : 11] +Input [5]: [sales_price#51, profit#52, return_amt#53, net_loss#54, cp_catalog_page_id#66] +Keys [1]: [cp_catalog_page_id#66] +Functions [4]: [partial_sum(UnscaledValue(sales_price#51)), partial_sum(UnscaledValue(return_amt#53)), partial_sum(UnscaledValue(profit#52)), partial_sum(UnscaledValue(net_loss#54))] +Aggregate Attributes [4]: [sum#68, sum#69, sum#70, sum#71] +Results [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75] + +(45) Exchange +Input [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75] +Arguments: hashpartitioning(cp_catalog_page_id#66, 5), true, [id=#76] + +(46) HashAggregate [codegen id : 12] +Input [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75] +Keys [1]: [cp_catalog_page_id#66] +Functions [4]: [sum(UnscaledValue(sales_price#51)), sum(UnscaledValue(return_amt#53)), sum(UnscaledValue(profit#52)), sum(UnscaledValue(net_loss#54))] +Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#51))#77, sum(UnscaledValue(return_amt#53))#78, sum(UnscaledValue(profit#52))#79, sum(UnscaledValue(net_loss#54))#80] +Results [5]: [catalog channel AS channel#81, concat(catalog_page, cp_catalog_page_id#66) AS id#82, MakeDecimal(sum(UnscaledValue(sales_price#51))#77,17,2) AS sales#83, MakeDecimal(sum(UnscaledValue(return_amt#53))#78,17,2) AS returns#84, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#52))#79,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#54))#80,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#85] + +(47) Scan parquet default.web_sales +Output [4]: [ws_sold_date_sk#86, ws_web_site_sk#87, ws_ext_sales_price#88, ws_net_profit#89] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_web_site_sk)] +ReadSchema: struct + +(48) ColumnarToRow [codegen id : 13] +Input [4]: [ws_sold_date_sk#86, ws_web_site_sk#87, ws_ext_sales_price#88, ws_net_profit#89] + +(49) Filter [codegen id : 13] +Input [4]: [ws_sold_date_sk#86, ws_web_site_sk#87, ws_ext_sales_price#88, ws_net_profit#89] +Condition : (isnotnull(cast(ws_sold_date_sk#86 as bigint)) AND isnotnull(ws_web_site_sk#87)) + +(50) Project [codegen id : 13] +Output [6]: [ws_web_site_sk#87 AS wsr_web_site_sk#90, cast(ws_sold_date_sk#86 as bigint) AS date_sk#91, ws_ext_sales_price#88 AS sales_price#92, ws_net_profit#89 AS profit#93, 0.00 AS return_amt#94, 0.00 AS net_loss#95] +Input [4]: [ws_sold_date_sk#86, ws_web_site_sk#87, ws_ext_sales_price#88, ws_net_profit#89] + +(51) Scan parquet default.web_returns +Output [5]: [wr_returned_date_sk#96, wr_item_sk#97, wr_order_number#98, wr_return_amt#99, wr_net_loss#100] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/web_returns] +PushedFilters: [IsNotNull(wr_returned_date_sk)] +ReadSchema: struct + +(52) ColumnarToRow [codegen id : 14] +Input [5]: [wr_returned_date_sk#96, wr_item_sk#97, wr_order_number#98, wr_return_amt#99, wr_net_loss#100] + +(53) Filter [codegen id : 14] +Input [5]: [wr_returned_date_sk#96, wr_item_sk#97, wr_order_number#98, wr_return_amt#99, wr_net_loss#100] +Condition : isnotnull(wr_returned_date_sk#96) + +(54) Exchange +Input [5]: [wr_returned_date_sk#96, wr_item_sk#97, wr_order_number#98, wr_return_amt#99, wr_net_loss#100] +Arguments: hashpartitioning(wr_item_sk#97, wr_order_number#98, 5), true, [id=#101] + +(55) Sort [codegen id : 15] +Input [5]: [wr_returned_date_sk#96, wr_item_sk#97, wr_order_number#98, wr_return_amt#99, wr_net_loss#100] +Arguments: [wr_item_sk#97 ASC NULLS FIRST, wr_order_number#98 ASC NULLS FIRST], false, 0 + +(56) Scan parquet default.web_sales +Output [3]: [ws_item_sk#102, ws_web_site_sk#87, ws_order_number#103] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_order_number), IsNotNull(ws_web_site_sk)] +ReadSchema: struct + +(57) ColumnarToRow [codegen id : 16] +Input [3]: [ws_item_sk#102, ws_web_site_sk#87, ws_order_number#103] + +(58) Filter [codegen id : 16] +Input [3]: [ws_item_sk#102, ws_web_site_sk#87, ws_order_number#103] +Condition : ((isnotnull(ws_item_sk#102) AND isnotnull(ws_order_number#103)) AND isnotnull(ws_web_site_sk#87)) + +(59) Exchange +Input [3]: [ws_item_sk#102, ws_web_site_sk#87, ws_order_number#103] +Arguments: hashpartitioning(cast(ws_item_sk#102 as bigint), cast(ws_order_number#103 as bigint), 5), true, [id=#104] + +(60) Sort [codegen id : 17] +Input [3]: [ws_item_sk#102, ws_web_site_sk#87, ws_order_number#103] +Arguments: [cast(ws_item_sk#102 as bigint) ASC NULLS FIRST, cast(ws_order_number#103 as bigint) ASC NULLS FIRST], false, 0 + +(61) SortMergeJoin [codegen id : 18] +Left keys [2]: [wr_item_sk#97, wr_order_number#98] +Right keys [2]: [cast(ws_item_sk#102 as bigint), cast(ws_order_number#103 as bigint)] +Join condition: None + +(62) Project [codegen id : 18] +Output [6]: [ws_web_site_sk#87 AS wsr_web_site_sk#105, wr_returned_date_sk#96 AS date_sk#106, 0.00 AS sales_price#107, 0.00 AS profit#108, wr_return_amt#99 AS return_amt#109, wr_net_loss#100 AS net_loss#110] +Input [8]: [wr_returned_date_sk#96, wr_item_sk#97, wr_order_number#98, wr_return_amt#99, wr_net_loss#100, ws_item_sk#102, ws_web_site_sk#87, ws_order_number#103] + +(63) Union + +(64) ReusedExchange [Reuses operator id: 14] +Output [1]: [d_date_sk#21] + +(65) BroadcastHashJoin [codegen id : 21] +Left keys [1]: [date_sk#91] +Right keys [1]: [cast(d_date_sk#21 as bigint)] +Join condition: None + +(66) Project [codegen id : 21] +Output [5]: [wsr_web_site_sk#90, sales_price#92, profit#93, return_amt#94, net_loss#95] +Input [7]: [wsr_web_site_sk#90, date_sk#91, sales_price#92, profit#93, return_amt#94, net_loss#95, d_date_sk#21] + +(67) Scan parquet default.web_site +Output [2]: [web_site_sk#111, web_site_id#112] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/web_site] +PushedFilters: [IsNotNull(web_site_sk)] +ReadSchema: struct + +(68) ColumnarToRow [codegen id : 20] +Input [2]: [web_site_sk#111, web_site_id#112] + +(69) Filter [codegen id : 20] +Input [2]: [web_site_sk#111, web_site_id#112] +Condition : isnotnull(web_site_sk#111) + +(70) BroadcastExchange +Input [2]: [web_site_sk#111, web_site_id#112] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#113] + +(71) BroadcastHashJoin [codegen id : 21] +Left keys [1]: [wsr_web_site_sk#90] +Right keys [1]: [web_site_sk#111] +Join condition: None + +(72) Project [codegen id : 21] +Output [5]: [sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_id#112] +Input [7]: [wsr_web_site_sk#90, sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_sk#111, web_site_id#112] + +(73) HashAggregate [codegen id : 21] +Input [5]: [sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_id#112] +Keys [1]: [web_site_id#112] +Functions [4]: [partial_sum(UnscaledValue(sales_price#92)), partial_sum(UnscaledValue(return_amt#94)), partial_sum(UnscaledValue(profit#93)), partial_sum(UnscaledValue(net_loss#95))] +Aggregate Attributes [4]: [sum#114, sum#115, sum#116, sum#117] +Results [5]: [web_site_id#112, sum#118, sum#119, sum#120, sum#121] + +(74) Exchange +Input [5]: [web_site_id#112, sum#118, sum#119, sum#120, sum#121] +Arguments: hashpartitioning(web_site_id#112, 5), true, [id=#122] + +(75) HashAggregate [codegen id : 22] +Input [5]: [web_site_id#112, sum#118, sum#119, sum#120, sum#121] +Keys [1]: [web_site_id#112] +Functions [4]: [sum(UnscaledValue(sales_price#92)), sum(UnscaledValue(return_amt#94)), sum(UnscaledValue(profit#93)), sum(UnscaledValue(net_loss#95))] +Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#92))#123, sum(UnscaledValue(return_amt#94))#124, sum(UnscaledValue(profit#93))#125, sum(UnscaledValue(net_loss#95))#126] +Results [5]: [web channel AS channel#127, concat(web_site, web_site_id#112) AS id#128, MakeDecimal(sum(UnscaledValue(sales_price#92))#123,17,2) AS sales#129, MakeDecimal(sum(UnscaledValue(return_amt#94))#124,17,2) AS returns#130, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#93))#125,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#95))#126,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#131] + +(76) Union + +(77) HashAggregate [codegen id : 23] +Input [5]: [channel#40, id#41, sales#42, returns#43, profit#44] +Keys [2]: [channel#40, id#41] +Functions [3]: [partial_sum(sales#42), partial_sum(returns#43), partial_sum(profit#44)] +Aggregate Attributes [6]: [sum#132, isEmpty#133, sum#134, isEmpty#135, sum#136, isEmpty#137] +Results [8]: [channel#40, id#41, sum#138, isEmpty#139, sum#140, isEmpty#141, sum#142, isEmpty#143] + +(78) Exchange +Input [8]: [channel#40, id#41, sum#138, isEmpty#139, sum#140, isEmpty#141, sum#142, isEmpty#143] +Arguments: hashpartitioning(channel#40, id#41, 5), true, [id=#144] + +(79) HashAggregate [codegen id : 24] +Input [8]: [channel#40, id#41, sum#138, isEmpty#139, sum#140, isEmpty#141, sum#142, isEmpty#143] +Keys [2]: [channel#40, id#41] +Functions [3]: [sum(sales#42), sum(returns#43), sum(profit#44)] +Aggregate Attributes [3]: [sum(sales#42)#145, sum(returns#43)#146, sum(profit#44)#147] +Results [5]: [channel#40, id#41, cast(sum(sales#42)#145 as decimal(37,2)) AS sales#148, cast(sum(returns#43)#146 as decimal(37,2)) AS returns#149, cast(sum(profit#44)#147 as decimal(38,2)) AS profit#150] + +(80) ReusedExchange [Reuses operator id: 78] +Output [8]: [channel#40, id#41, sum#151, isEmpty#152, sum#153, isEmpty#154, sum#155, isEmpty#156] + +(81) HashAggregate [codegen id : 48] +Input [8]: [channel#40, id#41, sum#151, isEmpty#152, sum#153, isEmpty#154, sum#155, isEmpty#156] +Keys [2]: [channel#40, id#41] +Functions [3]: [sum(sales#42), sum(returns#43), sum(profit#157)] +Aggregate Attributes [3]: [sum(sales#42)#158, sum(returns#43)#159, sum(profit#157)#160] +Results [4]: [channel#40, sum(sales#42)#158 AS sales#161, sum(returns#43)#159 AS returns#162, sum(profit#157)#160 AS profit#163] + +(82) HashAggregate [codegen id : 48] +Input [4]: [channel#40, sales#161, returns#162, profit#163] +Keys [1]: [channel#40] +Functions [3]: [partial_sum(sales#161), partial_sum(returns#162), partial_sum(profit#163)] +Aggregate Attributes [6]: [sum#164, isEmpty#165, sum#166, isEmpty#167, sum#168, isEmpty#169] +Results [7]: [channel#40, sum#170, isEmpty#171, sum#172, isEmpty#173, sum#174, isEmpty#175] + +(83) Exchange +Input [7]: [channel#40, sum#170, isEmpty#171, sum#172, isEmpty#173, sum#174, isEmpty#175] +Arguments: hashpartitioning(channel#40, 5), true, [id=#176] + +(84) HashAggregate [codegen id : 49] +Input [7]: [channel#40, sum#170, isEmpty#171, sum#172, isEmpty#173, sum#174, isEmpty#175] +Keys [1]: [channel#40] +Functions [3]: [sum(sales#161), sum(returns#162), sum(profit#163)] +Aggregate Attributes [3]: [sum(sales#161)#177, sum(returns#162)#178, sum(profit#163)#179] +Results [5]: [channel#40, null AS id#180, sum(sales#161)#177 AS sum(sales)#181, sum(returns#162)#178 AS sum(returns)#182, sum(profit#163)#179 AS sum(profit)#183] + +(85) Union + +(86) HashAggregate [codegen id : 50] +Input [5]: [channel#40, id#41, sales#148, returns#149, profit#150] +Keys [5]: [channel#40, id#41, sales#148, returns#149, profit#150] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#40, id#41, sales#148, returns#149, profit#150] + +(87) Exchange +Input [5]: [channel#40, id#41, sales#148, returns#149, profit#150] +Arguments: hashpartitioning(channel#40, id#41, sales#148, returns#149, profit#150, 5), true, [id=#184] + +(88) HashAggregate [codegen id : 51] +Input [5]: [channel#40, id#41, sales#148, returns#149, profit#150] +Keys [5]: [channel#40, id#41, sales#148, returns#149, profit#150] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#40, id#41, sales#148, returns#149, profit#150] + +(89) ReusedExchange [Reuses operator id: 78] +Output [8]: [channel#40, id#41, sum#185, isEmpty#186, sum#187, isEmpty#188, sum#189, isEmpty#190] + +(90) HashAggregate [codegen id : 75] +Input [8]: [channel#40, id#41, sum#185, isEmpty#186, sum#187, isEmpty#188, sum#189, isEmpty#190] +Keys [2]: [channel#40, id#41] +Functions [3]: [sum(sales#42), sum(returns#43), sum(profit#191)] +Aggregate Attributes [3]: [sum(sales#42)#192, sum(returns#43)#193, sum(profit#191)#194] +Results [3]: [sum(sales#42)#192 AS sales#161, sum(returns#43)#193 AS returns#162, sum(profit#191)#194 AS profit#163] + +(91) HashAggregate [codegen id : 75] +Input [3]: [sales#161, returns#162, profit#163] +Keys: [] +Functions [3]: [partial_sum(sales#161), partial_sum(returns#162), partial_sum(profit#163)] +Aggregate Attributes [6]: [sum#195, isEmpty#196, sum#197, isEmpty#198, sum#199, isEmpty#200] +Results [6]: [sum#201, isEmpty#202, sum#203, isEmpty#204, sum#205, isEmpty#206] + +(92) Exchange +Input [6]: [sum#201, isEmpty#202, sum#203, isEmpty#204, sum#205, isEmpty#206] +Arguments: SinglePartition, true, [id=#207] + +(93) HashAggregate [codegen id : 76] +Input [6]: [sum#201, isEmpty#202, sum#203, isEmpty#204, sum#205, isEmpty#206] +Keys: [] +Functions [3]: [sum(sales#161), sum(returns#162), sum(profit#163)] +Aggregate Attributes [3]: [sum(sales#161)#208, sum(returns#162)#209, sum(profit#163)#210] +Results [5]: [null AS channel#211, null AS id#212, sum(sales#161)#208 AS sum(sales)#213, sum(returns#162)#209 AS sum(returns)#214, sum(profit#163)#210 AS sum(profit)#215] + +(94) Union + +(95) HashAggregate [codegen id : 77] +Input [5]: [channel#40, id#41, sales#148, returns#149, profit#150] +Keys [5]: [channel#40, id#41, sales#148, returns#149, profit#150] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#40, id#41, sales#148, returns#149, profit#150] + +(96) Exchange +Input [5]: [channel#40, id#41, sales#148, returns#149, profit#150] +Arguments: hashpartitioning(channel#40, id#41, sales#148, returns#149, profit#150, 5), true, [id=#216] + +(97) HashAggregate [codegen id : 78] +Input [5]: [channel#40, id#41, sales#148, returns#149, profit#150] +Keys [5]: [channel#40, id#41, sales#148, returns#149, profit#150] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#40, id#41, sales#148, returns#149, profit#150] + +(98) TakeOrderedAndProject +Input [5]: [channel#40, id#41, sales#148, returns#149, profit#150] +Arguments: 100, [channel#40 ASC NULLS FIRST, id#41 ASC NULLS FIRST], [channel#40, id#41, sales#148, returns#149, profit#150] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/simplified.txt new file mode 100644 index 0000000000000..209f65e8d333e --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/simplified.txt @@ -0,0 +1,165 @@ +TakeOrderedAndProject [channel,id,profit,returns,sales] + WholeStageCodegen (78) + HashAggregate [channel,id,profit,returns,sales] + InputAdapter + Exchange [channel,id,profit,returns,sales] #1 + WholeStageCodegen (77) + HashAggregate [channel,id,profit,returns,sales] + InputAdapter + Union + WholeStageCodegen (51) + HashAggregate [channel,id,profit,returns,sales] + InputAdapter + Exchange [channel,id,profit,returns,sales] #2 + WholeStageCodegen (50) + HashAggregate [channel,id,profit,returns,sales] + InputAdapter + Union + WholeStageCodegen (24) + HashAggregate [channel,id,isEmpty,isEmpty,isEmpty,sum,sum,sum] [isEmpty,isEmpty,isEmpty,profit,returns,sales,sum,sum,sum,sum(profit),sum(returns),sum(sales)] + InputAdapter + Exchange [channel,id] #3 + WholeStageCodegen (23) + HashAggregate [channel,id,profit,returns,sales] [isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum] + InputAdapter + Union + WholeStageCodegen (6) + HashAggregate [s_store_id,sum,sum,sum,sum] [channel,id,profit,returns,sales,sum,sum,sum,sum,sum(UnscaledValue(net_loss)),sum(UnscaledValue(profit)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(sales_price))] + InputAdapter + Exchange [s_store_id] #4 + WholeStageCodegen (5) + HashAggregate [net_loss,profit,return_amt,s_store_id,sales_price] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [net_loss,profit,return_amt,s_store_id,sales_price] + BroadcastHashJoin [s_store_sk,store_sk] + Project [net_loss,profit,return_amt,sales_price,store_sk] + BroadcastHashJoin [d_date_sk,date_sk] + InputAdapter + Union + WholeStageCodegen (1) + Project [ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,ss_store_sk] + Filter [ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,ss_store_sk] + WholeStageCodegen (2) + Project [sr_net_loss,sr_return_amt,sr_returned_date_sk,sr_store_sk] + Filter [sr_returned_date_sk,sr_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_net_loss,sr_return_amt,sr_returned_date_sk,sr_store_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (4) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_id,s_store_sk] + WholeStageCodegen (12) + HashAggregate [cp_catalog_page_id,sum,sum,sum,sum] [channel,id,profit,returns,sales,sum,sum,sum,sum,sum(UnscaledValue(net_loss)),sum(UnscaledValue(profit)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(sales_price))] + InputAdapter + Exchange [cp_catalog_page_id] #7 + WholeStageCodegen (11) + HashAggregate [cp_catalog_page_id,net_loss,profit,return_amt,sales_price] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [cp_catalog_page_id,net_loss,profit,return_amt,sales_price] + BroadcastHashJoin [cp_catalog_page_sk,page_sk] + Project [net_loss,page_sk,profit,return_amt,sales_price] + BroadcastHashJoin [d_date_sk,date_sk] + InputAdapter + Union + WholeStageCodegen (7) + Project [cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] + Filter [cs_catalog_page_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] + WholeStageCodegen (8) + Project [cr_catalog_page_sk,cr_net_loss,cr_return_amount,cr_returned_date_sk] + Filter [cr_catalog_page_sk,cr_returned_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_catalog_page_sk,cr_net_loss,cr_return_amount,cr_returned_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #5 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (10) + Filter [cp_catalog_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_page [cp_catalog_page_id,cp_catalog_page_sk] + WholeStageCodegen (22) + HashAggregate [sum,sum,sum,sum,web_site_id] [channel,id,profit,returns,sales,sum,sum,sum,sum,sum(UnscaledValue(net_loss)),sum(UnscaledValue(profit)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(sales_price))] + InputAdapter + Exchange [web_site_id] #9 + WholeStageCodegen (21) + HashAggregate [net_loss,profit,return_amt,sales_price,web_site_id] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [net_loss,profit,return_amt,sales_price,web_site_id] + BroadcastHashJoin [web_site_sk,wsr_web_site_sk] + Project [net_loss,profit,return_amt,sales_price,wsr_web_site_sk] + BroadcastHashJoin [d_date_sk,date_sk] + InputAdapter + Union + WholeStageCodegen (13) + Project [ws_ext_sales_price,ws_net_profit,ws_sold_date_sk,ws_web_site_sk] + Filter [ws_sold_date_sk,ws_web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_ext_sales_price,ws_net_profit,ws_sold_date_sk,ws_web_site_sk] + WholeStageCodegen (18) + Project [wr_net_loss,wr_return_amt,wr_returned_date_sk,ws_web_site_sk] + SortMergeJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] + InputAdapter + WholeStageCodegen (15) + Sort [wr_item_sk,wr_order_number] + InputAdapter + Exchange [wr_item_sk,wr_order_number] #10 + WholeStageCodegen (14) + Filter [wr_returned_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_returns [wr_item_sk,wr_net_loss,wr_order_number,wr_return_amt,wr_returned_date_sk] + InputAdapter + WholeStageCodegen (17) + Sort [ws_item_sk,ws_order_number] + InputAdapter + Exchange [ws_item_sk,ws_order_number] #11 + WholeStageCodegen (16) + Filter [ws_item_sk,ws_order_number,ws_web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_item_sk,ws_order_number,ws_web_site_sk] + InputAdapter + ReusedExchange [d_date_sk] #5 + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (20) + Filter [web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_site [web_site_id,web_site_sk] + WholeStageCodegen (49) + HashAggregate [channel,isEmpty,isEmpty,isEmpty,sum,sum,sum] [id,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum(profit),sum(profit),sum(returns),sum(returns),sum(sales),sum(sales)] + InputAdapter + Exchange [channel] #13 + WholeStageCodegen (48) + HashAggregate [channel,profit,returns,sales] [isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum] + HashAggregate [channel,id,isEmpty,isEmpty,isEmpty,sum,sum,sum] [isEmpty,isEmpty,isEmpty,profit,returns,sales,sum,sum,sum,sum(profit),sum(returns),sum(sales)] + InputAdapter + ReusedExchange [channel,id,isEmpty,isEmpty,isEmpty,sum,sum,sum] #3 + WholeStageCodegen (76) + HashAggregate [isEmpty,isEmpty,isEmpty,sum,sum,sum] [channel,id,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum(profit),sum(profit),sum(returns),sum(returns),sum(sales),sum(sales)] + InputAdapter + Exchange #14 + WholeStageCodegen (75) + HashAggregate [profit,returns,sales] [isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum] + HashAggregate [channel,id,isEmpty,isEmpty,isEmpty,sum,sum,sum] [isEmpty,isEmpty,isEmpty,profit,returns,sales,sum,sum,sum,sum(profit),sum(returns),sum(sales)] + InputAdapter + ReusedExchange [channel,id,isEmpty,isEmpty,isEmpty,sum,sum,sum] #3 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a/explain.txt new file mode 100644 index 0000000000000..62bbb6547080a --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a/explain.txt @@ -0,0 +1,544 @@ +== Physical Plan == +TakeOrderedAndProject (95) ++- * HashAggregate (94) + +- Exchange (93) + +- * HashAggregate (92) + +- Union (91) + :- * HashAggregate (85) + : +- Exchange (84) + : +- * HashAggregate (83) + : +- Union (82) + : :- * HashAggregate (76) + : : +- Exchange (75) + : : +- * HashAggregate (74) + : : +- Union (73) + : : :- * HashAggregate (25) + : : : +- Exchange (24) + : : : +- * HashAggregate (23) + : : : +- * Project (22) + : : : +- * BroadcastHashJoin Inner BuildRight (21) + : : : :- * Project (16) + : : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : : :- Union (9) + : : : : : :- * Project (4) + : : : : : : +- * Filter (3) + : : : : : : +- * ColumnarToRow (2) + : : : : : : +- Scan parquet default.store_sales (1) + : : : : : +- * Project (8) + : : : : : +- * Filter (7) + : : : : : +- * ColumnarToRow (6) + : : : : : +- Scan parquet default.store_returns (5) + : : : : +- BroadcastExchange (14) + : : : : +- * Project (13) + : : : : +- * Filter (12) + : : : : +- * ColumnarToRow (11) + : : : : +- Scan parquet default.date_dim (10) + : : : +- BroadcastExchange (20) + : : : +- * Filter (19) + : : : +- * ColumnarToRow (18) + : : : +- Scan parquet default.store (17) + : : :- * HashAggregate (46) + : : : +- Exchange (45) + : : : +- * HashAggregate (44) + : : : +- * Project (43) + : : : +- * BroadcastHashJoin Inner BuildRight (42) + : : : :- * Project (37) + : : : : +- * BroadcastHashJoin Inner BuildRight (36) + : : : : :- Union (34) + : : : : : :- * Project (29) + : : : : : : +- * Filter (28) + : : : : : : +- * ColumnarToRow (27) + : : : : : : +- Scan parquet default.catalog_sales (26) + : : : : : +- * Project (33) + : : : : : +- * Filter (32) + : : : : : +- * ColumnarToRow (31) + : : : : : +- Scan parquet default.catalog_returns (30) + : : : : +- ReusedExchange (35) + : : : +- BroadcastExchange (41) + : : : +- * Filter (40) + : : : +- * ColumnarToRow (39) + : : : +- Scan parquet default.catalog_page (38) + : : +- * HashAggregate (72) + : : +- Exchange (71) + : : +- * HashAggregate (70) + : : +- * Project (69) + : : +- * BroadcastHashJoin Inner BuildRight (68) + : : :- * Project (63) + : : : +- * BroadcastHashJoin Inner BuildRight (62) + : : : :- Union (60) + : : : : :- * Project (50) + : : : : : +- * Filter (49) + : : : : : +- * ColumnarToRow (48) + : : : : : +- Scan parquet default.web_sales (47) + : : : : +- * Project (59) + : : : : +- * BroadcastHashJoin Inner BuildRight (58) + : : : : :- * Filter (53) + : : : : : +- * ColumnarToRow (52) + : : : : : +- Scan parquet default.web_returns (51) + : : : : +- BroadcastExchange (57) + : : : : +- * Filter (56) + : : : : +- * ColumnarToRow (55) + : : : : +- Scan parquet default.web_sales (54) + : : : +- ReusedExchange (61) + : : +- BroadcastExchange (67) + : : +- * Filter (66) + : : +- * ColumnarToRow (65) + : : +- Scan parquet default.web_site (64) + : +- * HashAggregate (81) + : +- Exchange (80) + : +- * HashAggregate (79) + : +- * HashAggregate (78) + : +- ReusedExchange (77) + +- * HashAggregate (90) + +- Exchange (89) + +- * HashAggregate (88) + +- * HashAggregate (87) + +- ReusedExchange (86) + + +(1) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store_sales] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [4]: [ss_sold_date_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] + +(3) Filter [codegen id : 1] +Input [4]: [ss_sold_date_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] +Condition : (isnotnull(cast(ss_sold_date_sk#1 as bigint)) AND isnotnull(cast(ss_store_sk#2 as bigint))) + +(4) Project [codegen id : 1] +Output [6]: [cast(ss_store_sk#2 as bigint) AS store_sk#5, cast(ss_sold_date_sk#1 as bigint) AS date_sk#6, ss_ext_sales_price#3 AS sales_price#7, ss_net_profit#4 AS profit#8, 0.00 AS return_amt#9, 0.00 AS net_loss#10] +Input [4]: [ss_sold_date_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] + +(5) Scan parquet default.store_returns +Output [4]: [sr_returned_date_sk#11, sr_store_sk#12, sr_return_amt#13, sr_net_loss#14] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store_returns] +PushedFilters: [IsNotNull(sr_returned_date_sk), IsNotNull(sr_store_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 2] +Input [4]: [sr_returned_date_sk#11, sr_store_sk#12, sr_return_amt#13, sr_net_loss#14] + +(7) Filter [codegen id : 2] +Input [4]: [sr_returned_date_sk#11, sr_store_sk#12, sr_return_amt#13, sr_net_loss#14] +Condition : (isnotnull(sr_returned_date_sk#11) AND isnotnull(sr_store_sk#12)) + +(8) Project [codegen id : 2] +Output [6]: [sr_store_sk#12 AS store_sk#15, sr_returned_date_sk#11 AS date_sk#16, 0.00 AS sales_price#17, 0.00 AS profit#18, sr_return_amt#13 AS return_amt#19, sr_net_loss#14 AS net_loss#20] +Input [4]: [sr_returned_date_sk#11, sr_store_sk#12, sr_return_amt#13, sr_net_loss#14] + +(9) Union + +(10) Scan parquet default.date_dim +Output [2]: [d_date_sk#21, d_date#22] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1998-08-04), LessThanOrEqual(d_date,1998-08-18), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 3] +Input [2]: [d_date_sk#21, d_date#22] + +(12) Filter [codegen id : 3] +Input [2]: [d_date_sk#21, d_date#22] +Condition : (((isnotnull(d_date#22) AND (d_date#22 >= 10442)) AND (d_date#22 <= 10456)) AND isnotnull(d_date_sk#21)) + +(13) Project [codegen id : 3] +Output [1]: [d_date_sk#21] +Input [2]: [d_date_sk#21, d_date#22] + +(14) BroadcastExchange +Input [1]: [d_date_sk#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#23] + +(15) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [date_sk#6] +Right keys [1]: [cast(d_date_sk#21 as bigint)] +Join condition: None + +(16) Project [codegen id : 5] +Output [5]: [store_sk#5, sales_price#7, profit#8, return_amt#9, net_loss#10] +Input [7]: [store_sk#5, date_sk#6, sales_price#7, profit#8, return_amt#9, net_loss#10, d_date_sk#21] + +(17) Scan parquet default.store +Output [2]: [s_store_sk#24, s_store_id#25] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 4] +Input [2]: [s_store_sk#24, s_store_id#25] + +(19) Filter [codegen id : 4] +Input [2]: [s_store_sk#24, s_store_id#25] +Condition : isnotnull(s_store_sk#24) + +(20) BroadcastExchange +Input [2]: [s_store_sk#24, s_store_id#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#26] + +(21) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [store_sk#5] +Right keys [1]: [cast(s_store_sk#24 as bigint)] +Join condition: None + +(22) Project [codegen id : 5] +Output [5]: [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#25] +Input [7]: [store_sk#5, sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_sk#24, s_store_id#25] + +(23) HashAggregate [codegen id : 5] +Input [5]: [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#25] +Keys [1]: [s_store_id#25] +Functions [4]: [partial_sum(UnscaledValue(sales_price#7)), partial_sum(UnscaledValue(return_amt#9)), partial_sum(UnscaledValue(profit#8)), partial_sum(UnscaledValue(net_loss#10))] +Aggregate Attributes [4]: [sum#27, sum#28, sum#29, sum#30] +Results [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34] + +(24) Exchange +Input [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34] +Arguments: hashpartitioning(s_store_id#25, 5), true, [id=#35] + +(25) HashAggregate [codegen id : 6] +Input [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34] +Keys [1]: [s_store_id#25] +Functions [4]: [sum(UnscaledValue(sales_price#7)), sum(UnscaledValue(return_amt#9)), sum(UnscaledValue(profit#8)), sum(UnscaledValue(net_loss#10))] +Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#7))#36, sum(UnscaledValue(return_amt#9))#37, sum(UnscaledValue(profit#8))#38, sum(UnscaledValue(net_loss#10))#39] +Results [5]: [store channel AS channel#40, concat(store, s_store_id#25) AS id#41, MakeDecimal(sum(UnscaledValue(sales_price#7))#36,17,2) AS sales#42, MakeDecimal(sum(UnscaledValue(return_amt#9))#37,17,2) AS returns#43, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#8))#38,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#10))#39,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#44] + +(26) Scan parquet default.catalog_sales +Output [4]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_ext_sales_price#47, cs_net_profit#48] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_catalog_page_sk)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 7] +Input [4]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_ext_sales_price#47, cs_net_profit#48] + +(28) Filter [codegen id : 7] +Input [4]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_ext_sales_price#47, cs_net_profit#48] +Condition : (isnotnull(cs_sold_date_sk#45) AND isnotnull(cs_catalog_page_sk#46)) + +(29) Project [codegen id : 7] +Output [6]: [cs_catalog_page_sk#46 AS page_sk#49, cs_sold_date_sk#45 AS date_sk#50, cs_ext_sales_price#47 AS sales_price#51, cs_net_profit#48 AS profit#52, 0.00 AS return_amt#53, 0.00 AS net_loss#54] +Input [4]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_ext_sales_price#47, cs_net_profit#48] + +(30) Scan parquet default.catalog_returns +Output [4]: [cr_returned_date_sk#55, cr_catalog_page_sk#56, cr_return_amount#57, cr_net_loss#58] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/catalog_returns] +PushedFilters: [IsNotNull(cr_returned_date_sk), IsNotNull(cr_catalog_page_sk)] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 8] +Input [4]: [cr_returned_date_sk#55, cr_catalog_page_sk#56, cr_return_amount#57, cr_net_loss#58] + +(32) Filter [codegen id : 8] +Input [4]: [cr_returned_date_sk#55, cr_catalog_page_sk#56, cr_return_amount#57, cr_net_loss#58] +Condition : (isnotnull(cr_returned_date_sk#55) AND isnotnull(cr_catalog_page_sk#56)) + +(33) Project [codegen id : 8] +Output [6]: [cr_catalog_page_sk#56 AS page_sk#59, cr_returned_date_sk#55 AS date_sk#60, 0.00 AS sales_price#61, 0.00 AS profit#62, cr_return_amount#57 AS return_amt#63, cr_net_loss#58 AS net_loss#64] +Input [4]: [cr_returned_date_sk#55, cr_catalog_page_sk#56, cr_return_amount#57, cr_net_loss#58] + +(34) Union + +(35) ReusedExchange [Reuses operator id: 14] +Output [1]: [d_date_sk#21] + +(36) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [date_sk#50] +Right keys [1]: [d_date_sk#21] +Join condition: None + +(37) Project [codegen id : 11] +Output [5]: [page_sk#49, sales_price#51, profit#52, return_amt#53, net_loss#54] +Input [7]: [page_sk#49, date_sk#50, sales_price#51, profit#52, return_amt#53, net_loss#54, d_date_sk#21] + +(38) Scan parquet default.catalog_page +Output [2]: [cp_catalog_page_sk#65, cp_catalog_page_id#66] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/catalog_page] +PushedFilters: [IsNotNull(cp_catalog_page_sk)] +ReadSchema: struct + +(39) ColumnarToRow [codegen id : 10] +Input [2]: [cp_catalog_page_sk#65, cp_catalog_page_id#66] + +(40) Filter [codegen id : 10] +Input [2]: [cp_catalog_page_sk#65, cp_catalog_page_id#66] +Condition : isnotnull(cp_catalog_page_sk#65) + +(41) BroadcastExchange +Input [2]: [cp_catalog_page_sk#65, cp_catalog_page_id#66] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#67] + +(42) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [page_sk#49] +Right keys [1]: [cp_catalog_page_sk#65] +Join condition: None + +(43) Project [codegen id : 11] +Output [5]: [sales_price#51, profit#52, return_amt#53, net_loss#54, cp_catalog_page_id#66] +Input [7]: [page_sk#49, sales_price#51, profit#52, return_amt#53, net_loss#54, cp_catalog_page_sk#65, cp_catalog_page_id#66] + +(44) HashAggregate [codegen id : 11] +Input [5]: [sales_price#51, profit#52, return_amt#53, net_loss#54, cp_catalog_page_id#66] +Keys [1]: [cp_catalog_page_id#66] +Functions [4]: [partial_sum(UnscaledValue(sales_price#51)), partial_sum(UnscaledValue(return_amt#53)), partial_sum(UnscaledValue(profit#52)), partial_sum(UnscaledValue(net_loss#54))] +Aggregate Attributes [4]: [sum#68, sum#69, sum#70, sum#71] +Results [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75] + +(45) Exchange +Input [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75] +Arguments: hashpartitioning(cp_catalog_page_id#66, 5), true, [id=#76] + +(46) HashAggregate [codegen id : 12] +Input [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75] +Keys [1]: [cp_catalog_page_id#66] +Functions [4]: [sum(UnscaledValue(sales_price#51)), sum(UnscaledValue(return_amt#53)), sum(UnscaledValue(profit#52)), sum(UnscaledValue(net_loss#54))] +Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#51))#77, sum(UnscaledValue(return_amt#53))#78, sum(UnscaledValue(profit#52))#79, sum(UnscaledValue(net_loss#54))#80] +Results [5]: [catalog channel AS channel#81, concat(catalog_page, cp_catalog_page_id#66) AS id#82, MakeDecimal(sum(UnscaledValue(sales_price#51))#77,17,2) AS sales#83, MakeDecimal(sum(UnscaledValue(return_amt#53))#78,17,2) AS returns#84, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#52))#79,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#54))#80,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#85] + +(47) Scan parquet default.web_sales +Output [4]: [ws_sold_date_sk#86, ws_web_site_sk#87, ws_ext_sales_price#88, ws_net_profit#89] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_web_site_sk)] +ReadSchema: struct + +(48) ColumnarToRow [codegen id : 13] +Input [4]: [ws_sold_date_sk#86, ws_web_site_sk#87, ws_ext_sales_price#88, ws_net_profit#89] + +(49) Filter [codegen id : 13] +Input [4]: [ws_sold_date_sk#86, ws_web_site_sk#87, ws_ext_sales_price#88, ws_net_profit#89] +Condition : (isnotnull(cast(ws_sold_date_sk#86 as bigint)) AND isnotnull(ws_web_site_sk#87)) + +(50) Project [codegen id : 13] +Output [6]: [ws_web_site_sk#87 AS wsr_web_site_sk#90, cast(ws_sold_date_sk#86 as bigint) AS date_sk#91, ws_ext_sales_price#88 AS sales_price#92, ws_net_profit#89 AS profit#93, 0.00 AS return_amt#94, 0.00 AS net_loss#95] +Input [4]: [ws_sold_date_sk#86, ws_web_site_sk#87, ws_ext_sales_price#88, ws_net_profit#89] + +(51) Scan parquet default.web_returns +Output [5]: [wr_returned_date_sk#96, wr_item_sk#97, wr_order_number#98, wr_return_amt#99, wr_net_loss#100] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/web_returns] +PushedFilters: [IsNotNull(wr_returned_date_sk)] +ReadSchema: struct + +(52) ColumnarToRow [codegen id : 15] +Input [5]: [wr_returned_date_sk#96, wr_item_sk#97, wr_order_number#98, wr_return_amt#99, wr_net_loss#100] + +(53) Filter [codegen id : 15] +Input [5]: [wr_returned_date_sk#96, wr_item_sk#97, wr_order_number#98, wr_return_amt#99, wr_net_loss#100] +Condition : isnotnull(wr_returned_date_sk#96) + +(54) Scan parquet default.web_sales +Output [3]: [ws_item_sk#101, ws_web_site_sk#87, ws_order_number#102] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_order_number), IsNotNull(ws_web_site_sk)] +ReadSchema: struct + +(55) ColumnarToRow [codegen id : 14] +Input [3]: [ws_item_sk#101, ws_web_site_sk#87, ws_order_number#102] + +(56) Filter [codegen id : 14] +Input [3]: [ws_item_sk#101, ws_web_site_sk#87, ws_order_number#102] +Condition : ((isnotnull(ws_item_sk#101) AND isnotnull(ws_order_number#102)) AND isnotnull(ws_web_site_sk#87)) + +(57) BroadcastExchange +Input [3]: [ws_item_sk#101, ws_web_site_sk#87, ws_order_number#102] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint), cast(input[2, int, false] as bigint)),false), [id=#103] + +(58) BroadcastHashJoin [codegen id : 15] +Left keys [2]: [wr_item_sk#97, wr_order_number#98] +Right keys [2]: [cast(ws_item_sk#101 as bigint), cast(ws_order_number#102 as bigint)] +Join condition: None + +(59) Project [codegen id : 15] +Output [6]: [ws_web_site_sk#87 AS wsr_web_site_sk#104, wr_returned_date_sk#96 AS date_sk#105, 0.00 AS sales_price#106, 0.00 AS profit#107, wr_return_amt#99 AS return_amt#108, wr_net_loss#100 AS net_loss#109] +Input [8]: [wr_returned_date_sk#96, wr_item_sk#97, wr_order_number#98, wr_return_amt#99, wr_net_loss#100, ws_item_sk#101, ws_web_site_sk#87, ws_order_number#102] + +(60) Union + +(61) ReusedExchange [Reuses operator id: 14] +Output [1]: [d_date_sk#21] + +(62) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [date_sk#91] +Right keys [1]: [cast(d_date_sk#21 as bigint)] +Join condition: None + +(63) Project [codegen id : 18] +Output [5]: [wsr_web_site_sk#90, sales_price#92, profit#93, return_amt#94, net_loss#95] +Input [7]: [wsr_web_site_sk#90, date_sk#91, sales_price#92, profit#93, return_amt#94, net_loss#95, d_date_sk#21] + +(64) Scan parquet default.web_site +Output [2]: [web_site_sk#110, web_site_id#111] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/web_site] +PushedFilters: [IsNotNull(web_site_sk)] +ReadSchema: struct + +(65) ColumnarToRow [codegen id : 17] +Input [2]: [web_site_sk#110, web_site_id#111] + +(66) Filter [codegen id : 17] +Input [2]: [web_site_sk#110, web_site_id#111] +Condition : isnotnull(web_site_sk#110) + +(67) BroadcastExchange +Input [2]: [web_site_sk#110, web_site_id#111] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#112] + +(68) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [wsr_web_site_sk#90] +Right keys [1]: [web_site_sk#110] +Join condition: None + +(69) Project [codegen id : 18] +Output [5]: [sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_id#111] +Input [7]: [wsr_web_site_sk#90, sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_sk#110, web_site_id#111] + +(70) HashAggregate [codegen id : 18] +Input [5]: [sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_id#111] +Keys [1]: [web_site_id#111] +Functions [4]: [partial_sum(UnscaledValue(sales_price#92)), partial_sum(UnscaledValue(return_amt#94)), partial_sum(UnscaledValue(profit#93)), partial_sum(UnscaledValue(net_loss#95))] +Aggregate Attributes [4]: [sum#113, sum#114, sum#115, sum#116] +Results [5]: [web_site_id#111, sum#117, sum#118, sum#119, sum#120] + +(71) Exchange +Input [5]: [web_site_id#111, sum#117, sum#118, sum#119, sum#120] +Arguments: hashpartitioning(web_site_id#111, 5), true, [id=#121] + +(72) HashAggregate [codegen id : 19] +Input [5]: [web_site_id#111, sum#117, sum#118, sum#119, sum#120] +Keys [1]: [web_site_id#111] +Functions [4]: [sum(UnscaledValue(sales_price#92)), sum(UnscaledValue(return_amt#94)), sum(UnscaledValue(profit#93)), sum(UnscaledValue(net_loss#95))] +Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#92))#122, sum(UnscaledValue(return_amt#94))#123, sum(UnscaledValue(profit#93))#124, sum(UnscaledValue(net_loss#95))#125] +Results [5]: [web channel AS channel#126, concat(web_site, web_site_id#111) AS id#127, MakeDecimal(sum(UnscaledValue(sales_price#92))#122,17,2) AS sales#128, MakeDecimal(sum(UnscaledValue(return_amt#94))#123,17,2) AS returns#129, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#93))#124,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#95))#125,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#130] + +(73) Union + +(74) HashAggregate [codegen id : 20] +Input [5]: [channel#40, id#41, sales#42, returns#43, profit#44] +Keys [2]: [channel#40, id#41] +Functions [3]: [partial_sum(sales#42), partial_sum(returns#43), partial_sum(profit#44)] +Aggregate Attributes [6]: [sum#131, isEmpty#132, sum#133, isEmpty#134, sum#135, isEmpty#136] +Results [8]: [channel#40, id#41, sum#137, isEmpty#138, sum#139, isEmpty#140, sum#141, isEmpty#142] + +(75) Exchange +Input [8]: [channel#40, id#41, sum#137, isEmpty#138, sum#139, isEmpty#140, sum#141, isEmpty#142] +Arguments: hashpartitioning(channel#40, id#41, 5), true, [id=#143] + +(76) HashAggregate [codegen id : 21] +Input [8]: [channel#40, id#41, sum#137, isEmpty#138, sum#139, isEmpty#140, sum#141, isEmpty#142] +Keys [2]: [channel#40, id#41] +Functions [3]: [sum(sales#42), sum(returns#43), sum(profit#44)] +Aggregate Attributes [3]: [sum(sales#42)#144, sum(returns#43)#145, sum(profit#44)#146] +Results [5]: [channel#40, id#41, cast(sum(sales#42)#144 as decimal(37,2)) AS sales#147, cast(sum(returns#43)#145 as decimal(37,2)) AS returns#148, cast(sum(profit#44)#146 as decimal(38,2)) AS profit#149] + +(77) ReusedExchange [Reuses operator id: 75] +Output [8]: [channel#40, id#41, sum#150, isEmpty#151, sum#152, isEmpty#153, sum#154, isEmpty#155] + +(78) HashAggregate [codegen id : 42] +Input [8]: [channel#40, id#41, sum#150, isEmpty#151, sum#152, isEmpty#153, sum#154, isEmpty#155] +Keys [2]: [channel#40, id#41] +Functions [3]: [sum(sales#42), sum(returns#43), sum(profit#156)] +Aggregate Attributes [3]: [sum(sales#42)#157, sum(returns#43)#158, sum(profit#156)#159] +Results [4]: [channel#40, sum(sales#42)#157 AS sales#160, sum(returns#43)#158 AS returns#161, sum(profit#156)#159 AS profit#162] + +(79) HashAggregate [codegen id : 42] +Input [4]: [channel#40, sales#160, returns#161, profit#162] +Keys [1]: [channel#40] +Functions [3]: [partial_sum(sales#160), partial_sum(returns#161), partial_sum(profit#162)] +Aggregate Attributes [6]: [sum#163, isEmpty#164, sum#165, isEmpty#166, sum#167, isEmpty#168] +Results [7]: [channel#40, sum#169, isEmpty#170, sum#171, isEmpty#172, sum#173, isEmpty#174] + +(80) Exchange +Input [7]: [channel#40, sum#169, isEmpty#170, sum#171, isEmpty#172, sum#173, isEmpty#174] +Arguments: hashpartitioning(channel#40, 5), true, [id=#175] + +(81) HashAggregate [codegen id : 43] +Input [7]: [channel#40, sum#169, isEmpty#170, sum#171, isEmpty#172, sum#173, isEmpty#174] +Keys [1]: [channel#40] +Functions [3]: [sum(sales#160), sum(returns#161), sum(profit#162)] +Aggregate Attributes [3]: [sum(sales#160)#176, sum(returns#161)#177, sum(profit#162)#178] +Results [5]: [channel#40, null AS id#179, sum(sales#160)#176 AS sum(sales)#180, sum(returns#161)#177 AS sum(returns)#181, sum(profit#162)#178 AS sum(profit)#182] + +(82) Union + +(83) HashAggregate [codegen id : 44] +Input [5]: [channel#40, id#41, sales#147, returns#148, profit#149] +Keys [5]: [channel#40, id#41, sales#147, returns#148, profit#149] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#40, id#41, sales#147, returns#148, profit#149] + +(84) Exchange +Input [5]: [channel#40, id#41, sales#147, returns#148, profit#149] +Arguments: hashpartitioning(channel#40, id#41, sales#147, returns#148, profit#149, 5), true, [id=#183] + +(85) HashAggregate [codegen id : 45] +Input [5]: [channel#40, id#41, sales#147, returns#148, profit#149] +Keys [5]: [channel#40, id#41, sales#147, returns#148, profit#149] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#40, id#41, sales#147, returns#148, profit#149] + +(86) ReusedExchange [Reuses operator id: 75] +Output [8]: [channel#40, id#41, sum#184, isEmpty#185, sum#186, isEmpty#187, sum#188, isEmpty#189] + +(87) HashAggregate [codegen id : 66] +Input [8]: [channel#40, id#41, sum#184, isEmpty#185, sum#186, isEmpty#187, sum#188, isEmpty#189] +Keys [2]: [channel#40, id#41] +Functions [3]: [sum(sales#42), sum(returns#43), sum(profit#190)] +Aggregate Attributes [3]: [sum(sales#42)#191, sum(returns#43)#192, sum(profit#190)#193] +Results [3]: [sum(sales#42)#191 AS sales#160, sum(returns#43)#192 AS returns#161, sum(profit#190)#193 AS profit#162] + +(88) HashAggregate [codegen id : 66] +Input [3]: [sales#160, returns#161, profit#162] +Keys: [] +Functions [3]: [partial_sum(sales#160), partial_sum(returns#161), partial_sum(profit#162)] +Aggregate Attributes [6]: [sum#194, isEmpty#195, sum#196, isEmpty#197, sum#198, isEmpty#199] +Results [6]: [sum#200, isEmpty#201, sum#202, isEmpty#203, sum#204, isEmpty#205] + +(89) Exchange +Input [6]: [sum#200, isEmpty#201, sum#202, isEmpty#203, sum#204, isEmpty#205] +Arguments: SinglePartition, true, [id=#206] + +(90) HashAggregate [codegen id : 67] +Input [6]: [sum#200, isEmpty#201, sum#202, isEmpty#203, sum#204, isEmpty#205] +Keys: [] +Functions [3]: [sum(sales#160), sum(returns#161), sum(profit#162)] +Aggregate Attributes [3]: [sum(sales#160)#207, sum(returns#161)#208, sum(profit#162)#209] +Results [5]: [null AS channel#210, null AS id#211, sum(sales#160)#207 AS sum(sales)#212, sum(returns#161)#208 AS sum(returns)#213, sum(profit#162)#209 AS sum(profit)#214] + +(91) Union + +(92) HashAggregate [codegen id : 68] +Input [5]: [channel#40, id#41, sales#147, returns#148, profit#149] +Keys [5]: [channel#40, id#41, sales#147, returns#148, profit#149] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#40, id#41, sales#147, returns#148, profit#149] + +(93) Exchange +Input [5]: [channel#40, id#41, sales#147, returns#148, profit#149] +Arguments: hashpartitioning(channel#40, id#41, sales#147, returns#148, profit#149, 5), true, [id=#215] + +(94) HashAggregate [codegen id : 69] +Input [5]: [channel#40, id#41, sales#147, returns#148, profit#149] +Keys [5]: [channel#40, id#41, sales#147, returns#148, profit#149] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#40, id#41, sales#147, returns#148, profit#149] + +(95) TakeOrderedAndProject +Input [5]: [channel#40, id#41, sales#147, returns#148, profit#149] +Arguments: 100, [channel#40 ASC NULLS FIRST, id#41 ASC NULLS FIRST], [channel#40, id#41, sales#147, returns#148, profit#149] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a/simplified.txt new file mode 100644 index 0000000000000..77d7d6f938665 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a/simplified.txt @@ -0,0 +1,156 @@ +TakeOrderedAndProject [channel,id,profit,returns,sales] + WholeStageCodegen (69) + HashAggregate [channel,id,profit,returns,sales] + InputAdapter + Exchange [channel,id,profit,returns,sales] #1 + WholeStageCodegen (68) + HashAggregate [channel,id,profit,returns,sales] + InputAdapter + Union + WholeStageCodegen (45) + HashAggregate [channel,id,profit,returns,sales] + InputAdapter + Exchange [channel,id,profit,returns,sales] #2 + WholeStageCodegen (44) + HashAggregate [channel,id,profit,returns,sales] + InputAdapter + Union + WholeStageCodegen (21) + HashAggregate [channel,id,isEmpty,isEmpty,isEmpty,sum,sum,sum] [isEmpty,isEmpty,isEmpty,profit,returns,sales,sum,sum,sum,sum(profit),sum(returns),sum(sales)] + InputAdapter + Exchange [channel,id] #3 + WholeStageCodegen (20) + HashAggregate [channel,id,profit,returns,sales] [isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum] + InputAdapter + Union + WholeStageCodegen (6) + HashAggregate [s_store_id,sum,sum,sum,sum] [channel,id,profit,returns,sales,sum,sum,sum,sum,sum(UnscaledValue(net_loss)),sum(UnscaledValue(profit)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(sales_price))] + InputAdapter + Exchange [s_store_id] #4 + WholeStageCodegen (5) + HashAggregate [net_loss,profit,return_amt,s_store_id,sales_price] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [net_loss,profit,return_amt,s_store_id,sales_price] + BroadcastHashJoin [s_store_sk,store_sk] + Project [net_loss,profit,return_amt,sales_price,store_sk] + BroadcastHashJoin [d_date_sk,date_sk] + InputAdapter + Union + WholeStageCodegen (1) + Project [ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,ss_store_sk] + Filter [ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,ss_store_sk] + WholeStageCodegen (2) + Project [sr_net_loss,sr_return_amt,sr_returned_date_sk,sr_store_sk] + Filter [sr_returned_date_sk,sr_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_net_loss,sr_return_amt,sr_returned_date_sk,sr_store_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (4) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_id,s_store_sk] + WholeStageCodegen (12) + HashAggregate [cp_catalog_page_id,sum,sum,sum,sum] [channel,id,profit,returns,sales,sum,sum,sum,sum,sum(UnscaledValue(net_loss)),sum(UnscaledValue(profit)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(sales_price))] + InputAdapter + Exchange [cp_catalog_page_id] #7 + WholeStageCodegen (11) + HashAggregate [cp_catalog_page_id,net_loss,profit,return_amt,sales_price] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [cp_catalog_page_id,net_loss,profit,return_amt,sales_price] + BroadcastHashJoin [cp_catalog_page_sk,page_sk] + Project [net_loss,page_sk,profit,return_amt,sales_price] + BroadcastHashJoin [d_date_sk,date_sk] + InputAdapter + Union + WholeStageCodegen (7) + Project [cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] + Filter [cs_catalog_page_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] + WholeStageCodegen (8) + Project [cr_catalog_page_sk,cr_net_loss,cr_return_amount,cr_returned_date_sk] + Filter [cr_catalog_page_sk,cr_returned_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_catalog_page_sk,cr_net_loss,cr_return_amount,cr_returned_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #5 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (10) + Filter [cp_catalog_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_page [cp_catalog_page_id,cp_catalog_page_sk] + WholeStageCodegen (19) + HashAggregate [sum,sum,sum,sum,web_site_id] [channel,id,profit,returns,sales,sum,sum,sum,sum,sum(UnscaledValue(net_loss)),sum(UnscaledValue(profit)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(sales_price))] + InputAdapter + Exchange [web_site_id] #9 + WholeStageCodegen (18) + HashAggregate [net_loss,profit,return_amt,sales_price,web_site_id] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [net_loss,profit,return_amt,sales_price,web_site_id] + BroadcastHashJoin [web_site_sk,wsr_web_site_sk] + Project [net_loss,profit,return_amt,sales_price,wsr_web_site_sk] + BroadcastHashJoin [d_date_sk,date_sk] + InputAdapter + Union + WholeStageCodegen (13) + Project [ws_ext_sales_price,ws_net_profit,ws_sold_date_sk,ws_web_site_sk] + Filter [ws_sold_date_sk,ws_web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_ext_sales_price,ws_net_profit,ws_sold_date_sk,ws_web_site_sk] + WholeStageCodegen (15) + Project [wr_net_loss,wr_return_amt,wr_returned_date_sk,ws_web_site_sk] + BroadcastHashJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] + Filter [wr_returned_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_returns [wr_item_sk,wr_net_loss,wr_order_number,wr_return_amt,wr_returned_date_sk] + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (14) + Filter [ws_item_sk,ws_order_number,ws_web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_item_sk,ws_order_number,ws_web_site_sk] + InputAdapter + ReusedExchange [d_date_sk] #5 + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (17) + Filter [web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_site [web_site_id,web_site_sk] + WholeStageCodegen (43) + HashAggregate [channel,isEmpty,isEmpty,isEmpty,sum,sum,sum] [id,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum(profit),sum(profit),sum(returns),sum(returns),sum(sales),sum(sales)] + InputAdapter + Exchange [channel] #12 + WholeStageCodegen (42) + HashAggregate [channel,profit,returns,sales] [isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum] + HashAggregate [channel,id,isEmpty,isEmpty,isEmpty,sum,sum,sum] [isEmpty,isEmpty,isEmpty,profit,returns,sales,sum,sum,sum,sum(profit),sum(returns),sum(sales)] + InputAdapter + ReusedExchange [channel,id,isEmpty,isEmpty,isEmpty,sum,sum,sum] #3 + WholeStageCodegen (67) + HashAggregate [isEmpty,isEmpty,isEmpty,sum,sum,sum] [channel,id,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum(profit),sum(profit),sum(returns),sum(returns),sum(sales),sum(sales)] + InputAdapter + Exchange #13 + WholeStageCodegen (66) + HashAggregate [profit,returns,sales] [isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum] + HashAggregate [channel,id,isEmpty,isEmpty,isEmpty,sum,sum,sum] [isEmpty,isEmpty,isEmpty,profit,returns,sales,sum,sum,sum,sum(profit),sum(returns),sum(sales)] + InputAdapter + ReusedExchange [channel,id,isEmpty,isEmpty,isEmpty,sum,sum,sum] #3 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6.sf100/explain.txt new file mode 100644 index 0000000000000..fe826bf02784e --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6.sf100/explain.txt @@ -0,0 +1,331 @@ +== Physical Plan == +TakeOrderedAndProject (50) ++- * Project (49) + +- * Filter (48) + +- * HashAggregate (47) + +- Exchange (46) + +- * HashAggregate (45) + +- * Project (44) + +- * SortMergeJoin Inner (43) + :- * Sort (28) + : +- Exchange (27) + : +- * Project (26) + : +- * BroadcastHashJoin Inner BuildRight (25) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.store_sales (1) + : : +- BroadcastExchange (8) + : : +- * Project (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet default.date_dim (4) + : +- BroadcastExchange (24) + : +- * Project (23) + : +- * Filter (22) + : +- * BroadcastHashJoin LeftOuter BuildRight (21) + : :- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.item (11) + : +- BroadcastExchange (20) + : +- * HashAggregate (19) + : +- Exchange (18) + : +- * HashAggregate (17) + : +- * Filter (16) + : +- * ColumnarToRow (15) + : +- Scan parquet default.item (14) + +- * Sort (42) + +- Exchange (41) + +- * Project (40) + +- * SortMergeJoin Inner (39) + :- * Sort (33) + : +- Exchange (32) + : +- * Filter (31) + : +- * ColumnarToRow (30) + : +- Scan parquet default.customer_address (29) + +- * Sort (38) + +- Exchange (37) + +- * Filter (36) + +- * ColumnarToRow (35) + +- Scan parquet default.customer (34) + + +(1) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3] + +(3) Filter [codegen id : 5] +Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3] +Condition : ((isnotnull(ss_customer_sk#3) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_item_sk#2)) + +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#4, d_month_seq#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] +Condition : ((isnotnull(d_month_seq#5) AND (d_month_seq#5 = Subquery scalar-subquery#6, [id=#7])) AND isnotnull(d_date_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#4] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(8) BroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#8] + +(9) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(10) Project [codegen id : 5] +Output [2]: [ss_item_sk#2, ss_customer_sk#3] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, d_date_sk#4] + +(11) Scan parquet default.item +Output [3]: [i_item_sk#9, i_current_price#10, i_category#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_current_price), IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 4] +Input [3]: [i_item_sk#9, i_current_price#10, i_category#11] + +(13) Filter [codegen id : 4] +Input [3]: [i_item_sk#9, i_current_price#10, i_category#11] +Condition : (isnotnull(i_current_price#10) AND isnotnull(i_item_sk#9)) + +(14) Scan parquet default.item +Output [2]: [i_current_price#10, i_category#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_category)] +ReadSchema: struct + +(15) ColumnarToRow [codegen id : 2] +Input [2]: [i_current_price#10, i_category#11] + +(16) Filter [codegen id : 2] +Input [2]: [i_current_price#10, i_category#11] +Condition : isnotnull(i_category#11) + +(17) HashAggregate [codegen id : 2] +Input [2]: [i_current_price#10, i_category#11] +Keys [1]: [i_category#11] +Functions [1]: [partial_avg(UnscaledValue(i_current_price#10))] +Aggregate Attributes [2]: [sum#12, count#13] +Results [3]: [i_category#11, sum#14, count#15] + +(18) Exchange +Input [3]: [i_category#11, sum#14, count#15] +Arguments: hashpartitioning(i_category#11, 5), true, [id=#16] + +(19) HashAggregate [codegen id : 3] +Input [3]: [i_category#11, sum#14, count#15] +Keys [1]: [i_category#11] +Functions [1]: [avg(UnscaledValue(i_current_price#10))] +Aggregate Attributes [1]: [avg(UnscaledValue(i_current_price#10))#17] +Results [2]: [cast((avg(UnscaledValue(i_current_price#10))#17 / 100.0) as decimal(11,6)) AS avg(i_current_price)#18, i_category#11 AS i_category#11#19] + +(20) BroadcastExchange +Input [2]: [avg(i_current_price)#18, i_category#11#19] +Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [id=#20] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_category#11] +Right keys [1]: [i_category#11#19] +Join condition: None + +(22) Filter [codegen id : 4] +Input [5]: [i_item_sk#9, i_current_price#10, i_category#11, avg(i_current_price)#18, i_category#11#19] +Condition : (cast(i_current_price#10 as decimal(14,7)) > CheckOverflow((1.200000 * promote_precision(avg(i_current_price)#18)), DecimalType(14,7), true)) + +(23) Project [codegen id : 4] +Output [1]: [i_item_sk#9] +Input [5]: [i_item_sk#9, i_current_price#10, i_category#11, avg(i_current_price)#18, i_category#11#19] + +(24) BroadcastExchange +Input [1]: [i_item_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#21] + +(25) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#9] +Join condition: None + +(26) Project [codegen id : 5] +Output [1]: [ss_customer_sk#3] +Input [3]: [ss_item_sk#2, ss_customer_sk#3, i_item_sk#9] + +(27) Exchange +Input [1]: [ss_customer_sk#3] +Arguments: hashpartitioning(ss_customer_sk#3, 5), true, [id=#22] + +(28) Sort [codegen id : 6] +Input [1]: [ss_customer_sk#3] +Arguments: [ss_customer_sk#3 ASC NULLS FIRST], false, 0 + +(29) Scan parquet default.customer_address +Output [2]: [ca_address_sk#23, ca_state#24] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct + +(30) ColumnarToRow [codegen id : 7] +Input [2]: [ca_address_sk#23, ca_state#24] + +(31) Filter [codegen id : 7] +Input [2]: [ca_address_sk#23, ca_state#24] +Condition : isnotnull(ca_address_sk#23) + +(32) Exchange +Input [2]: [ca_address_sk#23, ca_state#24] +Arguments: hashpartitioning(ca_address_sk#23, 5), true, [id=#25] + +(33) Sort [codegen id : 8] +Input [2]: [ca_address_sk#23, ca_state#24] +Arguments: [ca_address_sk#23 ASC NULLS FIRST], false, 0 + +(34) Scan parquet default.customer +Output [2]: [c_customer_sk#26, c_current_addr_sk#27] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/customer] +PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_customer_sk)] +ReadSchema: struct + +(35) ColumnarToRow [codegen id : 9] +Input [2]: [c_customer_sk#26, c_current_addr_sk#27] + +(36) Filter [codegen id : 9] +Input [2]: [c_customer_sk#26, c_current_addr_sk#27] +Condition : (isnotnull(c_current_addr_sk#27) AND isnotnull(c_customer_sk#26)) + +(37) Exchange +Input [2]: [c_customer_sk#26, c_current_addr_sk#27] +Arguments: hashpartitioning(c_current_addr_sk#27, 5), true, [id=#28] + +(38) Sort [codegen id : 10] +Input [2]: [c_customer_sk#26, c_current_addr_sk#27] +Arguments: [c_current_addr_sk#27 ASC NULLS FIRST], false, 0 + +(39) SortMergeJoin [codegen id : 11] +Left keys [1]: [ca_address_sk#23] +Right keys [1]: [c_current_addr_sk#27] +Join condition: None + +(40) Project [codegen id : 11] +Output [2]: [ca_state#24, c_customer_sk#26] +Input [4]: [ca_address_sk#23, ca_state#24, c_customer_sk#26, c_current_addr_sk#27] + +(41) Exchange +Input [2]: [ca_state#24, c_customer_sk#26] +Arguments: hashpartitioning(c_customer_sk#26, 5), true, [id=#29] + +(42) Sort [codegen id : 12] +Input [2]: [ca_state#24, c_customer_sk#26] +Arguments: [c_customer_sk#26 ASC NULLS FIRST], false, 0 + +(43) SortMergeJoin [codegen id : 13] +Left keys [1]: [ss_customer_sk#3] +Right keys [1]: [c_customer_sk#26] +Join condition: None + +(44) Project [codegen id : 13] +Output [1]: [ca_state#24] +Input [3]: [ss_customer_sk#3, ca_state#24, c_customer_sk#26] + +(45) HashAggregate [codegen id : 13] +Input [1]: [ca_state#24] +Keys [1]: [ca_state#24] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#30] +Results [2]: [ca_state#24, count#31] + +(46) Exchange +Input [2]: [ca_state#24, count#31] +Arguments: hashpartitioning(ca_state#24, 5), true, [id=#32] + +(47) HashAggregate [codegen id : 14] +Input [2]: [ca_state#24, count#31] +Keys [1]: [ca_state#24] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#33] +Results [4]: [ca_state#24 AS state#34, count(1)#33 AS cnt#35, count(1)#33 AS count(1)#36, ca_state#24] + +(48) Filter [codegen id : 14] +Input [4]: [state#34, cnt#35, count(1)#36, ca_state#24] +Condition : (count(1)#36 >= 10) + +(49) Project [codegen id : 14] +Output [3]: [state#34, cnt#35, ca_state#24] +Input [4]: [state#34, cnt#35, count(1)#36, ca_state#24] + +(50) TakeOrderedAndProject +Input [3]: [state#34, cnt#35, ca_state#24] +Arguments: 100, [cnt#35 ASC NULLS FIRST, ca_state#24 ASC NULLS FIRST], [state#34, cnt#35] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 6 Hosting Expression = Subquery scalar-subquery#6, [id=#7] +* HashAggregate (57) ++- Exchange (56) + +- * HashAggregate (55) + +- * Project (54) + +- * Filter (53) + +- * ColumnarToRow (52) + +- Scan parquet default.date_dim (51) + + +(51) Scan parquet default.date_dim +Output [3]: [d_month_seq#5, d_year#37, d_moy#38] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,1)] +ReadSchema: struct + +(52) ColumnarToRow [codegen id : 1] +Input [3]: [d_month_seq#5, d_year#37, d_moy#38] + +(53) Filter [codegen id : 1] +Input [3]: [d_month_seq#5, d_year#37, d_moy#38] +Condition : (((isnotnull(d_year#37) AND isnotnull(d_moy#38)) AND (d_year#37 = 2000)) AND (d_moy#38 = 1)) + +(54) Project [codegen id : 1] +Output [1]: [d_month_seq#5] +Input [3]: [d_month_seq#5, d_year#37, d_moy#38] + +(55) HashAggregate [codegen id : 1] +Input [1]: [d_month_seq#5] +Keys [1]: [d_month_seq#5] +Functions: [] +Aggregate Attributes: [] +Results [1]: [d_month_seq#5] + +(56) Exchange +Input [1]: [d_month_seq#5] +Arguments: hashpartitioning(d_month_seq#5, 5), true, [id=#39] + +(57) HashAggregate [codegen id : 2] +Input [1]: [d_month_seq#5] +Keys [1]: [d_month_seq#5] +Functions: [] +Aggregate Attributes: [] +Results [1]: [d_month_seq#5] + + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6.sf100/simplified.txt new file mode 100644 index 0000000000000..924f669212155 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6.sf100/simplified.txt @@ -0,0 +1,95 @@ +TakeOrderedAndProject [ca_state,cnt,state] + WholeStageCodegen (14) + Project [ca_state,cnt,state] + Filter [count(1)] + HashAggregate [ca_state,count] [cnt,count,count(1),count(1),state] + InputAdapter + Exchange [ca_state] #1 + WholeStageCodegen (13) + HashAggregate [ca_state] [count,count] + Project [ca_state] + SortMergeJoin [c_customer_sk,ss_customer_sk] + InputAdapter + WholeStageCodegen (6) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #2 + WholeStageCodegen (5) + Project [ss_customer_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_customer_sk,ss_item_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_customer_sk,ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_month_seq] + Subquery #1 + WholeStageCodegen (2) + HashAggregate [d_month_seq] + InputAdapter + Exchange [d_month_seq] #4 + WholeStageCodegen (1) + HashAggregate [d_month_seq] + Project [d_month_seq] + Filter [d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_month_seq,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Project [i_item_sk] + Filter [avg(i_current_price),i_current_price] + BroadcastHashJoin [i_category,i_category] + Filter [i_current_price,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_category,i_current_price,i_item_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (3) + HashAggregate [count,i_category,sum] [avg(UnscaledValue(i_current_price)),avg(i_current_price),count,i_category,sum] + InputAdapter + Exchange [i_category] #7 + WholeStageCodegen (2) + HashAggregate [i_category,i_current_price] [count,count,sum,sum] + Filter [i_category] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_category,i_current_price] + InputAdapter + WholeStageCodegen (12) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #8 + WholeStageCodegen (11) + Project [c_customer_sk,ca_state] + SortMergeJoin [c_current_addr_sk,ca_address_sk] + InputAdapter + WholeStageCodegen (8) + Sort [ca_address_sk] + InputAdapter + Exchange [ca_address_sk] #9 + WholeStageCodegen (7) + Filter [ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_state] + InputAdapter + WholeStageCodegen (10) + Sort [c_current_addr_sk] + InputAdapter + Exchange [c_current_addr_sk] #10 + WholeStageCodegen (9) + Filter [c_current_addr_sk,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_current_addr_sk,c_customer_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6/explain.txt new file mode 100644 index 0000000000000..4a892fbdb59b6 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6/explain.txt @@ -0,0 +1,301 @@ +== Physical Plan == +TakeOrderedAndProject (44) ++- * Project (43) + +- * Filter (42) + +- * HashAggregate (41) + +- Exchange (40) + +- * HashAggregate (39) + +- * Project (38) + +- * BroadcastHashJoin Inner BuildRight (37) + :- * Project (22) + : +- * BroadcastHashJoin Inner BuildRight (21) + : :- * Project (15) + : : +- * BroadcastHashJoin Inner BuildRight (14) + : : :- * Project (9) + : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.customer_address (1) + : : : +- BroadcastExchange (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.customer (4) + : : +- BroadcastExchange (13) + : : +- * Filter (12) + : : +- * ColumnarToRow (11) + : : +- Scan parquet default.store_sales (10) + : +- BroadcastExchange (20) + : +- * Project (19) + : +- * Filter (18) + : +- * ColumnarToRow (17) + : +- Scan parquet default.date_dim (16) + +- BroadcastExchange (36) + +- * Project (35) + +- * Filter (34) + +- * BroadcastHashJoin LeftOuter BuildRight (33) + :- * Filter (25) + : +- * ColumnarToRow (24) + : +- Scan parquet default.item (23) + +- BroadcastExchange (32) + +- * HashAggregate (31) + +- Exchange (30) + +- * HashAggregate (29) + +- * Filter (28) + +- * ColumnarToRow (27) + +- Scan parquet default.item (26) + + +(1) Scan parquet default.customer_address +Output [2]: [ca_address_sk#1, ca_state#2] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 7] +Input [2]: [ca_address_sk#1, ca_state#2] + +(3) Filter [codegen id : 7] +Input [2]: [ca_address_sk#1, ca_state#2] +Condition : isnotnull(ca_address_sk#1) + +(4) Scan parquet default.customer +Output [2]: [c_customer_sk#3, c_current_addr_sk#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_customer_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [c_customer_sk#3, c_current_addr_sk#4] + +(6) Filter [codegen id : 1] +Input [2]: [c_customer_sk#3, c_current_addr_sk#4] +Condition : (isnotnull(c_current_addr_sk#4) AND isnotnull(c_customer_sk#3)) + +(7) BroadcastExchange +Input [2]: [c_customer_sk#3, c_current_addr_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#5] + +(8) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ca_address_sk#1] +Right keys [1]: [c_current_addr_sk#4] +Join condition: None + +(9) Project [codegen id : 7] +Output [2]: [ca_state#2, c_customer_sk#3] +Input [4]: [ca_address_sk#1, ca_state#2, c_customer_sk#3, c_current_addr_sk#4] + +(10) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#6, ss_item_sk#7, ss_customer_sk#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [ss_sold_date_sk#6, ss_item_sk#7, ss_customer_sk#8] + +(12) Filter [codegen id : 2] +Input [3]: [ss_sold_date_sk#6, ss_item_sk#7, ss_customer_sk#8] +Condition : ((isnotnull(ss_customer_sk#8) AND isnotnull(ss_sold_date_sk#6)) AND isnotnull(ss_item_sk#7)) + +(13) BroadcastExchange +Input [3]: [ss_sold_date_sk#6, ss_item_sk#7, ss_customer_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, false] as bigint)),false), [id=#9] + +(14) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [ss_customer_sk#8] +Join condition: None + +(15) Project [codegen id : 7] +Output [3]: [ca_state#2, ss_sold_date_sk#6, ss_item_sk#7] +Input [5]: [ca_state#2, c_customer_sk#3, ss_sold_date_sk#6, ss_item_sk#7, ss_customer_sk#8] + +(16) Scan parquet default.date_dim +Output [2]: [d_date_sk#10, d_month_seq#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [2]: [d_date_sk#10, d_month_seq#11] + +(18) Filter [codegen id : 3] +Input [2]: [d_date_sk#10, d_month_seq#11] +Condition : ((isnotnull(d_month_seq#11) AND (d_month_seq#11 = Subquery scalar-subquery#12, [id=#13])) AND isnotnull(d_date_sk#10)) + +(19) Project [codegen id : 3] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_month_seq#11] + +(20) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] + +(21) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ss_sold_date_sk#6] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(22) Project [codegen id : 7] +Output [2]: [ca_state#2, ss_item_sk#7] +Input [4]: [ca_state#2, ss_sold_date_sk#6, ss_item_sk#7, d_date_sk#10] + +(23) Scan parquet default.item +Output [3]: [i_item_sk#15, i_current_price#16, i_category#17] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_current_price), IsNotNull(i_item_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 6] +Input [3]: [i_item_sk#15, i_current_price#16, i_category#17] + +(25) Filter [codegen id : 6] +Input [3]: [i_item_sk#15, i_current_price#16, i_category#17] +Condition : (isnotnull(i_current_price#16) AND isnotnull(i_item_sk#15)) + +(26) Scan parquet default.item +Output [2]: [i_current_price#16, i_category#17] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_category)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 4] +Input [2]: [i_current_price#16, i_category#17] + +(28) Filter [codegen id : 4] +Input [2]: [i_current_price#16, i_category#17] +Condition : isnotnull(i_category#17) + +(29) HashAggregate [codegen id : 4] +Input [2]: [i_current_price#16, i_category#17] +Keys [1]: [i_category#17] +Functions [1]: [partial_avg(UnscaledValue(i_current_price#16))] +Aggregate Attributes [2]: [sum#18, count#19] +Results [3]: [i_category#17, sum#20, count#21] + +(30) Exchange +Input [3]: [i_category#17, sum#20, count#21] +Arguments: hashpartitioning(i_category#17, 5), true, [id=#22] + +(31) HashAggregate [codegen id : 5] +Input [3]: [i_category#17, sum#20, count#21] +Keys [1]: [i_category#17] +Functions [1]: [avg(UnscaledValue(i_current_price#16))] +Aggregate Attributes [1]: [avg(UnscaledValue(i_current_price#16))#23] +Results [2]: [cast((avg(UnscaledValue(i_current_price#16))#23 / 100.0) as decimal(11,6)) AS avg(i_current_price)#24, i_category#17 AS i_category#17#25] + +(32) BroadcastExchange +Input [2]: [avg(i_current_price)#24, i_category#17#25] +Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [id=#26] + +(33) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [i_category#17] +Right keys [1]: [i_category#17#25] +Join condition: None + +(34) Filter [codegen id : 6] +Input [5]: [i_item_sk#15, i_current_price#16, i_category#17, avg(i_current_price)#24, i_category#17#25] +Condition : (cast(i_current_price#16 as decimal(14,7)) > CheckOverflow((1.200000 * promote_precision(avg(i_current_price)#24)), DecimalType(14,7), true)) + +(35) Project [codegen id : 6] +Output [1]: [i_item_sk#15] +Input [5]: [i_item_sk#15, i_current_price#16, i_category#17, avg(i_current_price)#24, i_category#17#25] + +(36) BroadcastExchange +Input [1]: [i_item_sk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#27] + +(37) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ss_item_sk#7] +Right keys [1]: [i_item_sk#15] +Join condition: None + +(38) Project [codegen id : 7] +Output [1]: [ca_state#2] +Input [3]: [ca_state#2, ss_item_sk#7, i_item_sk#15] + +(39) HashAggregate [codegen id : 7] +Input [1]: [ca_state#2] +Keys [1]: [ca_state#2] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#28] +Results [2]: [ca_state#2, count#29] + +(40) Exchange +Input [2]: [ca_state#2, count#29] +Arguments: hashpartitioning(ca_state#2, 5), true, [id=#30] + +(41) HashAggregate [codegen id : 8] +Input [2]: [ca_state#2, count#29] +Keys [1]: [ca_state#2] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#31] +Results [4]: [ca_state#2 AS state#32, count(1)#31 AS cnt#33, count(1)#31 AS count(1)#34, ca_state#2] + +(42) Filter [codegen id : 8] +Input [4]: [state#32, cnt#33, count(1)#34, ca_state#2] +Condition : (count(1)#34 >= 10) + +(43) Project [codegen id : 8] +Output [3]: [state#32, cnt#33, ca_state#2] +Input [4]: [state#32, cnt#33, count(1)#34, ca_state#2] + +(44) TakeOrderedAndProject +Input [3]: [state#32, cnt#33, ca_state#2] +Arguments: 100, [cnt#33 ASC NULLS FIRST, ca_state#2 ASC NULLS FIRST], [state#32, cnt#33] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 18 Hosting Expression = Subquery scalar-subquery#12, [id=#13] +* HashAggregate (51) ++- Exchange (50) + +- * HashAggregate (49) + +- * Project (48) + +- * Filter (47) + +- * ColumnarToRow (46) + +- Scan parquet default.date_dim (45) + + +(45) Scan parquet default.date_dim +Output [3]: [d_month_seq#11, d_year#35, d_moy#36] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,1)] +ReadSchema: struct + +(46) ColumnarToRow [codegen id : 1] +Input [3]: [d_month_seq#11, d_year#35, d_moy#36] + +(47) Filter [codegen id : 1] +Input [3]: [d_month_seq#11, d_year#35, d_moy#36] +Condition : (((isnotnull(d_year#35) AND isnotnull(d_moy#36)) AND (d_year#35 = 2000)) AND (d_moy#36 = 1)) + +(48) Project [codegen id : 1] +Output [1]: [d_month_seq#11] +Input [3]: [d_month_seq#11, d_year#35, d_moy#36] + +(49) HashAggregate [codegen id : 1] +Input [1]: [d_month_seq#11] +Keys [1]: [d_month_seq#11] +Functions: [] +Aggregate Attributes: [] +Results [1]: [d_month_seq#11] + +(50) Exchange +Input [1]: [d_month_seq#11] +Arguments: hashpartitioning(d_month_seq#11, 5), true, [id=#37] + +(51) HashAggregate [codegen id : 2] +Input [1]: [d_month_seq#11] +Keys [1]: [d_month_seq#11] +Functions: [] +Aggregate Attributes: [] +Results [1]: [d_month_seq#11] + + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6/simplified.txt new file mode 100644 index 0000000000000..affe3f93d6e73 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6/simplified.txt @@ -0,0 +1,77 @@ +TakeOrderedAndProject [ca_state,cnt,state] + WholeStageCodegen (8) + Project [ca_state,cnt,state] + Filter [count(1)] + HashAggregate [ca_state,count] [cnt,count,count(1),count(1),state] + InputAdapter + Exchange [ca_state] #1 + WholeStageCodegen (7) + HashAggregate [ca_state] [count,count] + Project [ca_state] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ca_state,ss_item_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ca_state,ss_item_sk,ss_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Project [c_customer_sk,ca_state] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Filter [ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [c_current_addr_sk,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_current_addr_sk,c_customer_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [ss_customer_sk,ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_date_sk,d_month_seq] + Subquery #1 + WholeStageCodegen (2) + HashAggregate [d_month_seq] + InputAdapter + Exchange [d_month_seq] #5 + WholeStageCodegen (1) + HashAggregate [d_month_seq] + Project [d_month_seq] + Filter [d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_month_seq,d_moy,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (6) + Project [i_item_sk] + Filter [avg(i_current_price),i_current_price] + BroadcastHashJoin [i_category,i_category] + Filter [i_current_price,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_category,i_current_price,i_item_sk] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (5) + HashAggregate [count,i_category,sum] [avg(UnscaledValue(i_current_price)),avg(i_current_price),count,i_category,sum] + InputAdapter + Exchange [i_category] #8 + WholeStageCodegen (4) + HashAggregate [i_category,i_current_price] [count,count,sum,sum] + Filter [i_category] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_category,i_current_price] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64.sf100/explain.txt new file mode 100644 index 0000000000000..900f61c34e2bc --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64.sf100/explain.txt @@ -0,0 +1,1110 @@ +== Physical Plan == +* Sort (209) ++- Exchange (208) + +- * Project (207) + +- * SortMergeJoin Inner (206) + :- * Sort (128) + : +- Exchange (127) + : +- * HashAggregate (126) + : +- Exchange (125) + : +- * HashAggregate (124) + : +- * Project (123) + : +- * BroadcastHashJoin Inner BuildRight (122) + : :- * Project (116) + : : +- * BroadcastHashJoin Inner BuildRight (115) + : : :- * Project (113) + : : : +- * BroadcastHashJoin Inner BuildRight (112) + : : : :- * Project (107) + : : : : +- * SortMergeJoin Inner (106) + : : : : :- * Sort (103) + : : : : : +- Exchange (102) + : : : : : +- * Project (101) + : : : : : +- * SortMergeJoin Inner (100) + : : : : : :- * Sort (94) + : : : : : : +- Exchange (93) + : : : : : : +- * Project (92) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (91) + : : : : : : :- * Project (89) + : : : : : : : +- * BroadcastHashJoin Inner BuildRight (88) + : : : : : : : :- * Project (83) + : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (82) + : : : : : : : : :- * Project (77) + : : : : : : : : : +- * SortMergeJoin Inner (76) + : : : : : : : : : :- * Sort (73) + : : : : : : : : : : +- Exchange (72) + : : : : : : : : : : +- * Project (71) + : : : : : : : : : : +- * SortMergeJoin Inner (70) + : : : : : : : : : : :- * Sort (64) + : : : : : : : : : : : +- Exchange (63) + : : : : : : : : : : : +- * Project (62) + : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (61) + : : : : : : : : : : : :- * Project (59) + : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (58) + : : : : : : : : : : : : :- * Project (53) + : : : : : : : : : : : : : +- * SortMergeJoin Inner (52) + : : : : : : : : : : : : : :- * Sort (46) + : : : : : : : : : : : : : : +- Exchange (45) + : : : : : : : : : : : : : : +- * Project (44) + : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (43) + : : : : : : : : : : : : : : :- * Project (38) + : : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (37) + : : : : : : : : : : : : : : : :- * Project (32) + : : : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (31) + : : : : : : : : : : : : : : : : :- * Project (12) + : : : : : : : : : : : : : : : : : +- * SortMergeJoin Inner (11) + : : : : : : : : : : : : : : : : : :- * Sort (5) + : : : : : : : : : : : : : : : : : : +- Exchange (4) + : : : : : : : : : : : : : : : : : : +- * Filter (3) + : : : : : : : : : : : : : : : : : : +- * ColumnarToRow (2) + : : : : : : : : : : : : : : : : : : +- Scan parquet default.store_sales (1) + : : : : : : : : : : : : : : : : : +- * Sort (10) + : : : : : : : : : : : : : : : : : +- Exchange (9) + : : : : : : : : : : : : : : : : : +- * Filter (8) + : : : : : : : : : : : : : : : : : +- * ColumnarToRow (7) + : : : : : : : : : : : : : : : : : +- Scan parquet default.store_returns (6) + : : : : : : : : : : : : : : : : +- BroadcastExchange (30) + : : : : : : : : : : : : : : : : +- * Project (29) + : : : : : : : : : : : : : : : : +- * Filter (28) + : : : : : : : : : : : : : : : : +- * HashAggregate (27) + : : : : : : : : : : : : : : : : +- Exchange (26) + : : : : : : : : : : : : : : : : +- * HashAggregate (25) + : : : : : : : : : : : : : : : : +- * Project (24) + : : : : : : : : : : : : : : : : +- * SortMergeJoin Inner (23) + : : : : : : : : : : : : : : : : :- * Sort (17) + : : : : : : : : : : : : : : : : : +- Exchange (16) + : : : : : : : : : : : : : : : : : +- * Filter (15) + : : : : : : : : : : : : : : : : : +- * ColumnarToRow (14) + : : : : : : : : : : : : : : : : : +- Scan parquet default.catalog_sales (13) + : : : : : : : : : : : : : : : : +- * Sort (22) + : : : : : : : : : : : : : : : : +- Exchange (21) + : : : : : : : : : : : : : : : : +- * Filter (20) + : : : : : : : : : : : : : : : : +- * ColumnarToRow (19) + : : : : : : : : : : : : : : : : +- Scan parquet default.catalog_returns (18) + : : : : : : : : : : : : : : : +- BroadcastExchange (36) + : : : : : : : : : : : : : : : +- * Filter (35) + : : : : : : : : : : : : : : : +- * ColumnarToRow (34) + : : : : : : : : : : : : : : : +- Scan parquet default.date_dim (33) + : : : : : : : : : : : : : : +- BroadcastExchange (42) + : : : : : : : : : : : : : : +- * Filter (41) + : : : : : : : : : : : : : : +- * ColumnarToRow (40) + : : : : : : : : : : : : : : +- Scan parquet default.store (39) + : : : : : : : : : : : : : +- * Sort (51) + : : : : : : : : : : : : : +- Exchange (50) + : : : : : : : : : : : : : +- * Filter (49) + : : : : : : : : : : : : : +- * ColumnarToRow (48) + : : : : : : : : : : : : : +- Scan parquet default.customer (47) + : : : : : : : : : : : : +- BroadcastExchange (57) + : : : : : : : : : : : : +- * Filter (56) + : : : : : : : : : : : : +- * ColumnarToRow (55) + : : : : : : : : : : : : +- Scan parquet default.date_dim (54) + : : : : : : : : : : : +- ReusedExchange (60) + : : : : : : : : : : +- * Sort (69) + : : : : : : : : : : +- Exchange (68) + : : : : : : : : : : +- * Filter (67) + : : : : : : : : : : +- * ColumnarToRow (66) + : : : : : : : : : : +- Scan parquet default.customer_demographics (65) + : : : : : : : : : +- * Sort (75) + : : : : : : : : : +- ReusedExchange (74) + : : : : : : : : +- BroadcastExchange (81) + : : : : : : : : +- * Filter (80) + : : : : : : : : +- * ColumnarToRow (79) + : : : : : : : : +- Scan parquet default.promotion (78) + : : : : : : : +- BroadcastExchange (87) + : : : : : : : +- * Filter (86) + : : : : : : : +- * ColumnarToRow (85) + : : : : : : : +- Scan parquet default.household_demographics (84) + : : : : : : +- ReusedExchange (90) + : : : : : +- * Sort (99) + : : : : : +- Exchange (98) + : : : : : +- * Filter (97) + : : : : : +- * ColumnarToRow (96) + : : : : : +- Scan parquet default.customer_address (95) + : : : : +- * Sort (105) + : : : : +- ReusedExchange (104) + : : : +- BroadcastExchange (111) + : : : +- * Filter (110) + : : : +- * ColumnarToRow (109) + : : : +- Scan parquet default.income_band (108) + : : +- ReusedExchange (114) + : +- BroadcastExchange (121) + : +- * Project (120) + : +- * Filter (119) + : +- * ColumnarToRow (118) + : +- Scan parquet default.item (117) + +- * Sort (205) + +- Exchange (204) + +- * HashAggregate (203) + +- Exchange (202) + +- * HashAggregate (201) + +- * Project (200) + +- * BroadcastHashJoin Inner BuildRight (199) + :- * Project (197) + : +- * BroadcastHashJoin Inner BuildRight (196) + : :- * Project (194) + : : +- * BroadcastHashJoin Inner BuildRight (193) + : : :- * Project (191) + : : : +- * SortMergeJoin Inner (190) + : : : :- * Sort (187) + : : : : +- Exchange (186) + : : : : +- * Project (185) + : : : : +- * SortMergeJoin Inner (184) + : : : : :- * Sort (181) + : : : : : +- Exchange (180) + : : : : : +- * Project (179) + : : : : : +- * BroadcastHashJoin Inner BuildRight (178) + : : : : : :- * Project (176) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (175) + : : : : : : :- * Project (173) + : : : : : : : +- * BroadcastHashJoin Inner BuildRight (172) + : : : : : : : :- * Project (170) + : : : : : : : : +- * SortMergeJoin Inner (169) + : : : : : : : : :- * Sort (166) + : : : : : : : : : +- Exchange (165) + : : : : : : : : : +- * Project (164) + : : : : : : : : : +- * SortMergeJoin Inner (163) + : : : : : : : : : :- * Sort (160) + : : : : : : : : : : +- Exchange (159) + : : : : : : : : : : +- * Project (158) + : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (157) + : : : : : : : : : : :- * Project (155) + : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (154) + : : : : : : : : : : : :- * Project (152) + : : : : : : : : : : : : +- * SortMergeJoin Inner (151) + : : : : : : : : : : : : :- * Sort (148) + : : : : : : : : : : : : : +- Exchange (147) + : : : : : : : : : : : : : +- * Project (146) + : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (145) + : : : : : : : : : : : : : :- * Project (143) + : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (142) + : : : : : : : : : : : : : : :- * Project (137) + : : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (136) + : : : : : : : : : : : : : : : :- * Project (134) + : : : : : : : : : : : : : : : : +- * SortMergeJoin Inner (133) + : : : : : : : : : : : : : : : : :- * Sort (130) + : : : : : : : : : : : : : : : : : +- ReusedExchange (129) + : : : : : : : : : : : : : : : : +- * Sort (132) + : : : : : : : : : : : : : : : : +- ReusedExchange (131) + : : : : : : : : : : : : : : : +- ReusedExchange (135) + : : : : : : : : : : : : : : +- BroadcastExchange (141) + : : : : : : : : : : : : : : +- * Filter (140) + : : : : : : : : : : : : : : +- * ColumnarToRow (139) + : : : : : : : : : : : : : : +- Scan parquet default.date_dim (138) + : : : : : : : : : : : : : +- ReusedExchange (144) + : : : : : : : : : : : : +- * Sort (150) + : : : : : : : : : : : : +- ReusedExchange (149) + : : : : : : : : : : : +- ReusedExchange (153) + : : : : : : : : : : +- ReusedExchange (156) + : : : : : : : : : +- * Sort (162) + : : : : : : : : : +- ReusedExchange (161) + : : : : : : : : +- * Sort (168) + : : : : : : : : +- ReusedExchange (167) + : : : : : : : +- ReusedExchange (171) + : : : : : : +- ReusedExchange (174) + : : : : : +- ReusedExchange (177) + : : : : +- * Sort (183) + : : : : +- ReusedExchange (182) + : : : +- * Sort (189) + : : : +- ReusedExchange (188) + : : +- ReusedExchange (192) + : +- ReusedExchange (195) + +- ReusedExchange (198) + + +(1) Scan parquet default.store_sales +Output [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_ticket_number#9, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_cdemo_sk), IsNotNull(ss_promo_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_addr_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_ticket_number#9, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] + +(3) Filter [codegen id : 1] +Input [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_ticket_number#9, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] +Condition : ((((((((isnotnull(ss_item_sk#2) AND isnotnull(ss_ticket_number#9)) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_store_sk#7)) AND isnotnull(ss_customer_sk#3)) AND isnotnull(ss_cdemo_sk#4)) AND isnotnull(ss_promo_sk#8)) AND isnotnull(ss_hdemo_sk#5)) AND isnotnull(ss_addr_sk#6)) + +(4) Exchange +Input [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_ticket_number#9, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] +Arguments: hashpartitioning(cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#9 as bigint), 5), true, [id=#13] + +(5) Sort [codegen id : 2] +Input [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_ticket_number#9, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] +Arguments: [cast(ss_item_sk#2 as bigint) ASC NULLS FIRST, cast(ss_ticket_number#9 as bigint) ASC NULLS FIRST], false, 0 + +(6) Scan parquet default.store_returns +Output [2]: [sr_item_sk#14, sr_ticket_number#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store_returns] +PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 3] +Input [2]: [sr_item_sk#14, sr_ticket_number#15] + +(8) Filter [codegen id : 3] +Input [2]: [sr_item_sk#14, sr_ticket_number#15] +Condition : (isnotnull(sr_item_sk#14) AND isnotnull(sr_ticket_number#15)) + +(9) Exchange +Input [2]: [sr_item_sk#14, sr_ticket_number#15] +Arguments: hashpartitioning(sr_item_sk#14, sr_ticket_number#15, 5), true, [id=#16] + +(10) Sort [codegen id : 4] +Input [2]: [sr_item_sk#14, sr_ticket_number#15] +Arguments: [sr_item_sk#14 ASC NULLS FIRST, sr_ticket_number#15 ASC NULLS FIRST], false, 0 + +(11) SortMergeJoin [codegen id : 13] +Left keys [2]: [cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#9 as bigint)] +Right keys [2]: [sr_item_sk#14, sr_ticket_number#15] +Join condition: None + +(12) Project [codegen id : 13] +Output [11]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] +Input [14]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_ticket_number#9, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, sr_item_sk#14, sr_ticket_number#15] + +(13) Scan parquet default.catalog_sales +Output [3]: [cs_item_sk#17, cs_order_number#18, cs_ext_list_price#19] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_order_number)] +ReadSchema: struct + +(14) ColumnarToRow [codegen id : 5] +Input [3]: [cs_item_sk#17, cs_order_number#18, cs_ext_list_price#19] + +(15) Filter [codegen id : 5] +Input [3]: [cs_item_sk#17, cs_order_number#18, cs_ext_list_price#19] +Condition : (isnotnull(cs_item_sk#17) AND isnotnull(cs_order_number#18)) + +(16) Exchange +Input [3]: [cs_item_sk#17, cs_order_number#18, cs_ext_list_price#19] +Arguments: hashpartitioning(cs_item_sk#17, cs_order_number#18, 5), true, [id=#20] + +(17) Sort [codegen id : 6] +Input [3]: [cs_item_sk#17, cs_order_number#18, cs_ext_list_price#19] +Arguments: [cs_item_sk#17 ASC NULLS FIRST, cs_order_number#18 ASC NULLS FIRST], false, 0 + +(18) Scan parquet default.catalog_returns +Output [5]: [cr_item_sk#21, cr_order_number#22, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/catalog_returns] +PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 7] +Input [5]: [cr_item_sk#21, cr_order_number#22, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25] + +(20) Filter [codegen id : 7] +Input [5]: [cr_item_sk#21, cr_order_number#22, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25] +Condition : (isnotnull(cr_item_sk#21) AND isnotnull(cr_order_number#22)) + +(21) Exchange +Input [5]: [cr_item_sk#21, cr_order_number#22, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25] +Arguments: hashpartitioning(cr_item_sk#21, cr_order_number#22, 5), true, [id=#26] + +(22) Sort [codegen id : 8] +Input [5]: [cr_item_sk#21, cr_order_number#22, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25] +Arguments: [cr_item_sk#21 ASC NULLS FIRST, cr_order_number#22 ASC NULLS FIRST], false, 0 + +(23) SortMergeJoin [codegen id : 9] +Left keys [2]: [cs_item_sk#17, cs_order_number#18] +Right keys [2]: [cr_item_sk#21, cr_order_number#22] +Join condition: None + +(24) Project [codegen id : 9] +Output [5]: [cs_item_sk#17, cs_ext_list_price#19, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25] +Input [8]: [cs_item_sk#17, cs_order_number#18, cs_ext_list_price#19, cr_item_sk#21, cr_order_number#22, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25] + +(25) HashAggregate [codegen id : 9] +Input [5]: [cs_item_sk#17, cs_ext_list_price#19, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25] +Keys [1]: [cs_item_sk#17] +Functions [2]: [partial_sum(UnscaledValue(cs_ext_list_price#19)), partial_sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#23 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#24 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#25 as decimal(9,2)))), DecimalType(9,2), true))] +Aggregate Attributes [3]: [sum#27, sum#28, isEmpty#29] +Results [4]: [cs_item_sk#17, sum#30, sum#31, isEmpty#32] + +(26) Exchange +Input [4]: [cs_item_sk#17, sum#30, sum#31, isEmpty#32] +Arguments: hashpartitioning(cs_item_sk#17, 5), true, [id=#33] + +(27) HashAggregate [codegen id : 10] +Input [4]: [cs_item_sk#17, sum#30, sum#31, isEmpty#32] +Keys [1]: [cs_item_sk#17] +Functions [2]: [sum(UnscaledValue(cs_ext_list_price#19)), sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#23 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#24 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#25 as decimal(9,2)))), DecimalType(9,2), true))] +Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_list_price#19))#34, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#23 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#24 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#25 as decimal(9,2)))), DecimalType(9,2), true))#35] +Results [3]: [cs_item_sk#17, MakeDecimal(sum(UnscaledValue(cs_ext_list_price#19))#34,17,2) AS sum(cs_ext_list_price#19)#36, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#23 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#24 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#25 as decimal(9,2)))), DecimalType(9,2), true))#35 AS sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#23 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#24 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#25 as decimal(9,2)))), DecimalType(9,2), true))#37] + +(28) Filter [codegen id : 10] +Input [3]: [cs_item_sk#17, sum(cs_ext_list_price#19)#36, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#23 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#24 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#25 as decimal(9,2)))), DecimalType(9,2), true))#37] +Condition : (isnotnull(sum(cs_ext_list_price#19)#36) AND (cast(sum(cs_ext_list_price#19)#36 as decimal(21,2)) > CheckOverflow((2.00 * promote_precision(sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#23 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#24 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#25 as decimal(9,2)))), DecimalType(9,2), true))#37)), DecimalType(21,2), true))) + +(29) Project [codegen id : 10] +Output [1]: [cs_item_sk#17] +Input [3]: [cs_item_sk#17, sum(cs_ext_list_price#19)#36, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#23 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#24 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#25 as decimal(9,2)))), DecimalType(9,2), true))#37] + +(30) BroadcastExchange +Input [1]: [cs_item_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#38] + +(31) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [cs_item_sk#17] +Join condition: None + +(32) Project [codegen id : 13] +Output [11]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] +Input [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, cs_item_sk#17] + +(33) Scan parquet default.date_dim +Output [2]: [d_date_sk#39, d_year#40] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 11] +Input [2]: [d_date_sk#39, d_year#40] + +(35) Filter [codegen id : 11] +Input [2]: [d_date_sk#39, d_year#40] +Condition : ((isnotnull(d_year#40) AND (d_year#40 = 1999)) AND isnotnull(d_date_sk#39)) + +(36) BroadcastExchange +Input [2]: [d_date_sk#39, d_year#40] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#41] + +(37) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#39] +Join condition: None + +(38) Project [codegen id : 13] +Output [11]: [ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40] +Input [13]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_date_sk#39, d_year#40] + +(39) Scan parquet default.store +Output [3]: [s_store_sk#42, s_store_name#43, s_zip#44] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_zip), IsNotNull(s_store_name)] +ReadSchema: struct + +(40) ColumnarToRow [codegen id : 12] +Input [3]: [s_store_sk#42, s_store_name#43, s_zip#44] + +(41) Filter [codegen id : 12] +Input [3]: [s_store_sk#42, s_store_name#43, s_zip#44] +Condition : ((isnotnull(s_store_sk#42) AND isnotnull(s_zip#44)) AND isnotnull(s_store_name#43)) + +(42) BroadcastExchange +Input [3]: [s_store_sk#42, s_store_name#43, s_zip#44] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#45] + +(43) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [ss_store_sk#7] +Right keys [1]: [s_store_sk#42] +Join condition: None + +(44) Project [codegen id : 13] +Output [12]: [ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44] +Input [14]: [ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_sk#42, s_store_name#43, s_zip#44] + +(45) Exchange +Input [12]: [ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44] +Arguments: hashpartitioning(ss_customer_sk#3, 5), true, [id=#46] + +(46) Sort [codegen id : 14] +Input [12]: [ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44] +Arguments: [ss_customer_sk#3 ASC NULLS FIRST], false, 0 + +(47) Scan parquet default.customer +Output [6]: [c_customer_sk#47, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, c_first_shipto_date_sk#51, c_first_sales_date_sk#52] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_first_sales_date_sk), IsNotNull(c_first_shipto_date_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_hdemo_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(48) ColumnarToRow [codegen id : 15] +Input [6]: [c_customer_sk#47, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, c_first_shipto_date_sk#51, c_first_sales_date_sk#52] + +(49) Filter [codegen id : 15] +Input [6]: [c_customer_sk#47, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, c_first_shipto_date_sk#51, c_first_sales_date_sk#52] +Condition : (((((isnotnull(c_customer_sk#47) AND isnotnull(c_first_sales_date_sk#52)) AND isnotnull(c_first_shipto_date_sk#51)) AND isnotnull(c_current_cdemo_sk#48)) AND isnotnull(c_current_hdemo_sk#49)) AND isnotnull(c_current_addr_sk#50)) + +(50) Exchange +Input [6]: [c_customer_sk#47, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, c_first_shipto_date_sk#51, c_first_sales_date_sk#52] +Arguments: hashpartitioning(c_customer_sk#47, 5), true, [id=#53] + +(51) Sort [codegen id : 16] +Input [6]: [c_customer_sk#47, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, c_first_shipto_date_sk#51, c_first_sales_date_sk#52] +Arguments: [c_customer_sk#47 ASC NULLS FIRST], false, 0 + +(52) SortMergeJoin [codegen id : 19] +Left keys [1]: [ss_customer_sk#3] +Right keys [1]: [c_customer_sk#47] +Join condition: None + +(53) Project [codegen id : 19] +Output [16]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, c_first_shipto_date_sk#51, c_first_sales_date_sk#52] +Input [18]: [ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_customer_sk#47, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, c_first_shipto_date_sk#51, c_first_sales_date_sk#52] + +(54) Scan parquet default.date_dim +Output [2]: [d_date_sk#54, d_year#55] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_date_sk)] +ReadSchema: struct + +(55) ColumnarToRow [codegen id : 17] +Input [2]: [d_date_sk#54, d_year#55] + +(56) Filter [codegen id : 17] +Input [2]: [d_date_sk#54, d_year#55] +Condition : isnotnull(d_date_sk#54) + +(57) BroadcastExchange +Input [2]: [d_date_sk#54, d_year#55] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#56] + +(58) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [c_first_sales_date_sk#52] +Right keys [1]: [d_date_sk#54] +Join condition: None + +(59) Project [codegen id : 19] +Output [16]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, c_first_shipto_date_sk#51, d_year#55] +Input [18]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, c_first_shipto_date_sk#51, c_first_sales_date_sk#52, d_date_sk#54, d_year#55] + +(60) ReusedExchange [Reuses operator id: 57] +Output [2]: [d_date_sk#57, d_year#58] + +(61) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [c_first_shipto_date_sk#51] +Right keys [1]: [d_date_sk#57] +Join condition: None + +(62) Project [codegen id : 19] +Output [16]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, d_year#55, d_year#58] +Input [18]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, c_first_shipto_date_sk#51, d_year#55, d_date_sk#57, d_year#58] + +(63) Exchange +Input [16]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, d_year#55, d_year#58] +Arguments: hashpartitioning(ss_cdemo_sk#4, 5), true, [id=#59] + +(64) Sort [codegen id : 20] +Input [16]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, d_year#55, d_year#58] +Arguments: [ss_cdemo_sk#4 ASC NULLS FIRST], false, 0 + +(65) Scan parquet default.customer_demographics +Output [2]: [cd_demo_sk#60, cd_marital_status#61] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_marital_status)] +ReadSchema: struct + +(66) ColumnarToRow [codegen id : 21] +Input [2]: [cd_demo_sk#60, cd_marital_status#61] + +(67) Filter [codegen id : 21] +Input [2]: [cd_demo_sk#60, cd_marital_status#61] +Condition : (isnotnull(cd_demo_sk#60) AND isnotnull(cd_marital_status#61)) + +(68) Exchange +Input [2]: [cd_demo_sk#60, cd_marital_status#61] +Arguments: hashpartitioning(cd_demo_sk#60, 5), true, [id=#62] + +(69) Sort [codegen id : 22] +Input [2]: [cd_demo_sk#60, cd_marital_status#61] +Arguments: [cd_demo_sk#60 ASC NULLS FIRST], false, 0 + +(70) SortMergeJoin [codegen id : 23] +Left keys [1]: [ss_cdemo_sk#4] +Right keys [1]: [cd_demo_sk#60] +Join condition: None + +(71) Project [codegen id : 23] +Output [16]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, d_year#55, d_year#58, cd_marital_status#61] +Input [18]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, d_year#55, d_year#58, cd_demo_sk#60, cd_marital_status#61] + +(72) Exchange +Input [16]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, d_year#55, d_year#58, cd_marital_status#61] +Arguments: hashpartitioning(c_current_cdemo_sk#48, 5), true, [id=#63] + +(73) Sort [codegen id : 24] +Input [16]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, d_year#55, d_year#58, cd_marital_status#61] +Arguments: [c_current_cdemo_sk#48 ASC NULLS FIRST], false, 0 + +(74) ReusedExchange [Reuses operator id: 68] +Output [2]: [cd_demo_sk#64, cd_marital_status#65] + +(75) Sort [codegen id : 26] +Input [2]: [cd_demo_sk#64, cd_marital_status#65] +Arguments: [cd_demo_sk#64 ASC NULLS FIRST], false, 0 + +(76) SortMergeJoin [codegen id : 30] +Left keys [1]: [c_current_cdemo_sk#48] +Right keys [1]: [cd_demo_sk#64] +Join condition: NOT (cd_marital_status#61 = cd_marital_status#65) + +(77) Project [codegen id : 30] +Output [14]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_hdemo_sk#49, c_current_addr_sk#50, d_year#55, d_year#58] +Input [18]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, d_year#55, d_year#58, cd_marital_status#61, cd_demo_sk#64, cd_marital_status#65] + +(78) Scan parquet default.promotion +Output [1]: [p_promo_sk#66] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/promotion] +PushedFilters: [IsNotNull(p_promo_sk)] +ReadSchema: struct + +(79) ColumnarToRow [codegen id : 27] +Input [1]: [p_promo_sk#66] + +(80) Filter [codegen id : 27] +Input [1]: [p_promo_sk#66] +Condition : isnotnull(p_promo_sk#66) + +(81) BroadcastExchange +Input [1]: [p_promo_sk#66] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#67] + +(82) BroadcastHashJoin [codegen id : 30] +Left keys [1]: [ss_promo_sk#8] +Right keys [1]: [p_promo_sk#66] +Join condition: None + +(83) Project [codegen id : 30] +Output [13]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_hdemo_sk#49, c_current_addr_sk#50, d_year#55, d_year#58] +Input [15]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_hdemo_sk#49, c_current_addr_sk#50, d_year#55, d_year#58, p_promo_sk#66] + +(84) Scan parquet default.household_demographics +Output [2]: [hd_demo_sk#68, hd_income_band_sk#69] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/household_demographics] +PushedFilters: [IsNotNull(hd_demo_sk), IsNotNull(hd_income_band_sk)] +ReadSchema: struct + +(85) ColumnarToRow [codegen id : 28] +Input [2]: [hd_demo_sk#68, hd_income_band_sk#69] + +(86) Filter [codegen id : 28] +Input [2]: [hd_demo_sk#68, hd_income_band_sk#69] +Condition : (isnotnull(hd_demo_sk#68) AND isnotnull(hd_income_band_sk#69)) + +(87) BroadcastExchange +Input [2]: [hd_demo_sk#68, hd_income_band_sk#69] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#70] + +(88) BroadcastHashJoin [codegen id : 30] +Left keys [1]: [ss_hdemo_sk#5] +Right keys [1]: [hd_demo_sk#68] +Join condition: None + +(89) Project [codegen id : 30] +Output [13]: [ss_item_sk#2, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_hdemo_sk#49, c_current_addr_sk#50, d_year#55, d_year#58, hd_income_band_sk#69] +Input [15]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_hdemo_sk#49, c_current_addr_sk#50, d_year#55, d_year#58, hd_demo_sk#68, hd_income_band_sk#69] + +(90) ReusedExchange [Reuses operator id: 87] +Output [2]: [hd_demo_sk#71, hd_income_band_sk#72] + +(91) BroadcastHashJoin [codegen id : 30] +Left keys [1]: [c_current_hdemo_sk#49] +Right keys [1]: [hd_demo_sk#71] +Join condition: None + +(92) Project [codegen id : 30] +Output [13]: [ss_item_sk#2, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_addr_sk#50, d_year#55, d_year#58, hd_income_band_sk#69, hd_income_band_sk#72] +Input [15]: [ss_item_sk#2, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_hdemo_sk#49, c_current_addr_sk#50, d_year#55, d_year#58, hd_income_band_sk#69, hd_demo_sk#71, hd_income_band_sk#72] + +(93) Exchange +Input [13]: [ss_item_sk#2, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_addr_sk#50, d_year#55, d_year#58, hd_income_band_sk#69, hd_income_band_sk#72] +Arguments: hashpartitioning(ss_addr_sk#6, 5), true, [id=#73] + +(94) Sort [codegen id : 31] +Input [13]: [ss_item_sk#2, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_addr_sk#50, d_year#55, d_year#58, hd_income_band_sk#69, hd_income_band_sk#72] +Arguments: [ss_addr_sk#6 ASC NULLS FIRST], false, 0 + +(95) Scan parquet default.customer_address +Output [5]: [ca_address_sk#74, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct + +(96) ColumnarToRow [codegen id : 32] +Input [5]: [ca_address_sk#74, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78] + +(97) Filter [codegen id : 32] +Input [5]: [ca_address_sk#74, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78] +Condition : isnotnull(ca_address_sk#74) + +(98) Exchange +Input [5]: [ca_address_sk#74, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78] +Arguments: hashpartitioning(ca_address_sk#74, 5), true, [id=#79] + +(99) Sort [codegen id : 33] +Input [5]: [ca_address_sk#74, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78] +Arguments: [ca_address_sk#74 ASC NULLS FIRST], false, 0 + +(100) SortMergeJoin [codegen id : 34] +Left keys [1]: [ss_addr_sk#6] +Right keys [1]: [ca_address_sk#74] +Join condition: None + +(101) Project [codegen id : 34] +Output [16]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_addr_sk#50, d_year#55, d_year#58, hd_income_band_sk#69, hd_income_band_sk#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78] +Input [18]: [ss_item_sk#2, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_addr_sk#50, d_year#55, d_year#58, hd_income_band_sk#69, hd_income_band_sk#72, ca_address_sk#74, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78] + +(102) Exchange +Input [16]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_addr_sk#50, d_year#55, d_year#58, hd_income_band_sk#69, hd_income_band_sk#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78] +Arguments: hashpartitioning(c_current_addr_sk#50, 5), true, [id=#80] + +(103) Sort [codegen id : 35] +Input [16]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_addr_sk#50, d_year#55, d_year#58, hd_income_band_sk#69, hd_income_band_sk#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78] +Arguments: [c_current_addr_sk#50 ASC NULLS FIRST], false, 0 + +(104) ReusedExchange [Reuses operator id: 98] +Output [5]: [ca_address_sk#81, ca_street_number#82, ca_street_name#83, ca_city#84, ca_zip#85] + +(105) Sort [codegen id : 37] +Input [5]: [ca_address_sk#81, ca_street_number#82, ca_street_name#83, ca_city#84, ca_zip#85] +Arguments: [ca_address_sk#81 ASC NULLS FIRST], false, 0 + +(106) SortMergeJoin [codegen id : 41] +Left keys [1]: [c_current_addr_sk#50] +Right keys [1]: [ca_address_sk#81] +Join condition: None + +(107) Project [codegen id : 41] +Output [19]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, d_year#55, d_year#58, hd_income_band_sk#69, hd_income_band_sk#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_street_number#82, ca_street_name#83, ca_city#84, ca_zip#85] +Input [21]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_addr_sk#50, d_year#55, d_year#58, hd_income_band_sk#69, hd_income_band_sk#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_address_sk#81, ca_street_number#82, ca_street_name#83, ca_city#84, ca_zip#85] + +(108) Scan parquet default.income_band +Output [1]: [ib_income_band_sk#86] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/income_band] +PushedFilters: [IsNotNull(ib_income_band_sk)] +ReadSchema: struct + +(109) ColumnarToRow [codegen id : 38] +Input [1]: [ib_income_band_sk#86] + +(110) Filter [codegen id : 38] +Input [1]: [ib_income_band_sk#86] +Condition : isnotnull(ib_income_band_sk#86) + +(111) BroadcastExchange +Input [1]: [ib_income_band_sk#86] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#87] + +(112) BroadcastHashJoin [codegen id : 41] +Left keys [1]: [hd_income_band_sk#69] +Right keys [1]: [ib_income_band_sk#86] +Join condition: None + +(113) Project [codegen id : 41] +Output [18]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, d_year#55, d_year#58, hd_income_band_sk#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_street_number#82, ca_street_name#83, ca_city#84, ca_zip#85] +Input [20]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, d_year#55, d_year#58, hd_income_band_sk#69, hd_income_band_sk#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_street_number#82, ca_street_name#83, ca_city#84, ca_zip#85, ib_income_band_sk#86] + +(114) ReusedExchange [Reuses operator id: 111] +Output [1]: [ib_income_band_sk#88] + +(115) BroadcastHashJoin [codegen id : 41] +Left keys [1]: [hd_income_band_sk#72] +Right keys [1]: [ib_income_band_sk#88] +Join condition: None + +(116) Project [codegen id : 41] +Output [17]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, d_year#55, d_year#58, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_street_number#82, ca_street_name#83, ca_city#84, ca_zip#85] +Input [19]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, d_year#55, d_year#58, hd_income_band_sk#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_street_number#82, ca_street_name#83, ca_city#84, ca_zip#85, ib_income_band_sk#88] + +(117) Scan parquet default.item +Output [4]: [i_item_sk#89, i_current_price#90, i_color#91, i_product_name#92] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_current_price), In(i_color, [purple,burlywood,indian,spring,floral,medium]), GreaterThanOrEqual(i_current_price,64.00), IsNotNull(i_item_sk)] +ReadSchema: struct + +(118) ColumnarToRow [codegen id : 40] +Input [4]: [i_item_sk#89, i_current_price#90, i_color#91, i_product_name#92] + +(119) Filter [codegen id : 40] +Input [4]: [i_item_sk#89, i_current_price#90, i_color#91, i_product_name#92] +Condition : ((((((isnotnull(i_current_price#90) AND i_color#91 IN (purple,burlywood,indian,spring,floral,medium)) AND (i_current_price#90 >= 64.00)) AND (cast(i_current_price#90 as decimal(12,2)) <= 74.00)) AND (cast(i_current_price#90 as decimal(12,2)) >= 65.00)) AND (cast(i_current_price#90 as decimal(12,2)) <= 79.00)) AND isnotnull(i_item_sk#89)) + +(120) Project [codegen id : 40] +Output [2]: [i_item_sk#89, i_product_name#92] +Input [4]: [i_item_sk#89, i_current_price#90, i_color#91, i_product_name#92] + +(121) BroadcastExchange +Input [2]: [i_item_sk#89, i_product_name#92] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#93] + +(122) BroadcastHashJoin [codegen id : 41] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#89] +Join condition: None + +(123) Project [codegen id : 41] +Output [18]: [ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, d_year#55, d_year#58, s_store_name#43, s_zip#44, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_street_number#82, ca_street_name#83, ca_city#84, ca_zip#85, i_item_sk#89, i_product_name#92] +Input [19]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, d_year#55, d_year#58, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_street_number#82, ca_street_name#83, ca_city#84, ca_zip#85, i_item_sk#89, i_product_name#92] + +(124) HashAggregate [codegen id : 41] +Input [18]: [ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, d_year#55, d_year#58, s_store_name#43, s_zip#44, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_street_number#82, ca_street_name#83, ca_city#84, ca_zip#85, i_item_sk#89, i_product_name#92] +Keys [15]: [i_product_name#92, i_item_sk#89, s_store_name#43, s_zip#44, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_street_number#82, ca_street_name#83, ca_city#84, ca_zip#85, d_year#40, d_year#55, d_year#58] +Functions [4]: [partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#10)), partial_sum(UnscaledValue(ss_list_price#11)), partial_sum(UnscaledValue(ss_coupon_amt#12))] +Aggregate Attributes [4]: [count#94, sum#95, sum#96, sum#97] +Results [19]: [i_product_name#92, i_item_sk#89, s_store_name#43, s_zip#44, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_street_number#82, ca_street_name#83, ca_city#84, ca_zip#85, d_year#40, d_year#55, d_year#58, count#98, sum#99, sum#100, sum#101] + +(125) Exchange +Input [19]: [i_product_name#92, i_item_sk#89, s_store_name#43, s_zip#44, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_street_number#82, ca_street_name#83, ca_city#84, ca_zip#85, d_year#40, d_year#55, d_year#58, count#98, sum#99, sum#100, sum#101] +Arguments: hashpartitioning(i_product_name#92, i_item_sk#89, s_store_name#43, s_zip#44, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_street_number#82, ca_street_name#83, ca_city#84, ca_zip#85, d_year#40, d_year#55, d_year#58, 5), true, [id=#102] + +(126) HashAggregate [codegen id : 42] +Input [19]: [i_product_name#92, i_item_sk#89, s_store_name#43, s_zip#44, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_street_number#82, ca_street_name#83, ca_city#84, ca_zip#85, d_year#40, d_year#55, d_year#58, count#98, sum#99, sum#100, sum#101] +Keys [15]: [i_product_name#92, i_item_sk#89, s_store_name#43, s_zip#44, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_street_number#82, ca_street_name#83, ca_city#84, ca_zip#85, d_year#40, d_year#55, d_year#58] +Functions [4]: [count(1), sum(UnscaledValue(ss_wholesale_cost#10)), sum(UnscaledValue(ss_list_price#11)), sum(UnscaledValue(ss_coupon_amt#12))] +Aggregate Attributes [4]: [count(1)#103, sum(UnscaledValue(ss_wholesale_cost#10))#104, sum(UnscaledValue(ss_list_price#11))#105, sum(UnscaledValue(ss_coupon_amt#12))#106] +Results [17]: [i_product_name#92 AS product_name#107, i_item_sk#89 AS item_sk#108, s_store_name#43 AS store_name#109, s_zip#44 AS store_zip#110, ca_street_number#75 AS b_street_number#111, ca_street_name#76 AS b_streen_name#112, ca_city#77 AS b_city#113, ca_zip#78 AS b_zip#114, ca_street_number#82 AS c_street_number#115, ca_street_name#83 AS c_street_name#116, ca_city#84 AS c_city#117, ca_zip#85 AS c_zip#118, d_year#40 AS syear#119, count(1)#103 AS cnt#120, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#10))#104,17,2) AS s1#121, MakeDecimal(sum(UnscaledValue(ss_list_price#11))#105,17,2) AS s2#122, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#12))#106,17,2) AS s3#123] + +(127) Exchange +Input [17]: [product_name#107, item_sk#108, store_name#109, store_zip#110, b_street_number#111, b_streen_name#112, b_city#113, b_zip#114, c_street_number#115, c_street_name#116, c_city#117, c_zip#118, syear#119, cnt#120, s1#121, s2#122, s3#123] +Arguments: hashpartitioning(item_sk#108, store_name#109, store_zip#110, 5), true, [id=#124] + +(128) Sort [codegen id : 43] +Input [17]: [product_name#107, item_sk#108, store_name#109, store_zip#110, b_street_number#111, b_streen_name#112, b_city#113, b_zip#114, c_street_number#115, c_street_name#116, c_city#117, c_zip#118, syear#119, cnt#120, s1#121, s2#122, s3#123] +Arguments: [item_sk#108 ASC NULLS FIRST, store_name#109 ASC NULLS FIRST, store_zip#110 ASC NULLS FIRST], false, 0 + +(129) ReusedExchange [Reuses operator id: 4] +Output [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_ticket_number#9, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] + +(130) Sort [codegen id : 45] +Input [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_ticket_number#9, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] +Arguments: [cast(ss_item_sk#2 as bigint) ASC NULLS FIRST, cast(ss_ticket_number#9 as bigint) ASC NULLS FIRST], false, 0 + +(131) ReusedExchange [Reuses operator id: 9] +Output [2]: [sr_item_sk#14, sr_ticket_number#15] + +(132) Sort [codegen id : 47] +Input [2]: [sr_item_sk#14, sr_ticket_number#15] +Arguments: [sr_item_sk#14 ASC NULLS FIRST, sr_ticket_number#15 ASC NULLS FIRST], false, 0 + +(133) SortMergeJoin [codegen id : 56] +Left keys [2]: [cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#9 as bigint)] +Right keys [2]: [sr_item_sk#14, sr_ticket_number#15] +Join condition: None + +(134) Project [codegen id : 56] +Output [11]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] +Input [14]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_ticket_number#9, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, sr_item_sk#14, sr_ticket_number#15] + +(135) ReusedExchange [Reuses operator id: 30] +Output [1]: [cs_item_sk#17] + +(136) BroadcastHashJoin [codegen id : 56] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [cs_item_sk#17] +Join condition: None + +(137) Project [codegen id : 56] +Output [11]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] +Input [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, cs_item_sk#17] + +(138) Scan parquet default.date_dim +Output [2]: [d_date_sk#39, d_year#40] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(139) ColumnarToRow [codegen id : 54] +Input [2]: [d_date_sk#39, d_year#40] + +(140) Filter [codegen id : 54] +Input [2]: [d_date_sk#39, d_year#40] +Condition : ((isnotnull(d_year#40) AND (d_year#40 = 2000)) AND isnotnull(d_date_sk#39)) + +(141) BroadcastExchange +Input [2]: [d_date_sk#39, d_year#40] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#125] + +(142) BroadcastHashJoin [codegen id : 56] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#39] +Join condition: None + +(143) Project [codegen id : 56] +Output [11]: [ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40] +Input [13]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_date_sk#39, d_year#40] + +(144) ReusedExchange [Reuses operator id: 42] +Output [3]: [s_store_sk#42, s_store_name#43, s_zip#44] + +(145) BroadcastHashJoin [codegen id : 56] +Left keys [1]: [ss_store_sk#7] +Right keys [1]: [s_store_sk#42] +Join condition: None + +(146) Project [codegen id : 56] +Output [12]: [ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44] +Input [14]: [ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_sk#42, s_store_name#43, s_zip#44] + +(147) Exchange +Input [12]: [ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44] +Arguments: hashpartitioning(ss_customer_sk#3, 5), true, [id=#126] + +(148) Sort [codegen id : 57] +Input [12]: [ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44] +Arguments: [ss_customer_sk#3 ASC NULLS FIRST], false, 0 + +(149) ReusedExchange [Reuses operator id: 50] +Output [6]: [c_customer_sk#47, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, c_first_shipto_date_sk#51, c_first_sales_date_sk#52] + +(150) Sort [codegen id : 59] +Input [6]: [c_customer_sk#47, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, c_first_shipto_date_sk#51, c_first_sales_date_sk#52] +Arguments: [c_customer_sk#47 ASC NULLS FIRST], false, 0 + +(151) SortMergeJoin [codegen id : 62] +Left keys [1]: [ss_customer_sk#3] +Right keys [1]: [c_customer_sk#47] +Join condition: None + +(152) Project [codegen id : 62] +Output [16]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, c_first_shipto_date_sk#51, c_first_sales_date_sk#52] +Input [18]: [ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_customer_sk#47, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, c_first_shipto_date_sk#51, c_first_sales_date_sk#52] + +(153) ReusedExchange [Reuses operator id: 57] +Output [2]: [d_date_sk#127, d_year#128] + +(154) BroadcastHashJoin [codegen id : 62] +Left keys [1]: [c_first_sales_date_sk#52] +Right keys [1]: [d_date_sk#127] +Join condition: None + +(155) Project [codegen id : 62] +Output [16]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, c_first_shipto_date_sk#51, d_year#128] +Input [18]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, c_first_shipto_date_sk#51, c_first_sales_date_sk#52, d_date_sk#127, d_year#128] + +(156) ReusedExchange [Reuses operator id: 57] +Output [2]: [d_date_sk#129, d_year#130] + +(157) BroadcastHashJoin [codegen id : 62] +Left keys [1]: [c_first_shipto_date_sk#51] +Right keys [1]: [d_date_sk#129] +Join condition: None + +(158) Project [codegen id : 62] +Output [16]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, d_year#128, d_year#130] +Input [18]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, c_first_shipto_date_sk#51, d_year#128, d_date_sk#129, d_year#130] + +(159) Exchange +Input [16]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, d_year#128, d_year#130] +Arguments: hashpartitioning(ss_cdemo_sk#4, 5), true, [id=#131] + +(160) Sort [codegen id : 63] +Input [16]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, d_year#128, d_year#130] +Arguments: [ss_cdemo_sk#4 ASC NULLS FIRST], false, 0 + +(161) ReusedExchange [Reuses operator id: 68] +Output [2]: [cd_demo_sk#60, cd_marital_status#61] + +(162) Sort [codegen id : 65] +Input [2]: [cd_demo_sk#60, cd_marital_status#61] +Arguments: [cd_demo_sk#60 ASC NULLS FIRST], false, 0 + +(163) SortMergeJoin [codegen id : 66] +Left keys [1]: [ss_cdemo_sk#4] +Right keys [1]: [cd_demo_sk#60] +Join condition: None + +(164) Project [codegen id : 66] +Output [16]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, d_year#128, d_year#130, cd_marital_status#61] +Input [18]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, d_year#128, d_year#130, cd_demo_sk#60, cd_marital_status#61] + +(165) Exchange +Input [16]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, d_year#128, d_year#130, cd_marital_status#61] +Arguments: hashpartitioning(c_current_cdemo_sk#48, 5), true, [id=#132] + +(166) Sort [codegen id : 67] +Input [16]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, d_year#128, d_year#130, cd_marital_status#61] +Arguments: [c_current_cdemo_sk#48 ASC NULLS FIRST], false, 0 + +(167) ReusedExchange [Reuses operator id: 68] +Output [2]: [cd_demo_sk#133, cd_marital_status#134] + +(168) Sort [codegen id : 69] +Input [2]: [cd_demo_sk#133, cd_marital_status#134] +Arguments: [cd_demo_sk#133 ASC NULLS FIRST], false, 0 + +(169) SortMergeJoin [codegen id : 73] +Left keys [1]: [c_current_cdemo_sk#48] +Right keys [1]: [cd_demo_sk#133] +Join condition: NOT (cd_marital_status#61 = cd_marital_status#134) + +(170) Project [codegen id : 73] +Output [14]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_hdemo_sk#49, c_current_addr_sk#50, d_year#128, d_year#130] +Input [18]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_cdemo_sk#48, c_current_hdemo_sk#49, c_current_addr_sk#50, d_year#128, d_year#130, cd_marital_status#61, cd_demo_sk#133, cd_marital_status#134] + +(171) ReusedExchange [Reuses operator id: 81] +Output [1]: [p_promo_sk#66] + +(172) BroadcastHashJoin [codegen id : 73] +Left keys [1]: [ss_promo_sk#8] +Right keys [1]: [p_promo_sk#66] +Join condition: None + +(173) Project [codegen id : 73] +Output [13]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_hdemo_sk#49, c_current_addr_sk#50, d_year#128, d_year#130] +Input [15]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_hdemo_sk#49, c_current_addr_sk#50, d_year#128, d_year#130, p_promo_sk#66] + +(174) ReusedExchange [Reuses operator id: 87] +Output [2]: [hd_demo_sk#68, hd_income_band_sk#69] + +(175) BroadcastHashJoin [codegen id : 73] +Left keys [1]: [ss_hdemo_sk#5] +Right keys [1]: [hd_demo_sk#68] +Join condition: None + +(176) Project [codegen id : 73] +Output [13]: [ss_item_sk#2, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_hdemo_sk#49, c_current_addr_sk#50, d_year#128, d_year#130, hd_income_band_sk#69] +Input [15]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_hdemo_sk#49, c_current_addr_sk#50, d_year#128, d_year#130, hd_demo_sk#68, hd_income_band_sk#69] + +(177) ReusedExchange [Reuses operator id: 87] +Output [2]: [hd_demo_sk#135, hd_income_band_sk#136] + +(178) BroadcastHashJoin [codegen id : 73] +Left keys [1]: [c_current_hdemo_sk#49] +Right keys [1]: [hd_demo_sk#135] +Join condition: None + +(179) Project [codegen id : 73] +Output [13]: [ss_item_sk#2, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_addr_sk#50, d_year#128, d_year#130, hd_income_band_sk#69, hd_income_band_sk#136] +Input [15]: [ss_item_sk#2, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_hdemo_sk#49, c_current_addr_sk#50, d_year#128, d_year#130, hd_income_band_sk#69, hd_demo_sk#135, hd_income_band_sk#136] + +(180) Exchange +Input [13]: [ss_item_sk#2, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_addr_sk#50, d_year#128, d_year#130, hd_income_band_sk#69, hd_income_band_sk#136] +Arguments: hashpartitioning(ss_addr_sk#6, 5), true, [id=#137] + +(181) Sort [codegen id : 74] +Input [13]: [ss_item_sk#2, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_addr_sk#50, d_year#128, d_year#130, hd_income_band_sk#69, hd_income_band_sk#136] +Arguments: [ss_addr_sk#6 ASC NULLS FIRST], false, 0 + +(182) ReusedExchange [Reuses operator id: 98] +Output [5]: [ca_address_sk#74, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78] + +(183) Sort [codegen id : 76] +Input [5]: [ca_address_sk#74, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78] +Arguments: [ca_address_sk#74 ASC NULLS FIRST], false, 0 + +(184) SortMergeJoin [codegen id : 77] +Left keys [1]: [ss_addr_sk#6] +Right keys [1]: [ca_address_sk#74] +Join condition: None + +(185) Project [codegen id : 77] +Output [16]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_addr_sk#50, d_year#128, d_year#130, hd_income_band_sk#69, hd_income_band_sk#136, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78] +Input [18]: [ss_item_sk#2, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_addr_sk#50, d_year#128, d_year#130, hd_income_band_sk#69, hd_income_band_sk#136, ca_address_sk#74, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78] + +(186) Exchange +Input [16]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_addr_sk#50, d_year#128, d_year#130, hd_income_band_sk#69, hd_income_band_sk#136, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78] +Arguments: hashpartitioning(c_current_addr_sk#50, 5), true, [id=#138] + +(187) Sort [codegen id : 78] +Input [16]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_addr_sk#50, d_year#128, d_year#130, hd_income_band_sk#69, hd_income_band_sk#136, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78] +Arguments: [c_current_addr_sk#50 ASC NULLS FIRST], false, 0 + +(188) ReusedExchange [Reuses operator id: 98] +Output [5]: [ca_address_sk#139, ca_street_number#140, ca_street_name#141, ca_city#142, ca_zip#143] + +(189) Sort [codegen id : 80] +Input [5]: [ca_address_sk#139, ca_street_number#140, ca_street_name#141, ca_city#142, ca_zip#143] +Arguments: [ca_address_sk#139 ASC NULLS FIRST], false, 0 + +(190) SortMergeJoin [codegen id : 84] +Left keys [1]: [c_current_addr_sk#50] +Right keys [1]: [ca_address_sk#139] +Join condition: None + +(191) Project [codegen id : 84] +Output [19]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, d_year#128, d_year#130, hd_income_band_sk#69, hd_income_band_sk#136, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_street_number#140, ca_street_name#141, ca_city#142, ca_zip#143] +Input [21]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, c_current_addr_sk#50, d_year#128, d_year#130, hd_income_band_sk#69, hd_income_band_sk#136, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_address_sk#139, ca_street_number#140, ca_street_name#141, ca_city#142, ca_zip#143] + +(192) ReusedExchange [Reuses operator id: 111] +Output [1]: [ib_income_band_sk#86] + +(193) BroadcastHashJoin [codegen id : 84] +Left keys [1]: [hd_income_band_sk#69] +Right keys [1]: [ib_income_band_sk#86] +Join condition: None + +(194) Project [codegen id : 84] +Output [18]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, d_year#128, d_year#130, hd_income_band_sk#136, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_street_number#140, ca_street_name#141, ca_city#142, ca_zip#143] +Input [20]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, d_year#128, d_year#130, hd_income_band_sk#69, hd_income_band_sk#136, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_street_number#140, ca_street_name#141, ca_city#142, ca_zip#143, ib_income_band_sk#86] + +(195) ReusedExchange [Reuses operator id: 111] +Output [1]: [ib_income_band_sk#144] + +(196) BroadcastHashJoin [codegen id : 84] +Left keys [1]: [hd_income_band_sk#136] +Right keys [1]: [ib_income_band_sk#144] +Join condition: None + +(197) Project [codegen id : 84] +Output [17]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, d_year#128, d_year#130, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_street_number#140, ca_street_name#141, ca_city#142, ca_zip#143] +Input [19]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, d_year#128, d_year#130, hd_income_band_sk#136, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_street_number#140, ca_street_name#141, ca_city#142, ca_zip#143, ib_income_band_sk#144] + +(198) ReusedExchange [Reuses operator id: 121] +Output [2]: [i_item_sk#89, i_product_name#92] + +(199) BroadcastHashJoin [codegen id : 84] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#89] +Join condition: None + +(200) Project [codegen id : 84] +Output [18]: [ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, d_year#128, d_year#130, s_store_name#43, s_zip#44, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_street_number#140, ca_street_name#141, ca_city#142, ca_zip#143, i_item_sk#89, i_product_name#92] +Input [19]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, s_store_name#43, s_zip#44, d_year#128, d_year#130, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_street_number#140, ca_street_name#141, ca_city#142, ca_zip#143, i_item_sk#89, i_product_name#92] + +(201) HashAggregate [codegen id : 84] +Input [18]: [ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#40, d_year#128, d_year#130, s_store_name#43, s_zip#44, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_street_number#140, ca_street_name#141, ca_city#142, ca_zip#143, i_item_sk#89, i_product_name#92] +Keys [15]: [i_product_name#92, i_item_sk#89, s_store_name#43, s_zip#44, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_street_number#140, ca_street_name#141, ca_city#142, ca_zip#143, d_year#40, d_year#128, d_year#130] +Functions [4]: [partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#10)), partial_sum(UnscaledValue(ss_list_price#11)), partial_sum(UnscaledValue(ss_coupon_amt#12))] +Aggregate Attributes [4]: [count#145, sum#146, sum#147, sum#148] +Results [19]: [i_product_name#92, i_item_sk#89, s_store_name#43, s_zip#44, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_street_number#140, ca_street_name#141, ca_city#142, ca_zip#143, d_year#40, d_year#128, d_year#130, count#149, sum#150, sum#151, sum#152] + +(202) Exchange +Input [19]: [i_product_name#92, i_item_sk#89, s_store_name#43, s_zip#44, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_street_number#140, ca_street_name#141, ca_city#142, ca_zip#143, d_year#40, d_year#128, d_year#130, count#149, sum#150, sum#151, sum#152] +Arguments: hashpartitioning(i_product_name#92, i_item_sk#89, s_store_name#43, s_zip#44, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_street_number#140, ca_street_name#141, ca_city#142, ca_zip#143, d_year#40, d_year#128, d_year#130, 5), true, [id=#153] + +(203) HashAggregate [codegen id : 85] +Input [19]: [i_product_name#92, i_item_sk#89, s_store_name#43, s_zip#44, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_street_number#140, ca_street_name#141, ca_city#142, ca_zip#143, d_year#40, d_year#128, d_year#130, count#149, sum#150, sum#151, sum#152] +Keys [15]: [i_product_name#92, i_item_sk#89, s_store_name#43, s_zip#44, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ca_street_number#140, ca_street_name#141, ca_city#142, ca_zip#143, d_year#40, d_year#128, d_year#130] +Functions [4]: [count(1), sum(UnscaledValue(ss_wholesale_cost#10)), sum(UnscaledValue(ss_list_price#11)), sum(UnscaledValue(ss_coupon_amt#12))] +Aggregate Attributes [4]: [count(1)#154, sum(UnscaledValue(ss_wholesale_cost#10))#155, sum(UnscaledValue(ss_list_price#11))#156, sum(UnscaledValue(ss_coupon_amt#12))#157] +Results [8]: [i_item_sk#89 AS item_sk#158, s_store_name#43 AS store_name#159, s_zip#44 AS store_zip#160, d_year#40 AS syear#161, count(1)#154 AS cnt#162, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#10))#155,17,2) AS s1#163, MakeDecimal(sum(UnscaledValue(ss_list_price#11))#156,17,2) AS s2#164, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#12))#157,17,2) AS s3#165] + +(204) Exchange +Input [8]: [item_sk#158, store_name#159, store_zip#160, syear#161, cnt#162, s1#163, s2#164, s3#165] +Arguments: hashpartitioning(item_sk#158, store_name#159, store_zip#160, 5), true, [id=#166] + +(205) Sort [codegen id : 86] +Input [8]: [item_sk#158, store_name#159, store_zip#160, syear#161, cnt#162, s1#163, s2#164, s3#165] +Arguments: [item_sk#158 ASC NULLS FIRST, store_name#159 ASC NULLS FIRST, store_zip#160 ASC NULLS FIRST], false, 0 + +(206) SortMergeJoin [codegen id : 87] +Left keys [3]: [item_sk#108, store_name#109, store_zip#110] +Right keys [3]: [item_sk#158, store_name#159, store_zip#160] +Join condition: (cnt#162 <= cnt#120) + +(207) Project [codegen id : 87] +Output [21]: [product_name#107, store_name#109, store_zip#110, b_street_number#111, b_streen_name#112, b_city#113, b_zip#114, c_street_number#115, c_street_name#116, c_city#117, c_zip#118, syear#119, cnt#120, s1#121, s2#122, s3#123, s1#163, s2#164, s3#165, syear#161, cnt#162] +Input [25]: [product_name#107, item_sk#108, store_name#109, store_zip#110, b_street_number#111, b_streen_name#112, b_city#113, b_zip#114, c_street_number#115, c_street_name#116, c_city#117, c_zip#118, syear#119, cnt#120, s1#121, s2#122, s3#123, item_sk#158, store_name#159, store_zip#160, syear#161, cnt#162, s1#163, s2#164, s3#165] + +(208) Exchange +Input [21]: [product_name#107, store_name#109, store_zip#110, b_street_number#111, b_streen_name#112, b_city#113, b_zip#114, c_street_number#115, c_street_name#116, c_city#117, c_zip#118, syear#119, cnt#120, s1#121, s2#122, s3#123, s1#163, s2#164, s3#165, syear#161, cnt#162] +Arguments: rangepartitioning(product_name#107 ASC NULLS FIRST, store_name#109 ASC NULLS FIRST, cnt#162 ASC NULLS FIRST, s1#121 ASC NULLS FIRST, s1#163 ASC NULLS FIRST, 5), true, [id=#167] + +(209) Sort [codegen id : 88] +Input [21]: [product_name#107, store_name#109, store_zip#110, b_street_number#111, b_streen_name#112, b_city#113, b_zip#114, c_street_number#115, c_street_name#116, c_city#117, c_zip#118, syear#119, cnt#120, s1#121, s2#122, s3#123, s1#163, s2#164, s3#165, syear#161, cnt#162] +Arguments: [product_name#107 ASC NULLS FIRST, store_name#109 ASC NULLS FIRST, cnt#162 ASC NULLS FIRST, s1#121 ASC NULLS FIRST, s1#163 ASC NULLS FIRST], true, 0 + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64.sf100/simplified.txt new file mode 100644 index 0000000000000..b44625037841a --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64.sf100/simplified.txt @@ -0,0 +1,367 @@ +WholeStageCodegen (88) + Sort [cnt,product_name,s1,s1,store_name] + InputAdapter + Exchange [cnt,product_name,s1,s1,store_name] #1 + WholeStageCodegen (87) + Project [b_city,b_streen_name,b_street_number,b_zip,c_city,c_street_name,c_street_number,c_zip,cnt,cnt,product_name,s1,s1,s2,s2,s3,s3,store_name,store_zip,syear,syear] + SortMergeJoin [cnt,cnt,item_sk,item_sk,store_name,store_name,store_zip,store_zip] + InputAdapter + WholeStageCodegen (43) + Sort [item_sk,store_name,store_zip] + InputAdapter + Exchange [item_sk,store_name,store_zip] #2 + WholeStageCodegen (42) + HashAggregate [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,count,d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip,sum,sum,sum] [b_city,b_streen_name,b_street_number,b_zip,c_city,c_street_name,c_street_number,c_zip,cnt,count,count(1),item_sk,product_name,s1,s2,s3,store_name,store_zip,sum,sum,sum,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_list_price)),sum(UnscaledValue(ss_wholesale_cost)),syear] + InputAdapter + Exchange [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip] #3 + WholeStageCodegen (41) + HashAggregate [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip,ss_coupon_amt,ss_list_price,ss_wholesale_cost] [count,count,sum,sum,sum,sum,sum,sum] + Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip,ss_coupon_amt,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] + Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,hd_income_band_sk,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] + Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,hd_income_band_sk,hd_income_band_sk,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + SortMergeJoin [c_current_addr_sk,ca_address_sk] + InputAdapter + WholeStageCodegen (35) + Sort [c_current_addr_sk] + InputAdapter + Exchange [c_current_addr_sk] #4 + WholeStageCodegen (34) + Project [c_current_addr_sk,ca_city,ca_street_name,ca_street_number,ca_zip,d_year,d_year,d_year,hd_income_band_sk,hd_income_band_sk,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + SortMergeJoin [ca_address_sk,ss_addr_sk] + InputAdapter + WholeStageCodegen (31) + Sort [ss_addr_sk] + InputAdapter + Exchange [ss_addr_sk] #5 + WholeStageCodegen (30) + Project [c_current_addr_sk,d_year,d_year,d_year,hd_income_band_sk,hd_income_band_sk,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] + Project [c_current_addr_sk,c_current_hdemo_sk,d_year,d_year,d_year,hd_income_band_sk,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [c_current_addr_sk,c_current_hdemo_sk,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [p_promo_sk,ss_promo_sk] + Project [c_current_addr_sk,c_current_hdemo_sk,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + SortMergeJoin [c_current_cdemo_sk,cd_demo_sk,cd_marital_status,cd_marital_status] + InputAdapter + WholeStageCodegen (24) + Sort [c_current_cdemo_sk] + InputAdapter + Exchange [c_current_cdemo_sk] #6 + WholeStageCodegen (23) + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,cd_marital_status,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + SortMergeJoin [cd_demo_sk,ss_cdemo_sk] + InputAdapter + WholeStageCodegen (20) + Sort [ss_cdemo_sk] + InputAdapter + Exchange [ss_cdemo_sk] #7 + WholeStageCodegen (19) + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + BroadcastHashJoin [c_first_shipto_date_sk,d_date_sk] + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_first_shipto_date_sk,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + BroadcastHashJoin [c_first_sales_date_sk,d_date_sk] + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_first_sales_date_sk,c_first_shipto_date_sk,d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + SortMergeJoin [c_customer_sk,ss_customer_sk] + InputAdapter + WholeStageCodegen (14) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #8 + WholeStageCodegen (13) + Project [d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [d_year,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_store_sk,ss_wholesale_cost] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_wholesale_cost] + BroadcastHashJoin [cs_item_sk,ss_item_sk] + Project [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_wholesale_cost] + SortMergeJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + InputAdapter + WholeStageCodegen (2) + Sort [ss_item_sk,ss_ticket_number] + InputAdapter + Exchange [ss_item_sk,ss_ticket_number] #9 + WholeStageCodegen (1) + Filter [ss_addr_sk,ss_cdemo_sk,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number,ss_wholesale_cost] + InputAdapter + WholeStageCodegen (4) + Sort [sr_item_sk,sr_ticket_number] + InputAdapter + Exchange [sr_item_sk,sr_ticket_number] #10 + WholeStageCodegen (3) + Filter [sr_item_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (10) + Project [cs_item_sk] + Filter [sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2), true)),sum(cs_ext_list_price)] + HashAggregate [cs_item_sk,isEmpty,sum,sum] [isEmpty,sum,sum,sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2), true)),sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2), true)),sum(UnscaledValue(cs_ext_list_price)),sum(cs_ext_list_price)] + InputAdapter + Exchange [cs_item_sk] #12 + WholeStageCodegen (9) + HashAggregate [cr_refunded_cash,cr_reversed_charge,cr_store_credit,cs_ext_list_price,cs_item_sk] [isEmpty,isEmpty,sum,sum,sum,sum] + Project [cr_refunded_cash,cr_reversed_charge,cr_store_credit,cs_ext_list_price,cs_item_sk] + SortMergeJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] + InputAdapter + WholeStageCodegen (6) + Sort [cs_item_sk,cs_order_number] + InputAdapter + Exchange [cs_item_sk,cs_order_number] #13 + WholeStageCodegen (5) + Filter [cs_item_sk,cs_order_number] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_ext_list_price,cs_item_sk,cs_order_number] + InputAdapter + WholeStageCodegen (8) + Sort [cr_item_sk,cr_order_number] + InputAdapter + Exchange [cr_item_sk,cr_order_number] #14 + WholeStageCodegen (7) + Filter [cr_item_sk,cr_order_number] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit] + InputAdapter + BroadcastExchange #15 + WholeStageCodegen (11) + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #16 + WholeStageCodegen (12) + Filter [s_store_name,s_store_sk,s_zip] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_name,s_store_sk,s_zip] + InputAdapter + WholeStageCodegen (16) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #17 + WholeStageCodegen (15) + Filter [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk,c_first_shipto_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk,c_first_shipto_date_sk] + InputAdapter + BroadcastExchange #18 + WholeStageCodegen (17) + Filter [d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + ReusedExchange [d_date_sk,d_year] #18 + InputAdapter + WholeStageCodegen (22) + Sort [cd_demo_sk] + InputAdapter + Exchange [cd_demo_sk] #19 + WholeStageCodegen (21) + Filter [cd_demo_sk,cd_marital_status] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status] + InputAdapter + WholeStageCodegen (26) + Sort [cd_demo_sk] + InputAdapter + ReusedExchange [cd_demo_sk,cd_marital_status] #19 + InputAdapter + BroadcastExchange #20 + WholeStageCodegen (27) + Filter [p_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.promotion [p_promo_sk] + InputAdapter + BroadcastExchange #21 + WholeStageCodegen (28) + Filter [hd_demo_sk,hd_income_band_sk] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_demo_sk,hd_income_band_sk] + InputAdapter + ReusedExchange [hd_demo_sk,hd_income_band_sk] #21 + InputAdapter + WholeStageCodegen (33) + Sort [ca_address_sk] + InputAdapter + Exchange [ca_address_sk] #22 + WholeStageCodegen (32) + Filter [ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_city,ca_street_name,ca_street_number,ca_zip] + InputAdapter + WholeStageCodegen (37) + Sort [ca_address_sk] + InputAdapter + ReusedExchange [ca_address_sk,ca_city,ca_street_name,ca_street_number,ca_zip] #22 + InputAdapter + BroadcastExchange #23 + WholeStageCodegen (38) + Filter [ib_income_band_sk] + ColumnarToRow + InputAdapter + Scan parquet default.income_band [ib_income_band_sk] + InputAdapter + ReusedExchange [ib_income_band_sk] #23 + InputAdapter + BroadcastExchange #24 + WholeStageCodegen (40) + Project [i_item_sk,i_product_name] + Filter [i_color,i_current_price,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_color,i_current_price,i_item_sk,i_product_name] + InputAdapter + WholeStageCodegen (86) + Sort [item_sk,store_name,store_zip] + InputAdapter + Exchange [item_sk,store_name,store_zip] #25 + WholeStageCodegen (85) + HashAggregate [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,count,d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip,sum,sum,sum] [cnt,count,count(1),item_sk,s1,s2,s3,store_name,store_zip,sum,sum,sum,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_list_price)),sum(UnscaledValue(ss_wholesale_cost)),syear] + InputAdapter + Exchange [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip] #26 + WholeStageCodegen (84) + HashAggregate [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip,ss_coupon_amt,ss_list_price,ss_wholesale_cost] [count,count,sum,sum,sum,sum,sum,sum] + Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip,ss_coupon_amt,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] + Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,hd_income_band_sk,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] + Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,hd_income_band_sk,hd_income_band_sk,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + SortMergeJoin [c_current_addr_sk,ca_address_sk] + InputAdapter + WholeStageCodegen (78) + Sort [c_current_addr_sk] + InputAdapter + Exchange [c_current_addr_sk] #27 + WholeStageCodegen (77) + Project [c_current_addr_sk,ca_city,ca_street_name,ca_street_number,ca_zip,d_year,d_year,d_year,hd_income_band_sk,hd_income_band_sk,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + SortMergeJoin [ca_address_sk,ss_addr_sk] + InputAdapter + WholeStageCodegen (74) + Sort [ss_addr_sk] + InputAdapter + Exchange [ss_addr_sk] #28 + WholeStageCodegen (73) + Project [c_current_addr_sk,d_year,d_year,d_year,hd_income_band_sk,hd_income_band_sk,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] + Project [c_current_addr_sk,c_current_hdemo_sk,d_year,d_year,d_year,hd_income_band_sk,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [c_current_addr_sk,c_current_hdemo_sk,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [p_promo_sk,ss_promo_sk] + Project [c_current_addr_sk,c_current_hdemo_sk,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + SortMergeJoin [c_current_cdemo_sk,cd_demo_sk,cd_marital_status,cd_marital_status] + InputAdapter + WholeStageCodegen (67) + Sort [c_current_cdemo_sk] + InputAdapter + Exchange [c_current_cdemo_sk] #29 + WholeStageCodegen (66) + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,cd_marital_status,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + SortMergeJoin [cd_demo_sk,ss_cdemo_sk] + InputAdapter + WholeStageCodegen (63) + Sort [ss_cdemo_sk] + InputAdapter + Exchange [ss_cdemo_sk] #30 + WholeStageCodegen (62) + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + BroadcastHashJoin [c_first_shipto_date_sk,d_date_sk] + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_first_shipto_date_sk,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + BroadcastHashJoin [c_first_sales_date_sk,d_date_sk] + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_first_sales_date_sk,c_first_shipto_date_sk,d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + SortMergeJoin [c_customer_sk,ss_customer_sk] + InputAdapter + WholeStageCodegen (57) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #31 + WholeStageCodegen (56) + Project [d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [d_year,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_store_sk,ss_wholesale_cost] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_wholesale_cost] + BroadcastHashJoin [cs_item_sk,ss_item_sk] + Project [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_wholesale_cost] + SortMergeJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + InputAdapter + WholeStageCodegen (45) + Sort [ss_item_sk,ss_ticket_number] + InputAdapter + ReusedExchange [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number,ss_wholesale_cost] #9 + InputAdapter + WholeStageCodegen (47) + Sort [sr_item_sk,sr_ticket_number] + InputAdapter + ReusedExchange [sr_item_sk,sr_ticket_number] #10 + InputAdapter + ReusedExchange [cs_item_sk] #11 + InputAdapter + BroadcastExchange #32 + WholeStageCodegen (54) + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + ReusedExchange [s_store_name,s_store_sk,s_zip] #16 + InputAdapter + WholeStageCodegen (59) + Sort [c_customer_sk] + InputAdapter + ReusedExchange [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk,c_first_shipto_date_sk] #17 + InputAdapter + ReusedExchange [d_date_sk,d_year] #18 + InputAdapter + ReusedExchange [d_date_sk,d_year] #18 + InputAdapter + WholeStageCodegen (65) + Sort [cd_demo_sk] + InputAdapter + ReusedExchange [cd_demo_sk,cd_marital_status] #19 + InputAdapter + WholeStageCodegen (69) + Sort [cd_demo_sk] + InputAdapter + ReusedExchange [cd_demo_sk,cd_marital_status] #19 + InputAdapter + ReusedExchange [p_promo_sk] #20 + InputAdapter + ReusedExchange [hd_demo_sk,hd_income_band_sk] #21 + InputAdapter + ReusedExchange [hd_demo_sk,hd_income_band_sk] #21 + InputAdapter + WholeStageCodegen (76) + Sort [ca_address_sk] + InputAdapter + ReusedExchange [ca_address_sk,ca_city,ca_street_name,ca_street_number,ca_zip] #22 + InputAdapter + WholeStageCodegen (80) + Sort [ca_address_sk] + InputAdapter + ReusedExchange [ca_address_sk,ca_city,ca_street_name,ca_street_number,ca_zip] #22 + InputAdapter + ReusedExchange [ib_income_band_sk] #23 + InputAdapter + ReusedExchange [ib_income_band_sk] #23 + InputAdapter + ReusedExchange [i_item_sk,i_product_name] #24 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64/explain.txt new file mode 100644 index 0000000000000..55967382e42cd --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64/explain.txt @@ -0,0 +1,918 @@ +== Physical Plan == +* Sort (170) ++- Exchange (169) + +- * Project (168) + +- * BroadcastHashJoin Inner BuildRight (167) + :- * HashAggregate (105) + : +- Exchange (104) + : +- * HashAggregate (103) + : +- * Project (102) + : +- * BroadcastHashJoin Inner BuildRight (101) + : :- * Project (95) + : : +- * BroadcastHashJoin Inner BuildRight (94) + : : :- * Project (92) + : : : +- * BroadcastHashJoin Inner BuildRight (91) + : : : :- * Project (86) + : : : : +- * BroadcastHashJoin Inner BuildRight (85) + : : : : :- * Project (83) + : : : : : +- * BroadcastHashJoin Inner BuildRight (82) + : : : : : :- * Project (77) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (76) + : : : : : : :- * Project (74) + : : : : : : : +- * BroadcastHashJoin Inner BuildRight (73) + : : : : : : : :- * Project (68) + : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (67) + : : : : : : : : :- * Project (62) + : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (61) + : : : : : : : : : :- * Project (59) + : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (58) + : : : : : : : : : : :- * Project (53) + : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (52) + : : : : : : : : : : : :- * Project (50) + : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (49) + : : : : : : : : : : : : :- * Project (44) + : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (43) + : : : : : : : : : : : : : :- * Project (38) + : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (37) + : : : : : : : : : : : : : : :- * Project (32) + : : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (31) + : : : : : : : : : : : : : : : :- * Project (26) + : : : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (25) + : : : : : : : : : : : : : : : : :- * Project (9) + : : : : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : : : : : : : : : : : : : : :- * Filter (3) + : : : : : : : : : : : : : : : : : : +- * ColumnarToRow (2) + : : : : : : : : : : : : : : : : : : +- Scan parquet default.store_sales (1) + : : : : : : : : : : : : : : : : : +- BroadcastExchange (7) + : : : : : : : : : : : : : : : : : +- * Filter (6) + : : : : : : : : : : : : : : : : : +- * ColumnarToRow (5) + : : : : : : : : : : : : : : : : : +- Scan parquet default.store_returns (4) + : : : : : : : : : : : : : : : : +- BroadcastExchange (24) + : : : : : : : : : : : : : : : : +- * Project (23) + : : : : : : : : : : : : : : : : +- * Filter (22) + : : : : : : : : : : : : : : : : +- * HashAggregate (21) + : : : : : : : : : : : : : : : : +- Exchange (20) + : : : : : : : : : : : : : : : : +- * HashAggregate (19) + : : : : : : : : : : : : : : : : +- * Project (18) + : : : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (17) + : : : : : : : : : : : : : : : : :- * Filter (12) + : : : : : : : : : : : : : : : : : +- * ColumnarToRow (11) + : : : : : : : : : : : : : : : : : +- Scan parquet default.catalog_sales (10) + : : : : : : : : : : : : : : : : +- BroadcastExchange (16) + : : : : : : : : : : : : : : : : +- * Filter (15) + : : : : : : : : : : : : : : : : +- * ColumnarToRow (14) + : : : : : : : : : : : : : : : : +- Scan parquet default.catalog_returns (13) + : : : : : : : : : : : : : : : +- BroadcastExchange (30) + : : : : : : : : : : : : : : : +- * Filter (29) + : : : : : : : : : : : : : : : +- * ColumnarToRow (28) + : : : : : : : : : : : : : : : +- Scan parquet default.date_dim (27) + : : : : : : : : : : : : : : +- BroadcastExchange (36) + : : : : : : : : : : : : : : +- * Filter (35) + : : : : : : : : : : : : : : +- * ColumnarToRow (34) + : : : : : : : : : : : : : : +- Scan parquet default.store (33) + : : : : : : : : : : : : : +- BroadcastExchange (42) + : : : : : : : : : : : : : +- * Filter (41) + : : : : : : : : : : : : : +- * ColumnarToRow (40) + : : : : : : : : : : : : : +- Scan parquet default.customer (39) + : : : : : : : : : : : : +- BroadcastExchange (48) + : : : : : : : : : : : : +- * Filter (47) + : : : : : : : : : : : : +- * ColumnarToRow (46) + : : : : : : : : : : : : +- Scan parquet default.date_dim (45) + : : : : : : : : : : : +- ReusedExchange (51) + : : : : : : : : : : +- BroadcastExchange (57) + : : : : : : : : : : +- * Filter (56) + : : : : : : : : : : +- * ColumnarToRow (55) + : : : : : : : : : : +- Scan parquet default.customer_demographics (54) + : : : : : : : : : +- ReusedExchange (60) + : : : : : : : : +- BroadcastExchange (66) + : : : : : : : : +- * Filter (65) + : : : : : : : : +- * ColumnarToRow (64) + : : : : : : : : +- Scan parquet default.promotion (63) + : : : : : : : +- BroadcastExchange (72) + : : : : : : : +- * Filter (71) + : : : : : : : +- * ColumnarToRow (70) + : : : : : : : +- Scan parquet default.household_demographics (69) + : : : : : : +- ReusedExchange (75) + : : : : : +- BroadcastExchange (81) + : : : : : +- * Filter (80) + : : : : : +- * ColumnarToRow (79) + : : : : : +- Scan parquet default.customer_address (78) + : : : : +- ReusedExchange (84) + : : : +- BroadcastExchange (90) + : : : +- * Filter (89) + : : : +- * ColumnarToRow (88) + : : : +- Scan parquet default.income_band (87) + : : +- ReusedExchange (93) + : +- BroadcastExchange (100) + : +- * Project (99) + : +- * Filter (98) + : +- * ColumnarToRow (97) + : +- Scan parquet default.item (96) + +- BroadcastExchange (166) + +- * HashAggregate (165) + +- Exchange (164) + +- * HashAggregate (163) + +- * Project (162) + +- * BroadcastHashJoin Inner BuildRight (161) + :- * Project (159) + : +- * BroadcastHashJoin Inner BuildRight (158) + : :- * Project (156) + : : +- * BroadcastHashJoin Inner BuildRight (155) + : : :- * Project (153) + : : : +- * BroadcastHashJoin Inner BuildRight (152) + : : : :- * Project (150) + : : : : +- * BroadcastHashJoin Inner BuildRight (149) + : : : : :- * Project (147) + : : : : : +- * BroadcastHashJoin Inner BuildRight (146) + : : : : : :- * Project (144) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (143) + : : : : : : :- * Project (141) + : : : : : : : +- * BroadcastHashJoin Inner BuildRight (140) + : : : : : : : :- * Project (138) + : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (137) + : : : : : : : : :- * Project (135) + : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (134) + : : : : : : : : : :- * Project (132) + : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (131) + : : : : : : : : : : :- * Project (129) + : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (128) + : : : : : : : : : : : :- * Project (126) + : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (125) + : : : : : : : : : : : : :- * Project (123) + : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (122) + : : : : : : : : : : : : : :- * Project (120) + : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (119) + : : : : : : : : : : : : : : :- * Project (114) + : : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (113) + : : : : : : : : : : : : : : : :- * Project (111) + : : : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (110) + : : : : : : : : : : : : : : : : :- * Filter (108) + : : : : : : : : : : : : : : : : : +- * ColumnarToRow (107) + : : : : : : : : : : : : : : : : : +- Scan parquet default.store_sales (106) + : : : : : : : : : : : : : : : : +- ReusedExchange (109) + : : : : : : : : : : : : : : : +- ReusedExchange (112) + : : : : : : : : : : : : : : +- BroadcastExchange (118) + : : : : : : : : : : : : : : +- * Filter (117) + : : : : : : : : : : : : : : +- * ColumnarToRow (116) + : : : : : : : : : : : : : : +- Scan parquet default.date_dim (115) + : : : : : : : : : : : : : +- ReusedExchange (121) + : : : : : : : : : : : : +- ReusedExchange (124) + : : : : : : : : : : : +- ReusedExchange (127) + : : : : : : : : : : +- ReusedExchange (130) + : : : : : : : : : +- ReusedExchange (133) + : : : : : : : : +- ReusedExchange (136) + : : : : : : : +- ReusedExchange (139) + : : : : : : +- ReusedExchange (142) + : : : : : +- ReusedExchange (145) + : : : : +- ReusedExchange (148) + : : : +- ReusedExchange (151) + : : +- ReusedExchange (154) + : +- ReusedExchange (157) + +- ReusedExchange (160) + + +(1) Scan parquet default.store_sales +Output [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_ticket_number#9, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_cdemo_sk), IsNotNull(ss_promo_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_addr_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 20] +Input [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_ticket_number#9, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] + +(3) Filter [codegen id : 20] +Input [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_ticket_number#9, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] +Condition : ((((((((isnotnull(ss_item_sk#2) AND isnotnull(ss_ticket_number#9)) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_store_sk#7)) AND isnotnull(ss_customer_sk#3)) AND isnotnull(ss_cdemo_sk#4)) AND isnotnull(ss_promo_sk#8)) AND isnotnull(ss_hdemo_sk#5)) AND isnotnull(ss_addr_sk#6)) + +(4) Scan parquet default.store_returns +Output [2]: [sr_item_sk#13, sr_ticket_number#14] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store_returns] +PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [sr_item_sk#13, sr_ticket_number#14] + +(6) Filter [codegen id : 1] +Input [2]: [sr_item_sk#13, sr_ticket_number#14] +Condition : (isnotnull(sr_item_sk#13) AND isnotnull(sr_ticket_number#14)) + +(7) BroadcastExchange +Input [2]: [sr_item_sk#13, sr_ticket_number#14] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false], input[1, bigint, false]),false), [id=#15] + +(8) BroadcastHashJoin [codegen id : 20] +Left keys [2]: [cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#9 as bigint)] +Right keys [2]: [sr_item_sk#13, sr_ticket_number#14] +Join condition: None + +(9) Project [codegen id : 20] +Output [11]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] +Input [14]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_ticket_number#9, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, sr_item_sk#13, sr_ticket_number#14] + +(10) Scan parquet default.catalog_sales +Output [3]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_order_number)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 3] +Input [3]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18] + +(12) Filter [codegen id : 3] +Input [3]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18] +Condition : (isnotnull(cs_item_sk#16) AND isnotnull(cs_order_number#17)) + +(13) Scan parquet default.catalog_returns +Output [5]: [cr_item_sk#19, cr_order_number#20, cr_refunded_cash#21, cr_reversed_charge#22, cr_store_credit#23] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/catalog_returns] +PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)] +ReadSchema: struct + +(14) ColumnarToRow [codegen id : 2] +Input [5]: [cr_item_sk#19, cr_order_number#20, cr_refunded_cash#21, cr_reversed_charge#22, cr_store_credit#23] + +(15) Filter [codegen id : 2] +Input [5]: [cr_item_sk#19, cr_order_number#20, cr_refunded_cash#21, cr_reversed_charge#22, cr_store_credit#23] +Condition : (isnotnull(cr_item_sk#19) AND isnotnull(cr_order_number#20)) + +(16) BroadcastExchange +Input [5]: [cr_item_sk#19, cr_order_number#20, cr_refunded_cash#21, cr_reversed_charge#22, cr_store_credit#23] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, false] as bigint), 32) | (cast(input[1, int, false] as bigint) & 4294967295))),false), [id=#24] + +(17) BroadcastHashJoin [codegen id : 3] +Left keys [2]: [cs_item_sk#16, cs_order_number#17] +Right keys [2]: [cr_item_sk#19, cr_order_number#20] +Join condition: None + +(18) Project [codegen id : 3] +Output [5]: [cs_item_sk#16, cs_ext_list_price#18, cr_refunded_cash#21, cr_reversed_charge#22, cr_store_credit#23] +Input [8]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18, cr_item_sk#19, cr_order_number#20, cr_refunded_cash#21, cr_reversed_charge#22, cr_store_credit#23] + +(19) HashAggregate [codegen id : 3] +Input [5]: [cs_item_sk#16, cs_ext_list_price#18, cr_refunded_cash#21, cr_reversed_charge#22, cr_store_credit#23] +Keys [1]: [cs_item_sk#16] +Functions [2]: [partial_sum(UnscaledValue(cs_ext_list_price#18)), partial_sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#21 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#22 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#23 as decimal(9,2)))), DecimalType(9,2), true))] +Aggregate Attributes [3]: [sum#25, sum#26, isEmpty#27] +Results [4]: [cs_item_sk#16, sum#28, sum#29, isEmpty#30] + +(20) Exchange +Input [4]: [cs_item_sk#16, sum#28, sum#29, isEmpty#30] +Arguments: hashpartitioning(cs_item_sk#16, 5), true, [id=#31] + +(21) HashAggregate [codegen id : 4] +Input [4]: [cs_item_sk#16, sum#28, sum#29, isEmpty#30] +Keys [1]: [cs_item_sk#16] +Functions [2]: [sum(UnscaledValue(cs_ext_list_price#18)), sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#21 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#22 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#23 as decimal(9,2)))), DecimalType(9,2), true))] +Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_list_price#18))#32, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#21 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#22 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#23 as decimal(9,2)))), DecimalType(9,2), true))#33] +Results [3]: [cs_item_sk#16, MakeDecimal(sum(UnscaledValue(cs_ext_list_price#18))#32,17,2) AS sum(cs_ext_list_price#18)#34, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#21 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#22 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#23 as decimal(9,2)))), DecimalType(9,2), true))#33 AS sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#21 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#22 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#23 as decimal(9,2)))), DecimalType(9,2), true))#35] + +(22) Filter [codegen id : 4] +Input [3]: [cs_item_sk#16, sum(cs_ext_list_price#18)#34, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#21 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#22 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#23 as decimal(9,2)))), DecimalType(9,2), true))#35] +Condition : (isnotnull(sum(cs_ext_list_price#18)#34) AND (cast(sum(cs_ext_list_price#18)#34 as decimal(21,2)) > CheckOverflow((2.00 * promote_precision(sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#21 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#22 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#23 as decimal(9,2)))), DecimalType(9,2), true))#35)), DecimalType(21,2), true))) + +(23) Project [codegen id : 4] +Output [1]: [cs_item_sk#16] +Input [3]: [cs_item_sk#16, sum(cs_ext_list_price#18)#34, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#21 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#22 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#23 as decimal(9,2)))), DecimalType(9,2), true))#35] + +(24) BroadcastExchange +Input [1]: [cs_item_sk#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#36] + +(25) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [cs_item_sk#16] +Join condition: None + +(26) Project [codegen id : 20] +Output [11]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] +Input [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, cs_item_sk#16] + +(27) Scan parquet default.date_dim +Output [2]: [d_date_sk#37, d_year#38] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk)] +ReadSchema: struct + +(28) ColumnarToRow [codegen id : 5] +Input [2]: [d_date_sk#37, d_year#38] + +(29) Filter [codegen id : 5] +Input [2]: [d_date_sk#37, d_year#38] +Condition : ((isnotnull(d_year#38) AND (d_year#38 = 1999)) AND isnotnull(d_date_sk#37)) + +(30) BroadcastExchange +Input [2]: [d_date_sk#37, d_year#38] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#39] + +(31) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#37] +Join condition: None + +(32) Project [codegen id : 20] +Output [11]: [ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38] +Input [13]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_date_sk#37, d_year#38] + +(33) Scan parquet default.store +Output [3]: [s_store_sk#40, s_store_name#41, s_zip#42] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_zip), IsNotNull(s_store_name)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 6] +Input [3]: [s_store_sk#40, s_store_name#41, s_zip#42] + +(35) Filter [codegen id : 6] +Input [3]: [s_store_sk#40, s_store_name#41, s_zip#42] +Condition : ((isnotnull(s_store_sk#40) AND isnotnull(s_zip#42)) AND isnotnull(s_store_name#41)) + +(36) BroadcastExchange +Input [3]: [s_store_sk#40, s_store_name#41, s_zip#42] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#43] + +(37) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [ss_store_sk#7] +Right keys [1]: [s_store_sk#40] +Join condition: None + +(38) Project [codegen id : 20] +Output [12]: [ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42] +Input [14]: [ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_sk#40, s_store_name#41, s_zip#42] + +(39) Scan parquet default.customer +Output [6]: [c_customer_sk#44, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_first_sales_date_sk), IsNotNull(c_first_shipto_date_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_hdemo_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(40) ColumnarToRow [codegen id : 7] +Input [6]: [c_customer_sk#44, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49] + +(41) Filter [codegen id : 7] +Input [6]: [c_customer_sk#44, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49] +Condition : (((((isnotnull(c_customer_sk#44) AND isnotnull(c_first_sales_date_sk#49)) AND isnotnull(c_first_shipto_date_sk#48)) AND isnotnull(c_current_cdemo_sk#45)) AND isnotnull(c_current_hdemo_sk#46)) AND isnotnull(c_current_addr_sk#47)) + +(42) BroadcastExchange +Input [6]: [c_customer_sk#44, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#50] + +(43) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [ss_customer_sk#3] +Right keys [1]: [c_customer_sk#44] +Join condition: None + +(44) Project [codegen id : 20] +Output [16]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49] +Input [18]: [ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_customer_sk#44, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49] + +(45) Scan parquet default.date_dim +Output [2]: [d_date_sk#51, d_year#52] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_date_sk)] +ReadSchema: struct + +(46) ColumnarToRow [codegen id : 8] +Input [2]: [d_date_sk#51, d_year#52] + +(47) Filter [codegen id : 8] +Input [2]: [d_date_sk#51, d_year#52] +Condition : isnotnull(d_date_sk#51) + +(48) BroadcastExchange +Input [2]: [d_date_sk#51, d_year#52] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#53] + +(49) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [c_first_sales_date_sk#49] +Right keys [1]: [d_date_sk#51] +Join condition: None + +(50) Project [codegen id : 20] +Output [16]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, d_year#52] +Input [18]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49, d_date_sk#51, d_year#52] + +(51) ReusedExchange [Reuses operator id: 48] +Output [2]: [d_date_sk#54, d_year#55] + +(52) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [c_first_shipto_date_sk#48] +Right keys [1]: [d_date_sk#54] +Join condition: None + +(53) Project [codegen id : 20] +Output [16]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#52, d_year#55] +Input [18]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, d_year#52, d_date_sk#54, d_year#55] + +(54) Scan parquet default.customer_demographics +Output [2]: [cd_demo_sk#56, cd_marital_status#57] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_marital_status)] +ReadSchema: struct + +(55) ColumnarToRow [codegen id : 10] +Input [2]: [cd_demo_sk#56, cd_marital_status#57] + +(56) Filter [codegen id : 10] +Input [2]: [cd_demo_sk#56, cd_marital_status#57] +Condition : (isnotnull(cd_demo_sk#56) AND isnotnull(cd_marital_status#57)) + +(57) BroadcastExchange +Input [2]: [cd_demo_sk#56, cd_marital_status#57] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#58] + +(58) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [ss_cdemo_sk#4] +Right keys [1]: [cd_demo_sk#56] +Join condition: None + +(59) Project [codegen id : 20] +Output [16]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#52, d_year#55, cd_marital_status#57] +Input [18]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#52, d_year#55, cd_demo_sk#56, cd_marital_status#57] + +(60) ReusedExchange [Reuses operator id: 57] +Output [2]: [cd_demo_sk#59, cd_marital_status#60] + +(61) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [c_current_cdemo_sk#45] +Right keys [1]: [cd_demo_sk#59] +Join condition: NOT (cd_marital_status#57 = cd_marital_status#60) + +(62) Project [codegen id : 20] +Output [14]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#52, d_year#55] +Input [18]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#52, d_year#55, cd_marital_status#57, cd_demo_sk#59, cd_marital_status#60] + +(63) Scan parquet default.promotion +Output [1]: [p_promo_sk#61] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/promotion] +PushedFilters: [IsNotNull(p_promo_sk)] +ReadSchema: struct + +(64) ColumnarToRow [codegen id : 12] +Input [1]: [p_promo_sk#61] + +(65) Filter [codegen id : 12] +Input [1]: [p_promo_sk#61] +Condition : isnotnull(p_promo_sk#61) + +(66) BroadcastExchange +Input [1]: [p_promo_sk#61] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#62] + +(67) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [ss_promo_sk#8] +Right keys [1]: [p_promo_sk#61] +Join condition: None + +(68) Project [codegen id : 20] +Output [13]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#52, d_year#55] +Input [15]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#52, d_year#55, p_promo_sk#61] + +(69) Scan parquet default.household_demographics +Output [2]: [hd_demo_sk#63, hd_income_band_sk#64] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/household_demographics] +PushedFilters: [IsNotNull(hd_demo_sk), IsNotNull(hd_income_band_sk)] +ReadSchema: struct + +(70) ColumnarToRow [codegen id : 13] +Input [2]: [hd_demo_sk#63, hd_income_band_sk#64] + +(71) Filter [codegen id : 13] +Input [2]: [hd_demo_sk#63, hd_income_band_sk#64] +Condition : (isnotnull(hd_demo_sk#63) AND isnotnull(hd_income_band_sk#64)) + +(72) BroadcastExchange +Input [2]: [hd_demo_sk#63, hd_income_band_sk#64] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#65] + +(73) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [ss_hdemo_sk#5] +Right keys [1]: [hd_demo_sk#63] +Join condition: None + +(74) Project [codegen id : 20] +Output [13]: [ss_item_sk#2, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#52, d_year#55, hd_income_band_sk#64] +Input [15]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#52, d_year#55, hd_demo_sk#63, hd_income_band_sk#64] + +(75) ReusedExchange [Reuses operator id: 72] +Output [2]: [hd_demo_sk#66, hd_income_band_sk#67] + +(76) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [c_current_hdemo_sk#46] +Right keys [1]: [hd_demo_sk#66] +Join condition: None + +(77) Project [codegen id : 20] +Output [13]: [ss_item_sk#2, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_addr_sk#47, d_year#52, d_year#55, hd_income_band_sk#64, hd_income_band_sk#67] +Input [15]: [ss_item_sk#2, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#52, d_year#55, hd_income_band_sk#64, hd_demo_sk#66, hd_income_band_sk#67] + +(78) Scan parquet default.customer_address +Output [5]: [ca_address_sk#68, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct + +(79) ColumnarToRow [codegen id : 15] +Input [5]: [ca_address_sk#68, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72] + +(80) Filter [codegen id : 15] +Input [5]: [ca_address_sk#68, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72] +Condition : isnotnull(ca_address_sk#68) + +(81) BroadcastExchange +Input [5]: [ca_address_sk#68, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#73] + +(82) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [ss_addr_sk#6] +Right keys [1]: [ca_address_sk#68] +Join condition: None + +(83) Project [codegen id : 20] +Output [16]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_addr_sk#47, d_year#52, d_year#55, hd_income_band_sk#64, hd_income_band_sk#67, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72] +Input [18]: [ss_item_sk#2, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_addr_sk#47, d_year#52, d_year#55, hd_income_band_sk#64, hd_income_band_sk#67, ca_address_sk#68, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72] + +(84) ReusedExchange [Reuses operator id: 81] +Output [5]: [ca_address_sk#74, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78] + +(85) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [c_current_addr_sk#47] +Right keys [1]: [ca_address_sk#74] +Join condition: None + +(86) Project [codegen id : 20] +Output [19]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, d_year#52, d_year#55, hd_income_band_sk#64, hd_income_band_sk#67, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78] +Input [21]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_addr_sk#47, d_year#52, d_year#55, hd_income_band_sk#64, hd_income_band_sk#67, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_address_sk#74, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78] + +(87) Scan parquet default.income_band +Output [1]: [ib_income_band_sk#79] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/income_band] +PushedFilters: [IsNotNull(ib_income_band_sk)] +ReadSchema: struct + +(88) ColumnarToRow [codegen id : 17] +Input [1]: [ib_income_band_sk#79] + +(89) Filter [codegen id : 17] +Input [1]: [ib_income_band_sk#79] +Condition : isnotnull(ib_income_band_sk#79) + +(90) BroadcastExchange +Input [1]: [ib_income_band_sk#79] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#80] + +(91) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [hd_income_band_sk#64] +Right keys [1]: [ib_income_band_sk#79] +Join condition: None + +(92) Project [codegen id : 20] +Output [18]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, d_year#52, d_year#55, hd_income_band_sk#67, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78] +Input [20]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, d_year#52, d_year#55, hd_income_band_sk#64, hd_income_band_sk#67, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ib_income_band_sk#79] + +(93) ReusedExchange [Reuses operator id: 90] +Output [1]: [ib_income_band_sk#81] + +(94) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [hd_income_band_sk#67] +Right keys [1]: [ib_income_band_sk#81] +Join condition: None + +(95) Project [codegen id : 20] +Output [17]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, d_year#52, d_year#55, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78] +Input [19]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, d_year#52, d_year#55, hd_income_band_sk#67, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ib_income_band_sk#81] + +(96) Scan parquet default.item +Output [4]: [i_item_sk#82, i_current_price#83, i_color#84, i_product_name#85] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_current_price), In(i_color, [purple,burlywood,indian,spring,floral,medium]), GreaterThanOrEqual(i_current_price,64.00), IsNotNull(i_item_sk)] +ReadSchema: struct + +(97) ColumnarToRow [codegen id : 19] +Input [4]: [i_item_sk#82, i_current_price#83, i_color#84, i_product_name#85] + +(98) Filter [codegen id : 19] +Input [4]: [i_item_sk#82, i_current_price#83, i_color#84, i_product_name#85] +Condition : ((((((isnotnull(i_current_price#83) AND i_color#84 IN (purple,burlywood,indian,spring,floral,medium)) AND (i_current_price#83 >= 64.00)) AND (cast(i_current_price#83 as decimal(12,2)) <= 74.00)) AND (cast(i_current_price#83 as decimal(12,2)) >= 65.00)) AND (cast(i_current_price#83 as decimal(12,2)) <= 79.00)) AND isnotnull(i_item_sk#82)) + +(99) Project [codegen id : 19] +Output [2]: [i_item_sk#82, i_product_name#85] +Input [4]: [i_item_sk#82, i_current_price#83, i_color#84, i_product_name#85] + +(100) BroadcastExchange +Input [2]: [i_item_sk#82, i_product_name#85] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#86] + +(101) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#82] +Join condition: None + +(102) Project [codegen id : 20] +Output [18]: [ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, d_year#52, d_year#55, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, i_item_sk#82, i_product_name#85] +Input [19]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, d_year#52, d_year#55, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, i_item_sk#82, i_product_name#85] + +(103) HashAggregate [codegen id : 20] +Input [18]: [ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, d_year#52, d_year#55, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, i_item_sk#82, i_product_name#85] +Keys [15]: [i_product_name#85, i_item_sk#82, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, d_year#38, d_year#52, d_year#55] +Functions [4]: [partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#10)), partial_sum(UnscaledValue(ss_list_price#11)), partial_sum(UnscaledValue(ss_coupon_amt#12))] +Aggregate Attributes [4]: [count#87, sum#88, sum#89, sum#90] +Results [19]: [i_product_name#85, i_item_sk#82, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, d_year#38, d_year#52, d_year#55, count#91, sum#92, sum#93, sum#94] + +(104) Exchange +Input [19]: [i_product_name#85, i_item_sk#82, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, d_year#38, d_year#52, d_year#55, count#91, sum#92, sum#93, sum#94] +Arguments: hashpartitioning(i_product_name#85, i_item_sk#82, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, d_year#38, d_year#52, d_year#55, 5), true, [id=#95] + +(105) HashAggregate [codegen id : 42] +Input [19]: [i_product_name#85, i_item_sk#82, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, d_year#38, d_year#52, d_year#55, count#91, sum#92, sum#93, sum#94] +Keys [15]: [i_product_name#85, i_item_sk#82, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, d_year#38, d_year#52, d_year#55] +Functions [4]: [count(1), sum(UnscaledValue(ss_wholesale_cost#10)), sum(UnscaledValue(ss_list_price#11)), sum(UnscaledValue(ss_coupon_amt#12))] +Aggregate Attributes [4]: [count(1)#96, sum(UnscaledValue(ss_wholesale_cost#10))#97, sum(UnscaledValue(ss_list_price#11))#98, sum(UnscaledValue(ss_coupon_amt#12))#99] +Results [17]: [i_product_name#85 AS product_name#100, i_item_sk#82 AS item_sk#101, s_store_name#41 AS store_name#102, s_zip#42 AS store_zip#103, ca_street_number#69 AS b_street_number#104, ca_street_name#70 AS b_streen_name#105, ca_city#71 AS b_city#106, ca_zip#72 AS b_zip#107, ca_street_number#75 AS c_street_number#108, ca_street_name#76 AS c_street_name#109, ca_city#77 AS c_city#110, ca_zip#78 AS c_zip#111, d_year#38 AS syear#112, count(1)#96 AS cnt#113, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#10))#97,17,2) AS s1#114, MakeDecimal(sum(UnscaledValue(ss_list_price#11))#98,17,2) AS s2#115, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#12))#99,17,2) AS s3#116] + +(106) Scan parquet default.store_sales +Output [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_ticket_number#9, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_cdemo_sk), IsNotNull(ss_promo_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_addr_sk)] +ReadSchema: struct + +(107) ColumnarToRow [codegen id : 40] +Input [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_ticket_number#9, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] + +(108) Filter [codegen id : 40] +Input [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_ticket_number#9, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] +Condition : ((((((((isnotnull(ss_item_sk#2) AND isnotnull(ss_ticket_number#9)) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_store_sk#7)) AND isnotnull(ss_customer_sk#3)) AND isnotnull(ss_cdemo_sk#4)) AND isnotnull(ss_promo_sk#8)) AND isnotnull(ss_hdemo_sk#5)) AND isnotnull(ss_addr_sk#6)) + +(109) ReusedExchange [Reuses operator id: 7] +Output [2]: [sr_item_sk#13, sr_ticket_number#14] + +(110) BroadcastHashJoin [codegen id : 40] +Left keys [2]: [cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#9 as bigint)] +Right keys [2]: [sr_item_sk#13, sr_ticket_number#14] +Join condition: None + +(111) Project [codegen id : 40] +Output [11]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] +Input [14]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_ticket_number#9, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, sr_item_sk#13, sr_ticket_number#14] + +(112) ReusedExchange [Reuses operator id: 24] +Output [1]: [cs_item_sk#16] + +(113) BroadcastHashJoin [codegen id : 40] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [cs_item_sk#16] +Join condition: None + +(114) Project [codegen id : 40] +Output [11]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] +Input [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, cs_item_sk#16] + +(115) Scan parquet default.date_dim +Output [2]: [d_date_sk#37, d_year#38] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(116) ColumnarToRow [codegen id : 25] +Input [2]: [d_date_sk#37, d_year#38] + +(117) Filter [codegen id : 25] +Input [2]: [d_date_sk#37, d_year#38] +Condition : ((isnotnull(d_year#38) AND (d_year#38 = 2000)) AND isnotnull(d_date_sk#37)) + +(118) BroadcastExchange +Input [2]: [d_date_sk#37, d_year#38] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#117] + +(119) BroadcastHashJoin [codegen id : 40] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#37] +Join condition: None + +(120) Project [codegen id : 40] +Output [11]: [ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38] +Input [13]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_date_sk#37, d_year#38] + +(121) ReusedExchange [Reuses operator id: 36] +Output [3]: [s_store_sk#40, s_store_name#41, s_zip#42] + +(122) BroadcastHashJoin [codegen id : 40] +Left keys [1]: [ss_store_sk#7] +Right keys [1]: [s_store_sk#40] +Join condition: None + +(123) Project [codegen id : 40] +Output [12]: [ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42] +Input [14]: [ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_sk#40, s_store_name#41, s_zip#42] + +(124) ReusedExchange [Reuses operator id: 42] +Output [6]: [c_customer_sk#44, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49] + +(125) BroadcastHashJoin [codegen id : 40] +Left keys [1]: [ss_customer_sk#3] +Right keys [1]: [c_customer_sk#44] +Join condition: None + +(126) Project [codegen id : 40] +Output [16]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49] +Input [18]: [ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_customer_sk#44, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49] + +(127) ReusedExchange [Reuses operator id: 48] +Output [2]: [d_date_sk#118, d_year#119] + +(128) BroadcastHashJoin [codegen id : 40] +Left keys [1]: [c_first_sales_date_sk#49] +Right keys [1]: [d_date_sk#118] +Join condition: None + +(129) Project [codegen id : 40] +Output [16]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, d_year#119] +Input [18]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49, d_date_sk#118, d_year#119] + +(130) ReusedExchange [Reuses operator id: 48] +Output [2]: [d_date_sk#120, d_year#121] + +(131) BroadcastHashJoin [codegen id : 40] +Left keys [1]: [c_first_shipto_date_sk#48] +Right keys [1]: [d_date_sk#120] +Join condition: None + +(132) Project [codegen id : 40] +Output [16]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#119, d_year#121] +Input [18]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, d_year#119, d_date_sk#120, d_year#121] + +(133) ReusedExchange [Reuses operator id: 57] +Output [2]: [cd_demo_sk#56, cd_marital_status#57] + +(134) BroadcastHashJoin [codegen id : 40] +Left keys [1]: [ss_cdemo_sk#4] +Right keys [1]: [cd_demo_sk#56] +Join condition: None + +(135) Project [codegen id : 40] +Output [16]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#119, d_year#121, cd_marital_status#57] +Input [18]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#119, d_year#121, cd_demo_sk#56, cd_marital_status#57] + +(136) ReusedExchange [Reuses operator id: 57] +Output [2]: [cd_demo_sk#122, cd_marital_status#123] + +(137) BroadcastHashJoin [codegen id : 40] +Left keys [1]: [c_current_cdemo_sk#45] +Right keys [1]: [cd_demo_sk#122] +Join condition: NOT (cd_marital_status#57 = cd_marital_status#123) + +(138) Project [codegen id : 40] +Output [14]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#119, d_year#121] +Input [18]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#119, d_year#121, cd_marital_status#57, cd_demo_sk#122, cd_marital_status#123] + +(139) ReusedExchange [Reuses operator id: 66] +Output [1]: [p_promo_sk#61] + +(140) BroadcastHashJoin [codegen id : 40] +Left keys [1]: [ss_promo_sk#8] +Right keys [1]: [p_promo_sk#61] +Join condition: None + +(141) Project [codegen id : 40] +Output [13]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#119, d_year#121] +Input [15]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#119, d_year#121, p_promo_sk#61] + +(142) ReusedExchange [Reuses operator id: 72] +Output [2]: [hd_demo_sk#63, hd_income_band_sk#64] + +(143) BroadcastHashJoin [codegen id : 40] +Left keys [1]: [ss_hdemo_sk#5] +Right keys [1]: [hd_demo_sk#63] +Join condition: None + +(144) Project [codegen id : 40] +Output [13]: [ss_item_sk#2, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#119, d_year#121, hd_income_band_sk#64] +Input [15]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#119, d_year#121, hd_demo_sk#63, hd_income_band_sk#64] + +(145) ReusedExchange [Reuses operator id: 72] +Output [2]: [hd_demo_sk#124, hd_income_band_sk#125] + +(146) BroadcastHashJoin [codegen id : 40] +Left keys [1]: [c_current_hdemo_sk#46] +Right keys [1]: [hd_demo_sk#124] +Join condition: None + +(147) Project [codegen id : 40] +Output [13]: [ss_item_sk#2, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_addr_sk#47, d_year#119, d_year#121, hd_income_band_sk#64, hd_income_band_sk#125] +Input [15]: [ss_item_sk#2, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#119, d_year#121, hd_income_band_sk#64, hd_demo_sk#124, hd_income_band_sk#125] + +(148) ReusedExchange [Reuses operator id: 81] +Output [5]: [ca_address_sk#68, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72] + +(149) BroadcastHashJoin [codegen id : 40] +Left keys [1]: [ss_addr_sk#6] +Right keys [1]: [ca_address_sk#68] +Join condition: None + +(150) Project [codegen id : 40] +Output [16]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_addr_sk#47, d_year#119, d_year#121, hd_income_band_sk#64, hd_income_band_sk#125, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72] +Input [18]: [ss_item_sk#2, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_addr_sk#47, d_year#119, d_year#121, hd_income_band_sk#64, hd_income_band_sk#125, ca_address_sk#68, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72] + +(151) ReusedExchange [Reuses operator id: 81] +Output [5]: [ca_address_sk#126, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130] + +(152) BroadcastHashJoin [codegen id : 40] +Left keys [1]: [c_current_addr_sk#47] +Right keys [1]: [ca_address_sk#126] +Join condition: None + +(153) Project [codegen id : 40] +Output [19]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, d_year#119, d_year#121, hd_income_band_sk#64, hd_income_band_sk#125, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130] +Input [21]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_addr_sk#47, d_year#119, d_year#121, hd_income_band_sk#64, hd_income_band_sk#125, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_address_sk#126, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130] + +(154) ReusedExchange [Reuses operator id: 90] +Output [1]: [ib_income_band_sk#79] + +(155) BroadcastHashJoin [codegen id : 40] +Left keys [1]: [hd_income_band_sk#64] +Right keys [1]: [ib_income_band_sk#79] +Join condition: None + +(156) Project [codegen id : 40] +Output [18]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, d_year#119, d_year#121, hd_income_band_sk#125, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130] +Input [20]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, d_year#119, d_year#121, hd_income_band_sk#64, hd_income_band_sk#125, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130, ib_income_band_sk#79] + +(157) ReusedExchange [Reuses operator id: 90] +Output [1]: [ib_income_band_sk#131] + +(158) BroadcastHashJoin [codegen id : 40] +Left keys [1]: [hd_income_band_sk#125] +Right keys [1]: [ib_income_band_sk#131] +Join condition: None + +(159) Project [codegen id : 40] +Output [17]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, d_year#119, d_year#121, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130] +Input [19]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, d_year#119, d_year#121, hd_income_band_sk#125, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130, ib_income_band_sk#131] + +(160) ReusedExchange [Reuses operator id: 100] +Output [2]: [i_item_sk#82, i_product_name#85] + +(161) BroadcastHashJoin [codegen id : 40] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#82] +Join condition: None + +(162) Project [codegen id : 40] +Output [18]: [ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, d_year#119, d_year#121, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130, i_item_sk#82, i_product_name#85] +Input [19]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, d_year#119, d_year#121, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130, i_item_sk#82, i_product_name#85] + +(163) HashAggregate [codegen id : 40] +Input [18]: [ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, d_year#119, d_year#121, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130, i_item_sk#82, i_product_name#85] +Keys [15]: [i_product_name#85, i_item_sk#82, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130, d_year#38, d_year#119, d_year#121] +Functions [4]: [partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#10)), partial_sum(UnscaledValue(ss_list_price#11)), partial_sum(UnscaledValue(ss_coupon_amt#12))] +Aggregate Attributes [4]: [count#132, sum#133, sum#134, sum#135] +Results [19]: [i_product_name#85, i_item_sk#82, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130, d_year#38, d_year#119, d_year#121, count#136, sum#137, sum#138, sum#139] + +(164) Exchange +Input [19]: [i_product_name#85, i_item_sk#82, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130, d_year#38, d_year#119, d_year#121, count#136, sum#137, sum#138, sum#139] +Arguments: hashpartitioning(i_product_name#85, i_item_sk#82, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130, d_year#38, d_year#119, d_year#121, 5), true, [id=#140] + +(165) HashAggregate [codegen id : 41] +Input [19]: [i_product_name#85, i_item_sk#82, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130, d_year#38, d_year#119, d_year#121, count#136, sum#137, sum#138, sum#139] +Keys [15]: [i_product_name#85, i_item_sk#82, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130, d_year#38, d_year#119, d_year#121] +Functions [4]: [count(1), sum(UnscaledValue(ss_wholesale_cost#10)), sum(UnscaledValue(ss_list_price#11)), sum(UnscaledValue(ss_coupon_amt#12))] +Aggregate Attributes [4]: [count(1)#141, sum(UnscaledValue(ss_wholesale_cost#10))#142, sum(UnscaledValue(ss_list_price#11))#143, sum(UnscaledValue(ss_coupon_amt#12))#144] +Results [8]: [i_item_sk#82 AS item_sk#145, s_store_name#41 AS store_name#146, s_zip#42 AS store_zip#147, d_year#38 AS syear#148, count(1)#141 AS cnt#149, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#10))#142,17,2) AS s1#150, MakeDecimal(sum(UnscaledValue(ss_list_price#11))#143,17,2) AS s2#151, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#12))#144,17,2) AS s3#152] + +(166) BroadcastExchange +Input [8]: [item_sk#145, store_name#146, store_zip#147, syear#148, cnt#149, s1#150, s2#151, s3#152] +Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, string, true], input[2, string, true]),false), [id=#153] + +(167) BroadcastHashJoin [codegen id : 42] +Left keys [3]: [item_sk#101, store_name#102, store_zip#103] +Right keys [3]: [item_sk#145, store_name#146, store_zip#147] +Join condition: (cnt#149 <= cnt#113) + +(168) Project [codegen id : 42] +Output [21]: [product_name#100, store_name#102, store_zip#103, b_street_number#104, b_streen_name#105, b_city#106, b_zip#107, c_street_number#108, c_street_name#109, c_city#110, c_zip#111, syear#112, cnt#113, s1#114, s2#115, s3#116, s1#150, s2#151, s3#152, syear#148, cnt#149] +Input [25]: [product_name#100, item_sk#101, store_name#102, store_zip#103, b_street_number#104, b_streen_name#105, b_city#106, b_zip#107, c_street_number#108, c_street_name#109, c_city#110, c_zip#111, syear#112, cnt#113, s1#114, s2#115, s3#116, item_sk#145, store_name#146, store_zip#147, syear#148, cnt#149, s1#150, s2#151, s3#152] + +(169) Exchange +Input [21]: [product_name#100, store_name#102, store_zip#103, b_street_number#104, b_streen_name#105, b_city#106, b_zip#107, c_street_number#108, c_street_name#109, c_city#110, c_zip#111, syear#112, cnt#113, s1#114, s2#115, s3#116, s1#150, s2#151, s3#152, syear#148, cnt#149] +Arguments: rangepartitioning(product_name#100 ASC NULLS FIRST, store_name#102 ASC NULLS FIRST, cnt#149 ASC NULLS FIRST, s1#114 ASC NULLS FIRST, s1#150 ASC NULLS FIRST, 5), true, [id=#154] + +(170) Sort [codegen id : 43] +Input [21]: [product_name#100, store_name#102, store_zip#103, b_street_number#104, b_streen_name#105, b_city#106, b_zip#107, c_street_number#108, c_street_name#109, c_city#110, c_zip#111, syear#112, cnt#113, s1#114, s2#115, s3#116, s1#150, s2#151, s3#152, syear#148, cnt#149] +Arguments: [product_name#100 ASC NULLS FIRST, store_name#102 ASC NULLS FIRST, cnt#149 ASC NULLS FIRST, s1#114 ASC NULLS FIRST, s1#150 ASC NULLS FIRST], true, 0 + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64/simplified.txt new file mode 100644 index 0000000000000..45e392724d3c1 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64/simplified.txt @@ -0,0 +1,246 @@ +WholeStageCodegen (43) + Sort [cnt,product_name,s1,s1,store_name] + InputAdapter + Exchange [cnt,product_name,s1,s1,store_name] #1 + WholeStageCodegen (42) + Project [b_city,b_streen_name,b_street_number,b_zip,c_city,c_street_name,c_street_number,c_zip,cnt,cnt,product_name,s1,s1,s2,s2,s3,s3,store_name,store_zip,syear,syear] + BroadcastHashJoin [cnt,cnt,item_sk,item_sk,store_name,store_name,store_zip,store_zip] + HashAggregate [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,count,d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip,sum,sum,sum] [b_city,b_streen_name,b_street_number,b_zip,c_city,c_street_name,c_street_number,c_zip,cnt,count,count(1),item_sk,product_name,s1,s2,s3,store_name,store_zip,sum,sum,sum,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_list_price)),sum(UnscaledValue(ss_wholesale_cost)),syear] + InputAdapter + Exchange [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip] #2 + WholeStageCodegen (20) + HashAggregate [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip,ss_coupon_amt,ss_list_price,ss_wholesale_cost] [count,count,sum,sum,sum,sum,sum,sum] + Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip,ss_coupon_amt,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] + Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,hd_income_band_sk,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] + Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,hd_income_band_sk,hd_income_band_sk,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_addr_sk,ca_city,ca_street_name,ca_street_number,ca_zip,d_year,d_year,d_year,hd_income_band_sk,hd_income_band_sk,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [ca_address_sk,ss_addr_sk] + Project [c_current_addr_sk,d_year,d_year,d_year,hd_income_band_sk,hd_income_band_sk,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] + Project [c_current_addr_sk,c_current_hdemo_sk,d_year,d_year,d_year,hd_income_band_sk,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [c_current_addr_sk,c_current_hdemo_sk,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [p_promo_sk,ss_promo_sk] + Project [c_current_addr_sk,c_current_hdemo_sk,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk,cd_marital_status,cd_marital_status] + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,cd_marital_status,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + BroadcastHashJoin [cd_demo_sk,ss_cdemo_sk] + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + BroadcastHashJoin [c_first_shipto_date_sk,d_date_sk] + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_first_shipto_date_sk,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + BroadcastHashJoin [c_first_sales_date_sk,d_date_sk] + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_first_sales_date_sk,c_first_shipto_date_sk,d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Project [d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [d_year,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_store_sk,ss_wholesale_cost] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_wholesale_cost] + BroadcastHashJoin [cs_item_sk,ss_item_sk] + Project [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_wholesale_cost] + BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + Filter [ss_addr_sk,ss_cdemo_sk,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number,ss_wholesale_cost] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [sr_item_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (4) + Project [cs_item_sk] + Filter [sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2), true)),sum(cs_ext_list_price)] + HashAggregate [cs_item_sk,isEmpty,sum,sum] [isEmpty,sum,sum,sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2), true)),sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2), true)),sum(UnscaledValue(cs_ext_list_price)),sum(cs_ext_list_price)] + InputAdapter + Exchange [cs_item_sk] #5 + WholeStageCodegen (3) + HashAggregate [cr_refunded_cash,cr_reversed_charge,cr_store_credit,cs_ext_list_price,cs_item_sk] [isEmpty,isEmpty,sum,sum,sum,sum] + Project [cr_refunded_cash,cr_reversed_charge,cr_store_credit,cs_ext_list_price,cs_item_sk] + BroadcastHashJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] + Filter [cs_item_sk,cs_order_number] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_ext_list_price,cs_item_sk,cs_order_number] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (2) + Filter [cr_item_sk,cr_order_number] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (5) + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (6) + Filter [s_store_name,s_store_sk,s_zip] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_name,s_store_sk,s_zip] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (7) + Filter [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk,c_first_shipto_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk,c_first_shipto_date_sk] + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (8) + Filter [d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + ReusedExchange [d_date_sk,d_year] #10 + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (10) + Filter [cd_demo_sk,cd_marital_status] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status] + InputAdapter + ReusedExchange [cd_demo_sk,cd_marital_status] #11 + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (12) + Filter [p_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.promotion [p_promo_sk] + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (13) + Filter [hd_demo_sk,hd_income_band_sk] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_demo_sk,hd_income_band_sk] + InputAdapter + ReusedExchange [hd_demo_sk,hd_income_band_sk] #13 + InputAdapter + BroadcastExchange #14 + WholeStageCodegen (15) + Filter [ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_city,ca_street_name,ca_street_number,ca_zip] + InputAdapter + ReusedExchange [ca_address_sk,ca_city,ca_street_name,ca_street_number,ca_zip] #14 + InputAdapter + BroadcastExchange #15 + WholeStageCodegen (17) + Filter [ib_income_band_sk] + ColumnarToRow + InputAdapter + Scan parquet default.income_band [ib_income_band_sk] + InputAdapter + ReusedExchange [ib_income_band_sk] #15 + InputAdapter + BroadcastExchange #16 + WholeStageCodegen (19) + Project [i_item_sk,i_product_name] + Filter [i_color,i_current_price,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_color,i_current_price,i_item_sk,i_product_name] + InputAdapter + BroadcastExchange #17 + WholeStageCodegen (41) + HashAggregate [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,count,d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip,sum,sum,sum] [cnt,count,count(1),item_sk,s1,s2,s3,store_name,store_zip,sum,sum,sum,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_list_price)),sum(UnscaledValue(ss_wholesale_cost)),syear] + InputAdapter + Exchange [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip] #18 + WholeStageCodegen (40) + HashAggregate [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip,ss_coupon_amt,ss_list_price,ss_wholesale_cost] [count,count,sum,sum,sum,sum,sum,sum] + Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip,ss_coupon_amt,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] + Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,hd_income_band_sk,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] + Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,hd_income_band_sk,hd_income_band_sk,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_addr_sk,ca_city,ca_street_name,ca_street_number,ca_zip,d_year,d_year,d_year,hd_income_band_sk,hd_income_band_sk,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [ca_address_sk,ss_addr_sk] + Project [c_current_addr_sk,d_year,d_year,d_year,hd_income_band_sk,hd_income_band_sk,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] + Project [c_current_addr_sk,c_current_hdemo_sk,d_year,d_year,d_year,hd_income_band_sk,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [c_current_addr_sk,c_current_hdemo_sk,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [p_promo_sk,ss_promo_sk] + Project [c_current_addr_sk,c_current_hdemo_sk,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk,cd_marital_status,cd_marital_status] + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,cd_marital_status,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + BroadcastHashJoin [cd_demo_sk,ss_cdemo_sk] + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + BroadcastHashJoin [c_first_shipto_date_sk,d_date_sk] + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_first_shipto_date_sk,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + BroadcastHashJoin [c_first_sales_date_sk,d_date_sk] + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_first_sales_date_sk,c_first_shipto_date_sk,d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Project [d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [d_year,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_store_sk,ss_wholesale_cost] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_wholesale_cost] + BroadcastHashJoin [cs_item_sk,ss_item_sk] + Project [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_wholesale_cost] + BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + Filter [ss_addr_sk,ss_cdemo_sk,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number,ss_wholesale_cost] + InputAdapter + ReusedExchange [sr_item_sk,sr_ticket_number] #3 + InputAdapter + ReusedExchange [cs_item_sk] #4 + InputAdapter + BroadcastExchange #19 + WholeStageCodegen (25) + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + ReusedExchange [s_store_name,s_store_sk,s_zip] #8 + InputAdapter + ReusedExchange [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk,c_first_shipto_date_sk] #9 + InputAdapter + ReusedExchange [d_date_sk,d_year] #10 + InputAdapter + ReusedExchange [d_date_sk,d_year] #10 + InputAdapter + ReusedExchange [cd_demo_sk,cd_marital_status] #11 + InputAdapter + ReusedExchange [cd_demo_sk,cd_marital_status] #11 + InputAdapter + ReusedExchange [p_promo_sk] #12 + InputAdapter + ReusedExchange [hd_demo_sk,hd_income_band_sk] #13 + InputAdapter + ReusedExchange [hd_demo_sk,hd_income_band_sk] #13 + InputAdapter + ReusedExchange [ca_address_sk,ca_city,ca_street_name,ca_street_number,ca_zip] #14 + InputAdapter + ReusedExchange [ca_address_sk,ca_city,ca_street_name,ca_street_number,ca_zip] #14 + InputAdapter + ReusedExchange [ib_income_band_sk] #15 + InputAdapter + ReusedExchange [ib_income_band_sk] #15 + InputAdapter + ReusedExchange [i_item_sk,i_product_name] #16 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a.sf100/explain.txt new file mode 100644 index 0000000000000..40d9b1e2cbd08 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a.sf100/explain.txt @@ -0,0 +1,452 @@ +== Physical Plan == +TakeOrderedAndProject (74) ++- * Filter (73) + +- Window (72) + +- * Sort (71) + +- Exchange (70) + +- Union (69) + :- * HashAggregate (28) + : +- Exchange (27) + : +- * HashAggregate (26) + : +- * Project (25) + : +- * SortMergeJoin Inner (24) + : :- * Sort (18) + : : +- Exchange (17) + : : +- * Project (16) + : : +- * BroadcastHashJoin Inner BuildRight (15) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.store (11) + : +- * Sort (23) + : +- Exchange (22) + : +- * Filter (21) + : +- * ColumnarToRow (20) + : +- Scan parquet default.item (19) + :- * HashAggregate (33) + : +- Exchange (32) + : +- * HashAggregate (31) + : +- * HashAggregate (30) + : +- ReusedExchange (29) + :- * HashAggregate (38) + : +- Exchange (37) + : +- * HashAggregate (36) + : +- * HashAggregate (35) + : +- ReusedExchange (34) + :- * HashAggregate (43) + : +- Exchange (42) + : +- * HashAggregate (41) + : +- * HashAggregate (40) + : +- ReusedExchange (39) + :- * HashAggregate (48) + : +- Exchange (47) + : +- * HashAggregate (46) + : +- * HashAggregate (45) + : +- ReusedExchange (44) + :- * HashAggregate (53) + : +- Exchange (52) + : +- * HashAggregate (51) + : +- * HashAggregate (50) + : +- ReusedExchange (49) + :- * HashAggregate (58) + : +- Exchange (57) + : +- * HashAggregate (56) + : +- * HashAggregate (55) + : +- ReusedExchange (54) + :- * HashAggregate (63) + : +- Exchange (62) + : +- * HashAggregate (61) + : +- * HashAggregate (60) + : +- ReusedExchange (59) + +- * HashAggregate (68) + +- Exchange (67) + +- * HashAggregate (66) + +- * HashAggregate (65) + +- ReusedExchange (64) + + +(1) Scan parquet default.store_sales +Output [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5] + +(3) Filter [codegen id : 3] +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5] +Condition : ((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_item_sk#2)) + +(4) Scan parquet default.date_dim +Output [5]: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [5]: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] + +(6) Filter [codegen id : 1] +Input [5]: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] +Condition : (((isnotnull(d_month_seq#7) AND (d_month_seq#7 >= 1212)) AND (d_month_seq#7 <= 1223)) AND isnotnull(d_date_sk#6)) + +(7) Project [codegen id : 1] +Output [4]: [d_date_sk#6, d_year#8, d_moy#9, d_qoy#10] +Input [5]: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] + +(8) BroadcastExchange +Input [4]: [d_date_sk#6, d_year#8, d_moy#9, d_qoy#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#6] +Join condition: None + +(10) Project [codegen id : 3] +Output [7]: [ss_item_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5, d_year#8, d_moy#9, d_qoy#10] +Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5, d_date_sk#6, d_year#8, d_moy#9, d_qoy#10] + +(11) Scan parquet default.store +Output [2]: [s_store_sk#12, s_store_id#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [s_store_sk#12, s_store_id#13] + +(13) Filter [codegen id : 2] +Input [2]: [s_store_sk#12, s_store_id#13] +Condition : isnotnull(s_store_sk#12) + +(14) BroadcastExchange +Input [2]: [s_store_sk#12, s_store_id#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#12] +Join condition: None + +(16) Project [codegen id : 3] +Output [7]: [ss_item_sk#2, ss_quantity#4, ss_sales_price#5, d_year#8, d_moy#9, d_qoy#10, s_store_id#13] +Input [9]: [ss_item_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5, d_year#8, d_moy#9, d_qoy#10, s_store_sk#12, s_store_id#13] + +(17) Exchange +Input [7]: [ss_item_sk#2, ss_quantity#4, ss_sales_price#5, d_year#8, d_moy#9, d_qoy#10, s_store_id#13] +Arguments: hashpartitioning(ss_item_sk#2, 5), true, [id=#15] + +(18) Sort [codegen id : 4] +Input [7]: [ss_item_sk#2, ss_quantity#4, ss_sales_price#5, d_year#8, d_moy#9, d_qoy#10, s_store_id#13] +Arguments: [ss_item_sk#2 ASC NULLS FIRST], false, 0 + +(19) Scan parquet default.item +Output [5]: [i_item_sk#16, i_brand#17, i_class#18, i_category#19, i_product_name#20] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 5] +Input [5]: [i_item_sk#16, i_brand#17, i_class#18, i_category#19, i_product_name#20] + +(21) Filter [codegen id : 5] +Input [5]: [i_item_sk#16, i_brand#17, i_class#18, i_category#19, i_product_name#20] +Condition : isnotnull(i_item_sk#16) + +(22) Exchange +Input [5]: [i_item_sk#16, i_brand#17, i_class#18, i_category#19, i_product_name#20] +Arguments: hashpartitioning(i_item_sk#16, 5), true, [id=#21] + +(23) Sort [codegen id : 6] +Input [5]: [i_item_sk#16, i_brand#17, i_class#18, i_category#19, i_product_name#20] +Arguments: [i_item_sk#16 ASC NULLS FIRST], false, 0 + +(24) SortMergeJoin [codegen id : 7] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#16] +Join condition: None + +(25) Project [codegen id : 7] +Output [10]: [ss_quantity#4, ss_sales_price#5, d_year#8, d_moy#9, d_qoy#10, s_store_id#13, i_brand#17, i_class#18, i_category#19, i_product_name#20] +Input [12]: [ss_item_sk#2, ss_quantity#4, ss_sales_price#5, d_year#8, d_moy#9, d_qoy#10, s_store_id#13, i_item_sk#16, i_brand#17, i_class#18, i_category#19, i_product_name#20] + +(26) HashAggregate [codegen id : 7] +Input [10]: [ss_quantity#4, ss_sales_price#5, d_year#8, d_moy#9, d_qoy#10, s_store_id#13, i_brand#17, i_class#18, i_category#19, i_product_name#20] +Keys [8]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, d_moy#9, s_store_id#13] +Functions [1]: [partial_sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))] +Aggregate Attributes [2]: [sum#22, isEmpty#23] +Results [10]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, sum#24, isEmpty#25] + +(27) Exchange +Input [10]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, sum#24, isEmpty#25] +Arguments: hashpartitioning(i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, 5), true, [id=#26] + +(28) HashAggregate [codegen id : 8] +Input [10]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, sum#24, isEmpty#25] +Keys [8]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, d_moy#9, s_store_id#13] +Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))] +Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#27] +Results [9]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, cast(sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#27 as decimal(38,2)) AS sumsales#28] + +(29) ReusedExchange [Reuses operator id: 27] +Output [10]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, sum#29, isEmpty#30] + +(30) HashAggregate [codegen id : 16] +Input [10]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, sum#29, isEmpty#30] +Keys [8]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, d_moy#9, s_store_id#13] +Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))] +Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#31] +Results [8]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, d_moy#9, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#31 AS sumsales#32] + +(31) HashAggregate [codegen id : 16] +Input [8]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, d_moy#9, sumsales#32] +Keys [7]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, d_moy#9] +Functions [1]: [partial_sum(sumsales#32)] +Aggregate Attributes [2]: [sum#33, isEmpty#34] +Results [9]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, d_moy#9, sum#35, isEmpty#36] + +(32) Exchange +Input [9]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, d_moy#9, sum#35, isEmpty#36] +Arguments: hashpartitioning(i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, d_moy#9, 5), true, [id=#37] + +(33) HashAggregate [codegen id : 17] +Input [9]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, d_moy#9, sum#35, isEmpty#36] +Keys [7]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, d_moy#9] +Functions [1]: [sum(sumsales#32)] +Aggregate Attributes [1]: [sum(sumsales#32)#38] +Results [9]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, d_moy#9, null AS s_store_id#39, sum(sumsales#32)#38 AS sumsales#40] + +(34) ReusedExchange [Reuses operator id: 27] +Output [10]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, sum#41, isEmpty#42] + +(35) HashAggregate [codegen id : 25] +Input [10]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, sum#41, isEmpty#42] +Keys [8]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, d_moy#9, s_store_id#13] +Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))] +Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#43] +Results [7]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#43 AS sumsales#32] + +(36) HashAggregate [codegen id : 25] +Input [7]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, sumsales#32] +Keys [6]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10] +Functions [1]: [partial_sum(sumsales#32)] +Aggregate Attributes [2]: [sum#44, isEmpty#45] +Results [8]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, sum#46, isEmpty#47] + +(37) Exchange +Input [8]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, sum#46, isEmpty#47] +Arguments: hashpartitioning(i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, 5), true, [id=#48] + +(38) HashAggregate [codegen id : 26] +Input [8]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, sum#46, isEmpty#47] +Keys [6]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10] +Functions [1]: [sum(sumsales#32)] +Aggregate Attributes [1]: [sum(sumsales#32)#49] +Results [9]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, null AS d_moy#50, null AS s_store_id#51, sum(sumsales#32)#49 AS sumsales#52] + +(39) ReusedExchange [Reuses operator id: 27] +Output [10]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, sum#53, isEmpty#54] + +(40) HashAggregate [codegen id : 34] +Input [10]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, sum#53, isEmpty#54] +Keys [8]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, d_moy#9, s_store_id#13] +Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))] +Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#55] +Results [6]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#55 AS sumsales#32] + +(41) HashAggregate [codegen id : 34] +Input [6]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, sumsales#32] +Keys [5]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8] +Functions [1]: [partial_sum(sumsales#32)] +Aggregate Attributes [2]: [sum#56, isEmpty#57] +Results [7]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, sum#58, isEmpty#59] + +(42) Exchange +Input [7]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, sum#58, isEmpty#59] +Arguments: hashpartitioning(i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, 5), true, [id=#60] + +(43) HashAggregate [codegen id : 35] +Input [7]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, sum#58, isEmpty#59] +Keys [5]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8] +Functions [1]: [sum(sumsales#32)] +Aggregate Attributes [1]: [sum(sumsales#32)#61] +Results [9]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, null AS d_qoy#62, null AS d_moy#63, null AS s_store_id#64, sum(sumsales#32)#61 AS sumsales#65] + +(44) ReusedExchange [Reuses operator id: 27] +Output [10]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, sum#66, isEmpty#67] + +(45) HashAggregate [codegen id : 43] +Input [10]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, sum#66, isEmpty#67] +Keys [8]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, d_moy#9, s_store_id#13] +Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))] +Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#68] +Results [5]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#68 AS sumsales#32] + +(46) HashAggregate [codegen id : 43] +Input [5]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, sumsales#32] +Keys [4]: [i_category#19, i_class#18, i_brand#17, i_product_name#20] +Functions [1]: [partial_sum(sumsales#32)] +Aggregate Attributes [2]: [sum#69, isEmpty#70] +Results [6]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, sum#71, isEmpty#72] + +(47) Exchange +Input [6]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, sum#71, isEmpty#72] +Arguments: hashpartitioning(i_category#19, i_class#18, i_brand#17, i_product_name#20, 5), true, [id=#73] + +(48) HashAggregate [codegen id : 44] +Input [6]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, sum#71, isEmpty#72] +Keys [4]: [i_category#19, i_class#18, i_brand#17, i_product_name#20] +Functions [1]: [sum(sumsales#32)] +Aggregate Attributes [1]: [sum(sumsales#32)#74] +Results [9]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, null AS d_year#75, null AS d_qoy#76, null AS d_moy#77, null AS s_store_id#78, sum(sumsales#32)#74 AS sumsales#79] + +(49) ReusedExchange [Reuses operator id: 27] +Output [10]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, sum#80, isEmpty#81] + +(50) HashAggregate [codegen id : 52] +Input [10]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, sum#80, isEmpty#81] +Keys [8]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, d_moy#9, s_store_id#13] +Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))] +Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#82] +Results [4]: [i_category#19, i_class#18, i_brand#17, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#82 AS sumsales#32] + +(51) HashAggregate [codegen id : 52] +Input [4]: [i_category#19, i_class#18, i_brand#17, sumsales#32] +Keys [3]: [i_category#19, i_class#18, i_brand#17] +Functions [1]: [partial_sum(sumsales#32)] +Aggregate Attributes [2]: [sum#83, isEmpty#84] +Results [5]: [i_category#19, i_class#18, i_brand#17, sum#85, isEmpty#86] + +(52) Exchange +Input [5]: [i_category#19, i_class#18, i_brand#17, sum#85, isEmpty#86] +Arguments: hashpartitioning(i_category#19, i_class#18, i_brand#17, 5), true, [id=#87] + +(53) HashAggregate [codegen id : 53] +Input [5]: [i_category#19, i_class#18, i_brand#17, sum#85, isEmpty#86] +Keys [3]: [i_category#19, i_class#18, i_brand#17] +Functions [1]: [sum(sumsales#32)] +Aggregate Attributes [1]: [sum(sumsales#32)#88] +Results [9]: [i_category#19, i_class#18, i_brand#17, null AS i_product_name#89, null AS d_year#90, null AS d_qoy#91, null AS d_moy#92, null AS s_store_id#93, sum(sumsales#32)#88 AS sumsales#94] + +(54) ReusedExchange [Reuses operator id: 27] +Output [10]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, sum#95, isEmpty#96] + +(55) HashAggregate [codegen id : 61] +Input [10]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, sum#95, isEmpty#96] +Keys [8]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, d_moy#9, s_store_id#13] +Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))] +Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#97] +Results [3]: [i_category#19, i_class#18, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#97 AS sumsales#32] + +(56) HashAggregate [codegen id : 61] +Input [3]: [i_category#19, i_class#18, sumsales#32] +Keys [2]: [i_category#19, i_class#18] +Functions [1]: [partial_sum(sumsales#32)] +Aggregate Attributes [2]: [sum#98, isEmpty#99] +Results [4]: [i_category#19, i_class#18, sum#100, isEmpty#101] + +(57) Exchange +Input [4]: [i_category#19, i_class#18, sum#100, isEmpty#101] +Arguments: hashpartitioning(i_category#19, i_class#18, 5), true, [id=#102] + +(58) HashAggregate [codegen id : 62] +Input [4]: [i_category#19, i_class#18, sum#100, isEmpty#101] +Keys [2]: [i_category#19, i_class#18] +Functions [1]: [sum(sumsales#32)] +Aggregate Attributes [1]: [sum(sumsales#32)#103] +Results [9]: [i_category#19, i_class#18, null AS i_brand#104, null AS i_product_name#105, null AS d_year#106, null AS d_qoy#107, null AS d_moy#108, null AS s_store_id#109, sum(sumsales#32)#103 AS sumsales#110] + +(59) ReusedExchange [Reuses operator id: 27] +Output [10]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, sum#111, isEmpty#112] + +(60) HashAggregate [codegen id : 70] +Input [10]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, sum#111, isEmpty#112] +Keys [8]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, d_moy#9, s_store_id#13] +Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))] +Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#113] +Results [2]: [i_category#19, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#113 AS sumsales#32] + +(61) HashAggregate [codegen id : 70] +Input [2]: [i_category#19, sumsales#32] +Keys [1]: [i_category#19] +Functions [1]: [partial_sum(sumsales#32)] +Aggregate Attributes [2]: [sum#114, isEmpty#115] +Results [3]: [i_category#19, sum#116, isEmpty#117] + +(62) Exchange +Input [3]: [i_category#19, sum#116, isEmpty#117] +Arguments: hashpartitioning(i_category#19, 5), true, [id=#118] + +(63) HashAggregate [codegen id : 71] +Input [3]: [i_category#19, sum#116, isEmpty#117] +Keys [1]: [i_category#19] +Functions [1]: [sum(sumsales#32)] +Aggregate Attributes [1]: [sum(sumsales#32)#119] +Results [9]: [i_category#19, null AS i_class#120, null AS i_brand#121, null AS i_product_name#122, null AS d_year#123, null AS d_qoy#124, null AS d_moy#125, null AS s_store_id#126, sum(sumsales#32)#119 AS sumsales#127] + +(64) ReusedExchange [Reuses operator id: 27] +Output [10]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, sum#128, isEmpty#129] + +(65) HashAggregate [codegen id : 79] +Input [10]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, sum#128, isEmpty#129] +Keys [8]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, d_moy#9, s_store_id#13] +Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))] +Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#130] +Results [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#130 AS sumsales#32] + +(66) HashAggregate [codegen id : 79] +Input [1]: [sumsales#32] +Keys: [] +Functions [1]: [partial_sum(sumsales#32)] +Aggregate Attributes [2]: [sum#131, isEmpty#132] +Results [2]: [sum#133, isEmpty#134] + +(67) Exchange +Input [2]: [sum#133, isEmpty#134] +Arguments: SinglePartition, true, [id=#135] + +(68) HashAggregate [codegen id : 80] +Input [2]: [sum#133, isEmpty#134] +Keys: [] +Functions [1]: [sum(sumsales#32)] +Aggregate Attributes [1]: [sum(sumsales#32)#136] +Results [9]: [null AS i_category#137, null AS i_class#138, null AS i_brand#139, null AS i_product_name#140, null AS d_year#141, null AS d_qoy#142, null AS d_moy#143, null AS s_store_id#144, sum(sumsales#32)#136 AS sumsales#145] + +(69) Union + +(70) Exchange +Input [9]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, sumsales#28] +Arguments: hashpartitioning(i_category#19, 5), true, [id=#146] + +(71) Sort [codegen id : 81] +Input [9]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, sumsales#28] +Arguments: [i_category#19 ASC NULLS FIRST, sumsales#28 DESC NULLS LAST], false, 0 + +(72) Window +Input [9]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, sumsales#28] +Arguments: [rank(sumsales#28) windowspecdefinition(i_category#19, sumsales#28 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#147], [i_category#19], [sumsales#28 DESC NULLS LAST] + +(73) Filter [codegen id : 82] +Input [10]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, sumsales#28, rk#147] +Condition : (isnotnull(rk#147) AND (rk#147 <= 100)) + +(74) TakeOrderedAndProject +Input [10]: [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, sumsales#28, rk#147] +Arguments: 100, [i_category#19 ASC NULLS FIRST, i_class#18 ASC NULLS FIRST, i_brand#17 ASC NULLS FIRST, i_product_name#20 ASC NULLS FIRST, d_year#8 ASC NULLS FIRST, d_qoy#10 ASC NULLS FIRST, d_moy#9 ASC NULLS FIRST, s_store_id#13 ASC NULLS FIRST, sumsales#28 ASC NULLS FIRST, rk#147 ASC NULLS FIRST], [i_category#19, i_class#18, i_brand#17, i_product_name#20, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, sumsales#28, rk#147] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a.sf100/simplified.txt new file mode 100644 index 0000000000000..cad2ffb83d0bc --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a.sf100/simplified.txt @@ -0,0 +1,129 @@ +TakeOrderedAndProject [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,rk,s_store_id,sumsales] + WholeStageCodegen (82) + Filter [rk] + InputAdapter + Window [i_category,sumsales] + WholeStageCodegen (81) + Sort [i_category,sumsales] + InputAdapter + Exchange [i_category] #1 + Union + WholeStageCodegen (8) + HashAggregate [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,isEmpty,s_store_id,sum] [isEmpty,sum,sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales] + InputAdapter + Exchange [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,s_store_id] #2 + WholeStageCodegen (7) + HashAggregate [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,s_store_id,ss_quantity,ss_sales_price] [isEmpty,isEmpty,sum,sum] + Project [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,s_store_id,ss_quantity,ss_sales_price] + SortMergeJoin [i_item_sk,ss_item_sk] + InputAdapter + WholeStageCodegen (4) + Sort [ss_item_sk] + InputAdapter + Exchange [ss_item_sk] #3 + WholeStageCodegen (3) + Project [d_moy,d_qoy,d_year,s_store_id,ss_item_sk,ss_quantity,ss_sales_price] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [d_moy,d_qoy,d_year,ss_item_sk,ss_quantity,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Project [d_date_sk,d_moy,d_qoy,d_year] + Filter [d_date_sk,d_month_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq,d_moy,d_qoy,d_year] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_id,s_store_sk] + InputAdapter + WholeStageCodegen (6) + Sort [i_item_sk] + InputAdapter + Exchange [i_item_sk] #6 + WholeStageCodegen (5) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand,i_category,i_class,i_item_sk,i_product_name] + WholeStageCodegen (17) + HashAggregate [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,isEmpty,sum] [isEmpty,s_store_id,sum,sum(sumsales),sumsales] + InputAdapter + Exchange [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name] #7 + WholeStageCodegen (16) + HashAggregate [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,sumsales] [isEmpty,isEmpty,sum,sum] + HashAggregate [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,isEmpty,s_store_id,sum] [isEmpty,sum,sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales] + InputAdapter + ReusedExchange [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,isEmpty,s_store_id,sum] #2 + WholeStageCodegen (26) + HashAggregate [d_qoy,d_year,i_brand,i_category,i_class,i_product_name,isEmpty,sum] [d_moy,isEmpty,s_store_id,sum,sum(sumsales),sumsales] + InputAdapter + Exchange [d_qoy,d_year,i_brand,i_category,i_class,i_product_name] #8 + WholeStageCodegen (25) + HashAggregate [d_qoy,d_year,i_brand,i_category,i_class,i_product_name,sumsales] [isEmpty,isEmpty,sum,sum] + HashAggregate [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,isEmpty,s_store_id,sum] [isEmpty,sum,sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales] + InputAdapter + ReusedExchange [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,isEmpty,s_store_id,sum] #2 + WholeStageCodegen (35) + HashAggregate [d_year,i_brand,i_category,i_class,i_product_name,isEmpty,sum] [d_moy,d_qoy,isEmpty,s_store_id,sum,sum(sumsales),sumsales] + InputAdapter + Exchange [d_year,i_brand,i_category,i_class,i_product_name] #9 + WholeStageCodegen (34) + HashAggregate [d_year,i_brand,i_category,i_class,i_product_name,sumsales] [isEmpty,isEmpty,sum,sum] + HashAggregate [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,isEmpty,s_store_id,sum] [isEmpty,sum,sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales] + InputAdapter + ReusedExchange [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,isEmpty,s_store_id,sum] #2 + WholeStageCodegen (44) + HashAggregate [i_brand,i_category,i_class,i_product_name,isEmpty,sum] [d_moy,d_qoy,d_year,isEmpty,s_store_id,sum,sum(sumsales),sumsales] + InputAdapter + Exchange [i_brand,i_category,i_class,i_product_name] #10 + WholeStageCodegen (43) + HashAggregate [i_brand,i_category,i_class,i_product_name,sumsales] [isEmpty,isEmpty,sum,sum] + HashAggregate [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,isEmpty,s_store_id,sum] [isEmpty,sum,sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales] + InputAdapter + ReusedExchange [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,isEmpty,s_store_id,sum] #2 + WholeStageCodegen (53) + HashAggregate [i_brand,i_category,i_class,isEmpty,sum] [d_moy,d_qoy,d_year,i_product_name,isEmpty,s_store_id,sum,sum(sumsales),sumsales] + InputAdapter + Exchange [i_brand,i_category,i_class] #11 + WholeStageCodegen (52) + HashAggregate [i_brand,i_category,i_class,sumsales] [isEmpty,isEmpty,sum,sum] + HashAggregate [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,isEmpty,s_store_id,sum] [isEmpty,sum,sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales] + InputAdapter + ReusedExchange [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,isEmpty,s_store_id,sum] #2 + WholeStageCodegen (62) + HashAggregate [i_category,i_class,isEmpty,sum] [d_moy,d_qoy,d_year,i_brand,i_product_name,isEmpty,s_store_id,sum,sum(sumsales),sumsales] + InputAdapter + Exchange [i_category,i_class] #12 + WholeStageCodegen (61) + HashAggregate [i_category,i_class,sumsales] [isEmpty,isEmpty,sum,sum] + HashAggregate [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,isEmpty,s_store_id,sum] [isEmpty,sum,sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales] + InputAdapter + ReusedExchange [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,isEmpty,s_store_id,sum] #2 + WholeStageCodegen (71) + HashAggregate [i_category,isEmpty,sum] [d_moy,d_qoy,d_year,i_brand,i_class,i_product_name,isEmpty,s_store_id,sum,sum(sumsales),sumsales] + InputAdapter + Exchange [i_category] #13 + WholeStageCodegen (70) + HashAggregate [i_category,sumsales] [isEmpty,isEmpty,sum,sum] + HashAggregate [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,isEmpty,s_store_id,sum] [isEmpty,sum,sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales] + InputAdapter + ReusedExchange [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,isEmpty,s_store_id,sum] #2 + WholeStageCodegen (80) + HashAggregate [isEmpty,sum] [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,isEmpty,s_store_id,sum,sum(sumsales),sumsales] + InputAdapter + Exchange #14 + WholeStageCodegen (79) + HashAggregate [sumsales] [isEmpty,isEmpty,sum,sum] + HashAggregate [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,isEmpty,s_store_id,sum] [isEmpty,sum,sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales] + InputAdapter + ReusedExchange [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,isEmpty,s_store_id,sum] #2 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a/explain.txt new file mode 100644 index 0000000000000..f1bd844bb6973 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a/explain.txt @@ -0,0 +1,437 @@ +== Physical Plan == +TakeOrderedAndProject (71) ++- * Filter (70) + +- Window (69) + +- * Sort (68) + +- Exchange (67) + +- Union (66) + :- * HashAggregate (25) + : +- Exchange (24) + : +- * HashAggregate (23) + : +- * Project (22) + : +- * BroadcastHashJoin Inner BuildRight (21) + : :- * Project (16) + : : +- * BroadcastHashJoin Inner BuildRight (15) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.store (11) + : +- BroadcastExchange (20) + : +- * Filter (19) + : +- * ColumnarToRow (18) + : +- Scan parquet default.item (17) + :- * HashAggregate (30) + : +- Exchange (29) + : +- * HashAggregate (28) + : +- * HashAggregate (27) + : +- ReusedExchange (26) + :- * HashAggregate (35) + : +- Exchange (34) + : +- * HashAggregate (33) + : +- * HashAggregate (32) + : +- ReusedExchange (31) + :- * HashAggregate (40) + : +- Exchange (39) + : +- * HashAggregate (38) + : +- * HashAggregate (37) + : +- ReusedExchange (36) + :- * HashAggregate (45) + : +- Exchange (44) + : +- * HashAggregate (43) + : +- * HashAggregate (42) + : +- ReusedExchange (41) + :- * HashAggregate (50) + : +- Exchange (49) + : +- * HashAggregate (48) + : +- * HashAggregate (47) + : +- ReusedExchange (46) + :- * HashAggregate (55) + : +- Exchange (54) + : +- * HashAggregate (53) + : +- * HashAggregate (52) + : +- ReusedExchange (51) + :- * HashAggregate (60) + : +- Exchange (59) + : +- * HashAggregate (58) + : +- * HashAggregate (57) + : +- ReusedExchange (56) + +- * HashAggregate (65) + +- Exchange (64) + +- * HashAggregate (63) + +- * HashAggregate (62) + +- ReusedExchange (61) + + +(1) Scan parquet default.store_sales +Output [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5] + +(3) Filter [codegen id : 4] +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5] +Condition : ((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_item_sk#2)) + +(4) Scan parquet default.date_dim +Output [5]: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [5]: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] + +(6) Filter [codegen id : 1] +Input [5]: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] +Condition : (((isnotnull(d_month_seq#7) AND (d_month_seq#7 >= 1212)) AND (d_month_seq#7 <= 1223)) AND isnotnull(d_date_sk#6)) + +(7) Project [codegen id : 1] +Output [4]: [d_date_sk#6, d_year#8, d_moy#9, d_qoy#10] +Input [5]: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] + +(8) BroadcastExchange +Input [4]: [d_date_sk#6, d_year#8, d_moy#9, d_qoy#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#6] +Join condition: None + +(10) Project [codegen id : 4] +Output [7]: [ss_item_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5, d_year#8, d_moy#9, d_qoy#10] +Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5, d_date_sk#6, d_year#8, d_moy#9, d_qoy#10] + +(11) Scan parquet default.store +Output [2]: [s_store_sk#12, s_store_id#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [s_store_sk#12, s_store_id#13] + +(13) Filter [codegen id : 2] +Input [2]: [s_store_sk#12, s_store_id#13] +Condition : isnotnull(s_store_sk#12) + +(14) BroadcastExchange +Input [2]: [s_store_sk#12, s_store_id#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] + +(15) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#12] +Join condition: None + +(16) Project [codegen id : 4] +Output [7]: [ss_item_sk#2, ss_quantity#4, ss_sales_price#5, d_year#8, d_moy#9, d_qoy#10, s_store_id#13] +Input [9]: [ss_item_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5, d_year#8, d_moy#9, d_qoy#10, s_store_sk#12, s_store_id#13] + +(17) Scan parquet default.item +Output [5]: [i_item_sk#15, i_brand#16, i_class#17, i_category#18, i_product_name#19] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [5]: [i_item_sk#15, i_brand#16, i_class#17, i_category#18, i_product_name#19] + +(19) Filter [codegen id : 3] +Input [5]: [i_item_sk#15, i_brand#16, i_class#17, i_category#18, i_product_name#19] +Condition : isnotnull(i_item_sk#15) + +(20) BroadcastExchange +Input [5]: [i_item_sk#15, i_brand#16, i_class#17, i_category#18, i_product_name#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#20] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#15] +Join condition: None + +(22) Project [codegen id : 4] +Output [10]: [ss_quantity#4, ss_sales_price#5, d_year#8, d_moy#9, d_qoy#10, s_store_id#13, i_brand#16, i_class#17, i_category#18, i_product_name#19] +Input [12]: [ss_item_sk#2, ss_quantity#4, ss_sales_price#5, d_year#8, d_moy#9, d_qoy#10, s_store_id#13, i_item_sk#15, i_brand#16, i_class#17, i_category#18, i_product_name#19] + +(23) HashAggregate [codegen id : 4] +Input [10]: [ss_quantity#4, ss_sales_price#5, d_year#8, d_moy#9, d_qoy#10, s_store_id#13, i_brand#16, i_class#17, i_category#18, i_product_name#19] +Keys [8]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#13] +Functions [1]: [partial_sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))] +Aggregate Attributes [2]: [sum#21, isEmpty#22] +Results [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, sum#23, isEmpty#24] + +(24) Exchange +Input [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, sum#23, isEmpty#24] +Arguments: hashpartitioning(i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, 5), true, [id=#25] + +(25) HashAggregate [codegen id : 5] +Input [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, sum#23, isEmpty#24] +Keys [8]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#13] +Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))] +Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#26] +Results [9]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, cast(sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#26 as decimal(38,2)) AS sumsales#27] + +(26) ReusedExchange [Reuses operator id: 24] +Output [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, sum#28, isEmpty#29] + +(27) HashAggregate [codegen id : 10] +Input [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, sum#28, isEmpty#29] +Keys [8]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#13] +Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))] +Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#30] +Results [8]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#30 AS sumsales#31] + +(28) HashAggregate [codegen id : 10] +Input [8]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, sumsales#31] +Keys [7]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9] +Functions [1]: [partial_sum(sumsales#31)] +Aggregate Attributes [2]: [sum#32, isEmpty#33] +Results [9]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, sum#34, isEmpty#35] + +(29) Exchange +Input [9]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, sum#34, isEmpty#35] +Arguments: hashpartitioning(i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, 5), true, [id=#36] + +(30) HashAggregate [codegen id : 11] +Input [9]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, sum#34, isEmpty#35] +Keys [7]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9] +Functions [1]: [sum(sumsales#31)] +Aggregate Attributes [1]: [sum(sumsales#31)#37] +Results [9]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, null AS s_store_id#38, sum(sumsales#31)#37 AS sumsales#39] + +(31) ReusedExchange [Reuses operator id: 24] +Output [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, sum#40, isEmpty#41] + +(32) HashAggregate [codegen id : 16] +Input [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, sum#40, isEmpty#41] +Keys [8]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#13] +Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))] +Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#42] +Results [7]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#42 AS sumsales#31] + +(33) HashAggregate [codegen id : 16] +Input [7]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, sumsales#31] +Keys [6]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10] +Functions [1]: [partial_sum(sumsales#31)] +Aggregate Attributes [2]: [sum#43, isEmpty#44] +Results [8]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, sum#45, isEmpty#46] + +(34) Exchange +Input [8]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, sum#45, isEmpty#46] +Arguments: hashpartitioning(i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, 5), true, [id=#47] + +(35) HashAggregate [codegen id : 17] +Input [8]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, sum#45, isEmpty#46] +Keys [6]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10] +Functions [1]: [sum(sumsales#31)] +Aggregate Attributes [1]: [sum(sumsales#31)#48] +Results [9]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, null AS d_moy#49, null AS s_store_id#50, sum(sumsales#31)#48 AS sumsales#51] + +(36) ReusedExchange [Reuses operator id: 24] +Output [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, sum#52, isEmpty#53] + +(37) HashAggregate [codegen id : 22] +Input [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, sum#52, isEmpty#53] +Keys [8]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#13] +Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))] +Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#54] +Results [6]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#54 AS sumsales#31] + +(38) HashAggregate [codegen id : 22] +Input [6]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, sumsales#31] +Keys [5]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8] +Functions [1]: [partial_sum(sumsales#31)] +Aggregate Attributes [2]: [sum#55, isEmpty#56] +Results [7]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, sum#57, isEmpty#58] + +(39) Exchange +Input [7]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, sum#57, isEmpty#58] +Arguments: hashpartitioning(i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, 5), true, [id=#59] + +(40) HashAggregate [codegen id : 23] +Input [7]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, sum#57, isEmpty#58] +Keys [5]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8] +Functions [1]: [sum(sumsales#31)] +Aggregate Attributes [1]: [sum(sumsales#31)#60] +Results [9]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, null AS d_qoy#61, null AS d_moy#62, null AS s_store_id#63, sum(sumsales#31)#60 AS sumsales#64] + +(41) ReusedExchange [Reuses operator id: 24] +Output [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, sum#65, isEmpty#66] + +(42) HashAggregate [codegen id : 28] +Input [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, sum#65, isEmpty#66] +Keys [8]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#13] +Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))] +Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#67] +Results [5]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#67 AS sumsales#31] + +(43) HashAggregate [codegen id : 28] +Input [5]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, sumsales#31] +Keys [4]: [i_category#18, i_class#17, i_brand#16, i_product_name#19] +Functions [1]: [partial_sum(sumsales#31)] +Aggregate Attributes [2]: [sum#68, isEmpty#69] +Results [6]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, sum#70, isEmpty#71] + +(44) Exchange +Input [6]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, sum#70, isEmpty#71] +Arguments: hashpartitioning(i_category#18, i_class#17, i_brand#16, i_product_name#19, 5), true, [id=#72] + +(45) HashAggregate [codegen id : 29] +Input [6]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, sum#70, isEmpty#71] +Keys [4]: [i_category#18, i_class#17, i_brand#16, i_product_name#19] +Functions [1]: [sum(sumsales#31)] +Aggregate Attributes [1]: [sum(sumsales#31)#73] +Results [9]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, null AS d_year#74, null AS d_qoy#75, null AS d_moy#76, null AS s_store_id#77, sum(sumsales#31)#73 AS sumsales#78] + +(46) ReusedExchange [Reuses operator id: 24] +Output [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, sum#79, isEmpty#80] + +(47) HashAggregate [codegen id : 34] +Input [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, sum#79, isEmpty#80] +Keys [8]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#13] +Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))] +Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#81] +Results [4]: [i_category#18, i_class#17, i_brand#16, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#81 AS sumsales#31] + +(48) HashAggregate [codegen id : 34] +Input [4]: [i_category#18, i_class#17, i_brand#16, sumsales#31] +Keys [3]: [i_category#18, i_class#17, i_brand#16] +Functions [1]: [partial_sum(sumsales#31)] +Aggregate Attributes [2]: [sum#82, isEmpty#83] +Results [5]: [i_category#18, i_class#17, i_brand#16, sum#84, isEmpty#85] + +(49) Exchange +Input [5]: [i_category#18, i_class#17, i_brand#16, sum#84, isEmpty#85] +Arguments: hashpartitioning(i_category#18, i_class#17, i_brand#16, 5), true, [id=#86] + +(50) HashAggregate [codegen id : 35] +Input [5]: [i_category#18, i_class#17, i_brand#16, sum#84, isEmpty#85] +Keys [3]: [i_category#18, i_class#17, i_brand#16] +Functions [1]: [sum(sumsales#31)] +Aggregate Attributes [1]: [sum(sumsales#31)#87] +Results [9]: [i_category#18, i_class#17, i_brand#16, null AS i_product_name#88, null AS d_year#89, null AS d_qoy#90, null AS d_moy#91, null AS s_store_id#92, sum(sumsales#31)#87 AS sumsales#93] + +(51) ReusedExchange [Reuses operator id: 24] +Output [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, sum#94, isEmpty#95] + +(52) HashAggregate [codegen id : 40] +Input [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, sum#94, isEmpty#95] +Keys [8]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#13] +Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))] +Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#96] +Results [3]: [i_category#18, i_class#17, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#96 AS sumsales#31] + +(53) HashAggregate [codegen id : 40] +Input [3]: [i_category#18, i_class#17, sumsales#31] +Keys [2]: [i_category#18, i_class#17] +Functions [1]: [partial_sum(sumsales#31)] +Aggregate Attributes [2]: [sum#97, isEmpty#98] +Results [4]: [i_category#18, i_class#17, sum#99, isEmpty#100] + +(54) Exchange +Input [4]: [i_category#18, i_class#17, sum#99, isEmpty#100] +Arguments: hashpartitioning(i_category#18, i_class#17, 5), true, [id=#101] + +(55) HashAggregate [codegen id : 41] +Input [4]: [i_category#18, i_class#17, sum#99, isEmpty#100] +Keys [2]: [i_category#18, i_class#17] +Functions [1]: [sum(sumsales#31)] +Aggregate Attributes [1]: [sum(sumsales#31)#102] +Results [9]: [i_category#18, i_class#17, null AS i_brand#103, null AS i_product_name#104, null AS d_year#105, null AS d_qoy#106, null AS d_moy#107, null AS s_store_id#108, sum(sumsales#31)#102 AS sumsales#109] + +(56) ReusedExchange [Reuses operator id: 24] +Output [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, sum#110, isEmpty#111] + +(57) HashAggregate [codegen id : 46] +Input [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, sum#110, isEmpty#111] +Keys [8]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#13] +Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))] +Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#112] +Results [2]: [i_category#18, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#112 AS sumsales#31] + +(58) HashAggregate [codegen id : 46] +Input [2]: [i_category#18, sumsales#31] +Keys [1]: [i_category#18] +Functions [1]: [partial_sum(sumsales#31)] +Aggregate Attributes [2]: [sum#113, isEmpty#114] +Results [3]: [i_category#18, sum#115, isEmpty#116] + +(59) Exchange +Input [3]: [i_category#18, sum#115, isEmpty#116] +Arguments: hashpartitioning(i_category#18, 5), true, [id=#117] + +(60) HashAggregate [codegen id : 47] +Input [3]: [i_category#18, sum#115, isEmpty#116] +Keys [1]: [i_category#18] +Functions [1]: [sum(sumsales#31)] +Aggregate Attributes [1]: [sum(sumsales#31)#118] +Results [9]: [i_category#18, null AS i_class#119, null AS i_brand#120, null AS i_product_name#121, null AS d_year#122, null AS d_qoy#123, null AS d_moy#124, null AS s_store_id#125, sum(sumsales#31)#118 AS sumsales#126] + +(61) ReusedExchange [Reuses operator id: 24] +Output [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, sum#127, isEmpty#128] + +(62) HashAggregate [codegen id : 52] +Input [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, sum#127, isEmpty#128] +Keys [8]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#13] +Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))] +Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#129] +Results [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#129 AS sumsales#31] + +(63) HashAggregate [codegen id : 52] +Input [1]: [sumsales#31] +Keys: [] +Functions [1]: [partial_sum(sumsales#31)] +Aggregate Attributes [2]: [sum#130, isEmpty#131] +Results [2]: [sum#132, isEmpty#133] + +(64) Exchange +Input [2]: [sum#132, isEmpty#133] +Arguments: SinglePartition, true, [id=#134] + +(65) HashAggregate [codegen id : 53] +Input [2]: [sum#132, isEmpty#133] +Keys: [] +Functions [1]: [sum(sumsales#31)] +Aggregate Attributes [1]: [sum(sumsales#31)#135] +Results [9]: [null AS i_category#136, null AS i_class#137, null AS i_brand#138, null AS i_product_name#139, null AS d_year#140, null AS d_qoy#141, null AS d_moy#142, null AS s_store_id#143, sum(sumsales#31)#135 AS sumsales#144] + +(66) Union + +(67) Exchange +Input [9]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, sumsales#27] +Arguments: hashpartitioning(i_category#18, 5), true, [id=#145] + +(68) Sort [codegen id : 54] +Input [9]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, sumsales#27] +Arguments: [i_category#18 ASC NULLS FIRST, sumsales#27 DESC NULLS LAST], false, 0 + +(69) Window +Input [9]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, sumsales#27] +Arguments: [rank(sumsales#27) windowspecdefinition(i_category#18, sumsales#27 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#146], [i_category#18], [sumsales#27 DESC NULLS LAST] + +(70) Filter [codegen id : 55] +Input [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, sumsales#27, rk#146] +Condition : (isnotnull(rk#146) AND (rk#146 <= 100)) + +(71) TakeOrderedAndProject +Input [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, sumsales#27, rk#146] +Arguments: 100, [i_category#18 ASC NULLS FIRST, i_class#17 ASC NULLS FIRST, i_brand#16 ASC NULLS FIRST, i_product_name#19 ASC NULLS FIRST, d_year#8 ASC NULLS FIRST, d_qoy#10 ASC NULLS FIRST, d_moy#9 ASC NULLS FIRST, s_store_id#13 ASC NULLS FIRST, sumsales#27 ASC NULLS FIRST, rk#146 ASC NULLS FIRST], [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, sumsales#27, rk#146] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a/simplified.txt new file mode 100644 index 0000000000000..9fea5d8e164b3 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a/simplified.txt @@ -0,0 +1,120 @@ +TakeOrderedAndProject [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,rk,s_store_id,sumsales] + WholeStageCodegen (55) + Filter [rk] + InputAdapter + Window [i_category,sumsales] + WholeStageCodegen (54) + Sort [i_category,sumsales] + InputAdapter + Exchange [i_category] #1 + Union + WholeStageCodegen (5) + HashAggregate [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,isEmpty,s_store_id,sum] [isEmpty,sum,sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales] + InputAdapter + Exchange [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,s_store_id] #2 + WholeStageCodegen (4) + HashAggregate [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,s_store_id,ss_quantity,ss_sales_price] [isEmpty,isEmpty,sum,sum] + Project [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,s_store_id,ss_quantity,ss_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [d_moy,d_qoy,d_year,s_store_id,ss_item_sk,ss_quantity,ss_sales_price] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [d_moy,d_qoy,d_year,ss_item_sk,ss_quantity,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk,d_moy,d_qoy,d_year] + Filter [d_date_sk,d_month_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq,d_moy,d_qoy,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_id,s_store_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand,i_category,i_class,i_item_sk,i_product_name] + WholeStageCodegen (11) + HashAggregate [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,isEmpty,sum] [isEmpty,s_store_id,sum,sum(sumsales),sumsales] + InputAdapter + Exchange [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name] #6 + WholeStageCodegen (10) + HashAggregate [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,sumsales] [isEmpty,isEmpty,sum,sum] + HashAggregate [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,isEmpty,s_store_id,sum] [isEmpty,sum,sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales] + InputAdapter + ReusedExchange [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,isEmpty,s_store_id,sum] #2 + WholeStageCodegen (17) + HashAggregate [d_qoy,d_year,i_brand,i_category,i_class,i_product_name,isEmpty,sum] [d_moy,isEmpty,s_store_id,sum,sum(sumsales),sumsales] + InputAdapter + Exchange [d_qoy,d_year,i_brand,i_category,i_class,i_product_name] #7 + WholeStageCodegen (16) + HashAggregate [d_qoy,d_year,i_brand,i_category,i_class,i_product_name,sumsales] [isEmpty,isEmpty,sum,sum] + HashAggregate [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,isEmpty,s_store_id,sum] [isEmpty,sum,sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales] + InputAdapter + ReusedExchange [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,isEmpty,s_store_id,sum] #2 + WholeStageCodegen (23) + HashAggregate [d_year,i_brand,i_category,i_class,i_product_name,isEmpty,sum] [d_moy,d_qoy,isEmpty,s_store_id,sum,sum(sumsales),sumsales] + InputAdapter + Exchange [d_year,i_brand,i_category,i_class,i_product_name] #8 + WholeStageCodegen (22) + HashAggregate [d_year,i_brand,i_category,i_class,i_product_name,sumsales] [isEmpty,isEmpty,sum,sum] + HashAggregate [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,isEmpty,s_store_id,sum] [isEmpty,sum,sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales] + InputAdapter + ReusedExchange [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,isEmpty,s_store_id,sum] #2 + WholeStageCodegen (29) + HashAggregate [i_brand,i_category,i_class,i_product_name,isEmpty,sum] [d_moy,d_qoy,d_year,isEmpty,s_store_id,sum,sum(sumsales),sumsales] + InputAdapter + Exchange [i_brand,i_category,i_class,i_product_name] #9 + WholeStageCodegen (28) + HashAggregate [i_brand,i_category,i_class,i_product_name,sumsales] [isEmpty,isEmpty,sum,sum] + HashAggregate [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,isEmpty,s_store_id,sum] [isEmpty,sum,sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales] + InputAdapter + ReusedExchange [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,isEmpty,s_store_id,sum] #2 + WholeStageCodegen (35) + HashAggregate [i_brand,i_category,i_class,isEmpty,sum] [d_moy,d_qoy,d_year,i_product_name,isEmpty,s_store_id,sum,sum(sumsales),sumsales] + InputAdapter + Exchange [i_brand,i_category,i_class] #10 + WholeStageCodegen (34) + HashAggregate [i_brand,i_category,i_class,sumsales] [isEmpty,isEmpty,sum,sum] + HashAggregate [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,isEmpty,s_store_id,sum] [isEmpty,sum,sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales] + InputAdapter + ReusedExchange [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,isEmpty,s_store_id,sum] #2 + WholeStageCodegen (41) + HashAggregate [i_category,i_class,isEmpty,sum] [d_moy,d_qoy,d_year,i_brand,i_product_name,isEmpty,s_store_id,sum,sum(sumsales),sumsales] + InputAdapter + Exchange [i_category,i_class] #11 + WholeStageCodegen (40) + HashAggregate [i_category,i_class,sumsales] [isEmpty,isEmpty,sum,sum] + HashAggregate [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,isEmpty,s_store_id,sum] [isEmpty,sum,sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales] + InputAdapter + ReusedExchange [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,isEmpty,s_store_id,sum] #2 + WholeStageCodegen (47) + HashAggregate [i_category,isEmpty,sum] [d_moy,d_qoy,d_year,i_brand,i_class,i_product_name,isEmpty,s_store_id,sum,sum(sumsales),sumsales] + InputAdapter + Exchange [i_category] #12 + WholeStageCodegen (46) + HashAggregate [i_category,sumsales] [isEmpty,isEmpty,sum,sum] + HashAggregate [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,isEmpty,s_store_id,sum] [isEmpty,sum,sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales] + InputAdapter + ReusedExchange [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,isEmpty,s_store_id,sum] #2 + WholeStageCodegen (53) + HashAggregate [isEmpty,sum] [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,isEmpty,s_store_id,sum,sum(sumsales),sumsales] + InputAdapter + Exchange #13 + WholeStageCodegen (52) + HashAggregate [sumsales] [isEmpty,isEmpty,sum,sum] + HashAggregate [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,isEmpty,s_store_id,sum] [isEmpty,sum,sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales] + InputAdapter + ReusedExchange [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,isEmpty,s_store_id,sum] #2 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a.sf100/explain.txt new file mode 100644 index 0000000000000..8ffe0713c70ae --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a.sf100/explain.txt @@ -0,0 +1,373 @@ +== Physical Plan == +TakeOrderedAndProject (64) ++- * Project (63) + +- Window (62) + +- * Sort (61) + +- Exchange (60) + +- * HashAggregate (59) + +- Exchange (58) + +- * HashAggregate (57) + +- Union (56) + :- * HashAggregate (50) + : +- Exchange (49) + : +- * HashAggregate (48) + : +- Union (47) + : :- * HashAggregate (41) + : : +- Exchange (40) + : : +- * HashAggregate (39) + : : +- * Project (38) + : : +- * BroadcastHashJoin Inner BuildRight (37) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (36) + : : +- * BroadcastHashJoin LeftSemi BuildRight (35) + : : :- * Filter (13) + : : : +- * ColumnarToRow (12) + : : : +- Scan parquet default.store (11) + : : +- BroadcastExchange (34) + : : +- * Project (33) + : : +- * Filter (32) + : : +- Window (31) + : : +- * Sort (30) + : : +- Exchange (29) + : : +- * HashAggregate (28) + : : +- Exchange (27) + : : +- * HashAggregate (26) + : : +- * Project (25) + : : +- * BroadcastHashJoin Inner BuildRight (24) + : : :- * Project (19) + : : : +- * BroadcastHashJoin Inner BuildRight (18) + : : : :- * Filter (16) + : : : : +- * ColumnarToRow (15) + : : : : +- Scan parquet default.store_sales (14) + : : : +- ReusedExchange (17) + : : +- BroadcastExchange (23) + : : +- * Filter (22) + : : +- * ColumnarToRow (21) + : : +- Scan parquet default.store (20) + : +- * HashAggregate (46) + : +- Exchange (45) + : +- * HashAggregate (44) + : +- * HashAggregate (43) + : +- ReusedExchange (42) + +- * HashAggregate (55) + +- Exchange (54) + +- * HashAggregate (53) + +- * HashAggregate (52) + +- ReusedExchange (51) + + +(1) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 9] +Input [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3] + +(3) Filter [codegen id : 9] +Input [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3] +Condition : (isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#2)) + +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#4, d_month_seq#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1212)) AND (d_month_seq#5 <= 1223)) AND isnotnull(d_date_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#4] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(8) BroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#6] + +(9) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(10) Project [codegen id : 9] +Output [2]: [ss_store_sk#2, ss_net_profit#3] +Input [4]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3, d_date_sk#4] + +(11) Scan parquet default.store +Output [3]: [s_store_sk#7, s_county#8, s_state#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 8] +Input [3]: [s_store_sk#7, s_county#8, s_state#9] + +(13) Filter [codegen id : 8] +Input [3]: [s_store_sk#7, s_county#8, s_state#9] +Condition : isnotnull(s_store_sk#7) + +(14) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(15) ColumnarToRow [codegen id : 4] +Input [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3] + +(16) Filter [codegen id : 4] +Input [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3] +Condition : (isnotnull(ss_store_sk#2) AND isnotnull(ss_sold_date_sk#1)) + +(17) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#4] + +(18) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(19) Project [codegen id : 4] +Output [2]: [ss_store_sk#2, ss_net_profit#3] +Input [4]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3, d_date_sk#4] + +(20) Scan parquet default.store +Output [2]: [s_store_sk#7, s_state#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 3] +Input [2]: [s_store_sk#7, s_state#9] + +(22) Filter [codegen id : 3] +Input [2]: [s_store_sk#7, s_state#9] +Condition : isnotnull(s_store_sk#7) + +(23) BroadcastExchange +Input [2]: [s_store_sk#7, s_state#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#10] + +(24) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#2] +Right keys [1]: [s_store_sk#7] +Join condition: None + +(25) Project [codegen id : 4] +Output [2]: [ss_net_profit#3, s_state#9] +Input [4]: [ss_store_sk#2, ss_net_profit#3, s_store_sk#7, s_state#9] + +(26) HashAggregate [codegen id : 4] +Input [2]: [ss_net_profit#3, s_state#9] +Keys [1]: [s_state#9] +Functions [1]: [partial_sum(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [1]: [sum#11] +Results [2]: [s_state#9, sum#12] + +(27) Exchange +Input [2]: [s_state#9, sum#12] +Arguments: hashpartitioning(s_state#9, 5), true, [id=#13] + +(28) HashAggregate [codegen id : 5] +Input [2]: [s_state#9, sum#12] +Keys [1]: [s_state#9] +Functions [1]: [sum(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#3))#14] +Results [3]: [s_state#9 AS s_state#15, s_state#9, MakeDecimal(sum(UnscaledValue(ss_net_profit#3))#14,17,2) AS _w2#16] + +(29) Exchange +Input [3]: [s_state#15, s_state#9, _w2#16] +Arguments: hashpartitioning(s_state#9, 5), true, [id=#17] + +(30) Sort [codegen id : 6] +Input [3]: [s_state#15, s_state#9, _w2#16] +Arguments: [s_state#9 ASC NULLS FIRST, _w2#16 DESC NULLS LAST], false, 0 + +(31) Window +Input [3]: [s_state#15, s_state#9, _w2#16] +Arguments: [rank(_w2#16) windowspecdefinition(s_state#9, _w2#16 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS ranking#18], [s_state#9], [_w2#16 DESC NULLS LAST] + +(32) Filter [codegen id : 7] +Input [4]: [s_state#15, s_state#9, _w2#16, ranking#18] +Condition : (isnotnull(ranking#18) AND (ranking#18 <= 5)) + +(33) Project [codegen id : 7] +Output [1]: [s_state#15] +Input [4]: [s_state#15, s_state#9, _w2#16, ranking#18] + +(34) BroadcastExchange +Input [1]: [s_state#15] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#19] + +(35) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [s_state#9] +Right keys [1]: [s_state#15] +Join condition: None + +(36) BroadcastExchange +Input [3]: [s_store_sk#7, s_county#8, s_state#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#20] + +(37) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_store_sk#2] +Right keys [1]: [s_store_sk#7] +Join condition: None + +(38) Project [codegen id : 9] +Output [3]: [ss_net_profit#3, s_county#8, s_state#9] +Input [5]: [ss_store_sk#2, ss_net_profit#3, s_store_sk#7, s_county#8, s_state#9] + +(39) HashAggregate [codegen id : 9] +Input [3]: [ss_net_profit#3, s_county#8, s_state#9] +Keys [2]: [s_state#9, s_county#8] +Functions [1]: [partial_sum(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [1]: [sum#21] +Results [3]: [s_state#9, s_county#8, sum#22] + +(40) Exchange +Input [3]: [s_state#9, s_county#8, sum#22] +Arguments: hashpartitioning(s_state#9, s_county#8, 5), true, [id=#23] + +(41) HashAggregate [codegen id : 10] +Input [3]: [s_state#9, s_county#8, sum#22] +Keys [2]: [s_state#9, s_county#8] +Functions [1]: [sum(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#3))#24] +Results [6]: [cast(MakeDecimal(sum(UnscaledValue(ss_net_profit#3))#24,17,2) as decimal(27,2)) AS total_sum#25, s_state#9, s_county#8, 0 AS g_state#26, 0 AS g_county#27, 0 AS lochierarchy#28] + +(42) ReusedExchange [Reuses operator id: 40] +Output [3]: [s_state#9, s_county#8, sum#29] + +(43) HashAggregate [codegen id : 20] +Input [3]: [s_state#9, s_county#8, sum#29] +Keys [2]: [s_state#9, s_county#8] +Functions [1]: [sum(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#3))#30] +Results [2]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#3))#30,17,2) AS total_sum#31, s_state#9] + +(44) HashAggregate [codegen id : 20] +Input [2]: [total_sum#31, s_state#9] +Keys [1]: [s_state#9] +Functions [1]: [partial_sum(total_sum#31)] +Aggregate Attributes [2]: [sum#32, isEmpty#33] +Results [3]: [s_state#9, sum#34, isEmpty#35] + +(45) Exchange +Input [3]: [s_state#9, sum#34, isEmpty#35] +Arguments: hashpartitioning(s_state#9, 5), true, [id=#36] + +(46) HashAggregate [codegen id : 21] +Input [3]: [s_state#9, sum#34, isEmpty#35] +Keys [1]: [s_state#9] +Functions [1]: [sum(total_sum#31)] +Aggregate Attributes [1]: [sum(total_sum#31)#37] +Results [6]: [sum(total_sum#31)#37 AS total_sum#38, s_state#9, null AS s_county#39, 0 AS g_state#40, 1 AS g_county#41, 1 AS lochierarchy#42] + +(47) Union + +(48) HashAggregate [codegen id : 22] +Input [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28] +Keys [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28] +Functions: [] +Aggregate Attributes: [] +Results [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28] + +(49) Exchange +Input [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28] +Arguments: hashpartitioning(total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28, 5), true, [id=#43] + +(50) HashAggregate [codegen id : 23] +Input [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28] +Keys [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28] +Functions: [] +Aggregate Attributes: [] +Results [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28] + +(51) ReusedExchange [Reuses operator id: 40] +Output [3]: [s_state#9, s_county#8, sum#44] + +(52) HashAggregate [codegen id : 33] +Input [3]: [s_state#9, s_county#8, sum#44] +Keys [2]: [s_state#9, s_county#8] +Functions [1]: [sum(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#3))#45] +Results [1]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#3))#45,17,2) AS total_sum#31] + +(53) HashAggregate [codegen id : 33] +Input [1]: [total_sum#31] +Keys: [] +Functions [1]: [partial_sum(total_sum#31)] +Aggregate Attributes [2]: [sum#46, isEmpty#47] +Results [2]: [sum#48, isEmpty#49] + +(54) Exchange +Input [2]: [sum#48, isEmpty#49] +Arguments: SinglePartition, true, [id=#50] + +(55) HashAggregate [codegen id : 34] +Input [2]: [sum#48, isEmpty#49] +Keys: [] +Functions [1]: [sum(total_sum#31)] +Aggregate Attributes [1]: [sum(total_sum#31)#51] +Results [6]: [sum(total_sum#31)#51 AS total_sum#52, null AS s_state#53, null AS s_county#54, 1 AS g_state#55, 1 AS g_county#56, 2 AS lochierarchy#57] + +(56) Union + +(57) HashAggregate [codegen id : 35] +Input [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28] +Keys [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28] +Functions: [] +Aggregate Attributes: [] +Results [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28] + +(58) Exchange +Input [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28] +Arguments: hashpartitioning(total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28, 5), true, [id=#58] + +(59) HashAggregate [codegen id : 36] +Input [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28] +Keys [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28] +Functions: [] +Aggregate Attributes: [] +Results [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, CASE WHEN (g_county#27 = 0) THEN s_state#9 END AS _w0#59] + +(60) Exchange +Input [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, _w0#59] +Arguments: hashpartitioning(lochierarchy#28, _w0#59, 5), true, [id=#60] + +(61) Sort [codegen id : 37] +Input [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, _w0#59] +Arguments: [lochierarchy#28 ASC NULLS FIRST, _w0#59 ASC NULLS FIRST, total_sum#25 DESC NULLS LAST], false, 0 + +(62) Window +Input [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, _w0#59] +Arguments: [rank(total_sum#25) windowspecdefinition(lochierarchy#28, _w0#59, total_sum#25 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#61], [lochierarchy#28, _w0#59], [total_sum#25 DESC NULLS LAST] + +(63) Project [codegen id : 38] +Output [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, rank_within_parent#61] +Input [6]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, _w0#59, rank_within_parent#61] + +(64) TakeOrderedAndProject +Input [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, rank_within_parent#61] +Arguments: 100, [lochierarchy#28 DESC NULLS LAST, CASE WHEN (lochierarchy#28 = 0) THEN s_state#9 END ASC NULLS FIRST, rank_within_parent#61 ASC NULLS FIRST], [total_sum#25, s_state#9, s_county#8, lochierarchy#28, rank_within_parent#61] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a.sf100/simplified.txt new file mode 100644 index 0000000000000..f99d2b02f8429 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a.sf100/simplified.txt @@ -0,0 +1,107 @@ +TakeOrderedAndProject [lochierarchy,rank_within_parent,s_county,s_state,total_sum] + WholeStageCodegen (38) + Project [lochierarchy,rank_within_parent,s_county,s_state,total_sum] + InputAdapter + Window [_w0,lochierarchy,total_sum] + WholeStageCodegen (37) + Sort [_w0,lochierarchy,total_sum] + InputAdapter + Exchange [_w0,lochierarchy] #1 + WholeStageCodegen (36) + HashAggregate [g_county,g_state,lochierarchy,s_county,s_state,total_sum] [_w0] + InputAdapter + Exchange [g_county,g_state,lochierarchy,s_county,s_state,total_sum] #2 + WholeStageCodegen (35) + HashAggregate [g_county,g_state,lochierarchy,s_county,s_state,total_sum] + InputAdapter + Union + WholeStageCodegen (23) + HashAggregate [g_county,g_state,lochierarchy,s_county,s_state,total_sum] + InputAdapter + Exchange [g_county,g_state,lochierarchy,s_county,s_state,total_sum] #3 + WholeStageCodegen (22) + HashAggregate [g_county,g_state,lochierarchy,s_county,s_state,total_sum] + InputAdapter + Union + WholeStageCodegen (10) + HashAggregate [s_county,s_state,sum] [g_county,g_state,lochierarchy,sum,sum(UnscaledValue(ss_net_profit)),total_sum] + InputAdapter + Exchange [s_county,s_state] #4 + WholeStageCodegen (9) + HashAggregate [s_county,s_state,ss_net_profit] [sum,sum] + Project [s_county,s_state,ss_net_profit] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_net_profit,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_net_profit,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_month_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (8) + BroadcastHashJoin [s_state,s_state] + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_county,s_state,s_store_sk] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (7) + Project [s_state] + Filter [ranking] + InputAdapter + Window [_w2,s_state] + WholeStageCodegen (6) + Sort [_w2,s_state] + InputAdapter + Exchange [s_state] #8 + WholeStageCodegen (5) + HashAggregate [s_state,sum] [_w2,s_state,sum,sum(UnscaledValue(ss_net_profit))] + InputAdapter + Exchange [s_state] #9 + WholeStageCodegen (4) + HashAggregate [s_state,ss_net_profit] [sum,sum] + Project [s_state,ss_net_profit] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_net_profit,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_net_profit,ss_sold_date_sk,ss_store_sk] + InputAdapter + ReusedExchange [d_date_sk] #5 + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (3) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_state,s_store_sk] + WholeStageCodegen (21) + HashAggregate [isEmpty,s_state,sum] [g_county,g_state,isEmpty,lochierarchy,s_county,sum,sum(total_sum),total_sum] + InputAdapter + Exchange [s_state] #11 + WholeStageCodegen (20) + HashAggregate [s_state,total_sum] [isEmpty,isEmpty,sum,sum] + HashAggregate [s_county,s_state,sum] [sum,sum(UnscaledValue(ss_net_profit)),total_sum] + InputAdapter + ReusedExchange [s_county,s_state,sum] #4 + WholeStageCodegen (34) + HashAggregate [isEmpty,sum] [g_county,g_state,isEmpty,lochierarchy,s_county,s_state,sum,sum(total_sum),total_sum] + InputAdapter + Exchange #12 + WholeStageCodegen (33) + HashAggregate [total_sum] [isEmpty,isEmpty,sum,sum] + HashAggregate [s_county,s_state,sum] [sum,sum(UnscaledValue(ss_net_profit)),total_sum] + InputAdapter + ReusedExchange [s_county,s_state,sum] #4 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a/explain.txt new file mode 100644 index 0000000000000..6fc2c3e2b48a1 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a/explain.txt @@ -0,0 +1,373 @@ +== Physical Plan == +TakeOrderedAndProject (64) ++- * Project (63) + +- Window (62) + +- * Sort (61) + +- Exchange (60) + +- * HashAggregate (59) + +- Exchange (58) + +- * HashAggregate (57) + +- Union (56) + :- * HashAggregate (50) + : +- Exchange (49) + : +- * HashAggregate (48) + : +- Union (47) + : :- * HashAggregate (41) + : : +- Exchange (40) + : : +- * HashAggregate (39) + : : +- * Project (38) + : : +- * BroadcastHashJoin Inner BuildRight (37) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (36) + : : +- * BroadcastHashJoin LeftSemi BuildRight (35) + : : :- * Filter (13) + : : : +- * ColumnarToRow (12) + : : : +- Scan parquet default.store (11) + : : +- BroadcastExchange (34) + : : +- * Project (33) + : : +- * Filter (32) + : : +- Window (31) + : : +- * Sort (30) + : : +- Exchange (29) + : : +- * HashAggregate (28) + : : +- Exchange (27) + : : +- * HashAggregate (26) + : : +- * Project (25) + : : +- * BroadcastHashJoin Inner BuildRight (24) + : : :- * Project (22) + : : : +- * BroadcastHashJoin Inner BuildRight (21) + : : : :- * Filter (16) + : : : : +- * ColumnarToRow (15) + : : : : +- Scan parquet default.store_sales (14) + : : : +- BroadcastExchange (20) + : : : +- * Filter (19) + : : : +- * ColumnarToRow (18) + : : : +- Scan parquet default.store (17) + : : +- ReusedExchange (23) + : +- * HashAggregate (46) + : +- Exchange (45) + : +- * HashAggregate (44) + : +- * HashAggregate (43) + : +- ReusedExchange (42) + +- * HashAggregate (55) + +- Exchange (54) + +- * HashAggregate (53) + +- * HashAggregate (52) + +- ReusedExchange (51) + + +(1) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 9] +Input [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3] + +(3) Filter [codegen id : 9] +Input [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3] +Condition : (isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#2)) + +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#4, d_month_seq#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1212)) AND (d_month_seq#5 <= 1223)) AND isnotnull(d_date_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#4] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(8) BroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#6] + +(9) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(10) Project [codegen id : 9] +Output [2]: [ss_store_sk#2, ss_net_profit#3] +Input [4]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3, d_date_sk#4] + +(11) Scan parquet default.store +Output [3]: [s_store_sk#7, s_county#8, s_state#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 8] +Input [3]: [s_store_sk#7, s_county#8, s_state#9] + +(13) Filter [codegen id : 8] +Input [3]: [s_store_sk#7, s_county#8, s_state#9] +Condition : isnotnull(s_store_sk#7) + +(14) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(15) ColumnarToRow [codegen id : 4] +Input [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3] + +(16) Filter [codegen id : 4] +Input [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3] +Condition : (isnotnull(ss_store_sk#2) AND isnotnull(ss_sold_date_sk#1)) + +(17) Scan parquet default.store +Output [2]: [s_store_sk#7, s_state#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 2] +Input [2]: [s_store_sk#7, s_state#9] + +(19) Filter [codegen id : 2] +Input [2]: [s_store_sk#7, s_state#9] +Condition : isnotnull(s_store_sk#7) + +(20) BroadcastExchange +Input [2]: [s_store_sk#7, s_state#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#10] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#2] +Right keys [1]: [s_store_sk#7] +Join condition: None + +(22) Project [codegen id : 4] +Output [3]: [ss_sold_date_sk#1, ss_net_profit#3, s_state#9] +Input [5]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3, s_store_sk#7, s_state#9] + +(23) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#4] + +(24) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(25) Project [codegen id : 4] +Output [2]: [ss_net_profit#3, s_state#9] +Input [4]: [ss_sold_date_sk#1, ss_net_profit#3, s_state#9, d_date_sk#4] + +(26) HashAggregate [codegen id : 4] +Input [2]: [ss_net_profit#3, s_state#9] +Keys [1]: [s_state#9] +Functions [1]: [partial_sum(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [1]: [sum#11] +Results [2]: [s_state#9, sum#12] + +(27) Exchange +Input [2]: [s_state#9, sum#12] +Arguments: hashpartitioning(s_state#9, 5), true, [id=#13] + +(28) HashAggregate [codegen id : 5] +Input [2]: [s_state#9, sum#12] +Keys [1]: [s_state#9] +Functions [1]: [sum(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#3))#14] +Results [3]: [s_state#9 AS s_state#15, s_state#9, MakeDecimal(sum(UnscaledValue(ss_net_profit#3))#14,17,2) AS _w2#16] + +(29) Exchange +Input [3]: [s_state#15, s_state#9, _w2#16] +Arguments: hashpartitioning(s_state#9, 5), true, [id=#17] + +(30) Sort [codegen id : 6] +Input [3]: [s_state#15, s_state#9, _w2#16] +Arguments: [s_state#9 ASC NULLS FIRST, _w2#16 DESC NULLS LAST], false, 0 + +(31) Window +Input [3]: [s_state#15, s_state#9, _w2#16] +Arguments: [rank(_w2#16) windowspecdefinition(s_state#9, _w2#16 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS ranking#18], [s_state#9], [_w2#16 DESC NULLS LAST] + +(32) Filter [codegen id : 7] +Input [4]: [s_state#15, s_state#9, _w2#16, ranking#18] +Condition : (isnotnull(ranking#18) AND (ranking#18 <= 5)) + +(33) Project [codegen id : 7] +Output [1]: [s_state#15] +Input [4]: [s_state#15, s_state#9, _w2#16, ranking#18] + +(34) BroadcastExchange +Input [1]: [s_state#15] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#19] + +(35) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [s_state#9] +Right keys [1]: [s_state#15] +Join condition: None + +(36) BroadcastExchange +Input [3]: [s_store_sk#7, s_county#8, s_state#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#20] + +(37) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_store_sk#2] +Right keys [1]: [s_store_sk#7] +Join condition: None + +(38) Project [codegen id : 9] +Output [3]: [ss_net_profit#3, s_county#8, s_state#9] +Input [5]: [ss_store_sk#2, ss_net_profit#3, s_store_sk#7, s_county#8, s_state#9] + +(39) HashAggregate [codegen id : 9] +Input [3]: [ss_net_profit#3, s_county#8, s_state#9] +Keys [2]: [s_state#9, s_county#8] +Functions [1]: [partial_sum(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [1]: [sum#21] +Results [3]: [s_state#9, s_county#8, sum#22] + +(40) Exchange +Input [3]: [s_state#9, s_county#8, sum#22] +Arguments: hashpartitioning(s_state#9, s_county#8, 5), true, [id=#23] + +(41) HashAggregate [codegen id : 10] +Input [3]: [s_state#9, s_county#8, sum#22] +Keys [2]: [s_state#9, s_county#8] +Functions [1]: [sum(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#3))#24] +Results [6]: [cast(MakeDecimal(sum(UnscaledValue(ss_net_profit#3))#24,17,2) as decimal(27,2)) AS total_sum#25, s_state#9, s_county#8, 0 AS g_state#26, 0 AS g_county#27, 0 AS lochierarchy#28] + +(42) ReusedExchange [Reuses operator id: 40] +Output [3]: [s_state#9, s_county#8, sum#29] + +(43) HashAggregate [codegen id : 20] +Input [3]: [s_state#9, s_county#8, sum#29] +Keys [2]: [s_state#9, s_county#8] +Functions [1]: [sum(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#3))#30] +Results [2]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#3))#30,17,2) AS total_sum#31, s_state#9] + +(44) HashAggregate [codegen id : 20] +Input [2]: [total_sum#31, s_state#9] +Keys [1]: [s_state#9] +Functions [1]: [partial_sum(total_sum#31)] +Aggregate Attributes [2]: [sum#32, isEmpty#33] +Results [3]: [s_state#9, sum#34, isEmpty#35] + +(45) Exchange +Input [3]: [s_state#9, sum#34, isEmpty#35] +Arguments: hashpartitioning(s_state#9, 5), true, [id=#36] + +(46) HashAggregate [codegen id : 21] +Input [3]: [s_state#9, sum#34, isEmpty#35] +Keys [1]: [s_state#9] +Functions [1]: [sum(total_sum#31)] +Aggregate Attributes [1]: [sum(total_sum#31)#37] +Results [6]: [sum(total_sum#31)#37 AS total_sum#38, s_state#9, null AS s_county#39, 0 AS g_state#40, 1 AS g_county#41, 1 AS lochierarchy#42] + +(47) Union + +(48) HashAggregate [codegen id : 22] +Input [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28] +Keys [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28] +Functions: [] +Aggregate Attributes: [] +Results [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28] + +(49) Exchange +Input [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28] +Arguments: hashpartitioning(total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28, 5), true, [id=#43] + +(50) HashAggregate [codegen id : 23] +Input [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28] +Keys [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28] +Functions: [] +Aggregate Attributes: [] +Results [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28] + +(51) ReusedExchange [Reuses operator id: 40] +Output [3]: [s_state#9, s_county#8, sum#44] + +(52) HashAggregate [codegen id : 33] +Input [3]: [s_state#9, s_county#8, sum#44] +Keys [2]: [s_state#9, s_county#8] +Functions [1]: [sum(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#3))#45] +Results [1]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#3))#45,17,2) AS total_sum#31] + +(53) HashAggregate [codegen id : 33] +Input [1]: [total_sum#31] +Keys: [] +Functions [1]: [partial_sum(total_sum#31)] +Aggregate Attributes [2]: [sum#46, isEmpty#47] +Results [2]: [sum#48, isEmpty#49] + +(54) Exchange +Input [2]: [sum#48, isEmpty#49] +Arguments: SinglePartition, true, [id=#50] + +(55) HashAggregate [codegen id : 34] +Input [2]: [sum#48, isEmpty#49] +Keys: [] +Functions [1]: [sum(total_sum#31)] +Aggregate Attributes [1]: [sum(total_sum#31)#51] +Results [6]: [sum(total_sum#31)#51 AS total_sum#52, null AS s_state#53, null AS s_county#54, 1 AS g_state#55, 1 AS g_county#56, 2 AS lochierarchy#57] + +(56) Union + +(57) HashAggregate [codegen id : 35] +Input [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28] +Keys [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28] +Functions: [] +Aggregate Attributes: [] +Results [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28] + +(58) Exchange +Input [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28] +Arguments: hashpartitioning(total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28, 5), true, [id=#58] + +(59) HashAggregate [codegen id : 36] +Input [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28] +Keys [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28] +Functions: [] +Aggregate Attributes: [] +Results [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, CASE WHEN (g_county#27 = 0) THEN s_state#9 END AS _w0#59] + +(60) Exchange +Input [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, _w0#59] +Arguments: hashpartitioning(lochierarchy#28, _w0#59, 5), true, [id=#60] + +(61) Sort [codegen id : 37] +Input [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, _w0#59] +Arguments: [lochierarchy#28 ASC NULLS FIRST, _w0#59 ASC NULLS FIRST, total_sum#25 DESC NULLS LAST], false, 0 + +(62) Window +Input [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, _w0#59] +Arguments: [rank(total_sum#25) windowspecdefinition(lochierarchy#28, _w0#59, total_sum#25 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#61], [lochierarchy#28, _w0#59], [total_sum#25 DESC NULLS LAST] + +(63) Project [codegen id : 38] +Output [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, rank_within_parent#61] +Input [6]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, _w0#59, rank_within_parent#61] + +(64) TakeOrderedAndProject +Input [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, rank_within_parent#61] +Arguments: 100, [lochierarchy#28 DESC NULLS LAST, CASE WHEN (lochierarchy#28 = 0) THEN s_state#9 END ASC NULLS FIRST, rank_within_parent#61 ASC NULLS FIRST], [total_sum#25, s_state#9, s_county#8, lochierarchy#28, rank_within_parent#61] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a/simplified.txt new file mode 100644 index 0000000000000..1cf0fbbea7ac6 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a/simplified.txt @@ -0,0 +1,107 @@ +TakeOrderedAndProject [lochierarchy,rank_within_parent,s_county,s_state,total_sum] + WholeStageCodegen (38) + Project [lochierarchy,rank_within_parent,s_county,s_state,total_sum] + InputAdapter + Window [_w0,lochierarchy,total_sum] + WholeStageCodegen (37) + Sort [_w0,lochierarchy,total_sum] + InputAdapter + Exchange [_w0,lochierarchy] #1 + WholeStageCodegen (36) + HashAggregate [g_county,g_state,lochierarchy,s_county,s_state,total_sum] [_w0] + InputAdapter + Exchange [g_county,g_state,lochierarchy,s_county,s_state,total_sum] #2 + WholeStageCodegen (35) + HashAggregate [g_county,g_state,lochierarchy,s_county,s_state,total_sum] + InputAdapter + Union + WholeStageCodegen (23) + HashAggregate [g_county,g_state,lochierarchy,s_county,s_state,total_sum] + InputAdapter + Exchange [g_county,g_state,lochierarchy,s_county,s_state,total_sum] #3 + WholeStageCodegen (22) + HashAggregate [g_county,g_state,lochierarchy,s_county,s_state,total_sum] + InputAdapter + Union + WholeStageCodegen (10) + HashAggregate [s_county,s_state,sum] [g_county,g_state,lochierarchy,sum,sum(UnscaledValue(ss_net_profit)),total_sum] + InputAdapter + Exchange [s_county,s_state] #4 + WholeStageCodegen (9) + HashAggregate [s_county,s_state,ss_net_profit] [sum,sum] + Project [s_county,s_state,ss_net_profit] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_net_profit,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_net_profit,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_month_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (8) + BroadcastHashJoin [s_state,s_state] + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_county,s_state,s_store_sk] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (7) + Project [s_state] + Filter [ranking] + InputAdapter + Window [_w2,s_state] + WholeStageCodegen (6) + Sort [_w2,s_state] + InputAdapter + Exchange [s_state] #8 + WholeStageCodegen (5) + HashAggregate [s_state,sum] [_w2,s_state,sum,sum(UnscaledValue(ss_net_profit))] + InputAdapter + Exchange [s_state] #9 + WholeStageCodegen (4) + HashAggregate [s_state,ss_net_profit] [sum,sum] + Project [s_state,ss_net_profit] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [s_state,ss_net_profit,ss_sold_date_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Filter [ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_net_profit,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (2) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_state,s_store_sk] + InputAdapter + ReusedExchange [d_date_sk] #5 + WholeStageCodegen (21) + HashAggregate [isEmpty,s_state,sum] [g_county,g_state,isEmpty,lochierarchy,s_county,sum,sum(total_sum),total_sum] + InputAdapter + Exchange [s_state] #11 + WholeStageCodegen (20) + HashAggregate [s_state,total_sum] [isEmpty,isEmpty,sum,sum] + HashAggregate [s_county,s_state,sum] [sum,sum(UnscaledValue(ss_net_profit)),total_sum] + InputAdapter + ReusedExchange [s_county,s_state,sum] #4 + WholeStageCodegen (34) + HashAggregate [isEmpty,sum] [g_county,g_state,isEmpty,lochierarchy,s_county,s_state,sum,sum(total_sum),total_sum] + InputAdapter + Exchange #12 + WholeStageCodegen (33) + HashAggregate [total_sum] [isEmpty,isEmpty,sum,sum] + HashAggregate [s_county,s_state,sum] [sum,sum(UnscaledValue(ss_net_profit)),total_sum] + InputAdapter + ReusedExchange [s_county,s_state,sum] #4 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/explain.txt new file mode 100644 index 0000000000000..da6ded3e2e6c9 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/explain.txt @@ -0,0 +1,436 @@ +== Physical Plan == +TakeOrderedAndProject (79) ++- * HashAggregate (78) + +- Exchange (77) + +- * HashAggregate (76) + +- * Project (75) + +- SortMergeJoin LeftOuter (74) + :- * Sort (68) + : +- Exchange (67) + : +- * Project (66) + : +- * BroadcastHashJoin LeftOuter BuildRight (65) + : :- * Project (60) + : : +- * SortMergeJoin Inner (59) + : : :- * Sort (47) + : : : +- Exchange (46) + : : : +- * Project (45) + : : : +- * BroadcastHashJoin Inner BuildRight (44) + : : : :- * Project (32) + : : : : +- * SortMergeJoin Inner (31) + : : : : :- * Sort (25) + : : : : : +- Exchange (24) + : : : : : +- * Project (23) + : : : : : +- * BroadcastHashJoin Inner BuildRight (22) + : : : : : :- * Project (17) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (16) + : : : : : : :- * Project (10) + : : : : : : : +- * BroadcastHashJoin Inner BuildLeft (9) + : : : : : : : :- BroadcastExchange (5) + : : : : : : : : +- * Project (4) + : : : : : : : : +- * Filter (3) + : : : : : : : : +- * ColumnarToRow (2) + : : : : : : : : +- Scan parquet default.household_demographics (1) + : : : : : : : +- * Filter (8) + : : : : : : : +- * ColumnarToRow (7) + : : : : : : : +- Scan parquet default.catalog_sales (6) + : : : : : : +- BroadcastExchange (15) + : : : : : : +- * Project (14) + : : : : : : +- * Filter (13) + : : : : : : +- * ColumnarToRow (12) + : : : : : : +- Scan parquet default.customer_demographics (11) + : : : : : +- BroadcastExchange (21) + : : : : : +- * Filter (20) + : : : : : +- * ColumnarToRow (19) + : : : : : +- Scan parquet default.date_dim (18) + : : : : +- * Sort (30) + : : : : +- Exchange (29) + : : : : +- * Filter (28) + : : : : +- * ColumnarToRow (27) + : : : : +- Scan parquet default.item (26) + : : : +- BroadcastExchange (43) + : : : +- * Project (42) + : : : +- * BroadcastHashJoin Inner BuildRight (41) + : : : :- * Filter (35) + : : : : +- * ColumnarToRow (34) + : : : : +- Scan parquet default.date_dim (33) + : : : +- BroadcastExchange (40) + : : : +- * Project (39) + : : : +- * Filter (38) + : : : +- * ColumnarToRow (37) + : : : +- Scan parquet default.date_dim (36) + : : +- * Sort (58) + : : +- Exchange (57) + : : +- * Project (56) + : : +- * BroadcastHashJoin Inner BuildLeft (55) + : : :- BroadcastExchange (51) + : : : +- * Filter (50) + : : : +- * ColumnarToRow (49) + : : : +- Scan parquet default.warehouse (48) + : : +- * Filter (54) + : : +- * ColumnarToRow (53) + : : +- Scan parquet default.inventory (52) + : +- BroadcastExchange (64) + : +- * Filter (63) + : +- * ColumnarToRow (62) + : +- Scan parquet default.promotion (61) + +- * Sort (73) + +- Exchange (72) + +- * Filter (71) + +- * ColumnarToRow (70) + +- Scan parquet default.catalog_returns (69) + + +(1) Scan parquet default.household_demographics +Output [2]: [hd_demo_sk#1, hd_buy_potential#2] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/household_demographics] +PushedFilters: [IsNotNull(hd_buy_potential), EqualTo(hd_buy_potential,1001-5000), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [2]: [hd_demo_sk#1, hd_buy_potential#2] + +(3) Filter [codegen id : 1] +Input [2]: [hd_demo_sk#1, hd_buy_potential#2] +Condition : ((isnotnull(hd_buy_potential#2) AND (hd_buy_potential#2 = 1001-5000)) AND isnotnull(hd_demo_sk#1)) + +(4) Project [codegen id : 1] +Output [1]: [hd_demo_sk#1] +Input [2]: [hd_demo_sk#1, hd_buy_potential#2] + +(5) BroadcastExchange +Input [1]: [hd_demo_sk#1] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#3] + +(6) Scan parquet default.catalog_sales +Output [8]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_bill_cdemo_sk#6, cs_bill_hdemo_sk#7, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_quantity), IsNotNull(cs_item_sk), IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_hdemo_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_ship_date_sk)] +ReadSchema: struct + +(7) ColumnarToRow +Input [8]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_bill_cdemo_sk#6, cs_bill_hdemo_sk#7, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11] + +(8) Filter +Input [8]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_bill_cdemo_sk#6, cs_bill_hdemo_sk#7, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11] +Condition : (((((isnotnull(cs_quantity#11) AND isnotnull(cs_item_sk#8)) AND isnotnull(cs_bill_cdemo_sk#6)) AND isnotnull(cs_bill_hdemo_sk#7)) AND isnotnull(cs_sold_date_sk#4)) AND isnotnull(cs_ship_date_sk#5)) + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [hd_demo_sk#1] +Right keys [1]: [cs_bill_hdemo_sk#7] +Join condition: None + +(10) Project [codegen id : 4] +Output [7]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_bill_cdemo_sk#6, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11] +Input [9]: [hd_demo_sk#1, cs_sold_date_sk#4, cs_ship_date_sk#5, cs_bill_cdemo_sk#6, cs_bill_hdemo_sk#7, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11] + +(11) Scan parquet default.customer_demographics +Output [2]: [cd_demo_sk#12, cd_marital_status#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/customer_demographics] +PushedFilters: [IsNotNull(cd_marital_status), EqualTo(cd_marital_status,M), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [cd_demo_sk#12, cd_marital_status#13] + +(13) Filter [codegen id : 2] +Input [2]: [cd_demo_sk#12, cd_marital_status#13] +Condition : ((isnotnull(cd_marital_status#13) AND (cd_marital_status#13 = M)) AND isnotnull(cd_demo_sk#12)) + +(14) Project [codegen id : 2] +Output [1]: [cd_demo_sk#12] +Input [2]: [cd_demo_sk#12, cd_marital_status#13] + +(15) BroadcastExchange +Input [1]: [cd_demo_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] + +(16) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_bill_cdemo_sk#6] +Right keys [1]: [cd_demo_sk#12] +Join condition: None + +(17) Project [codegen id : 4] +Output [6]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11] +Input [8]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_bill_cdemo_sk#6, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, cd_demo_sk#12] + +(18) Scan parquet default.date_dim +Output [2]: [d_date_sk#15, d_date#16] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_date_sk), IsNotNull(d_date)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [2]: [d_date_sk#15, d_date#16] + +(20) Filter [codegen id : 3] +Input [2]: [d_date_sk#15, d_date#16] +Condition : (isnotnull(d_date_sk#15) AND isnotnull(d_date#16)) + +(21) BroadcastExchange +Input [2]: [d_date_sk#15, d_date#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#17] + +(22) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_ship_date_sk#5] +Right keys [1]: [d_date_sk#15] +Join condition: None + +(23) Project [codegen id : 4] +Output [6]: [cs_sold_date_sk#4, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, d_date#16] +Input [8]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, d_date_sk#15, d_date#16] + +(24) Exchange +Input [6]: [cs_sold_date_sk#4, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, d_date#16] +Arguments: hashpartitioning(cs_item_sk#8, 5), true, [id=#18] + +(25) Sort [codegen id : 5] +Input [6]: [cs_sold_date_sk#4, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, d_date#16] +Arguments: [cs_item_sk#8 ASC NULLS FIRST], false, 0 + +(26) Scan parquet default.item +Output [2]: [i_item_sk#19, i_item_desc#20] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 6] +Input [2]: [i_item_sk#19, i_item_desc#20] + +(28) Filter [codegen id : 6] +Input [2]: [i_item_sk#19, i_item_desc#20] +Condition : isnotnull(i_item_sk#19) + +(29) Exchange +Input [2]: [i_item_sk#19, i_item_desc#20] +Arguments: hashpartitioning(i_item_sk#19, 5), true, [id=#21] + +(30) Sort [codegen id : 7] +Input [2]: [i_item_sk#19, i_item_desc#20] +Arguments: [i_item_sk#19 ASC NULLS FIRST], false, 0 + +(31) SortMergeJoin [codegen id : 10] +Left keys [1]: [cs_item_sk#8] +Right keys [1]: [i_item_sk#19] +Join condition: None + +(32) Project [codegen id : 10] +Output [7]: [cs_sold_date_sk#4, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, d_date#16, i_item_desc#20] +Input [8]: [cs_sold_date_sk#4, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, d_date#16, i_item_sk#19, i_item_desc#20] + +(33) Scan parquet default.date_dim +Output [2]: [d_date_sk#22, d_week_seq#23] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 9] +Input [2]: [d_date_sk#22, d_week_seq#23] + +(35) Filter [codegen id : 9] +Input [2]: [d_date_sk#22, d_week_seq#23] +Condition : (isnotnull(d_week_seq#23) AND isnotnull(d_date_sk#22)) + +(36) Scan parquet default.date_dim +Output [4]: [d_date_sk#24, d_date#25, d_week_seq#26, d_year#27] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk), IsNotNull(d_week_seq), IsNotNull(d_date)] +ReadSchema: struct + +(37) ColumnarToRow [codegen id : 8] +Input [4]: [d_date_sk#24, d_date#25, d_week_seq#26, d_year#27] + +(38) Filter [codegen id : 8] +Input [4]: [d_date_sk#24, d_date#25, d_week_seq#26, d_year#27] +Condition : ((((isnotnull(d_year#27) AND (d_year#27 = 2001)) AND isnotnull(d_date_sk#24)) AND isnotnull(d_week_seq#26)) AND isnotnull(d_date#25)) + +(39) Project [codegen id : 8] +Output [3]: [d_date_sk#24, d_date#25, d_week_seq#26] +Input [4]: [d_date_sk#24, d_date#25, d_week_seq#26, d_year#27] + +(40) BroadcastExchange +Input [3]: [d_date_sk#24, d_date#25, d_week_seq#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [id=#28] + +(41) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [d_week_seq#23] +Right keys [1]: [d_week_seq#26] +Join condition: None + +(42) Project [codegen id : 9] +Output [4]: [d_date_sk#22, d_date_sk#24, d_date#25, d_week_seq#26] +Input [5]: [d_date_sk#22, d_week_seq#23, d_date_sk#24, d_date#25, d_week_seq#26] + +(43) BroadcastExchange +Input [4]: [d_date_sk#22, d_date_sk#24, d_date#25, d_week_seq#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [id=#29] + +(44) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_sold_date_sk#4] +Right keys [1]: [d_date_sk#24] +Join condition: (d_date#16 > d_date#25 + 5 days) + +(45) Project [codegen id : 10] +Output [7]: [cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, i_item_desc#20, d_date_sk#22, d_week_seq#26] +Input [11]: [cs_sold_date_sk#4, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, d_date#16, i_item_desc#20, d_date_sk#22, d_date_sk#24, d_date#25, d_week_seq#26] + +(46) Exchange +Input [7]: [cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, i_item_desc#20, d_date_sk#22, d_week_seq#26] +Arguments: hashpartitioning(cs_item_sk#8, d_date_sk#22, 5), true, [id=#30] + +(47) Sort [codegen id : 11] +Input [7]: [cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, i_item_desc#20, d_date_sk#22, d_week_seq#26] +Arguments: [cs_item_sk#8 ASC NULLS FIRST, d_date_sk#22 ASC NULLS FIRST], false, 0 + +(48) Scan parquet default.warehouse +Output [2]: [w_warehouse_sk#31, w_warehouse_name#32] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(49) ColumnarToRow [codegen id : 12] +Input [2]: [w_warehouse_sk#31, w_warehouse_name#32] + +(50) Filter [codegen id : 12] +Input [2]: [w_warehouse_sk#31, w_warehouse_name#32] +Condition : isnotnull(w_warehouse_sk#31) + +(51) BroadcastExchange +Input [2]: [w_warehouse_sk#31, w_warehouse_name#32] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#33] + +(52) Scan parquet default.inventory +Output [4]: [inv_date_sk#34, inv_item_sk#35, inv_warehouse_sk#36, inv_quantity_on_hand#37] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/inventory] +PushedFilters: [IsNotNull(inv_quantity_on_hand), IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)] +ReadSchema: struct + +(53) ColumnarToRow +Input [4]: [inv_date_sk#34, inv_item_sk#35, inv_warehouse_sk#36, inv_quantity_on_hand#37] + +(54) Filter +Input [4]: [inv_date_sk#34, inv_item_sk#35, inv_warehouse_sk#36, inv_quantity_on_hand#37] +Condition : (((isnotnull(inv_quantity_on_hand#37) AND isnotnull(inv_item_sk#35)) AND isnotnull(inv_warehouse_sk#36)) AND isnotnull(inv_date_sk#34)) + +(55) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [w_warehouse_sk#31] +Right keys [1]: [inv_warehouse_sk#36] +Join condition: None + +(56) Project [codegen id : 13] +Output [4]: [w_warehouse_name#32, inv_date_sk#34, inv_item_sk#35, inv_quantity_on_hand#37] +Input [6]: [w_warehouse_sk#31, w_warehouse_name#32, inv_date_sk#34, inv_item_sk#35, inv_warehouse_sk#36, inv_quantity_on_hand#37] + +(57) Exchange +Input [4]: [w_warehouse_name#32, inv_date_sk#34, inv_item_sk#35, inv_quantity_on_hand#37] +Arguments: hashpartitioning(inv_item_sk#35, inv_date_sk#34, 5), true, [id=#38] + +(58) Sort [codegen id : 14] +Input [4]: [w_warehouse_name#32, inv_date_sk#34, inv_item_sk#35, inv_quantity_on_hand#37] +Arguments: [inv_item_sk#35 ASC NULLS FIRST, inv_date_sk#34 ASC NULLS FIRST], false, 0 + +(59) SortMergeJoin [codegen id : 16] +Left keys [2]: [cs_item_sk#8, d_date_sk#22] +Right keys [2]: [inv_item_sk#35, inv_date_sk#34] +Join condition: (inv_quantity_on_hand#37 < cs_quantity#11) + +(60) Project [codegen id : 16] +Output [6]: [cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, w_warehouse_name#32, i_item_desc#20, d_week_seq#26] +Input [11]: [cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, i_item_desc#20, d_date_sk#22, d_week_seq#26, w_warehouse_name#32, inv_date_sk#34, inv_item_sk#35, inv_quantity_on_hand#37] + +(61) Scan parquet default.promotion +Output [1]: [p_promo_sk#39] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/promotion] +PushedFilters: [IsNotNull(p_promo_sk)] +ReadSchema: struct + +(62) ColumnarToRow [codegen id : 15] +Input [1]: [p_promo_sk#39] + +(63) Filter [codegen id : 15] +Input [1]: [p_promo_sk#39] +Condition : isnotnull(p_promo_sk#39) + +(64) BroadcastExchange +Input [1]: [p_promo_sk#39] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#40] + +(65) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [cs_promo_sk#9] +Right keys [1]: [p_promo_sk#39] +Join condition: None + +(66) Project [codegen id : 16] +Output [5]: [cs_item_sk#8, cs_order_number#10, w_warehouse_name#32, i_item_desc#20, d_week_seq#26] +Input [7]: [cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, w_warehouse_name#32, i_item_desc#20, d_week_seq#26, p_promo_sk#39] + +(67) Exchange +Input [5]: [cs_item_sk#8, cs_order_number#10, w_warehouse_name#32, i_item_desc#20, d_week_seq#26] +Arguments: hashpartitioning(cs_item_sk#8, cs_order_number#10, 5), true, [id=#41] + +(68) Sort [codegen id : 17] +Input [5]: [cs_item_sk#8, cs_order_number#10, w_warehouse_name#32, i_item_desc#20, d_week_seq#26] +Arguments: [cs_item_sk#8 ASC NULLS FIRST, cs_order_number#10 ASC NULLS FIRST], false, 0 + +(69) Scan parquet default.catalog_returns +Output [2]: [cr_item_sk#42, cr_order_number#43] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/catalog_returns] +PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)] +ReadSchema: struct + +(70) ColumnarToRow [codegen id : 18] +Input [2]: [cr_item_sk#42, cr_order_number#43] + +(71) Filter [codegen id : 18] +Input [2]: [cr_item_sk#42, cr_order_number#43] +Condition : (isnotnull(cr_item_sk#42) AND isnotnull(cr_order_number#43)) + +(72) Exchange +Input [2]: [cr_item_sk#42, cr_order_number#43] +Arguments: hashpartitioning(cr_item_sk#42, cr_order_number#43, 5), true, [id=#44] + +(73) Sort [codegen id : 19] +Input [2]: [cr_item_sk#42, cr_order_number#43] +Arguments: [cr_item_sk#42 ASC NULLS FIRST, cr_order_number#43 ASC NULLS FIRST], false, 0 + +(74) SortMergeJoin +Left keys [2]: [cs_item_sk#8, cs_order_number#10] +Right keys [2]: [cr_item_sk#42, cr_order_number#43] +Join condition: None + +(75) Project [codegen id : 20] +Output [3]: [w_warehouse_name#32, i_item_desc#20, d_week_seq#26] +Input [7]: [cs_item_sk#8, cs_order_number#10, w_warehouse_name#32, i_item_desc#20, d_week_seq#26, cr_item_sk#42, cr_order_number#43] + +(76) HashAggregate [codegen id : 20] +Input [3]: [w_warehouse_name#32, i_item_desc#20, d_week_seq#26] +Keys [3]: [i_item_desc#20, w_warehouse_name#32, d_week_seq#26] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#45] +Results [4]: [i_item_desc#20, w_warehouse_name#32, d_week_seq#26, count#46] + +(77) Exchange +Input [4]: [i_item_desc#20, w_warehouse_name#32, d_week_seq#26, count#46] +Arguments: hashpartitioning(i_item_desc#20, w_warehouse_name#32, d_week_seq#26, 5), true, [id=#47] + +(78) HashAggregate [codegen id : 21] +Input [4]: [i_item_desc#20, w_warehouse_name#32, d_week_seq#26, count#46] +Keys [3]: [i_item_desc#20, w_warehouse_name#32, d_week_seq#26] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#48] +Results [6]: [i_item_desc#20, w_warehouse_name#32, d_week_seq#26, count(1)#48 AS no_promo#49, count(1)#48 AS promo#50, count(1)#48 AS total_cnt#51] + +(79) TakeOrderedAndProject +Input [6]: [i_item_desc#20, w_warehouse_name#32, d_week_seq#26, no_promo#49, promo#50, total_cnt#51] +Arguments: 100, [total_cnt#51 DESC NULLS LAST, i_item_desc#20 ASC NULLS FIRST, w_warehouse_name#32 ASC NULLS FIRST, d_week_seq#26 ASC NULLS FIRST], [i_item_desc#20, w_warehouse_name#32, d_week_seq#26, no_promo#49, promo#50, total_cnt#51] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/simplified.txt new file mode 100644 index 0000000000000..d0b53caa76851 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/simplified.txt @@ -0,0 +1,130 @@ +TakeOrderedAndProject [d_week_seq,i_item_desc,no_promo,promo,total_cnt,w_warehouse_name] + WholeStageCodegen (21) + HashAggregate [count,d_week_seq,i_item_desc,w_warehouse_name] [count,count(1),no_promo,promo,total_cnt] + InputAdapter + Exchange [d_week_seq,i_item_desc,w_warehouse_name] #1 + WholeStageCodegen (20) + HashAggregate [d_week_seq,i_item_desc,w_warehouse_name] [count,count] + Project [d_week_seq,i_item_desc,w_warehouse_name] + InputAdapter + SortMergeJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] + WholeStageCodegen (17) + Sort [cs_item_sk,cs_order_number] + InputAdapter + Exchange [cs_item_sk,cs_order_number] #2 + WholeStageCodegen (16) + Project [cs_item_sk,cs_order_number,d_week_seq,i_item_desc,w_warehouse_name] + BroadcastHashJoin [cs_promo_sk,p_promo_sk] + Project [cs_item_sk,cs_order_number,cs_promo_sk,d_week_seq,i_item_desc,w_warehouse_name] + SortMergeJoin [cs_item_sk,cs_quantity,d_date_sk,inv_date_sk,inv_item_sk,inv_quantity_on_hand] + InputAdapter + WholeStageCodegen (11) + Sort [cs_item_sk,d_date_sk] + InputAdapter + Exchange [cs_item_sk,d_date_sk] #3 + WholeStageCodegen (10) + Project [cs_item_sk,cs_order_number,cs_promo_sk,cs_quantity,d_date_sk,d_week_seq,i_item_desc] + BroadcastHashJoin [cs_sold_date_sk,d_date,d_date,d_date_sk] + Project [cs_item_sk,cs_order_number,cs_promo_sk,cs_quantity,cs_sold_date_sk,d_date,i_item_desc] + SortMergeJoin [cs_item_sk,i_item_sk] + InputAdapter + WholeStageCodegen (5) + Sort [cs_item_sk] + InputAdapter + Exchange [cs_item_sk] #4 + WholeStageCodegen (4) + Project [cs_item_sk,cs_order_number,cs_promo_sk,cs_quantity,cs_sold_date_sk,d_date] + BroadcastHashJoin [cs_ship_date_sk,d_date_sk] + Project [cs_item_sk,cs_order_number,cs_promo_sk,cs_quantity,cs_ship_date_sk,cs_sold_date_sk] + BroadcastHashJoin [cd_demo_sk,cs_bill_cdemo_sk] + Project [cs_bill_cdemo_sk,cs_item_sk,cs_order_number,cs_promo_sk,cs_quantity,cs_ship_date_sk,cs_sold_date_sk] + BroadcastHashJoin [cs_bill_hdemo_sk,hd_demo_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (1) + Project [hd_demo_sk] + Filter [hd_buy_potential,hd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_buy_potential,hd_demo_sk] + Filter [cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_quantity,cs_ship_date_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_order_number,cs_promo_sk,cs_quantity,cs_ship_date_sk,cs_sold_date_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (2) + Project [cd_demo_sk] + Filter [cd_demo_sk,cd_marital_status] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (3) + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk] + InputAdapter + WholeStageCodegen (7) + Sort [i_item_sk] + InputAdapter + Exchange [i_item_sk] #8 + WholeStageCodegen (6) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_desc,i_item_sk] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (9) + Project [d_date,d_date_sk,d_date_sk,d_week_seq] + BroadcastHashJoin [d_week_seq,d_week_seq] + Filter [d_date_sk,d_week_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_week_seq] + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (8) + Project [d_date,d_date_sk,d_week_seq] + Filter [d_date,d_date_sk,d_week_seq,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk,d_week_seq,d_year] + InputAdapter + WholeStageCodegen (14) + Sort [inv_date_sk,inv_item_sk] + InputAdapter + Exchange [inv_date_sk,inv_item_sk] #11 + WholeStageCodegen (13) + Project [inv_date_sk,inv_item_sk,inv_quantity_on_hand,w_warehouse_name] + BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (12) + Filter [w_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.warehouse [w_warehouse_name,w_warehouse_sk] + Filter [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (15) + Filter [p_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.promotion [p_promo_sk] + WholeStageCodegen (19) + Sort [cr_item_sk,cr_order_number] + InputAdapter + Exchange [cr_item_sk,cr_order_number] #14 + WholeStageCodegen (18) + Filter [cr_item_sk,cr_order_number] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72/explain.txt new file mode 100644 index 0000000000000..d5b836aeb8004 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72/explain.txt @@ -0,0 +1,391 @@ +== Physical Plan == +TakeOrderedAndProject (70) ++- * HashAggregate (69) + +- Exchange (68) + +- * HashAggregate (67) + +- * Project (66) + +- * BroadcastHashJoin LeftOuter BuildRight (65) + :- * Project (60) + : +- * BroadcastHashJoin LeftOuter BuildRight (59) + : :- * Project (54) + : : +- * BroadcastHashJoin Inner BuildRight (53) + : : :- * Project (48) + : : : +- * BroadcastHashJoin Inner BuildRight (47) + : : : :- * Project (42) + : : : : +- * BroadcastHashJoin Inner BuildRight (41) + : : : : :- * Project (35) + : : : : : +- * BroadcastHashJoin Inner BuildRight (34) + : : : : : :- * Project (28) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (27) + : : : : : : :- * Project (21) + : : : : : : : +- * BroadcastHashJoin Inner BuildRight (20) + : : : : : : : :- * Project (15) + : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : : : : : : :- * Project (9) + : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : : : : : : :- * Filter (3) + : : : : : : : : : : +- * ColumnarToRow (2) + : : : : : : : : : : +- Scan parquet default.catalog_sales (1) + : : : : : : : : : +- BroadcastExchange (7) + : : : : : : : : : +- * Filter (6) + : : : : : : : : : +- * ColumnarToRow (5) + : : : : : : : : : +- Scan parquet default.inventory (4) + : : : : : : : : +- BroadcastExchange (13) + : : : : : : : : +- * Filter (12) + : : : : : : : : +- * ColumnarToRow (11) + : : : : : : : : +- Scan parquet default.warehouse (10) + : : : : : : : +- BroadcastExchange (19) + : : : : : : : +- * Filter (18) + : : : : : : : +- * ColumnarToRow (17) + : : : : : : : +- Scan parquet default.item (16) + : : : : : : +- BroadcastExchange (26) + : : : : : : +- * Project (25) + : : : : : : +- * Filter (24) + : : : : : : +- * ColumnarToRow (23) + : : : : : : +- Scan parquet default.customer_demographics (22) + : : : : : +- BroadcastExchange (33) + : : : : : +- * Project (32) + : : : : : +- * Filter (31) + : : : : : +- * ColumnarToRow (30) + : : : : : +- Scan parquet default.household_demographics (29) + : : : : +- BroadcastExchange (40) + : : : : +- * Project (39) + : : : : +- * Filter (38) + : : : : +- * ColumnarToRow (37) + : : : : +- Scan parquet default.date_dim (36) + : : : +- BroadcastExchange (46) + : : : +- * Filter (45) + : : : +- * ColumnarToRow (44) + : : : +- Scan parquet default.date_dim (43) + : : +- BroadcastExchange (52) + : : +- * Filter (51) + : : +- * ColumnarToRow (50) + : : +- Scan parquet default.date_dim (49) + : +- BroadcastExchange (58) + : +- * Filter (57) + : +- * ColumnarToRow (56) + : +- Scan parquet default.promotion (55) + +- BroadcastExchange (64) + +- * Filter (63) + +- * ColumnarToRow (62) + +- Scan parquet default.catalog_returns (61) + + +(1) Scan parquet default.catalog_sales +Output [8]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_quantity), IsNotNull(cs_item_sk), IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_hdemo_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_ship_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 11] +Input [8]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8] + +(3) Filter [codegen id : 11] +Input [8]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8] +Condition : (((((isnotnull(cs_quantity#8) AND isnotnull(cs_item_sk#5)) AND isnotnull(cs_bill_cdemo_sk#3)) AND isnotnull(cs_bill_hdemo_sk#4)) AND isnotnull(cs_sold_date_sk#1)) AND isnotnull(cs_ship_date_sk#2)) + +(4) Scan parquet default.inventory +Output [4]: [inv_date_sk#9, inv_item_sk#10, inv_warehouse_sk#11, inv_quantity_on_hand#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/inventory] +PushedFilters: [IsNotNull(inv_quantity_on_hand), IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [inv_date_sk#9, inv_item_sk#10, inv_warehouse_sk#11, inv_quantity_on_hand#12] + +(6) Filter [codegen id : 1] +Input [4]: [inv_date_sk#9, inv_item_sk#10, inv_warehouse_sk#11, inv_quantity_on_hand#12] +Condition : (((isnotnull(inv_quantity_on_hand#12) AND isnotnull(inv_item_sk#10)) AND isnotnull(inv_warehouse_sk#11)) AND isnotnull(inv_date_sk#9)) + +(7) BroadcastExchange +Input [4]: [inv_date_sk#9, inv_item_sk#10, inv_warehouse_sk#11, inv_quantity_on_hand#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#13] + +(8) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_item_sk#5] +Right keys [1]: [inv_item_sk#10] +Join condition: (inv_quantity_on_hand#12 < cs_quantity#8) + +(9) Project [codegen id : 11] +Output [9]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, inv_date_sk#9, inv_warehouse_sk#11] +Input [12]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, inv_date_sk#9, inv_item_sk#10, inv_warehouse_sk#11, inv_quantity_on_hand#12] + +(10) Scan parquet default.warehouse +Output [2]: [w_warehouse_sk#14, w_warehouse_name#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [w_warehouse_sk#14, w_warehouse_name#15] + +(12) Filter [codegen id : 2] +Input [2]: [w_warehouse_sk#14, w_warehouse_name#15] +Condition : isnotnull(w_warehouse_sk#14) + +(13) BroadcastExchange +Input [2]: [w_warehouse_sk#14, w_warehouse_name#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#16] + +(14) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [inv_warehouse_sk#11] +Right keys [1]: [w_warehouse_sk#14] +Join condition: None + +(15) Project [codegen id : 11] +Output [9]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, inv_date_sk#9, w_warehouse_name#15] +Input [11]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, inv_date_sk#9, inv_warehouse_sk#11, w_warehouse_sk#14, w_warehouse_name#15] + +(16) Scan parquet default.item +Output [2]: [i_item_sk#17, i_item_desc#18] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [2]: [i_item_sk#17, i_item_desc#18] + +(18) Filter [codegen id : 3] +Input [2]: [i_item_sk#17, i_item_desc#18] +Condition : isnotnull(i_item_sk#17) + +(19) BroadcastExchange +Input [2]: [i_item_sk#17, i_item_desc#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] + +(20) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_item_sk#5] +Right keys [1]: [i_item_sk#17] +Join condition: None + +(21) Project [codegen id : 11] +Output [10]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, inv_date_sk#9, w_warehouse_name#15, i_item_desc#18] +Input [11]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, inv_date_sk#9, w_warehouse_name#15, i_item_sk#17, i_item_desc#18] + +(22) Scan parquet default.customer_demographics +Output [2]: [cd_demo_sk#20, cd_marital_status#21] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/customer_demographics] +PushedFilters: [IsNotNull(cd_marital_status), EqualTo(cd_marital_status,M), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 4] +Input [2]: [cd_demo_sk#20, cd_marital_status#21] + +(24) Filter [codegen id : 4] +Input [2]: [cd_demo_sk#20, cd_marital_status#21] +Condition : ((isnotnull(cd_marital_status#21) AND (cd_marital_status#21 = M)) AND isnotnull(cd_demo_sk#20)) + +(25) Project [codegen id : 4] +Output [1]: [cd_demo_sk#20] +Input [2]: [cd_demo_sk#20, cd_marital_status#21] + +(26) BroadcastExchange +Input [1]: [cd_demo_sk#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#22] + +(27) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_bill_cdemo_sk#3] +Right keys [1]: [cd_demo_sk#20] +Join condition: None + +(28) Project [codegen id : 11] +Output [9]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, inv_date_sk#9, w_warehouse_name#15, i_item_desc#18] +Input [11]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, inv_date_sk#9, w_warehouse_name#15, i_item_desc#18, cd_demo_sk#20] + +(29) Scan parquet default.household_demographics +Output [2]: [hd_demo_sk#23, hd_buy_potential#24] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/household_demographics] +PushedFilters: [IsNotNull(hd_buy_potential), EqualTo(hd_buy_potential,1001-5000), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(30) ColumnarToRow [codegen id : 5] +Input [2]: [hd_demo_sk#23, hd_buy_potential#24] + +(31) Filter [codegen id : 5] +Input [2]: [hd_demo_sk#23, hd_buy_potential#24] +Condition : ((isnotnull(hd_buy_potential#24) AND (hd_buy_potential#24 = 1001-5000)) AND isnotnull(hd_demo_sk#23)) + +(32) Project [codegen id : 5] +Output [1]: [hd_demo_sk#23] +Input [2]: [hd_demo_sk#23, hd_buy_potential#24] + +(33) BroadcastExchange +Input [1]: [hd_demo_sk#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#25] + +(34) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_bill_hdemo_sk#4] +Right keys [1]: [hd_demo_sk#23] +Join condition: None + +(35) Project [codegen id : 11] +Output [8]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, inv_date_sk#9, w_warehouse_name#15, i_item_desc#18] +Input [10]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, inv_date_sk#9, w_warehouse_name#15, i_item_desc#18, hd_demo_sk#23] + +(36) Scan parquet default.date_dim +Output [4]: [d_date_sk#26, d_date#27, d_week_seq#28, d_year#29] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk), IsNotNull(d_week_seq), IsNotNull(d_date)] +ReadSchema: struct + +(37) ColumnarToRow [codegen id : 6] +Input [4]: [d_date_sk#26, d_date#27, d_week_seq#28, d_year#29] + +(38) Filter [codegen id : 6] +Input [4]: [d_date_sk#26, d_date#27, d_week_seq#28, d_year#29] +Condition : ((((isnotnull(d_year#29) AND (d_year#29 = 2001)) AND isnotnull(d_date_sk#26)) AND isnotnull(d_week_seq#28)) AND isnotnull(d_date#27)) + +(39) Project [codegen id : 6] +Output [3]: [d_date_sk#26, d_date#27, d_week_seq#28] +Input [4]: [d_date_sk#26, d_date#27, d_week_seq#28, d_year#29] + +(40) BroadcastExchange +Input [3]: [d_date_sk#26, d_date#27, d_week_seq#28] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#30] + +(41) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_sold_date_sk#1] +Right keys [1]: [d_date_sk#26] +Join condition: None + +(42) Project [codegen id : 11] +Output [9]: [cs_ship_date_sk#2, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, inv_date_sk#9, w_warehouse_name#15, i_item_desc#18, d_date#27, d_week_seq#28] +Input [11]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, inv_date_sk#9, w_warehouse_name#15, i_item_desc#18, d_date_sk#26, d_date#27, d_week_seq#28] + +(43) Scan parquet default.date_dim +Output [2]: [d_date_sk#31, d_week_seq#32] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)] +ReadSchema: struct + +(44) ColumnarToRow [codegen id : 7] +Input [2]: [d_date_sk#31, d_week_seq#32] + +(45) Filter [codegen id : 7] +Input [2]: [d_date_sk#31, d_week_seq#32] +Condition : (isnotnull(d_week_seq#32) AND isnotnull(d_date_sk#31)) + +(46) BroadcastExchange +Input [2]: [d_date_sk#31, d_week_seq#32] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, false] as bigint), 32) | (cast(input[0, int, false] as bigint) & 4294967295))),false), [id=#33] + +(47) BroadcastHashJoin [codegen id : 11] +Left keys [2]: [d_week_seq#28, inv_date_sk#9] +Right keys [2]: [d_week_seq#32, d_date_sk#31] +Join condition: None + +(48) Project [codegen id : 11] +Output [8]: [cs_ship_date_sk#2, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, w_warehouse_name#15, i_item_desc#18, d_date#27, d_week_seq#28] +Input [11]: [cs_ship_date_sk#2, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, inv_date_sk#9, w_warehouse_name#15, i_item_desc#18, d_date#27, d_week_seq#28, d_date_sk#31, d_week_seq#32] + +(49) Scan parquet default.date_dim +Output [2]: [d_date_sk#34, d_date#35] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_date_sk), IsNotNull(d_date)] +ReadSchema: struct + +(50) ColumnarToRow [codegen id : 8] +Input [2]: [d_date_sk#34, d_date#35] + +(51) Filter [codegen id : 8] +Input [2]: [d_date_sk#34, d_date#35] +Condition : (isnotnull(d_date_sk#34) AND isnotnull(d_date#35)) + +(52) BroadcastExchange +Input [2]: [d_date_sk#34, d_date#35] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#36] + +(53) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_ship_date_sk#2] +Right keys [1]: [d_date_sk#34] +Join condition: (d_date#35 > d_date#27 + 5 days) + +(54) Project [codegen id : 11] +Output [6]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, w_warehouse_name#15, i_item_desc#18, d_week_seq#28] +Input [10]: [cs_ship_date_sk#2, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, w_warehouse_name#15, i_item_desc#18, d_date#27, d_week_seq#28, d_date_sk#34, d_date#35] + +(55) Scan parquet default.promotion +Output [1]: [p_promo_sk#37] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/promotion] +PushedFilters: [IsNotNull(p_promo_sk)] +ReadSchema: struct + +(56) ColumnarToRow [codegen id : 9] +Input [1]: [p_promo_sk#37] + +(57) Filter [codegen id : 9] +Input [1]: [p_promo_sk#37] +Condition : isnotnull(p_promo_sk#37) + +(58) BroadcastExchange +Input [1]: [p_promo_sk#37] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#38] + +(59) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_promo_sk#6] +Right keys [1]: [p_promo_sk#37] +Join condition: None + +(60) Project [codegen id : 11] +Output [5]: [cs_item_sk#5, cs_order_number#7, w_warehouse_name#15, i_item_desc#18, d_week_seq#28] +Input [7]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, w_warehouse_name#15, i_item_desc#18, d_week_seq#28, p_promo_sk#37] + +(61) Scan parquet default.catalog_returns +Output [2]: [cr_item_sk#39, cr_order_number#40] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/catalog_returns] +PushedFilters: [IsNotNull(cr_order_number), IsNotNull(cr_item_sk)] +ReadSchema: struct + +(62) ColumnarToRow [codegen id : 10] +Input [2]: [cr_item_sk#39, cr_order_number#40] + +(63) Filter [codegen id : 10] +Input [2]: [cr_item_sk#39, cr_order_number#40] +Condition : (isnotnull(cr_order_number#40) AND isnotnull(cr_item_sk#39)) + +(64) BroadcastExchange +Input [2]: [cr_item_sk#39, cr_order_number#40] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, false] as bigint), 32) | (cast(input[1, int, false] as bigint) & 4294967295))),false), [id=#41] + +(65) BroadcastHashJoin [codegen id : 11] +Left keys [2]: [cs_item_sk#5, cs_order_number#7] +Right keys [2]: [cr_item_sk#39, cr_order_number#40] +Join condition: None + +(66) Project [codegen id : 11] +Output [3]: [w_warehouse_name#15, i_item_desc#18, d_week_seq#28] +Input [7]: [cs_item_sk#5, cs_order_number#7, w_warehouse_name#15, i_item_desc#18, d_week_seq#28, cr_item_sk#39, cr_order_number#40] + +(67) HashAggregate [codegen id : 11] +Input [3]: [w_warehouse_name#15, i_item_desc#18, d_week_seq#28] +Keys [3]: [i_item_desc#18, w_warehouse_name#15, d_week_seq#28] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#42] +Results [4]: [i_item_desc#18, w_warehouse_name#15, d_week_seq#28, count#43] + +(68) Exchange +Input [4]: [i_item_desc#18, w_warehouse_name#15, d_week_seq#28, count#43] +Arguments: hashpartitioning(i_item_desc#18, w_warehouse_name#15, d_week_seq#28, 5), true, [id=#44] + +(69) HashAggregate [codegen id : 12] +Input [4]: [i_item_desc#18, w_warehouse_name#15, d_week_seq#28, count#43] +Keys [3]: [i_item_desc#18, w_warehouse_name#15, d_week_seq#28] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#45] +Results [6]: [i_item_desc#18, w_warehouse_name#15, d_week_seq#28, count(1)#45 AS no_promo#46, count(1)#45 AS promo#47, count(1)#45 AS total_cnt#48] + +(70) TakeOrderedAndProject +Input [6]: [i_item_desc#18, w_warehouse_name#15, d_week_seq#28, no_promo#46, promo#47, total_cnt#48] +Arguments: 100, [total_cnt#48 DESC NULLS LAST, i_item_desc#18 ASC NULLS FIRST, w_warehouse_name#15 ASC NULLS FIRST, d_week_seq#28 ASC NULLS FIRST], [i_item_desc#18, w_warehouse_name#15, d_week_seq#28, no_promo#46, promo#47, total_cnt#48] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72/simplified.txt new file mode 100644 index 0000000000000..1488d52118b82 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72/simplified.txt @@ -0,0 +1,104 @@ +TakeOrderedAndProject [d_week_seq,i_item_desc,no_promo,promo,total_cnt,w_warehouse_name] + WholeStageCodegen (12) + HashAggregate [count,d_week_seq,i_item_desc,w_warehouse_name] [count,count(1),no_promo,promo,total_cnt] + InputAdapter + Exchange [d_week_seq,i_item_desc,w_warehouse_name] #1 + WholeStageCodegen (11) + HashAggregate [d_week_seq,i_item_desc,w_warehouse_name] [count,count] + Project [d_week_seq,i_item_desc,w_warehouse_name] + BroadcastHashJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] + Project [cs_item_sk,cs_order_number,d_week_seq,i_item_desc,w_warehouse_name] + BroadcastHashJoin [cs_promo_sk,p_promo_sk] + Project [cs_item_sk,cs_order_number,cs_promo_sk,d_week_seq,i_item_desc,w_warehouse_name] + BroadcastHashJoin [cs_ship_date_sk,d_date,d_date,d_date_sk] + Project [cs_item_sk,cs_order_number,cs_promo_sk,cs_ship_date_sk,d_date,d_week_seq,i_item_desc,w_warehouse_name] + BroadcastHashJoin [d_date_sk,d_week_seq,d_week_seq,inv_date_sk] + Project [cs_item_sk,cs_order_number,cs_promo_sk,cs_ship_date_sk,d_date,d_week_seq,i_item_desc,inv_date_sk,w_warehouse_name] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_item_sk,cs_order_number,cs_promo_sk,cs_ship_date_sk,cs_sold_date_sk,i_item_desc,inv_date_sk,w_warehouse_name] + BroadcastHashJoin [cs_bill_hdemo_sk,hd_demo_sk] + Project [cs_bill_hdemo_sk,cs_item_sk,cs_order_number,cs_promo_sk,cs_ship_date_sk,cs_sold_date_sk,i_item_desc,inv_date_sk,w_warehouse_name] + BroadcastHashJoin [cd_demo_sk,cs_bill_cdemo_sk] + Project [cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_order_number,cs_promo_sk,cs_ship_date_sk,cs_sold_date_sk,i_item_desc,inv_date_sk,w_warehouse_name] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_order_number,cs_promo_sk,cs_ship_date_sk,cs_sold_date_sk,inv_date_sk,w_warehouse_name] + BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] + Project [cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_order_number,cs_promo_sk,cs_ship_date_sk,cs_sold_date_sk,inv_date_sk,inv_warehouse_sk] + BroadcastHashJoin [cs_item_sk,cs_quantity,inv_item_sk,inv_quantity_on_hand] + Filter [cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_quantity,cs_ship_date_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_order_number,cs_promo_sk,cs_quantity,cs_ship_date_sk,cs_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [w_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.warehouse [w_warehouse_name,w_warehouse_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_desc,i_item_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Project [cd_demo_sk] + Filter [cd_demo_sk,cd_marital_status] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Project [hd_demo_sk] + Filter [hd_buy_potential,hd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_buy_potential,hd_demo_sk] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (6) + Project [d_date,d_date_sk,d_week_seq] + Filter [d_date,d_date_sk,d_week_seq,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk,d_week_seq,d_year] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (7) + Filter [d_date_sk,d_week_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_week_seq] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (8) + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk] + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (9) + Filter [p_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.promotion [p_promo_sk] + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (10) + Filter [cr_item_sk,cr_order_number] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74.sf100/explain.txt new file mode 100644 index 0000000000000..c295082463335 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74.sf100/explain.txt @@ -0,0 +1,477 @@ +== Physical Plan == +TakeOrderedAndProject (86) ++- * Project (85) + +- * SortMergeJoin Inner (84) + :- * Project (66) + : +- * SortMergeJoin Inner (65) + : :- * SortMergeJoin Inner (45) + : : :- * Sort (24) + : : : +- Exchange (23) + : : : +- * Filter (22) + : : : +- * HashAggregate (21) + : : : +- Exchange (20) + : : : +- * HashAggregate (19) + : : : +- * Project (18) + : : : +- * SortMergeJoin Inner (17) + : : : :- * Sort (11) + : : : : +- Exchange (10) + : : : : +- * Project (9) + : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.store_sales (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.date_dim (4) + : : : +- * Sort (16) + : : : +- Exchange (15) + : : : +- * Filter (14) + : : : +- * ColumnarToRow (13) + : : : +- Scan parquet default.customer (12) + : : +- * Sort (44) + : : +- Exchange (43) + : : +- * HashAggregate (42) + : : +- Exchange (41) + : : +- * HashAggregate (40) + : : +- * Project (39) + : : +- * SortMergeJoin Inner (38) + : : :- * Sort (35) + : : : +- Exchange (34) + : : : +- * Project (33) + : : : +- * BroadcastHashJoin Inner BuildRight (32) + : : : :- * Filter (27) + : : : : +- * ColumnarToRow (26) + : : : : +- Scan parquet default.store_sales (25) + : : : +- BroadcastExchange (31) + : : : +- * Filter (30) + : : : +- * ColumnarToRow (29) + : : : +- Scan parquet default.date_dim (28) + : : +- * Sort (37) + : : +- ReusedExchange (36) + : +- * Sort (64) + : +- Exchange (63) + : +- * Project (62) + : +- * Filter (61) + : +- * HashAggregate (60) + : +- Exchange (59) + : +- * HashAggregate (58) + : +- * Project (57) + : +- * SortMergeJoin Inner (56) + : :- * Sort (53) + : : +- Exchange (52) + : : +- * Project (51) + : : +- * BroadcastHashJoin Inner BuildRight (50) + : : :- * Filter (48) + : : : +- * ColumnarToRow (47) + : : : +- Scan parquet default.web_sales (46) + : : +- ReusedExchange (49) + : +- * Sort (55) + : +- ReusedExchange (54) + +- * Sort (83) + +- Exchange (82) + +- * HashAggregate (81) + +- Exchange (80) + +- * HashAggregate (79) + +- * Project (78) + +- * SortMergeJoin Inner (77) + :- * Sort (74) + : +- Exchange (73) + : +- * Project (72) + : +- * BroadcastHashJoin Inner BuildRight (71) + : :- * Filter (69) + : : +- * ColumnarToRow (68) + : : +- Scan parquet default.web_sales (67) + : +- ReusedExchange (70) + +- * Sort (76) + +- ReusedExchange (75) + + +(1) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_net_paid#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 2] +Input [3]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_net_paid#3] + +(3) Filter [codegen id : 2] +Input [3]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_net_paid#3] +Condition : (isnotnull(ss_customer_sk#2) AND isnotnull(ss_sold_date_sk#1)) + +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#4, d_year#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), In(d_year, [2001,2002]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#4, d_year#5] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#4, d_year#5] +Condition : (((isnotnull(d_year#5) AND (d_year#5 = 2001)) AND d_year#5 IN (2001,2002)) AND isnotnull(d_date_sk#4)) + +(7) BroadcastExchange +Input [2]: [d_date_sk#4, d_year#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#6] + +(8) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(9) Project [codegen id : 2] +Output [3]: [ss_customer_sk#2, ss_net_paid#3, d_year#5] +Input [5]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_net_paid#3, d_date_sk#4, d_year#5] + +(10) Exchange +Input [3]: [ss_customer_sk#2, ss_net_paid#3, d_year#5] +Arguments: hashpartitioning(ss_customer_sk#2, 5), true, [id=#7] + +(11) Sort [codegen id : 3] +Input [3]: [ss_customer_sk#2, ss_net_paid#3, d_year#5] +Arguments: [ss_customer_sk#2 ASC NULLS FIRST], false, 0 + +(12) Scan parquet default.customer +Output [4]: [c_customer_sk#8, c_customer_id#9, c_first_name#10, c_last_name#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(13) ColumnarToRow [codegen id : 4] +Input [4]: [c_customer_sk#8, c_customer_id#9, c_first_name#10, c_last_name#11] + +(14) Filter [codegen id : 4] +Input [4]: [c_customer_sk#8, c_customer_id#9, c_first_name#10, c_last_name#11] +Condition : (isnotnull(c_customer_sk#8) AND isnotnull(c_customer_id#9)) + +(15) Exchange +Input [4]: [c_customer_sk#8, c_customer_id#9, c_first_name#10, c_last_name#11] +Arguments: hashpartitioning(c_customer_sk#8, 5), true, [id=#12] + +(16) Sort [codegen id : 5] +Input [4]: [c_customer_sk#8, c_customer_id#9, c_first_name#10, c_last_name#11] +Arguments: [c_customer_sk#8 ASC NULLS FIRST], false, 0 + +(17) SortMergeJoin [codegen id : 6] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#8] +Join condition: None + +(18) Project [codegen id : 6] +Output [5]: [c_customer_id#9, c_first_name#10, c_last_name#11, ss_net_paid#3, d_year#5] +Input [7]: [ss_customer_sk#2, ss_net_paid#3, d_year#5, c_customer_sk#8, c_customer_id#9, c_first_name#10, c_last_name#11] + +(19) HashAggregate [codegen id : 6] +Input [5]: [c_customer_id#9, c_first_name#10, c_last_name#11, ss_net_paid#3, d_year#5] +Keys [4]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#5] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#3))] +Aggregate Attributes [1]: [sum#13] +Results [5]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#5, sum#14] + +(20) Exchange +Input [5]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#5, sum#14] +Arguments: hashpartitioning(c_customer_id#9, c_first_name#10, c_last_name#11, d_year#5, 5), true, [id=#15] + +(21) HashAggregate [codegen id : 7] +Input [5]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#5, sum#14] +Keys [4]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#5] +Functions [1]: [sum(UnscaledValue(ss_net_paid#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#3))#16] +Results [2]: [c_customer_id#9 AS customer_id#17, MakeDecimal(sum(UnscaledValue(ss_net_paid#3))#16,17,2) AS year_total#18] + +(22) Filter [codegen id : 7] +Input [2]: [customer_id#17, year_total#18] +Condition : (isnotnull(year_total#18) AND (year_total#18 > 0.00)) + +(23) Exchange +Input [2]: [customer_id#17, year_total#18] +Arguments: hashpartitioning(customer_id#17, 5), true, [id=#19] + +(24) Sort [codegen id : 8] +Input [2]: [customer_id#17, year_total#18] +Arguments: [customer_id#17 ASC NULLS FIRST], false, 0 + +(25) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_net_paid#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(26) ColumnarToRow [codegen id : 10] +Input [3]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_net_paid#3] + +(27) Filter [codegen id : 10] +Input [3]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_net_paid#3] +Condition : (isnotnull(ss_customer_sk#2) AND isnotnull(ss_sold_date_sk#1)) + +(28) Scan parquet default.date_dim +Output [2]: [d_date_sk#4, d_year#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), In(d_year, [2001,2002]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 9] +Input [2]: [d_date_sk#4, d_year#5] + +(30) Filter [codegen id : 9] +Input [2]: [d_date_sk#4, d_year#5] +Condition : (((isnotnull(d_year#5) AND (d_year#5 = 2002)) AND d_year#5 IN (2001,2002)) AND isnotnull(d_date_sk#4)) + +(31) BroadcastExchange +Input [2]: [d_date_sk#4, d_year#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#20] + +(32) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(33) Project [codegen id : 10] +Output [3]: [ss_customer_sk#2, ss_net_paid#3, d_year#5] +Input [5]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_net_paid#3, d_date_sk#4, d_year#5] + +(34) Exchange +Input [3]: [ss_customer_sk#2, ss_net_paid#3, d_year#5] +Arguments: hashpartitioning(ss_customer_sk#2, 5), true, [id=#21] + +(35) Sort [codegen id : 11] +Input [3]: [ss_customer_sk#2, ss_net_paid#3, d_year#5] +Arguments: [ss_customer_sk#2 ASC NULLS FIRST], false, 0 + +(36) ReusedExchange [Reuses operator id: 15] +Output [4]: [c_customer_sk#8, c_customer_id#9, c_first_name#10, c_last_name#11] + +(37) Sort [codegen id : 13] +Input [4]: [c_customer_sk#8, c_customer_id#9, c_first_name#10, c_last_name#11] +Arguments: [c_customer_sk#8 ASC NULLS FIRST], false, 0 + +(38) SortMergeJoin [codegen id : 14] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#8] +Join condition: None + +(39) Project [codegen id : 14] +Output [5]: [c_customer_id#9, c_first_name#10, c_last_name#11, ss_net_paid#3, d_year#5] +Input [7]: [ss_customer_sk#2, ss_net_paid#3, d_year#5, c_customer_sk#8, c_customer_id#9, c_first_name#10, c_last_name#11] + +(40) HashAggregate [codegen id : 14] +Input [5]: [c_customer_id#9, c_first_name#10, c_last_name#11, ss_net_paid#3, d_year#5] +Keys [4]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#5] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#3))] +Aggregate Attributes [1]: [sum#22] +Results [5]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#5, sum#23] + +(41) Exchange +Input [5]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#5, sum#23] +Arguments: hashpartitioning(c_customer_id#9, c_first_name#10, c_last_name#11, d_year#5, 5), true, [id=#24] + +(42) HashAggregate [codegen id : 15] +Input [5]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#5, sum#23] +Keys [4]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#5] +Functions [1]: [sum(UnscaledValue(ss_net_paid#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#3))#25] +Results [4]: [c_customer_id#9 AS customer_id#26, c_first_name#10 AS customer_first_name#27, c_last_name#11 AS customer_last_name#28, MakeDecimal(sum(UnscaledValue(ss_net_paid#3))#25,17,2) AS year_total#29] + +(43) Exchange +Input [4]: [customer_id#26, customer_first_name#27, customer_last_name#28, year_total#29] +Arguments: hashpartitioning(customer_id#26, 5), true, [id=#30] + +(44) Sort [codegen id : 16] +Input [4]: [customer_id#26, customer_first_name#27, customer_last_name#28, year_total#29] +Arguments: [customer_id#26 ASC NULLS FIRST], false, 0 + +(45) SortMergeJoin [codegen id : 17] +Left keys [1]: [customer_id#17] +Right keys [1]: [customer_id#26] +Join condition: None + +(46) Scan parquet default.web_sales +Output [3]: [ws_sold_date_sk#31, ws_bill_customer_sk#32, ws_net_paid#33] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_bill_customer_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(47) ColumnarToRow [codegen id : 19] +Input [3]: [ws_sold_date_sk#31, ws_bill_customer_sk#32, ws_net_paid#33] + +(48) Filter [codegen id : 19] +Input [3]: [ws_sold_date_sk#31, ws_bill_customer_sk#32, ws_net_paid#33] +Condition : (isnotnull(ws_bill_customer_sk#32) AND isnotnull(ws_sold_date_sk#31)) + +(49) ReusedExchange [Reuses operator id: 7] +Output [2]: [d_date_sk#4, d_year#5] + +(50) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [ws_sold_date_sk#31] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(51) Project [codegen id : 19] +Output [3]: [ws_bill_customer_sk#32, ws_net_paid#33, d_year#5] +Input [5]: [ws_sold_date_sk#31, ws_bill_customer_sk#32, ws_net_paid#33, d_date_sk#4, d_year#5] + +(52) Exchange +Input [3]: [ws_bill_customer_sk#32, ws_net_paid#33, d_year#5] +Arguments: hashpartitioning(ws_bill_customer_sk#32, 5), true, [id=#34] + +(53) Sort [codegen id : 20] +Input [3]: [ws_bill_customer_sk#32, ws_net_paid#33, d_year#5] +Arguments: [ws_bill_customer_sk#32 ASC NULLS FIRST], false, 0 + +(54) ReusedExchange [Reuses operator id: 15] +Output [4]: [c_customer_sk#8, c_customer_id#9, c_first_name#10, c_last_name#11] + +(55) Sort [codegen id : 22] +Input [4]: [c_customer_sk#8, c_customer_id#9, c_first_name#10, c_last_name#11] +Arguments: [c_customer_sk#8 ASC NULLS FIRST], false, 0 + +(56) SortMergeJoin [codegen id : 23] +Left keys [1]: [ws_bill_customer_sk#32] +Right keys [1]: [c_customer_sk#8] +Join condition: None + +(57) Project [codegen id : 23] +Output [5]: [c_customer_id#9, c_first_name#10, c_last_name#11, ws_net_paid#33, d_year#5] +Input [7]: [ws_bill_customer_sk#32, ws_net_paid#33, d_year#5, c_customer_sk#8, c_customer_id#9, c_first_name#10, c_last_name#11] + +(58) HashAggregate [codegen id : 23] +Input [5]: [c_customer_id#9, c_first_name#10, c_last_name#11, ws_net_paid#33, d_year#5] +Keys [4]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#5] +Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#33))] +Aggregate Attributes [1]: [sum#35] +Results [5]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#5, sum#36] + +(59) Exchange +Input [5]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#5, sum#36] +Arguments: hashpartitioning(c_customer_id#9, c_first_name#10, c_last_name#11, d_year#5, 5), true, [id=#37] + +(60) HashAggregate [codegen id : 24] +Input [5]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#5, sum#36] +Keys [4]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#5] +Functions [1]: [sum(UnscaledValue(ws_net_paid#33))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#33))#38] +Results [2]: [c_customer_id#9 AS customer_id#39, MakeDecimal(sum(UnscaledValue(ws_net_paid#33))#38,17,2) AS year_total#40] + +(61) Filter [codegen id : 24] +Input [2]: [customer_id#39, year_total#40] +Condition : (isnotnull(year_total#40) AND (year_total#40 > 0.00)) + +(62) Project [codegen id : 24] +Output [2]: [customer_id#39 AS customer_id#41, year_total#40 AS year_total#42] +Input [2]: [customer_id#39, year_total#40] + +(63) Exchange +Input [2]: [customer_id#41, year_total#42] +Arguments: hashpartitioning(customer_id#41, 5), true, [id=#43] + +(64) Sort [codegen id : 25] +Input [2]: [customer_id#41, year_total#42] +Arguments: [customer_id#41 ASC NULLS FIRST], false, 0 + +(65) SortMergeJoin [codegen id : 26] +Left keys [1]: [customer_id#17] +Right keys [1]: [customer_id#41] +Join condition: None + +(66) Project [codegen id : 26] +Output [7]: [customer_id#17, year_total#18, customer_id#26, customer_first_name#27, customer_last_name#28, year_total#29, year_total#42] +Input [8]: [customer_id#17, year_total#18, customer_id#26, customer_first_name#27, customer_last_name#28, year_total#29, customer_id#41, year_total#42] + +(67) Scan parquet default.web_sales +Output [3]: [ws_sold_date_sk#31, ws_bill_customer_sk#32, ws_net_paid#33] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_bill_customer_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(68) ColumnarToRow [codegen id : 28] +Input [3]: [ws_sold_date_sk#31, ws_bill_customer_sk#32, ws_net_paid#33] + +(69) Filter [codegen id : 28] +Input [3]: [ws_sold_date_sk#31, ws_bill_customer_sk#32, ws_net_paid#33] +Condition : (isnotnull(ws_bill_customer_sk#32) AND isnotnull(ws_sold_date_sk#31)) + +(70) ReusedExchange [Reuses operator id: 31] +Output [2]: [d_date_sk#4, d_year#5] + +(71) BroadcastHashJoin [codegen id : 28] +Left keys [1]: [ws_sold_date_sk#31] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(72) Project [codegen id : 28] +Output [3]: [ws_bill_customer_sk#32, ws_net_paid#33, d_year#5] +Input [5]: [ws_sold_date_sk#31, ws_bill_customer_sk#32, ws_net_paid#33, d_date_sk#4, d_year#5] + +(73) Exchange +Input [3]: [ws_bill_customer_sk#32, ws_net_paid#33, d_year#5] +Arguments: hashpartitioning(ws_bill_customer_sk#32, 5), true, [id=#44] + +(74) Sort [codegen id : 29] +Input [3]: [ws_bill_customer_sk#32, ws_net_paid#33, d_year#5] +Arguments: [ws_bill_customer_sk#32 ASC NULLS FIRST], false, 0 + +(75) ReusedExchange [Reuses operator id: 15] +Output [4]: [c_customer_sk#8, c_customer_id#9, c_first_name#10, c_last_name#11] + +(76) Sort [codegen id : 31] +Input [4]: [c_customer_sk#8, c_customer_id#9, c_first_name#10, c_last_name#11] +Arguments: [c_customer_sk#8 ASC NULLS FIRST], false, 0 + +(77) SortMergeJoin [codegen id : 32] +Left keys [1]: [ws_bill_customer_sk#32] +Right keys [1]: [c_customer_sk#8] +Join condition: None + +(78) Project [codegen id : 32] +Output [5]: [c_customer_id#9, c_first_name#10, c_last_name#11, ws_net_paid#33, d_year#5] +Input [7]: [ws_bill_customer_sk#32, ws_net_paid#33, d_year#5, c_customer_sk#8, c_customer_id#9, c_first_name#10, c_last_name#11] + +(79) HashAggregate [codegen id : 32] +Input [5]: [c_customer_id#9, c_first_name#10, c_last_name#11, ws_net_paid#33, d_year#5] +Keys [4]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#5] +Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#33))] +Aggregate Attributes [1]: [sum#45] +Results [5]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#5, sum#46] + +(80) Exchange +Input [5]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#5, sum#46] +Arguments: hashpartitioning(c_customer_id#9, c_first_name#10, c_last_name#11, d_year#5, 5), true, [id=#47] + +(81) HashAggregate [codegen id : 33] +Input [5]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#5, sum#46] +Keys [4]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#5] +Functions [1]: [sum(UnscaledValue(ws_net_paid#33))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#33))#48] +Results [2]: [c_customer_id#9 AS customer_id#49, MakeDecimal(sum(UnscaledValue(ws_net_paid#33))#48,17,2) AS year_total#50] + +(82) Exchange +Input [2]: [customer_id#49, year_total#50] +Arguments: hashpartitioning(customer_id#49, 5), true, [id=#51] + +(83) Sort [codegen id : 34] +Input [2]: [customer_id#49, year_total#50] +Arguments: [customer_id#49 ASC NULLS FIRST], false, 0 + +(84) SortMergeJoin [codegen id : 35] +Left keys [1]: [customer_id#17] +Right keys [1]: [customer_id#49] +Join condition: (CASE WHEN (year_total#42 > 0.00) THEN CheckOverflow((promote_precision(year_total#50) / promote_precision(year_total#42)), DecimalType(37,20), true) ELSE null END > CASE WHEN (year_total#18 > 0.00) THEN CheckOverflow((promote_precision(year_total#29) / promote_precision(year_total#18)), DecimalType(37,20), true) ELSE null END) + +(85) Project [codegen id : 35] +Output [3]: [customer_id#26, customer_first_name#27, customer_last_name#28] +Input [9]: [customer_id#17, year_total#18, customer_id#26, customer_first_name#27, customer_last_name#28, year_total#29, year_total#42, customer_id#49, year_total#50] + +(86) TakeOrderedAndProject +Input [3]: [customer_id#26, customer_first_name#27, customer_last_name#28] +Arguments: 100, [customer_first_name#27 ASC NULLS FIRST, customer_id#26 ASC NULLS FIRST, customer_last_name#28 ASC NULLS FIRST], [customer_id#26, customer_first_name#27, customer_last_name#28] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74.sf100/simplified.txt new file mode 100644 index 0000000000000..c35e70d72eb36 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74.sf100/simplified.txt @@ -0,0 +1,157 @@ +TakeOrderedAndProject [customer_first_name,customer_id,customer_last_name] + WholeStageCodegen (35) + Project [customer_first_name,customer_id,customer_last_name] + SortMergeJoin [customer_id,customer_id,year_total,year_total,year_total,year_total] + InputAdapter + WholeStageCodegen (26) + Project [customer_first_name,customer_id,customer_id,customer_last_name,year_total,year_total,year_total] + SortMergeJoin [customer_id,customer_id] + InputAdapter + WholeStageCodegen (17) + SortMergeJoin [customer_id,customer_id] + InputAdapter + WholeStageCodegen (8) + Sort [customer_id] + InputAdapter + Exchange [customer_id] #1 + WholeStageCodegen (7) + Filter [year_total] + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum] [customer_id,sum,sum(UnscaledValue(ss_net_paid)),year_total] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year] #2 + WholeStageCodegen (6) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,ss_net_paid] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,d_year,ss_net_paid] + SortMergeJoin [c_customer_sk,ss_customer_sk] + InputAdapter + WholeStageCodegen (3) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #3 + WholeStageCodegen (2) + Project [d_year,ss_customer_sk,ss_net_paid] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_customer_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_net_paid,ss_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + WholeStageCodegen (5) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #5 + WholeStageCodegen (4) + Filter [c_customer_id,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_id,c_customer_sk,c_first_name,c_last_name] + InputAdapter + WholeStageCodegen (16) + Sort [customer_id] + InputAdapter + Exchange [customer_id] #6 + WholeStageCodegen (15) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum] [customer_first_name,customer_id,customer_last_name,sum,sum(UnscaledValue(ss_net_paid)),year_total] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year] #7 + WholeStageCodegen (14) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,ss_net_paid] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,d_year,ss_net_paid] + SortMergeJoin [c_customer_sk,ss_customer_sk] + InputAdapter + WholeStageCodegen (11) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #8 + WholeStageCodegen (10) + Project [d_year,ss_customer_sk,ss_net_paid] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_customer_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_net_paid,ss_sold_date_sk] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (9) + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + WholeStageCodegen (13) + Sort [c_customer_sk] + InputAdapter + ReusedExchange [c_customer_id,c_customer_sk,c_first_name,c_last_name] #5 + InputAdapter + WholeStageCodegen (25) + Sort [customer_id] + InputAdapter + Exchange [customer_id] #10 + WholeStageCodegen (24) + Project [customer_id,year_total] + Filter [year_total] + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum] [customer_id,sum,sum(UnscaledValue(ws_net_paid)),year_total] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year] #11 + WholeStageCodegen (23) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,ws_net_paid] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,d_year,ws_net_paid] + SortMergeJoin [c_customer_sk,ws_bill_customer_sk] + InputAdapter + WholeStageCodegen (20) + Sort [ws_bill_customer_sk] + InputAdapter + Exchange [ws_bill_customer_sk] #12 + WholeStageCodegen (19) + Project [d_year,ws_bill_customer_sk,ws_net_paid] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_bill_customer_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_year] #4 + InputAdapter + WholeStageCodegen (22) + Sort [c_customer_sk] + InputAdapter + ReusedExchange [c_customer_id,c_customer_sk,c_first_name,c_last_name] #5 + InputAdapter + WholeStageCodegen (34) + Sort [customer_id] + InputAdapter + Exchange [customer_id] #13 + WholeStageCodegen (33) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum] [customer_id,sum,sum(UnscaledValue(ws_net_paid)),year_total] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year] #14 + WholeStageCodegen (32) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,ws_net_paid] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,d_year,ws_net_paid] + SortMergeJoin [c_customer_sk,ws_bill_customer_sk] + InputAdapter + WholeStageCodegen (29) + Sort [ws_bill_customer_sk] + InputAdapter + Exchange [ws_bill_customer_sk] #15 + WholeStageCodegen (28) + Project [d_year,ws_bill_customer_sk,ws_net_paid] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_bill_customer_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_year] #9 + InputAdapter + WholeStageCodegen (31) + Sort [c_customer_sk] + InputAdapter + ReusedExchange [c_customer_id,c_customer_sk,c_first_name,c_last_name] #5 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74/explain.txt new file mode 100644 index 0000000000000..53d52dd20bfbe --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74/explain.txt @@ -0,0 +1,410 @@ +== Physical Plan == +TakeOrderedAndProject (72) ++- * Project (71) + +- * BroadcastHashJoin Inner BuildRight (70) + :- * Project (56) + : +- * BroadcastHashJoin Inner BuildRight (55) + : :- * BroadcastHashJoin Inner BuildRight (36) + : : :- * Filter (19) + : : : +- * HashAggregate (18) + : : : +- Exchange (17) + : : : +- * HashAggregate (16) + : : : +- * Project (15) + : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : :- * Project (9) + : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.customer (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.store_sales (4) + : : : +- BroadcastExchange (13) + : : : +- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet default.date_dim (10) + : : +- BroadcastExchange (35) + : : +- * HashAggregate (34) + : : +- Exchange (33) + : : +- * HashAggregate (32) + : : +- * Project (31) + : : +- * BroadcastHashJoin Inner BuildRight (30) + : : :- * Project (25) + : : : +- * BroadcastHashJoin Inner BuildRight (24) + : : : :- * Filter (22) + : : : : +- * ColumnarToRow (21) + : : : : +- Scan parquet default.customer (20) + : : : +- ReusedExchange (23) + : : +- BroadcastExchange (29) + : : +- * Filter (28) + : : +- * ColumnarToRow (27) + : : +- Scan parquet default.date_dim (26) + : +- BroadcastExchange (54) + : +- * Project (53) + : +- * Filter (52) + : +- * HashAggregate (51) + : +- Exchange (50) + : +- * HashAggregate (49) + : +- * Project (48) + : +- * BroadcastHashJoin Inner BuildRight (47) + : :- * Project (45) + : : +- * BroadcastHashJoin Inner BuildRight (44) + : : :- * Filter (39) + : : : +- * ColumnarToRow (38) + : : : +- Scan parquet default.customer (37) + : : +- BroadcastExchange (43) + : : +- * Filter (42) + : : +- * ColumnarToRow (41) + : : +- Scan parquet default.web_sales (40) + : +- ReusedExchange (46) + +- BroadcastExchange (69) + +- * HashAggregate (68) + +- Exchange (67) + +- * HashAggregate (66) + +- * Project (65) + +- * BroadcastHashJoin Inner BuildRight (64) + :- * Project (62) + : +- * BroadcastHashJoin Inner BuildRight (61) + : :- * Filter (59) + : : +- * ColumnarToRow (58) + : : +- Scan parquet default.customer (57) + : +- ReusedExchange (60) + +- ReusedExchange (63) + + +(1) Scan parquet default.customer +Output [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] + +(3) Filter [codegen id : 3] +Input [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(4) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#5, ss_customer_sk#6, ss_net_paid#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [ss_sold_date_sk#5, ss_customer_sk#6, ss_net_paid#7] + +(6) Filter [codegen id : 1] +Input [3]: [ss_sold_date_sk#5, ss_customer_sk#6, ss_net_paid#7] +Condition : (isnotnull(ss_customer_sk#6) AND isnotnull(ss_sold_date_sk#5)) + +(7) BroadcastExchange +Input [3]: [ss_sold_date_sk#5, ss_customer_sk#6, ss_net_paid#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#8] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ss_customer_sk#6] +Join condition: None + +(9) Project [codegen id : 3] +Output [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_sold_date_sk#5, ss_net_paid#7] +Input [7]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, ss_sold_date_sk#5, ss_customer_sk#6, ss_net_paid#7] + +(10) Scan parquet default.date_dim +Output [2]: [d_date_sk#9, d_year#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), In(d_year, [2001,2002]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#9, d_year#10] + +(12) Filter [codegen id : 2] +Input [2]: [d_date_sk#9, d_year#10] +Condition : (((isnotnull(d_year#10) AND (d_year#10 = 2001)) AND d_year#10 IN (2001,2002)) AND isnotnull(d_date_sk#9)) + +(13) BroadcastExchange +Input [2]: [d_date_sk#9, d_year#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#11] + +(14) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#5] +Right keys [1]: [d_date_sk#9] +Join condition: None + +(15) Project [codegen id : 3] +Output [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#7, d_year#10] +Input [7]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_sold_date_sk#5, ss_net_paid#7, d_date_sk#9, d_year#10] + +(16) HashAggregate [codegen id : 3] +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#7, d_year#10] +Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#7))] +Aggregate Attributes [1]: [sum#12] +Results [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, sum#13] + +(17) Exchange +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, sum#13] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, 5), true, [id=#14] + +(18) HashAggregate [codegen id : 16] +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, sum#13] +Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10] +Functions [1]: [sum(UnscaledValue(ss_net_paid#7))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#7))#15] +Results [2]: [c_customer_id#2 AS customer_id#16, MakeDecimal(sum(UnscaledValue(ss_net_paid#7))#15,17,2) AS year_total#17] + +(19) Filter [codegen id : 16] +Input [2]: [customer_id#16, year_total#17] +Condition : (isnotnull(year_total#17) AND (year_total#17 > 0.00)) + +(20) Scan parquet default.customer +Output [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 6] +Input [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] + +(22) Filter [codegen id : 6] +Input [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(23) ReusedExchange [Reuses operator id: 7] +Output [3]: [ss_sold_date_sk#5, ss_customer_sk#6, ss_net_paid#7] + +(24) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ss_customer_sk#6] +Join condition: None + +(25) Project [codegen id : 6] +Output [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_sold_date_sk#5, ss_net_paid#7] +Input [7]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, ss_sold_date_sk#5, ss_customer_sk#6, ss_net_paid#7] + +(26) Scan parquet default.date_dim +Output [2]: [d_date_sk#9, d_year#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), In(d_year, [2001,2002]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 5] +Input [2]: [d_date_sk#9, d_year#10] + +(28) Filter [codegen id : 5] +Input [2]: [d_date_sk#9, d_year#10] +Condition : (((isnotnull(d_year#10) AND (d_year#10 = 2002)) AND d_year#10 IN (2001,2002)) AND isnotnull(d_date_sk#9)) + +(29) BroadcastExchange +Input [2]: [d_date_sk#9, d_year#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#18] + +(30) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_sold_date_sk#5] +Right keys [1]: [d_date_sk#9] +Join condition: None + +(31) Project [codegen id : 6] +Output [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#7, d_year#10] +Input [7]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_sold_date_sk#5, ss_net_paid#7, d_date_sk#9, d_year#10] + +(32) HashAggregate [codegen id : 6] +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#7, d_year#10] +Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#7))] +Aggregate Attributes [1]: [sum#19] +Results [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, sum#20] + +(33) Exchange +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, sum#20] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, 5), true, [id=#21] + +(34) HashAggregate [codegen id : 7] +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, sum#20] +Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10] +Functions [1]: [sum(UnscaledValue(ss_net_paid#7))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#7))#22] +Results [4]: [c_customer_id#2 AS customer_id#23, c_first_name#3 AS customer_first_name#24, c_last_name#4 AS customer_last_name#25, MakeDecimal(sum(UnscaledValue(ss_net_paid#7))#22,17,2) AS year_total#26] + +(35) BroadcastExchange +Input [4]: [customer_id#23, customer_first_name#24, customer_last_name#25, year_total#26] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#27] + +(36) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [customer_id#16] +Right keys [1]: [customer_id#23] +Join condition: None + +(37) Scan parquet default.customer +Output [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(38) ColumnarToRow [codegen id : 10] +Input [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] + +(39) Filter [codegen id : 10] +Input [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(40) Scan parquet default.web_sales +Output [3]: [ws_sold_date_sk#28, ws_bill_customer_sk#29, ws_net_paid#30] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_bill_customer_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(41) ColumnarToRow [codegen id : 8] +Input [3]: [ws_sold_date_sk#28, ws_bill_customer_sk#29, ws_net_paid#30] + +(42) Filter [codegen id : 8] +Input [3]: [ws_sold_date_sk#28, ws_bill_customer_sk#29, ws_net_paid#30] +Condition : (isnotnull(ws_bill_customer_sk#29) AND isnotnull(ws_sold_date_sk#28)) + +(43) BroadcastExchange +Input [3]: [ws_sold_date_sk#28, ws_bill_customer_sk#29, ws_net_paid#30] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#31] + +(44) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ws_bill_customer_sk#29] +Join condition: None + +(45) Project [codegen id : 10] +Output [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ws_sold_date_sk#28, ws_net_paid#30] +Input [7]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, ws_sold_date_sk#28, ws_bill_customer_sk#29, ws_net_paid#30] + +(46) ReusedExchange [Reuses operator id: 13] +Output [2]: [d_date_sk#9, d_year#10] + +(47) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ws_sold_date_sk#28] +Right keys [1]: [d_date_sk#9] +Join condition: None + +(48) Project [codegen id : 10] +Output [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ws_net_paid#30, d_year#10] +Input [7]: [c_customer_id#2, c_first_name#3, c_last_name#4, ws_sold_date_sk#28, ws_net_paid#30, d_date_sk#9, d_year#10] + +(49) HashAggregate [codegen id : 10] +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ws_net_paid#30, d_year#10] +Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10] +Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#30))] +Aggregate Attributes [1]: [sum#32] +Results [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, sum#33] + +(50) Exchange +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, sum#33] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, 5), true, [id=#34] + +(51) HashAggregate [codegen id : 11] +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, sum#33] +Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10] +Functions [1]: [sum(UnscaledValue(ws_net_paid#30))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#30))#35] +Results [2]: [c_customer_id#2 AS customer_id#36, MakeDecimal(sum(UnscaledValue(ws_net_paid#30))#35,17,2) AS year_total#37] + +(52) Filter [codegen id : 11] +Input [2]: [customer_id#36, year_total#37] +Condition : (isnotnull(year_total#37) AND (year_total#37 > 0.00)) + +(53) Project [codegen id : 11] +Output [2]: [customer_id#36 AS customer_id#38, year_total#37 AS year_total#39] +Input [2]: [customer_id#36, year_total#37] + +(54) BroadcastExchange +Input [2]: [customer_id#38, year_total#39] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#40] + +(55) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [customer_id#16] +Right keys [1]: [customer_id#38] +Join condition: None + +(56) Project [codegen id : 16] +Output [7]: [customer_id#16, year_total#17, customer_id#23, customer_first_name#24, customer_last_name#25, year_total#26, year_total#39] +Input [8]: [customer_id#16, year_total#17, customer_id#23, customer_first_name#24, customer_last_name#25, year_total#26, customer_id#38, year_total#39] + +(57) Scan parquet default.customer +Output [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(58) ColumnarToRow [codegen id : 14] +Input [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] + +(59) Filter [codegen id : 14] +Input [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(60) ReusedExchange [Reuses operator id: 43] +Output [3]: [ws_sold_date_sk#28, ws_bill_customer_sk#29, ws_net_paid#30] + +(61) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ws_bill_customer_sk#29] +Join condition: None + +(62) Project [codegen id : 14] +Output [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ws_sold_date_sk#28, ws_net_paid#30] +Input [7]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, ws_sold_date_sk#28, ws_bill_customer_sk#29, ws_net_paid#30] + +(63) ReusedExchange [Reuses operator id: 29] +Output [2]: [d_date_sk#9, d_year#10] + +(64) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [ws_sold_date_sk#28] +Right keys [1]: [d_date_sk#9] +Join condition: None + +(65) Project [codegen id : 14] +Output [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ws_net_paid#30, d_year#10] +Input [7]: [c_customer_id#2, c_first_name#3, c_last_name#4, ws_sold_date_sk#28, ws_net_paid#30, d_date_sk#9, d_year#10] + +(66) HashAggregate [codegen id : 14] +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ws_net_paid#30, d_year#10] +Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10] +Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#30))] +Aggregate Attributes [1]: [sum#41] +Results [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, sum#42] + +(67) Exchange +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, sum#42] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, 5), true, [id=#43] + +(68) HashAggregate [codegen id : 15] +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, sum#42] +Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10] +Functions [1]: [sum(UnscaledValue(ws_net_paid#30))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#30))#44] +Results [2]: [c_customer_id#2 AS customer_id#45, MakeDecimal(sum(UnscaledValue(ws_net_paid#30))#44,17,2) AS year_total#46] + +(69) BroadcastExchange +Input [2]: [customer_id#45, year_total#46] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#47] + +(70) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [customer_id#16] +Right keys [1]: [customer_id#45] +Join condition: (CASE WHEN (year_total#39 > 0.00) THEN CheckOverflow((promote_precision(year_total#46) / promote_precision(year_total#39)), DecimalType(37,20), true) ELSE null END > CASE WHEN (year_total#17 > 0.00) THEN CheckOverflow((promote_precision(year_total#26) / promote_precision(year_total#17)), DecimalType(37,20), true) ELSE null END) + +(71) Project [codegen id : 16] +Output [3]: [customer_id#23, customer_first_name#24, customer_last_name#25] +Input [9]: [customer_id#16, year_total#17, customer_id#23, customer_first_name#24, customer_last_name#25, year_total#26, year_total#39, customer_id#45, year_total#46] + +(72) TakeOrderedAndProject +Input [3]: [customer_id#23, customer_first_name#24, customer_last_name#25] +Arguments: 100, [customer_first_name#24 ASC NULLS FIRST, customer_id#23 ASC NULLS FIRST, customer_last_name#25 ASC NULLS FIRST], [customer_id#23, customer_first_name#24, customer_last_name#25] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74/simplified.txt new file mode 100644 index 0000000000000..68a6e7bfd91a3 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74/simplified.txt @@ -0,0 +1,107 @@ +TakeOrderedAndProject [customer_first_name,customer_id,customer_last_name] + WholeStageCodegen (16) + Project [customer_first_name,customer_id,customer_last_name] + BroadcastHashJoin [customer_id,customer_id,year_total,year_total,year_total,year_total] + Project [customer_first_name,customer_id,customer_id,customer_last_name,year_total,year_total,year_total] + BroadcastHashJoin [customer_id,customer_id] + BroadcastHashJoin [customer_id,customer_id] + Filter [year_total] + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum] [customer_id,sum,sum(UnscaledValue(ss_net_paid)),year_total] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year] #1 + WholeStageCodegen (3) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,ss_net_paid] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,d_year,ss_net_paid] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [c_customer_id,c_first_name,c_last_name,ss_net_paid,ss_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [c_customer_id,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_id,c_customer_sk,c_first_name,c_last_name] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [ss_customer_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_net_paid,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (7) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum] [customer_first_name,customer_id,customer_last_name,sum,sum(UnscaledValue(ss_net_paid)),year_total] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year] #5 + WholeStageCodegen (6) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,ss_net_paid] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,d_year,ss_net_paid] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [c_customer_id,c_first_name,c_last_name,ss_net_paid,ss_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [c_customer_id,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_id,c_customer_sk,c_first_name,c_last_name] + InputAdapter + ReusedExchange [ss_customer_sk,ss_net_paid,ss_sold_date_sk] #2 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (11) + Project [customer_id,year_total] + Filter [year_total] + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum] [customer_id,sum,sum(UnscaledValue(ws_net_paid)),year_total] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year] #8 + WholeStageCodegen (10) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,ws_net_paid] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,d_year,ws_net_paid] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [c_customer_id,c_first_name,c_last_name,ws_net_paid,ws_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + Filter [c_customer_id,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_id,c_customer_sk,c_first_name,c_last_name] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (8) + Filter [ws_bill_customer_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_year] #3 + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (15) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum] [customer_id,sum,sum(UnscaledValue(ws_net_paid)),year_total] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year] #11 + WholeStageCodegen (14) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,ws_net_paid] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,d_year,ws_net_paid] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [c_customer_id,c_first_name,c_last_name,ws_net_paid,ws_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + Filter [c_customer_id,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_id,c_customer_sk,c_first_name,c_last_name] + InputAdapter + ReusedExchange [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk] #9 + InputAdapter + ReusedExchange [d_date_sk,d_year] #6 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75.sf100/explain.txt new file mode 100644 index 0000000000000..414674df8a5a1 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75.sf100/explain.txt @@ -0,0 +1,752 @@ +== Physical Plan == +TakeOrderedAndProject (138) ++- * Project (137) + +- * SortMergeJoin Inner (136) + :- * Sort (74) + : +- Exchange (73) + : +- * HashAggregate (72) + : +- Exchange (71) + : +- * HashAggregate (70) + : +- * HashAggregate (69) + : +- Exchange (68) + : +- * HashAggregate (67) + : +- Union (66) + : :- * HashAggregate (47) + : : +- Exchange (46) + : : +- * HashAggregate (45) + : : +- Union (44) + : : :- * Project (25) + : : : +- SortMergeJoin LeftOuter (24) + : : : :- * Sort (18) + : : : : +- Exchange (17) + : : : : +- * Project (16) + : : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : : :- * Project (10) + : : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : : :- * Filter (3) + : : : : : : +- * ColumnarToRow (2) + : : : : : : +- Scan parquet default.catalog_sales (1) + : : : : : +- BroadcastExchange (8) + : : : : : +- * Project (7) + : : : : : +- * Filter (6) + : : : : : +- * ColumnarToRow (5) + : : : : : +- Scan parquet default.item (4) + : : : : +- BroadcastExchange (14) + : : : : +- * Filter (13) + : : : : +- * ColumnarToRow (12) + : : : : +- Scan parquet default.date_dim (11) + : : : +- * Sort (23) + : : : +- Exchange (22) + : : : +- * Filter (21) + : : : +- * ColumnarToRow (20) + : : : +- Scan parquet default.catalog_returns (19) + : : +- * Project (43) + : : +- SortMergeJoin LeftOuter (42) + : : :- * Sort (36) + : : : +- Exchange (35) + : : : +- * Project (34) + : : : +- * BroadcastHashJoin Inner BuildRight (33) + : : : :- * Project (31) + : : : : +- * BroadcastHashJoin Inner BuildRight (30) + : : : : :- * Filter (28) + : : : : : +- * ColumnarToRow (27) + : : : : : +- Scan parquet default.store_sales (26) + : : : : +- ReusedExchange (29) + : : : +- ReusedExchange (32) + : : +- * Sort (41) + : : +- Exchange (40) + : : +- * Filter (39) + : : +- * ColumnarToRow (38) + : : +- Scan parquet default.store_returns (37) + : +- * Project (65) + : +- SortMergeJoin LeftOuter (64) + : :- * Sort (58) + : : +- Exchange (57) + : : +- * Project (56) + : : +- * BroadcastHashJoin Inner BuildRight (55) + : : :- * Project (53) + : : : +- * BroadcastHashJoin Inner BuildRight (52) + : : : :- * Filter (50) + : : : : +- * ColumnarToRow (49) + : : : : +- Scan parquet default.web_sales (48) + : : : +- ReusedExchange (51) + : : +- ReusedExchange (54) + : +- * Sort (63) + : +- Exchange (62) + : +- * Filter (61) + : +- * ColumnarToRow (60) + : +- Scan parquet default.web_returns (59) + +- * Sort (135) + +- Exchange (134) + +- * HashAggregate (133) + +- Exchange (132) + +- * HashAggregate (131) + +- * HashAggregate (130) + +- Exchange (129) + +- * HashAggregate (128) + +- Union (127) + :- * HashAggregate (111) + : +- Exchange (110) + : +- * HashAggregate (109) + : +- Union (108) + : :- * Project (92) + : : +- SortMergeJoin LeftOuter (91) + : : :- * Sort (88) + : : : +- Exchange (87) + : : : +- * Project (86) + : : : +- * BroadcastHashJoin Inner BuildRight (85) + : : : :- * Project (80) + : : : : +- * BroadcastHashJoin Inner BuildRight (79) + : : : : :- * Filter (77) + : : : : : +- * ColumnarToRow (76) + : : : : : +- Scan parquet default.catalog_sales (75) + : : : : +- ReusedExchange (78) + : : : +- BroadcastExchange (84) + : : : +- * Filter (83) + : : : +- * ColumnarToRow (82) + : : : +- Scan parquet default.date_dim (81) + : : +- * Sort (90) + : : +- ReusedExchange (89) + : +- * Project (107) + : +- SortMergeJoin LeftOuter (106) + : :- * Sort (103) + : : +- Exchange (102) + : : +- * Project (101) + : : +- * BroadcastHashJoin Inner BuildRight (100) + : : :- * Project (98) + : : : +- * BroadcastHashJoin Inner BuildRight (97) + : : : :- * Filter (95) + : : : : +- * ColumnarToRow (94) + : : : : +- Scan parquet default.store_sales (93) + : : : +- ReusedExchange (96) + : : +- ReusedExchange (99) + : +- * Sort (105) + : +- ReusedExchange (104) + +- * Project (126) + +- SortMergeJoin LeftOuter (125) + :- * Sort (122) + : +- Exchange (121) + : +- * Project (120) + : +- * BroadcastHashJoin Inner BuildRight (119) + : :- * Project (117) + : : +- * BroadcastHashJoin Inner BuildRight (116) + : : :- * Filter (114) + : : : +- * ColumnarToRow (113) + : : : +- Scan parquet default.web_sales (112) + : : +- ReusedExchange (115) + : +- ReusedExchange (118) + +- * Sort (124) + +- ReusedExchange (123) + + +(1) Scan parquet default.catalog_sales +Output [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5] + +(3) Filter [codegen id : 3] +Input [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5] +Condition : (isnotnull(cs_item_sk#2) AND isnotnull(cs_sold_date_sk#1)) + +(4) Scan parquet default.item +Output [6]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Books), IsNotNull(i_item_sk), IsNotNull(i_class_id), IsNotNull(i_brand_id), IsNotNull(i_category_id), IsNotNull(i_manufact_id)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [6]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] + +(6) Filter [codegen id : 1] +Input [6]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] +Condition : ((((((isnotnull(i_category#10) AND (i_category#10 = Books)) AND isnotnull(i_item_sk#6)) AND isnotnull(i_class_id#8)) AND isnotnull(i_brand_id#7)) AND isnotnull(i_category_id#9)) AND isnotnull(i_manufact_id#11)) + +(7) Project [codegen id : 1] +Output [5]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Input [6]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] + +(8) BroadcastExchange +Input [5]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cs_item_sk#2] +Right keys [1]: [i_item_sk#6] +Join condition: None + +(10) Project [codegen id : 3] +Output [9]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Input [10]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] + +(11) Scan parquet default.date_dim +Output [2]: [d_date_sk#13, d_year#14] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#13, d_year#14] + +(13) Filter [codegen id : 2] +Input [2]: [d_date_sk#13, d_year#14] +Condition : ((isnotnull(d_year#14) AND (d_year#14 = 2002)) AND isnotnull(d_date_sk#13)) + +(14) BroadcastExchange +Input [2]: [d_date_sk#13, d_year#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#15] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cs_sold_date_sk#1] +Right keys [1]: [d_date_sk#13] +Join condition: None + +(16) Project [codegen id : 3] +Output [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14] +Input [11]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_date_sk#13, d_year#14] + +(17) Exchange +Input [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14] +Arguments: hashpartitioning(cs_order_number#3, cs_item_sk#2, 5), true, [id=#16] + +(18) Sort [codegen id : 4] +Input [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14] +Arguments: [cs_order_number#3 ASC NULLS FIRST, cs_item_sk#2 ASC NULLS FIRST], false, 0 + +(19) Scan parquet default.catalog_returns +Output [4]: [cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/catalog_returns] +PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 5] +Input [4]: [cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20] + +(21) Filter [codegen id : 5] +Input [4]: [cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20] +Condition : (isnotnull(cr_item_sk#17) AND isnotnull(cr_order_number#18)) + +(22) Exchange +Input [4]: [cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20] +Arguments: hashpartitioning(cr_order_number#18, cr_item_sk#17, 5), true, [id=#21] + +(23) Sort [codegen id : 6] +Input [4]: [cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20] +Arguments: [cr_order_number#18 ASC NULLS FIRST, cr_item_sk#17 ASC NULLS FIRST], false, 0 + +(24) SortMergeJoin +Left keys [2]: [cs_order_number#3, cs_item_sk#2] +Right keys [2]: [cr_order_number#18, cr_item_sk#17] +Join condition: None + +(25) Project [codegen id : 7] +Output [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, (cs_quantity#4 - coalesce(cr_return_quantity#19, 0)) AS sales_cnt#22, CheckOverflow((promote_precision(cast(cs_ext_sales_price#5 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#20, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#23] +Input [13]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14, cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20] + +(26) Scan parquet default.store_sales +Output [5]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 10] +Input [5]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28] + +(28) Filter [codegen id : 10] +Input [5]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28] +Condition : (isnotnull(ss_item_sk#25) AND isnotnull(ss_sold_date_sk#24)) + +(29) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] + +(30) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ss_item_sk#25] +Right keys [1]: [i_item_sk#6] +Join condition: None + +(31) Project [codegen id : 10] +Output [9]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Input [10]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] + +(32) ReusedExchange [Reuses operator id: 14] +Output [2]: [d_date_sk#13, d_year#14] + +(33) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ss_sold_date_sk#24] +Right keys [1]: [d_date_sk#13] +Join condition: None + +(34) Project [codegen id : 10] +Output [9]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14] +Input [11]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_date_sk#13, d_year#14] + +(35) Exchange +Input [9]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14] +Arguments: hashpartitioning(cast(ss_ticket_number#26 as bigint), cast(ss_item_sk#25 as bigint), 5), true, [id=#29] + +(36) Sort [codegen id : 11] +Input [9]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14] +Arguments: [cast(ss_ticket_number#26 as bigint) ASC NULLS FIRST, cast(ss_item_sk#25 as bigint) ASC NULLS FIRST], false, 0 + +(37) Scan parquet default.store_returns +Output [4]: [sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store_returns] +PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)] +ReadSchema: struct + +(38) ColumnarToRow [codegen id : 12] +Input [4]: [sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33] + +(39) Filter [codegen id : 12] +Input [4]: [sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33] +Condition : (isnotnull(sr_item_sk#30) AND isnotnull(sr_ticket_number#31)) + +(40) Exchange +Input [4]: [sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33] +Arguments: hashpartitioning(sr_ticket_number#31, sr_item_sk#30, 5), true, [id=#34] + +(41) Sort [codegen id : 13] +Input [4]: [sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33] +Arguments: [sr_ticket_number#31 ASC NULLS FIRST, sr_item_sk#30 ASC NULLS FIRST], false, 0 + +(42) SortMergeJoin +Left keys [2]: [cast(ss_ticket_number#26 as bigint), cast(ss_item_sk#25 as bigint)] +Right keys [2]: [sr_ticket_number#31, sr_item_sk#30] +Join condition: None + +(43) Project [codegen id : 14] +Output [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, (ss_quantity#27 - coalesce(sr_return_quantity#32, 0)) AS sales_cnt#35, CheckOverflow((promote_precision(cast(ss_ext_sales_price#28 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#33, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#36] +Input [13]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14, sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33] + +(44) Union + +(45) HashAggregate [codegen id : 15] +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] +Keys [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] +Functions: [] +Aggregate Attributes: [] +Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] + +(46) Exchange +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] +Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23, 5), true, [id=#37] + +(47) HashAggregate [codegen id : 16] +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] +Keys [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] +Functions: [] +Aggregate Attributes: [] +Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] + +(48) Scan parquet default.web_sales +Output [5]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(49) ColumnarToRow [codegen id : 19] +Input [5]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42] + +(50) Filter [codegen id : 19] +Input [5]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42] +Condition : (isnotnull(ws_item_sk#39) AND isnotnull(ws_sold_date_sk#38)) + +(51) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] + +(52) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [ws_item_sk#39] +Right keys [1]: [i_item_sk#6] +Join condition: None + +(53) Project [codegen id : 19] +Output [9]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Input [10]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] + +(54) ReusedExchange [Reuses operator id: 14] +Output [2]: [d_date_sk#13, d_year#14] + +(55) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [ws_sold_date_sk#38] +Right keys [1]: [d_date_sk#13] +Join condition: None + +(56) Project [codegen id : 19] +Output [9]: [ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14] +Input [11]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_date_sk#13, d_year#14] + +(57) Exchange +Input [9]: [ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14] +Arguments: hashpartitioning(cast(ws_order_number#40 as bigint), cast(ws_item_sk#39 as bigint), 5), true, [id=#43] + +(58) Sort [codegen id : 20] +Input [9]: [ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14] +Arguments: [cast(ws_order_number#40 as bigint) ASC NULLS FIRST, cast(ws_item_sk#39 as bigint) ASC NULLS FIRST], false, 0 + +(59) Scan parquet default.web_returns +Output [4]: [wr_item_sk#44, wr_order_number#45, wr_return_quantity#46, wr_return_amt#47] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/web_returns] +PushedFilters: [IsNotNull(wr_item_sk), IsNotNull(wr_order_number)] +ReadSchema: struct + +(60) ColumnarToRow [codegen id : 21] +Input [4]: [wr_item_sk#44, wr_order_number#45, wr_return_quantity#46, wr_return_amt#47] + +(61) Filter [codegen id : 21] +Input [4]: [wr_item_sk#44, wr_order_number#45, wr_return_quantity#46, wr_return_amt#47] +Condition : (isnotnull(wr_item_sk#44) AND isnotnull(wr_order_number#45)) + +(62) Exchange +Input [4]: [wr_item_sk#44, wr_order_number#45, wr_return_quantity#46, wr_return_amt#47] +Arguments: hashpartitioning(wr_order_number#45, wr_item_sk#44, 5), true, [id=#48] + +(63) Sort [codegen id : 22] +Input [4]: [wr_item_sk#44, wr_order_number#45, wr_return_quantity#46, wr_return_amt#47] +Arguments: [wr_order_number#45 ASC NULLS FIRST, wr_item_sk#44 ASC NULLS FIRST], false, 0 + +(64) SortMergeJoin +Left keys [2]: [cast(ws_order_number#40 as bigint), cast(ws_item_sk#39 as bigint)] +Right keys [2]: [wr_order_number#45, wr_item_sk#44] +Join condition: None + +(65) Project [codegen id : 23] +Output [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, (ws_quantity#41 - coalesce(wr_return_quantity#46, 0)) AS sales_cnt#49, CheckOverflow((promote_precision(cast(ws_ext_sales_price#42 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#47, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#50] +Input [13]: [ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14, wr_item_sk#44, wr_order_number#45, wr_return_quantity#46, wr_return_amt#47] + +(66) Union + +(67) HashAggregate [codegen id : 24] +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] +Keys [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] +Functions: [] +Aggregate Attributes: [] +Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] + +(68) Exchange +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] +Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23, 5), true, [id=#51] + +(69) HashAggregate [codegen id : 25] +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] +Keys [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] +Functions: [] +Aggregate Attributes: [] +Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] + +(70) HashAggregate [codegen id : 25] +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] +Keys [5]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Functions [2]: [partial_sum(cast(sales_cnt#22 as bigint)), partial_sum(UnscaledValue(sales_amt#23))] +Aggregate Attributes [2]: [sum#52, sum#53] +Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#54, sum#55] + +(71) Exchange +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#54, sum#55] +Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, 5), true, [id=#56] + +(72) HashAggregate [codegen id : 26] +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#54, sum#55] +Keys [5]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Functions [2]: [sum(cast(sales_cnt#22 as bigint)), sum(UnscaledValue(sales_amt#23))] +Aggregate Attributes [2]: [sum(cast(sales_cnt#22 as bigint))#57, sum(UnscaledValue(sales_amt#23))#58] +Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum(cast(sales_cnt#22 as bigint))#57 AS sales_cnt#59, MakeDecimal(sum(UnscaledValue(sales_amt#23))#58,18,2) AS sales_amt#60] + +(73) Exchange +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#59, sales_amt#60] +Arguments: hashpartitioning(i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, 5), true, [id=#61] + +(74) Sort [codegen id : 27] +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#59, sales_amt#60] +Arguments: [i_brand_id#7 ASC NULLS FIRST, i_class_id#8 ASC NULLS FIRST, i_category_id#9 ASC NULLS FIRST, i_manufact_id#11 ASC NULLS FIRST], false, 0 + +(75) Scan parquet default.catalog_sales +Output [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(76) ColumnarToRow [codegen id : 30] +Input [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5] + +(77) Filter [codegen id : 30] +Input [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5] +Condition : (isnotnull(cs_item_sk#2) AND isnotnull(cs_sold_date_sk#1)) + +(78) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#62, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66] + +(79) BroadcastHashJoin [codegen id : 30] +Left keys [1]: [cs_item_sk#2] +Right keys [1]: [i_item_sk#62] +Join condition: None + +(80) Project [codegen id : 30] +Output [9]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66] +Input [10]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_item_sk#62, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66] + +(81) Scan parquet default.date_dim +Output [2]: [d_date_sk#67, d_year#68] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(82) ColumnarToRow [codegen id : 29] +Input [2]: [d_date_sk#67, d_year#68] + +(83) Filter [codegen id : 29] +Input [2]: [d_date_sk#67, d_year#68] +Condition : ((isnotnull(d_year#68) AND (d_year#68 = 2001)) AND isnotnull(d_date_sk#67)) + +(84) BroadcastExchange +Input [2]: [d_date_sk#67, d_year#68] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#69] + +(85) BroadcastHashJoin [codegen id : 30] +Left keys [1]: [cs_sold_date_sk#1] +Right keys [1]: [d_date_sk#67] +Join condition: None + +(86) Project [codegen id : 30] +Output [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68] +Input [11]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_date_sk#67, d_year#68] + +(87) Exchange +Input [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68] +Arguments: hashpartitioning(cs_order_number#3, cs_item_sk#2, 5), true, [id=#70] + +(88) Sort [codegen id : 31] +Input [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68] +Arguments: [cs_order_number#3 ASC NULLS FIRST, cs_item_sk#2 ASC NULLS FIRST], false, 0 + +(89) ReusedExchange [Reuses operator id: 22] +Output [4]: [cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20] + +(90) Sort [codegen id : 33] +Input [4]: [cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20] +Arguments: [cr_order_number#18 ASC NULLS FIRST, cr_item_sk#17 ASC NULLS FIRST], false, 0 + +(91) SortMergeJoin +Left keys [2]: [cs_order_number#3, cs_item_sk#2] +Right keys [2]: [cr_order_number#18, cr_item_sk#17] +Join condition: None + +(92) Project [codegen id : 34] +Output [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, (cs_quantity#4 - coalesce(cr_return_quantity#19, 0)) AS sales_cnt#22, CheckOverflow((promote_precision(cast(cs_ext_sales_price#5 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#20, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#23] +Input [13]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68, cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20] + +(93) Scan parquet default.store_sales +Output [5]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(94) ColumnarToRow [codegen id : 37] +Input [5]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28] + +(95) Filter [codegen id : 37] +Input [5]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28] +Condition : (isnotnull(ss_item_sk#25) AND isnotnull(ss_sold_date_sk#24)) + +(96) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#62, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66] + +(97) BroadcastHashJoin [codegen id : 37] +Left keys [1]: [ss_item_sk#25] +Right keys [1]: [i_item_sk#62] +Join condition: None + +(98) Project [codegen id : 37] +Output [9]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66] +Input [10]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_item_sk#62, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66] + +(99) ReusedExchange [Reuses operator id: 84] +Output [2]: [d_date_sk#67, d_year#68] + +(100) BroadcastHashJoin [codegen id : 37] +Left keys [1]: [ss_sold_date_sk#24] +Right keys [1]: [d_date_sk#67] +Join condition: None + +(101) Project [codegen id : 37] +Output [9]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68] +Input [11]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_date_sk#67, d_year#68] + +(102) Exchange +Input [9]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68] +Arguments: hashpartitioning(cast(ss_ticket_number#26 as bigint), cast(ss_item_sk#25 as bigint), 5), true, [id=#71] + +(103) Sort [codegen id : 38] +Input [9]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68] +Arguments: [cast(ss_ticket_number#26 as bigint) ASC NULLS FIRST, cast(ss_item_sk#25 as bigint) ASC NULLS FIRST], false, 0 + +(104) ReusedExchange [Reuses operator id: 40] +Output [4]: [sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33] + +(105) Sort [codegen id : 40] +Input [4]: [sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33] +Arguments: [sr_ticket_number#31 ASC NULLS FIRST, sr_item_sk#30 ASC NULLS FIRST], false, 0 + +(106) SortMergeJoin +Left keys [2]: [cast(ss_ticket_number#26 as bigint), cast(ss_item_sk#25 as bigint)] +Right keys [2]: [sr_ticket_number#31, sr_item_sk#30] +Join condition: None + +(107) Project [codegen id : 41] +Output [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, (ss_quantity#27 - coalesce(sr_return_quantity#32, 0)) AS sales_cnt#72, CheckOverflow((promote_precision(cast(ss_ext_sales_price#28 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#33, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#73] +Input [13]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68, sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33] + +(108) Union + +(109) HashAggregate [codegen id : 42] +Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] +Keys [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] +Functions: [] +Aggregate Attributes: [] +Results [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] + +(110) Exchange +Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] +Arguments: hashpartitioning(d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23, 5), true, [id=#74] + +(111) HashAggregate [codegen id : 43] +Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] +Keys [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] +Functions: [] +Aggregate Attributes: [] +Results [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] + +(112) Scan parquet default.web_sales +Output [5]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(113) ColumnarToRow [codegen id : 46] +Input [5]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42] + +(114) Filter [codegen id : 46] +Input [5]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42] +Condition : (isnotnull(ws_item_sk#39) AND isnotnull(ws_sold_date_sk#38)) + +(115) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#62, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66] + +(116) BroadcastHashJoin [codegen id : 46] +Left keys [1]: [ws_item_sk#39] +Right keys [1]: [i_item_sk#62] +Join condition: None + +(117) Project [codegen id : 46] +Output [9]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66] +Input [10]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_item_sk#62, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66] + +(118) ReusedExchange [Reuses operator id: 84] +Output [2]: [d_date_sk#67, d_year#68] + +(119) BroadcastHashJoin [codegen id : 46] +Left keys [1]: [ws_sold_date_sk#38] +Right keys [1]: [d_date_sk#67] +Join condition: None + +(120) Project [codegen id : 46] +Output [9]: [ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68] +Input [11]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_date_sk#67, d_year#68] + +(121) Exchange +Input [9]: [ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68] +Arguments: hashpartitioning(cast(ws_order_number#40 as bigint), cast(ws_item_sk#39 as bigint), 5), true, [id=#75] + +(122) Sort [codegen id : 47] +Input [9]: [ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68] +Arguments: [cast(ws_order_number#40 as bigint) ASC NULLS FIRST, cast(ws_item_sk#39 as bigint) ASC NULLS FIRST], false, 0 + +(123) ReusedExchange [Reuses operator id: 62] +Output [4]: [wr_item_sk#44, wr_order_number#45, wr_return_quantity#46, wr_return_amt#47] + +(124) Sort [codegen id : 49] +Input [4]: [wr_item_sk#44, wr_order_number#45, wr_return_quantity#46, wr_return_amt#47] +Arguments: [wr_order_number#45 ASC NULLS FIRST, wr_item_sk#44 ASC NULLS FIRST], false, 0 + +(125) SortMergeJoin +Left keys [2]: [cast(ws_order_number#40 as bigint), cast(ws_item_sk#39 as bigint)] +Right keys [2]: [wr_order_number#45, wr_item_sk#44] +Join condition: None + +(126) Project [codegen id : 50] +Output [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, (ws_quantity#41 - coalesce(wr_return_quantity#46, 0)) AS sales_cnt#76, CheckOverflow((promote_precision(cast(ws_ext_sales_price#42 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#47, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#77] +Input [13]: [ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68, wr_item_sk#44, wr_order_number#45, wr_return_quantity#46, wr_return_amt#47] + +(127) Union + +(128) HashAggregate [codegen id : 51] +Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] +Keys [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] +Functions: [] +Aggregate Attributes: [] +Results [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] + +(129) Exchange +Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] +Arguments: hashpartitioning(d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23, 5), true, [id=#78] + +(130) HashAggregate [codegen id : 52] +Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] +Keys [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] +Functions: [] +Aggregate Attributes: [] +Results [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] + +(131) HashAggregate [codegen id : 52] +Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] +Keys [5]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66] +Functions [2]: [partial_sum(cast(sales_cnt#22 as bigint)), partial_sum(UnscaledValue(sales_amt#23))] +Aggregate Attributes [2]: [sum#79, sum#80] +Results [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sum#81, sum#82] + +(132) Exchange +Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sum#81, sum#82] +Arguments: hashpartitioning(d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, 5), true, [id=#83] + +(133) HashAggregate [codegen id : 53] +Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sum#81, sum#82] +Keys [5]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66] +Functions [2]: [sum(cast(sales_cnt#22 as bigint)), sum(UnscaledValue(sales_amt#23))] +Aggregate Attributes [2]: [sum(cast(sales_cnt#22 as bigint))#84, sum(UnscaledValue(sales_amt#23))#85] +Results [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sum(cast(sales_cnt#22 as bigint))#84 AS sales_cnt#86, MakeDecimal(sum(UnscaledValue(sales_amt#23))#85,18,2) AS sales_amt#87] + +(134) Exchange +Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#86, sales_amt#87] +Arguments: hashpartitioning(i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, 5), true, [id=#88] + +(135) Sort [codegen id : 54] +Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#86, sales_amt#87] +Arguments: [i_brand_id#63 ASC NULLS FIRST, i_class_id#64 ASC NULLS FIRST, i_category_id#65 ASC NULLS FIRST, i_manufact_id#66 ASC NULLS FIRST], false, 0 + +(136) SortMergeJoin [codegen id : 55] +Left keys [4]: [i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Right keys [4]: [i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66] +Join condition: (CheckOverflow((promote_precision(cast(sales_cnt#59 as decimal(17,2))) / promote_precision(cast(sales_cnt#86 as decimal(17,2)))), DecimalType(37,20), true) < 0.90000000000000000000) + +(137) Project [codegen id : 55] +Output [10]: [d_year#68 AS prev_year#89, d_year#14 AS year#90, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#86 AS prev_yr_cnt#91, sales_cnt#59 AS curr_yr_cnt#92, (sales_cnt#59 - sales_cnt#86) AS sales_cnt_diff#93, CheckOverflow((promote_precision(cast(sales_amt#60 as decimal(19,2))) - promote_precision(cast(sales_amt#87 as decimal(19,2)))), DecimalType(19,2), true) AS sales_amt_diff#94] +Input [14]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#59, sales_amt#60, d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#86, sales_amt#87] + +(138) TakeOrderedAndProject +Input [10]: [prev_year#89, year#90, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#91, curr_yr_cnt#92, sales_cnt_diff#93, sales_amt_diff#94] +Arguments: 100, [sales_cnt_diff#93 ASC NULLS FIRST, sales_amt_diff#94 ASC NULLS FIRST], [prev_year#89, year#90, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#91, curr_yr_cnt#92, sales_cnt_diff#93, sales_amt_diff#94] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75.sf100/simplified.txt new file mode 100644 index 0000000000000..40aa2931ad5b8 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75.sf100/simplified.txt @@ -0,0 +1,237 @@ +TakeOrderedAndProject [curr_yr_cnt,i_brand_id,i_category_id,i_class_id,i_manufact_id,prev_year,prev_yr_cnt,sales_amt_diff,sales_cnt_diff,year] + WholeStageCodegen (55) + Project [d_year,d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_amt,sales_cnt,sales_cnt] + SortMergeJoin [i_brand_id,i_brand_id,i_category_id,i_category_id,i_class_id,i_class_id,i_manufact_id,i_manufact_id,sales_cnt,sales_cnt] + InputAdapter + WholeStageCodegen (27) + Sort [i_brand_id,i_category_id,i_class_id,i_manufact_id] + InputAdapter + Exchange [i_brand_id,i_category_id,i_class_id,i_manufact_id] #1 + WholeStageCodegen (26) + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sum,sum] [sales_amt,sales_cnt,sum,sum,sum(UnscaledValue(sales_amt)),sum(cast(sales_cnt as bigint))] + InputAdapter + Exchange [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id] #2 + WholeStageCodegen (25) + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] [sum,sum,sum,sum] + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] + InputAdapter + Exchange [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] #3 + WholeStageCodegen (24) + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] + InputAdapter + Union + WholeStageCodegen (16) + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] + InputAdapter + Exchange [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] #4 + WholeStageCodegen (15) + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] + InputAdapter + Union + WholeStageCodegen (7) + Project [cr_return_amount,cr_return_quantity,cs_ext_sales_price,cs_quantity,d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id] + InputAdapter + SortMergeJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] + WholeStageCodegen (4) + Sort [cs_item_sk,cs_order_number] + InputAdapter + Exchange [cs_item_sk,cs_order_number] #5 + WholeStageCodegen (3) + Project [cs_ext_sales_price,cs_item_sk,cs_order_number,cs_quantity,d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_ext_sales_price,cs_item_sk,cs_order_number,cs_quantity,cs_sold_date_sk,i_brand_id,i_category_id,i_class_id,i_manufact_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Filter [cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_ext_sales_price,cs_item_sk,cs_order_number,cs_quantity,cs_sold_date_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (1) + Project [i_brand_id,i_category_id,i_class_id,i_item_sk,i_manufact_id] + Filter [i_brand_id,i_category,i_category_id,i_class_id,i_item_sk,i_manufact_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand_id,i_category,i_category_id,i_class_id,i_item_sk,i_manufact_id] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (2) + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + WholeStageCodegen (6) + Sort [cr_item_sk,cr_order_number] + InputAdapter + Exchange [cr_item_sk,cr_order_number] #8 + WholeStageCodegen (5) + Filter [cr_item_sk,cr_order_number] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_amount,cr_return_quantity] + WholeStageCodegen (14) + Project [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sr_return_amt,sr_return_quantity,ss_ext_sales_price,ss_quantity] + InputAdapter + SortMergeJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + WholeStageCodegen (11) + Sort [ss_item_sk,ss_ticket_number] + InputAdapter + Exchange [ss_item_sk,ss_ticket_number] #9 + WholeStageCodegen (10) + Project [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,ss_ext_sales_price,ss_item_sk,ss_quantity,ss_ticket_number] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_brand_id,i_category_id,i_class_id,i_manufact_id,ss_ext_sales_price,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_ticket_number] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_ticket_number] + InputAdapter + ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk,i_manufact_id] #6 + InputAdapter + ReusedExchange [d_date_sk,d_year] #7 + WholeStageCodegen (13) + Sort [sr_item_sk,sr_ticket_number] + InputAdapter + Exchange [sr_item_sk,sr_ticket_number] #10 + WholeStageCodegen (12) + Filter [sr_item_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_item_sk,sr_return_amt,sr_return_quantity,sr_ticket_number] + WholeStageCodegen (23) + Project [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,wr_return_amt,wr_return_quantity,ws_ext_sales_price,ws_quantity] + InputAdapter + SortMergeJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] + WholeStageCodegen (20) + Sort [ws_item_sk,ws_order_number] + InputAdapter + Exchange [ws_item_sk,ws_order_number] #11 + WholeStageCodegen (19) + Project [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,ws_ext_sales_price,ws_item_sk,ws_order_number,ws_quantity] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [i_brand_id,i_category_id,i_class_id,i_manufact_id,ws_ext_sales_price,ws_item_sk,ws_order_number,ws_quantity,ws_sold_date_sk] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Filter [ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_ext_sales_price,ws_item_sk,ws_order_number,ws_quantity,ws_sold_date_sk] + InputAdapter + ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk,i_manufact_id] #6 + InputAdapter + ReusedExchange [d_date_sk,d_year] #7 + WholeStageCodegen (22) + Sort [wr_item_sk,wr_order_number] + InputAdapter + Exchange [wr_item_sk,wr_order_number] #12 + WholeStageCodegen (21) + Filter [wr_item_sk,wr_order_number] + ColumnarToRow + InputAdapter + Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_return_quantity] + InputAdapter + WholeStageCodegen (54) + Sort [i_brand_id,i_category_id,i_class_id,i_manufact_id] + InputAdapter + Exchange [i_brand_id,i_category_id,i_class_id,i_manufact_id] #13 + WholeStageCodegen (53) + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sum,sum] [sales_amt,sales_cnt,sum,sum,sum(UnscaledValue(sales_amt)),sum(cast(sales_cnt as bigint))] + InputAdapter + Exchange [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id] #14 + WholeStageCodegen (52) + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] [sum,sum,sum,sum] + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] + InputAdapter + Exchange [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] #15 + WholeStageCodegen (51) + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] + InputAdapter + Union + WholeStageCodegen (43) + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] + InputAdapter + Exchange [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] #16 + WholeStageCodegen (42) + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] + InputAdapter + Union + WholeStageCodegen (34) + Project [cr_return_amount,cr_return_quantity,cs_ext_sales_price,cs_quantity,d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id] + InputAdapter + SortMergeJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] + WholeStageCodegen (31) + Sort [cs_item_sk,cs_order_number] + InputAdapter + Exchange [cs_item_sk,cs_order_number] #17 + WholeStageCodegen (30) + Project [cs_ext_sales_price,cs_item_sk,cs_order_number,cs_quantity,d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_ext_sales_price,cs_item_sk,cs_order_number,cs_quantity,cs_sold_date_sk,i_brand_id,i_category_id,i_class_id,i_manufact_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Filter [cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_ext_sales_price,cs_item_sk,cs_order_number,cs_quantity,cs_sold_date_sk] + InputAdapter + ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk,i_manufact_id] #6 + InputAdapter + BroadcastExchange #18 + WholeStageCodegen (29) + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + WholeStageCodegen (33) + Sort [cr_item_sk,cr_order_number] + InputAdapter + ReusedExchange [cr_item_sk,cr_order_number,cr_return_amount,cr_return_quantity] #8 + WholeStageCodegen (41) + Project [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sr_return_amt,sr_return_quantity,ss_ext_sales_price,ss_quantity] + InputAdapter + SortMergeJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + WholeStageCodegen (38) + Sort [ss_item_sk,ss_ticket_number] + InputAdapter + Exchange [ss_item_sk,ss_ticket_number] #19 + WholeStageCodegen (37) + Project [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,ss_ext_sales_price,ss_item_sk,ss_quantity,ss_ticket_number] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_brand_id,i_category_id,i_class_id,i_manufact_id,ss_ext_sales_price,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_ticket_number] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_ticket_number] + InputAdapter + ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk,i_manufact_id] #6 + InputAdapter + ReusedExchange [d_date_sk,d_year] #18 + WholeStageCodegen (40) + Sort [sr_item_sk,sr_ticket_number] + InputAdapter + ReusedExchange [sr_item_sk,sr_return_amt,sr_return_quantity,sr_ticket_number] #10 + WholeStageCodegen (50) + Project [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,wr_return_amt,wr_return_quantity,ws_ext_sales_price,ws_quantity] + InputAdapter + SortMergeJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] + WholeStageCodegen (47) + Sort [ws_item_sk,ws_order_number] + InputAdapter + Exchange [ws_item_sk,ws_order_number] #20 + WholeStageCodegen (46) + Project [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,ws_ext_sales_price,ws_item_sk,ws_order_number,ws_quantity] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [i_brand_id,i_category_id,i_class_id,i_manufact_id,ws_ext_sales_price,ws_item_sk,ws_order_number,ws_quantity,ws_sold_date_sk] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Filter [ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_ext_sales_price,ws_item_sk,ws_order_number,ws_quantity,ws_sold_date_sk] + InputAdapter + ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk,i_manufact_id] #6 + InputAdapter + ReusedExchange [d_date_sk,d_year] #18 + WholeStageCodegen (49) + Sort [wr_item_sk,wr_order_number] + InputAdapter + ReusedExchange [wr_item_sk,wr_order_number,wr_return_amt,wr_return_quantity] #12 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75/explain.txt new file mode 100644 index 0000000000000..7306dc2be142e --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75/explain.txt @@ -0,0 +1,647 @@ +== Physical Plan == +TakeOrderedAndProject (117) ++- * Project (116) + +- * BroadcastHashJoin Inner BuildRight (115) + :- * HashAggregate (63) + : +- Exchange (62) + : +- * HashAggregate (61) + : +- * HashAggregate (60) + : +- Exchange (59) + : +- * HashAggregate (58) + : +- Union (57) + : :- * HashAggregate (41) + : : +- Exchange (40) + : : +- * HashAggregate (39) + : : +- Union (38) + : : :- * Project (22) + : : : +- * BroadcastHashJoin LeftOuter BuildRight (21) + : : : :- * Project (16) + : : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : : :- * Project (10) + : : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : : :- * Filter (3) + : : : : : : +- * ColumnarToRow (2) + : : : : : : +- Scan parquet default.catalog_sales (1) + : : : : : +- BroadcastExchange (8) + : : : : : +- * Project (7) + : : : : : +- * Filter (6) + : : : : : +- * ColumnarToRow (5) + : : : : : +- Scan parquet default.item (4) + : : : : +- BroadcastExchange (14) + : : : : +- * Filter (13) + : : : : +- * ColumnarToRow (12) + : : : : +- Scan parquet default.date_dim (11) + : : : +- BroadcastExchange (20) + : : : +- * Filter (19) + : : : +- * ColumnarToRow (18) + : : : +- Scan parquet default.catalog_returns (17) + : : +- * Project (37) + : : +- * BroadcastHashJoin LeftOuter BuildRight (36) + : : :- * Project (31) + : : : +- * BroadcastHashJoin Inner BuildRight (30) + : : : :- * Project (28) + : : : : +- * BroadcastHashJoin Inner BuildRight (27) + : : : : :- * Filter (25) + : : : : : +- * ColumnarToRow (24) + : : : : : +- Scan parquet default.store_sales (23) + : : : : +- ReusedExchange (26) + : : : +- ReusedExchange (29) + : : +- BroadcastExchange (35) + : : +- * Filter (34) + : : +- * ColumnarToRow (33) + : : +- Scan parquet default.store_returns (32) + : +- * Project (56) + : +- * BroadcastHashJoin LeftOuter BuildRight (55) + : :- * Project (50) + : : +- * BroadcastHashJoin Inner BuildRight (49) + : : :- * Project (47) + : : : +- * BroadcastHashJoin Inner BuildRight (46) + : : : :- * Filter (44) + : : : : +- * ColumnarToRow (43) + : : : : +- Scan parquet default.web_sales (42) + : : : +- ReusedExchange (45) + : : +- ReusedExchange (48) + : +- BroadcastExchange (54) + : +- * Filter (53) + : +- * ColumnarToRow (52) + : +- Scan parquet default.web_returns (51) + +- BroadcastExchange (114) + +- * HashAggregate (113) + +- Exchange (112) + +- * HashAggregate (111) + +- * HashAggregate (110) + +- Exchange (109) + +- * HashAggregate (108) + +- Union (107) + :- * HashAggregate (94) + : +- Exchange (93) + : +- * HashAggregate (92) + : +- Union (91) + : :- * Project (78) + : : +- * BroadcastHashJoin LeftOuter BuildRight (77) + : : :- * Project (75) + : : : +- * BroadcastHashJoin Inner BuildRight (74) + : : : :- * Project (69) + : : : : +- * BroadcastHashJoin Inner BuildRight (68) + : : : : :- * Filter (66) + : : : : : +- * ColumnarToRow (65) + : : : : : +- Scan parquet default.catalog_sales (64) + : : : : +- ReusedExchange (67) + : : : +- BroadcastExchange (73) + : : : +- * Filter (72) + : : : +- * ColumnarToRow (71) + : : : +- Scan parquet default.date_dim (70) + : : +- ReusedExchange (76) + : +- * Project (90) + : +- * BroadcastHashJoin LeftOuter BuildRight (89) + : :- * Project (87) + : : +- * BroadcastHashJoin Inner BuildRight (86) + : : :- * Project (84) + : : : +- * BroadcastHashJoin Inner BuildRight (83) + : : : :- * Filter (81) + : : : : +- * ColumnarToRow (80) + : : : : +- Scan parquet default.store_sales (79) + : : : +- ReusedExchange (82) + : : +- ReusedExchange (85) + : +- ReusedExchange (88) + +- * Project (106) + +- * BroadcastHashJoin LeftOuter BuildRight (105) + :- * Project (103) + : +- * BroadcastHashJoin Inner BuildRight (102) + : :- * Project (100) + : : +- * BroadcastHashJoin Inner BuildRight (99) + : : :- * Filter (97) + : : : +- * ColumnarToRow (96) + : : : +- Scan parquet default.web_sales (95) + : : +- ReusedExchange (98) + : +- ReusedExchange (101) + +- ReusedExchange (104) + + +(1) Scan parquet default.catalog_sales +Output [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5] + +(3) Filter [codegen id : 4] +Input [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5] +Condition : (isnotnull(cs_item_sk#2) AND isnotnull(cs_sold_date_sk#1)) + +(4) Scan parquet default.item +Output [6]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Books), IsNotNull(i_item_sk), IsNotNull(i_category_id), IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_manufact_id)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [6]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] + +(6) Filter [codegen id : 1] +Input [6]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] +Condition : ((((((isnotnull(i_category#10) AND (i_category#10 = Books)) AND isnotnull(i_item_sk#6)) AND isnotnull(i_category_id#9)) AND isnotnull(i_brand_id#7)) AND isnotnull(i_class_id#8)) AND isnotnull(i_manufact_id#11)) + +(7) Project [codegen id : 1] +Output [5]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Input [6]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] + +(8) BroadcastExchange +Input [5]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_item_sk#2] +Right keys [1]: [i_item_sk#6] +Join condition: None + +(10) Project [codegen id : 4] +Output [9]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Input [10]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] + +(11) Scan parquet default.date_dim +Output [2]: [d_date_sk#13, d_year#14] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#13, d_year#14] + +(13) Filter [codegen id : 2] +Input [2]: [d_date_sk#13, d_year#14] +Condition : ((isnotnull(d_year#14) AND (d_year#14 = 2002)) AND isnotnull(d_date_sk#13)) + +(14) BroadcastExchange +Input [2]: [d_date_sk#13, d_year#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#15] + +(15) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_sold_date_sk#1] +Right keys [1]: [d_date_sk#13] +Join condition: None + +(16) Project [codegen id : 4] +Output [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14] +Input [11]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_date_sk#13, d_year#14] + +(17) Scan parquet default.catalog_returns +Output [4]: [cr_item_sk#16, cr_order_number#17, cr_return_quantity#18, cr_return_amount#19] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/catalog_returns] +PushedFilters: [IsNotNull(cr_order_number), IsNotNull(cr_item_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [4]: [cr_item_sk#16, cr_order_number#17, cr_return_quantity#18, cr_return_amount#19] + +(19) Filter [codegen id : 3] +Input [4]: [cr_item_sk#16, cr_order_number#17, cr_return_quantity#18, cr_return_amount#19] +Condition : (isnotnull(cr_order_number#17) AND isnotnull(cr_item_sk#16)) + +(20) BroadcastExchange +Input [4]: [cr_item_sk#16, cr_order_number#17, cr_return_quantity#18, cr_return_amount#19] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, false] as bigint), 32) | (cast(input[0, int, false] as bigint) & 4294967295))),false), [id=#20] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [2]: [cs_order_number#3, cs_item_sk#2] +Right keys [2]: [cr_order_number#17, cr_item_sk#16] +Join condition: None + +(22) Project [codegen id : 4] +Output [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, (cs_quantity#4 - coalesce(cr_return_quantity#18, 0)) AS sales_cnt#21, CheckOverflow((promote_precision(cast(cs_ext_sales_price#5 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#19, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#22] +Input [13]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14, cr_item_sk#16, cr_order_number#17, cr_return_quantity#18, cr_return_amount#19] + +(23) Scan parquet default.store_sales +Output [5]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 8] +Input [5]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27] + +(25) Filter [codegen id : 8] +Input [5]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27] +Condition : (isnotnull(ss_item_sk#24) AND isnotnull(ss_sold_date_sk#23)) + +(26) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] + +(27) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_item_sk#24] +Right keys [1]: [i_item_sk#6] +Join condition: None + +(28) Project [codegen id : 8] +Output [9]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Input [10]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] + +(29) ReusedExchange [Reuses operator id: 14] +Output [2]: [d_date_sk#13, d_year#14] + +(30) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_sold_date_sk#23] +Right keys [1]: [d_date_sk#13] +Join condition: None + +(31) Project [codegen id : 8] +Output [9]: [ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14] +Input [11]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_date_sk#13, d_year#14] + +(32) Scan parquet default.store_returns +Output [4]: [sr_item_sk#28, sr_ticket_number#29, sr_return_quantity#30, sr_return_amt#31] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store_returns] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] +ReadSchema: struct + +(33) ColumnarToRow [codegen id : 7] +Input [4]: [sr_item_sk#28, sr_ticket_number#29, sr_return_quantity#30, sr_return_amt#31] + +(34) Filter [codegen id : 7] +Input [4]: [sr_item_sk#28, sr_ticket_number#29, sr_return_quantity#30, sr_return_amt#31] +Condition : (isnotnull(sr_ticket_number#29) AND isnotnull(sr_item_sk#28)) + +(35) BroadcastExchange +Input [4]: [sr_item_sk#28, sr_ticket_number#29, sr_return_quantity#30, sr_return_amt#31] +Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false], input[0, bigint, false]),false), [id=#32] + +(36) BroadcastHashJoin [codegen id : 8] +Left keys [2]: [cast(ss_ticket_number#25 as bigint), cast(ss_item_sk#24 as bigint)] +Right keys [2]: [sr_ticket_number#29, sr_item_sk#28] +Join condition: None + +(37) Project [codegen id : 8] +Output [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, (ss_quantity#26 - coalesce(sr_return_quantity#30, 0)) AS sales_cnt#33, CheckOverflow((promote_precision(cast(ss_ext_sales_price#27 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#31, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#34] +Input [13]: [ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14, sr_item_sk#28, sr_ticket_number#29, sr_return_quantity#30, sr_return_amt#31] + +(38) Union + +(39) HashAggregate [codegen id : 9] +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] +Keys [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] +Functions: [] +Aggregate Attributes: [] +Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] + +(40) Exchange +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] +Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22, 5), true, [id=#35] + +(41) HashAggregate [codegen id : 10] +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] +Keys [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] +Functions: [] +Aggregate Attributes: [] +Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] + +(42) Scan parquet default.web_sales +Output [5]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(43) ColumnarToRow [codegen id : 14] +Input [5]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40] + +(44) Filter [codegen id : 14] +Input [5]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40] +Condition : (isnotnull(ws_item_sk#37) AND isnotnull(ws_sold_date_sk#36)) + +(45) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] + +(46) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [ws_item_sk#37] +Right keys [1]: [i_item_sk#6] +Join condition: None + +(47) Project [codegen id : 14] +Output [9]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Input [10]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] + +(48) ReusedExchange [Reuses operator id: 14] +Output [2]: [d_date_sk#13, d_year#14] + +(49) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [ws_sold_date_sk#36] +Right keys [1]: [d_date_sk#13] +Join condition: None + +(50) Project [codegen id : 14] +Output [9]: [ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14] +Input [11]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_date_sk#13, d_year#14] + +(51) Scan parquet default.web_returns +Output [4]: [wr_item_sk#41, wr_order_number#42, wr_return_quantity#43, wr_return_amt#44] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/web_returns] +PushedFilters: [IsNotNull(wr_order_number), IsNotNull(wr_item_sk)] +ReadSchema: struct + +(52) ColumnarToRow [codegen id : 13] +Input [4]: [wr_item_sk#41, wr_order_number#42, wr_return_quantity#43, wr_return_amt#44] + +(53) Filter [codegen id : 13] +Input [4]: [wr_item_sk#41, wr_order_number#42, wr_return_quantity#43, wr_return_amt#44] +Condition : (isnotnull(wr_order_number#42) AND isnotnull(wr_item_sk#41)) + +(54) BroadcastExchange +Input [4]: [wr_item_sk#41, wr_order_number#42, wr_return_quantity#43, wr_return_amt#44] +Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false], input[0, bigint, false]),false), [id=#45] + +(55) BroadcastHashJoin [codegen id : 14] +Left keys [2]: [cast(ws_order_number#38 as bigint), cast(ws_item_sk#37 as bigint)] +Right keys [2]: [wr_order_number#42, wr_item_sk#41] +Join condition: None + +(56) Project [codegen id : 14] +Output [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, (ws_quantity#39 - coalesce(wr_return_quantity#43, 0)) AS sales_cnt#46, CheckOverflow((promote_precision(cast(ws_ext_sales_price#40 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#44, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#47] +Input [13]: [ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14, wr_item_sk#41, wr_order_number#42, wr_return_quantity#43, wr_return_amt#44] + +(57) Union + +(58) HashAggregate [codegen id : 15] +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] +Keys [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] +Functions: [] +Aggregate Attributes: [] +Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] + +(59) Exchange +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] +Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22, 5), true, [id=#48] + +(60) HashAggregate [codegen id : 16] +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] +Keys [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] +Functions: [] +Aggregate Attributes: [] +Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] + +(61) HashAggregate [codegen id : 16] +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] +Keys [5]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Functions [2]: [partial_sum(cast(sales_cnt#21 as bigint)), partial_sum(UnscaledValue(sales_amt#22))] +Aggregate Attributes [2]: [sum#49, sum#50] +Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#51, sum#52] + +(62) Exchange +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#51, sum#52] +Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, 5), true, [id=#53] + +(63) HashAggregate [codegen id : 34] +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#51, sum#52] +Keys [5]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Functions [2]: [sum(cast(sales_cnt#21 as bigint)), sum(UnscaledValue(sales_amt#22))] +Aggregate Attributes [2]: [sum(cast(sales_cnt#21 as bigint))#54, sum(UnscaledValue(sales_amt#22))#55] +Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum(cast(sales_cnt#21 as bigint))#54 AS sales_cnt#56, MakeDecimal(sum(UnscaledValue(sales_amt#22))#55,18,2) AS sales_amt#57] + +(64) Scan parquet default.catalog_sales +Output [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(65) ColumnarToRow [codegen id : 20] +Input [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5] + +(66) Filter [codegen id : 20] +Input [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5] +Condition : (isnotnull(cs_item_sk#2) AND isnotnull(cs_sold_date_sk#1)) + +(67) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] + +(68) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [cs_item_sk#2] +Right keys [1]: [i_item_sk#58] +Join condition: None + +(69) Project [codegen id : 20] +Output [9]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] +Input [10]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] + +(70) Scan parquet default.date_dim +Output [2]: [d_date_sk#63, d_year#64] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(71) ColumnarToRow [codegen id : 18] +Input [2]: [d_date_sk#63, d_year#64] + +(72) Filter [codegen id : 18] +Input [2]: [d_date_sk#63, d_year#64] +Condition : ((isnotnull(d_year#64) AND (d_year#64 = 2001)) AND isnotnull(d_date_sk#63)) + +(73) BroadcastExchange +Input [2]: [d_date_sk#63, d_year#64] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#65] + +(74) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [cs_sold_date_sk#1] +Right keys [1]: [d_date_sk#63] +Join condition: None + +(75) Project [codegen id : 20] +Output [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_year#64] +Input [11]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_date_sk#63, d_year#64] + +(76) ReusedExchange [Reuses operator id: 20] +Output [4]: [cr_item_sk#16, cr_order_number#17, cr_return_quantity#18, cr_return_amount#19] + +(77) BroadcastHashJoin [codegen id : 20] +Left keys [2]: [cs_order_number#3, cs_item_sk#2] +Right keys [2]: [cr_order_number#17, cr_item_sk#16] +Join condition: None + +(78) Project [codegen id : 20] +Output [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, (cs_quantity#4 - coalesce(cr_return_quantity#18, 0)) AS sales_cnt#21, CheckOverflow((promote_precision(cast(cs_ext_sales_price#5 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#19, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#22] +Input [13]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_year#64, cr_item_sk#16, cr_order_number#17, cr_return_quantity#18, cr_return_amount#19] + +(79) Scan parquet default.store_sales +Output [5]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(80) ColumnarToRow [codegen id : 24] +Input [5]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27] + +(81) Filter [codegen id : 24] +Input [5]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27] +Condition : (isnotnull(ss_item_sk#24) AND isnotnull(ss_sold_date_sk#23)) + +(82) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] + +(83) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [ss_item_sk#24] +Right keys [1]: [i_item_sk#58] +Join condition: None + +(84) Project [codegen id : 24] +Output [9]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] +Input [10]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] + +(85) ReusedExchange [Reuses operator id: 73] +Output [2]: [d_date_sk#63, d_year#64] + +(86) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [ss_sold_date_sk#23] +Right keys [1]: [d_date_sk#63] +Join condition: None + +(87) Project [codegen id : 24] +Output [9]: [ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_year#64] +Input [11]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_date_sk#63, d_year#64] + +(88) ReusedExchange [Reuses operator id: 35] +Output [4]: [sr_item_sk#28, sr_ticket_number#29, sr_return_quantity#30, sr_return_amt#31] + +(89) BroadcastHashJoin [codegen id : 24] +Left keys [2]: [cast(ss_ticket_number#25 as bigint), cast(ss_item_sk#24 as bigint)] +Right keys [2]: [sr_ticket_number#29, sr_item_sk#28] +Join condition: None + +(90) Project [codegen id : 24] +Output [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, (ss_quantity#26 - coalesce(sr_return_quantity#30, 0)) AS sales_cnt#66, CheckOverflow((promote_precision(cast(ss_ext_sales_price#27 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#31, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#67] +Input [13]: [ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_year#64, sr_item_sk#28, sr_ticket_number#29, sr_return_quantity#30, sr_return_amt#31] + +(91) Union + +(92) HashAggregate [codegen id : 25] +Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] +Keys [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] +Functions: [] +Aggregate Attributes: [] +Results [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] + +(93) Exchange +Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] +Arguments: hashpartitioning(d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22, 5), true, [id=#68] + +(94) HashAggregate [codegen id : 26] +Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] +Keys [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] +Functions: [] +Aggregate Attributes: [] +Results [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] + +(95) Scan parquet default.web_sales +Output [5]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(96) ColumnarToRow [codegen id : 30] +Input [5]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40] + +(97) Filter [codegen id : 30] +Input [5]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40] +Condition : (isnotnull(ws_item_sk#37) AND isnotnull(ws_sold_date_sk#36)) + +(98) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] + +(99) BroadcastHashJoin [codegen id : 30] +Left keys [1]: [ws_item_sk#37] +Right keys [1]: [i_item_sk#58] +Join condition: None + +(100) Project [codegen id : 30] +Output [9]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] +Input [10]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] + +(101) ReusedExchange [Reuses operator id: 73] +Output [2]: [d_date_sk#63, d_year#64] + +(102) BroadcastHashJoin [codegen id : 30] +Left keys [1]: [ws_sold_date_sk#36] +Right keys [1]: [d_date_sk#63] +Join condition: None + +(103) Project [codegen id : 30] +Output [9]: [ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_year#64] +Input [11]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_date_sk#63, d_year#64] + +(104) ReusedExchange [Reuses operator id: 54] +Output [4]: [wr_item_sk#41, wr_order_number#42, wr_return_quantity#43, wr_return_amt#44] + +(105) BroadcastHashJoin [codegen id : 30] +Left keys [2]: [cast(ws_order_number#38 as bigint), cast(ws_item_sk#37 as bigint)] +Right keys [2]: [wr_order_number#42, wr_item_sk#41] +Join condition: None + +(106) Project [codegen id : 30] +Output [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, (ws_quantity#39 - coalesce(wr_return_quantity#43, 0)) AS sales_cnt#69, CheckOverflow((promote_precision(cast(ws_ext_sales_price#40 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#44, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#70] +Input [13]: [ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_year#64, wr_item_sk#41, wr_order_number#42, wr_return_quantity#43, wr_return_amt#44] + +(107) Union + +(108) HashAggregate [codegen id : 31] +Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] +Keys [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] +Functions: [] +Aggregate Attributes: [] +Results [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] + +(109) Exchange +Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] +Arguments: hashpartitioning(d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22, 5), true, [id=#71] + +(110) HashAggregate [codegen id : 32] +Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] +Keys [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] +Functions: [] +Aggregate Attributes: [] +Results [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] + +(111) HashAggregate [codegen id : 32] +Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] +Keys [5]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] +Functions [2]: [partial_sum(cast(sales_cnt#21 as bigint)), partial_sum(UnscaledValue(sales_amt#22))] +Aggregate Attributes [2]: [sum#72, sum#73] +Results [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sum#74, sum#75] + +(112) Exchange +Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sum#74, sum#75] +Arguments: hashpartitioning(d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, 5), true, [id=#76] + +(113) HashAggregate [codegen id : 33] +Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sum#74, sum#75] +Keys [5]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] +Functions [2]: [sum(cast(sales_cnt#21 as bigint)), sum(UnscaledValue(sales_amt#22))] +Aggregate Attributes [2]: [sum(cast(sales_cnt#21 as bigint))#77, sum(UnscaledValue(sales_amt#22))#78] +Results [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sum(cast(sales_cnt#21 as bigint))#77 AS sales_cnt#79, MakeDecimal(sum(UnscaledValue(sales_amt#22))#78,18,2) AS sales_amt#80] + +(114) BroadcastExchange +Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#79, sales_amt#80] +Arguments: HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true], input[4, int, true]),false), [id=#81] + +(115) BroadcastHashJoin [codegen id : 34] +Left keys [4]: [i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Right keys [4]: [i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] +Join condition: (CheckOverflow((promote_precision(cast(sales_cnt#56 as decimal(17,2))) / promote_precision(cast(sales_cnt#79 as decimal(17,2)))), DecimalType(37,20), true) < 0.90000000000000000000) + +(116) Project [codegen id : 34] +Output [10]: [d_year#64 AS prev_year#82, d_year#14 AS year#83, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#79 AS prev_yr_cnt#84, sales_cnt#56 AS curr_yr_cnt#85, (sales_cnt#56 - sales_cnt#79) AS sales_cnt_diff#86, CheckOverflow((promote_precision(cast(sales_amt#57 as decimal(19,2))) - promote_precision(cast(sales_amt#80 as decimal(19,2)))), DecimalType(19,2), true) AS sales_amt_diff#87] +Input [14]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#56, sales_amt#57, d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#79, sales_amt#80] + +(117) TakeOrderedAndProject +Input [10]: [prev_year#82, year#83, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#84, curr_yr_cnt#85, sales_cnt_diff#86, sales_amt_diff#87] +Arguments: 100, [sales_cnt_diff#86 ASC NULLS FIRST, sales_amt_diff#87 ASC NULLS FIRST], [prev_year#82, year#83, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#84, curr_yr_cnt#85, sales_cnt_diff#86, sales_amt_diff#87] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75/simplified.txt new file mode 100644 index 0000000000000..4974c17705d87 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75/simplified.txt @@ -0,0 +1,180 @@ +TakeOrderedAndProject [curr_yr_cnt,i_brand_id,i_category_id,i_class_id,i_manufact_id,prev_year,prev_yr_cnt,sales_amt_diff,sales_cnt_diff,year] + WholeStageCodegen (34) + Project [d_year,d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_amt,sales_cnt,sales_cnt] + BroadcastHashJoin [i_brand_id,i_brand_id,i_category_id,i_category_id,i_class_id,i_class_id,i_manufact_id,i_manufact_id,sales_cnt,sales_cnt] + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sum,sum] [sales_amt,sales_cnt,sum,sum,sum(UnscaledValue(sales_amt)),sum(cast(sales_cnt as bigint))] + InputAdapter + Exchange [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id] #1 + WholeStageCodegen (16) + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] [sum,sum,sum,sum] + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] + InputAdapter + Exchange [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] #2 + WholeStageCodegen (15) + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] + InputAdapter + Union + WholeStageCodegen (10) + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] + InputAdapter + Exchange [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] #3 + WholeStageCodegen (9) + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] + InputAdapter + Union + WholeStageCodegen (4) + Project [cr_return_amount,cr_return_quantity,cs_ext_sales_price,cs_quantity,d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id] + BroadcastHashJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] + Project [cs_ext_sales_price,cs_item_sk,cs_order_number,cs_quantity,d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_ext_sales_price,cs_item_sk,cs_order_number,cs_quantity,cs_sold_date_sk,i_brand_id,i_category_id,i_class_id,i_manufact_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Filter [cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_ext_sales_price,cs_item_sk,cs_order_number,cs_quantity,cs_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Project [i_brand_id,i_category_id,i_class_id,i_item_sk,i_manufact_id] + Filter [i_brand_id,i_category,i_category_id,i_class_id,i_item_sk,i_manufact_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_brand_id,i_category,i_category_id,i_class_id,i_item_sk,i_manufact_id] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (3) + Filter [cr_item_sk,cr_order_number] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_amount,cr_return_quantity] + WholeStageCodegen (8) + Project [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sr_return_amt,sr_return_quantity,ss_ext_sales_price,ss_quantity] + BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + Project [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,ss_ext_sales_price,ss_item_sk,ss_quantity,ss_ticket_number] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_brand_id,i_category_id,i_class_id,i_manufact_id,ss_ext_sales_price,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_ticket_number] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_ticket_number] + InputAdapter + ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk,i_manufact_id] #4 + InputAdapter + ReusedExchange [d_date_sk,d_year] #5 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (7) + Filter [sr_item_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_item_sk,sr_return_amt,sr_return_quantity,sr_ticket_number] + WholeStageCodegen (14) + Project [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,wr_return_amt,wr_return_quantity,ws_ext_sales_price,ws_quantity] + BroadcastHashJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] + Project [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,ws_ext_sales_price,ws_item_sk,ws_order_number,ws_quantity] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [i_brand_id,i_category_id,i_class_id,i_manufact_id,ws_ext_sales_price,ws_item_sk,ws_order_number,ws_quantity,ws_sold_date_sk] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Filter [ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_ext_sales_price,ws_item_sk,ws_order_number,ws_quantity,ws_sold_date_sk] + InputAdapter + ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk,i_manufact_id] #4 + InputAdapter + ReusedExchange [d_date_sk,d_year] #5 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (13) + Filter [wr_item_sk,wr_order_number] + ColumnarToRow + InputAdapter + Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_return_quantity] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (33) + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sum,sum] [sales_amt,sales_cnt,sum,sum,sum(UnscaledValue(sales_amt)),sum(cast(sales_cnt as bigint))] + InputAdapter + Exchange [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id] #10 + WholeStageCodegen (32) + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] [sum,sum,sum,sum] + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] + InputAdapter + Exchange [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] #11 + WholeStageCodegen (31) + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] + InputAdapter + Union + WholeStageCodegen (26) + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] + InputAdapter + Exchange [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] #12 + WholeStageCodegen (25) + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] + InputAdapter + Union + WholeStageCodegen (20) + Project [cr_return_amount,cr_return_quantity,cs_ext_sales_price,cs_quantity,d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id] + BroadcastHashJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] + Project [cs_ext_sales_price,cs_item_sk,cs_order_number,cs_quantity,d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_ext_sales_price,cs_item_sk,cs_order_number,cs_quantity,cs_sold_date_sk,i_brand_id,i_category_id,i_class_id,i_manufact_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Filter [cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_ext_sales_price,cs_item_sk,cs_order_number,cs_quantity,cs_sold_date_sk] + InputAdapter + ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk,i_manufact_id] #4 + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (18) + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + ReusedExchange [cr_item_sk,cr_order_number,cr_return_amount,cr_return_quantity] #6 + WholeStageCodegen (24) + Project [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sr_return_amt,sr_return_quantity,ss_ext_sales_price,ss_quantity] + BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + Project [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,ss_ext_sales_price,ss_item_sk,ss_quantity,ss_ticket_number] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_brand_id,i_category_id,i_class_id,i_manufact_id,ss_ext_sales_price,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_ticket_number] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_ticket_number] + InputAdapter + ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk,i_manufact_id] #4 + InputAdapter + ReusedExchange [d_date_sk,d_year] #13 + InputAdapter + ReusedExchange [sr_item_sk,sr_return_amt,sr_return_quantity,sr_ticket_number] #7 + WholeStageCodegen (30) + Project [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,wr_return_amt,wr_return_quantity,ws_ext_sales_price,ws_quantity] + BroadcastHashJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] + Project [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,ws_ext_sales_price,ws_item_sk,ws_order_number,ws_quantity] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [i_brand_id,i_category_id,i_class_id,i_manufact_id,ws_ext_sales_price,ws_item_sk,ws_order_number,ws_quantity,ws_sold_date_sk] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Filter [ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_ext_sales_price,ws_item_sk,ws_order_number,ws_quantity,ws_sold_date_sk] + InputAdapter + ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk,i_manufact_id] #4 + InputAdapter + ReusedExchange [d_date_sk,d_year] #13 + InputAdapter + ReusedExchange [wr_item_sk,wr_order_number,wr_return_amt,wr_return_quantity] #8 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.sf100/explain.txt new file mode 100644 index 0000000000000..dfeee524d5e06 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.sf100/explain.txt @@ -0,0 +1,629 @@ +== Physical Plan == +TakeOrderedAndProject (108) ++- * HashAggregate (107) + +- Exchange (106) + +- * HashAggregate (105) + +- Union (104) + :- * HashAggregate (98) + : +- Exchange (97) + : +- * HashAggregate (96) + : +- Union (95) + : :- * HashAggregate (89) + : : +- Exchange (88) + : : +- * HashAggregate (87) + : : +- Union (86) + : : :- * Project (34) + : : : +- * BroadcastHashJoin LeftOuter BuildRight (33) + : : : :- * HashAggregate (19) + : : : : +- Exchange (18) + : : : : +- * HashAggregate (17) + : : : : +- * Project (16) + : : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : : :- * Project (10) + : : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : : :- * Filter (3) + : : : : : : +- * ColumnarToRow (2) + : : : : : : +- Scan parquet default.store_sales (1) + : : : : : +- BroadcastExchange (8) + : : : : : +- * Project (7) + : : : : : +- * Filter (6) + : : : : : +- * ColumnarToRow (5) + : : : : : +- Scan parquet default.date_dim (4) + : : : : +- BroadcastExchange (14) + : : : : +- * Filter (13) + : : : : +- * ColumnarToRow (12) + : : : : +- Scan parquet default.store (11) + : : : +- BroadcastExchange (32) + : : : +- * HashAggregate (31) + : : : +- Exchange (30) + : : : +- * HashAggregate (29) + : : : +- * Project (28) + : : : +- * BroadcastHashJoin Inner BuildRight (27) + : : : :- * Project (25) + : : : : +- * BroadcastHashJoin Inner BuildRight (24) + : : : : :- * Filter (22) + : : : : : +- * ColumnarToRow (21) + : : : : : +- Scan parquet default.store_returns (20) + : : : : +- ReusedExchange (23) + : : : +- ReusedExchange (26) + : : :- * Project (55) + : : : +- BroadcastNestedLoopJoin Inner BuildRight (54) + : : : :- * HashAggregate (43) + : : : : +- Exchange (42) + : : : : +- * HashAggregate (41) + : : : : +- * Project (40) + : : : : +- * BroadcastHashJoin Inner BuildRight (39) + : : : : :- * Filter (37) + : : : : : +- * ColumnarToRow (36) + : : : : : +- Scan parquet default.catalog_sales (35) + : : : : +- ReusedExchange (38) + : : : +- BroadcastExchange (53) + : : : +- * HashAggregate (52) + : : : +- Exchange (51) + : : : +- * HashAggregate (50) + : : : +- * Project (49) + : : : +- * BroadcastHashJoin Inner BuildRight (48) + : : : :- * Filter (46) + : : : : +- * ColumnarToRow (45) + : : : : +- Scan parquet default.catalog_returns (44) + : : : +- ReusedExchange (47) + : : +- * Project (85) + : : +- * BroadcastHashJoin LeftOuter BuildRight (84) + : : :- * HashAggregate (70) + : : : +- Exchange (69) + : : : +- * HashAggregate (68) + : : : +- * Project (67) + : : : +- * BroadcastHashJoin Inner BuildRight (66) + : : : :- * Project (61) + : : : : +- * BroadcastHashJoin Inner BuildRight (60) + : : : : :- * Filter (58) + : : : : : +- * ColumnarToRow (57) + : : : : : +- Scan parquet default.web_sales (56) + : : : : +- ReusedExchange (59) + : : : +- BroadcastExchange (65) + : : : +- * Filter (64) + : : : +- * ColumnarToRow (63) + : : : +- Scan parquet default.web_page (62) + : : +- BroadcastExchange (83) + : : +- * HashAggregate (82) + : : +- Exchange (81) + : : +- * HashAggregate (80) + : : +- * Project (79) + : : +- * BroadcastHashJoin Inner BuildRight (78) + : : :- * Project (76) + : : : +- * BroadcastHashJoin Inner BuildRight (75) + : : : :- * Filter (73) + : : : : +- * ColumnarToRow (72) + : : : : +- Scan parquet default.web_returns (71) + : : : +- ReusedExchange (74) + : : +- ReusedExchange (77) + : +- * HashAggregate (94) + : +- Exchange (93) + : +- * HashAggregate (92) + : +- * HashAggregate (91) + : +- ReusedExchange (90) + +- * HashAggregate (103) + +- Exchange (102) + +- * HashAggregate (101) + +- * HashAggregate (100) + +- ReusedExchange (99) + + +(1) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [4]: [ss_sold_date_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] + +(3) Filter [codegen id : 3] +Input [4]: [ss_sold_date_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] +Condition : (isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#2)) + +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#5, d_date#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1998-08-04), LessThanOrEqual(d_date,1998-09-03), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#5, d_date#6] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#5, d_date#6] +Condition : (((isnotnull(d_date#6) AND (d_date#6 >= 10442)) AND (d_date#6 <= 10472)) AND isnotnull(d_date_sk#5)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#5] +Input [2]: [d_date_sk#5, d_date#6] + +(8) BroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#7] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(10) Project [codegen id : 3] +Output [3]: [ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] +Input [5]: [ss_sold_date_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, d_date_sk#5] + +(11) Scan parquet default.store +Output [1]: [s_store_sk#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [1]: [s_store_sk#8] + +(13) Filter [codegen id : 2] +Input [1]: [s_store_sk#8] +Condition : isnotnull(s_store_sk#8) + +(14) BroadcastExchange +Input [1]: [s_store_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#9] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_store_sk#2] +Right keys [1]: [s_store_sk#8] +Join condition: None + +(16) Project [codegen id : 3] +Output [3]: [ss_ext_sales_price#3, ss_net_profit#4, s_store_sk#8] +Input [4]: [ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, s_store_sk#8] + +(17) HashAggregate [codegen id : 3] +Input [3]: [ss_ext_sales_price#3, ss_net_profit#4, s_store_sk#8] +Keys [1]: [s_store_sk#8] +Functions [2]: [partial_sum(UnscaledValue(ss_ext_sales_price#3)), partial_sum(UnscaledValue(ss_net_profit#4))] +Aggregate Attributes [2]: [sum#10, sum#11] +Results [3]: [s_store_sk#8, sum#12, sum#13] + +(18) Exchange +Input [3]: [s_store_sk#8, sum#12, sum#13] +Arguments: hashpartitioning(s_store_sk#8, 5), true, [id=#14] + +(19) HashAggregate [codegen id : 8] +Input [3]: [s_store_sk#8, sum#12, sum#13] +Keys [1]: [s_store_sk#8] +Functions [2]: [sum(UnscaledValue(ss_ext_sales_price#3)), sum(UnscaledValue(ss_net_profit#4))] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_ext_sales_price#3))#15, sum(UnscaledValue(ss_net_profit#4))#16] +Results [3]: [s_store_sk#8, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#15,17,2) AS sales#17, MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#16,17,2) AS profit#18] + +(20) Scan parquet default.store_returns +Output [4]: [sr_returned_date_sk#19, sr_store_sk#20, sr_return_amt#21, sr_net_loss#22] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store_returns] +PushedFilters: [IsNotNull(sr_returned_date_sk), IsNotNull(sr_store_sk)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 6] +Input [4]: [sr_returned_date_sk#19, sr_store_sk#20, sr_return_amt#21, sr_net_loss#22] + +(22) Filter [codegen id : 6] +Input [4]: [sr_returned_date_sk#19, sr_store_sk#20, sr_return_amt#21, sr_net_loss#22] +Condition : (isnotnull(sr_returned_date_sk#19) AND isnotnull(sr_store_sk#20)) + +(23) ReusedExchange [Reuses operator id: 14] +Output [1]: [s_store_sk#23] + +(24) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [sr_store_sk#20] +Right keys [1]: [cast(s_store_sk#23 as bigint)] +Join condition: None + +(25) Project [codegen id : 6] +Output [4]: [sr_returned_date_sk#19, sr_return_amt#21, sr_net_loss#22, s_store_sk#23] +Input [5]: [sr_returned_date_sk#19, sr_store_sk#20, sr_return_amt#21, sr_net_loss#22, s_store_sk#23] + +(26) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(27) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [sr_returned_date_sk#19] +Right keys [1]: [cast(d_date_sk#5 as bigint)] +Join condition: None + +(28) Project [codegen id : 6] +Output [3]: [sr_return_amt#21, sr_net_loss#22, s_store_sk#23] +Input [5]: [sr_returned_date_sk#19, sr_return_amt#21, sr_net_loss#22, s_store_sk#23, d_date_sk#5] + +(29) HashAggregate [codegen id : 6] +Input [3]: [sr_return_amt#21, sr_net_loss#22, s_store_sk#23] +Keys [1]: [s_store_sk#23] +Functions [2]: [partial_sum(UnscaledValue(sr_return_amt#21)), partial_sum(UnscaledValue(sr_net_loss#22))] +Aggregate Attributes [2]: [sum#24, sum#25] +Results [3]: [s_store_sk#23, sum#26, sum#27] + +(30) Exchange +Input [3]: [s_store_sk#23, sum#26, sum#27] +Arguments: hashpartitioning(s_store_sk#23, 5), true, [id=#28] + +(31) HashAggregate [codegen id : 7] +Input [3]: [s_store_sk#23, sum#26, sum#27] +Keys [1]: [s_store_sk#23] +Functions [2]: [sum(UnscaledValue(sr_return_amt#21)), sum(UnscaledValue(sr_net_loss#22))] +Aggregate Attributes [2]: [sum(UnscaledValue(sr_return_amt#21))#29, sum(UnscaledValue(sr_net_loss#22))#30] +Results [3]: [s_store_sk#23, MakeDecimal(sum(UnscaledValue(sr_return_amt#21))#29,17,2) AS returns#31, MakeDecimal(sum(UnscaledValue(sr_net_loss#22))#30,17,2) AS profit_loss#32] + +(32) BroadcastExchange +Input [3]: [s_store_sk#23, returns#31, profit_loss#32] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#33] + +(33) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [s_store_sk#8] +Right keys [1]: [s_store_sk#23] +Join condition: None + +(34) Project [codegen id : 8] +Output [5]: [store channel AS channel#34, s_store_sk#8 AS id#35, sales#17, coalesce(returns#31, 0.00) AS returns#36, CheckOverflow((promote_precision(cast(profit#18 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#32, 0.00) as decimal(18,2)))), DecimalType(18,2), true) AS profit#37] +Input [6]: [s_store_sk#8, sales#17, profit#18, s_store_sk#23, returns#31, profit_loss#32] + +(35) Scan parquet default.catalog_sales +Output [4]: [cs_sold_date_sk#38, cs_call_center_sk#39, cs_ext_sales_price#40, cs_net_profit#41] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(36) ColumnarToRow [codegen id : 10] +Input [4]: [cs_sold_date_sk#38, cs_call_center_sk#39, cs_ext_sales_price#40, cs_net_profit#41] + +(37) Filter [codegen id : 10] +Input [4]: [cs_sold_date_sk#38, cs_call_center_sk#39, cs_ext_sales_price#40, cs_net_profit#41] +Condition : isnotnull(cs_sold_date_sk#38) + +(38) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(39) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_sold_date_sk#38] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(40) Project [codegen id : 10] +Output [3]: [cs_call_center_sk#39, cs_ext_sales_price#40, cs_net_profit#41] +Input [5]: [cs_sold_date_sk#38, cs_call_center_sk#39, cs_ext_sales_price#40, cs_net_profit#41, d_date_sk#5] + +(41) HashAggregate [codegen id : 10] +Input [3]: [cs_call_center_sk#39, cs_ext_sales_price#40, cs_net_profit#41] +Keys [1]: [cs_call_center_sk#39] +Functions [2]: [partial_sum(UnscaledValue(cs_ext_sales_price#40)), partial_sum(UnscaledValue(cs_net_profit#41))] +Aggregate Attributes [2]: [sum#42, sum#43] +Results [3]: [cs_call_center_sk#39, sum#44, sum#45] + +(42) Exchange +Input [3]: [cs_call_center_sk#39, sum#44, sum#45] +Arguments: hashpartitioning(cs_call_center_sk#39, 5), true, [id=#46] + +(43) HashAggregate [codegen id : 11] +Input [3]: [cs_call_center_sk#39, sum#44, sum#45] +Keys [1]: [cs_call_center_sk#39] +Functions [2]: [sum(UnscaledValue(cs_ext_sales_price#40)), sum(UnscaledValue(cs_net_profit#41))] +Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_sales_price#40))#47, sum(UnscaledValue(cs_net_profit#41))#48] +Results [3]: [cs_call_center_sk#39, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#40))#47,17,2) AS sales#49, MakeDecimal(sum(UnscaledValue(cs_net_profit#41))#48,17,2) AS profit#50] + +(44) Scan parquet default.catalog_returns +Output [3]: [cr_returned_date_sk#51, cr_return_amount#52, cr_net_loss#53] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/catalog_returns] +PushedFilters: [IsNotNull(cr_returned_date_sk)] +ReadSchema: struct + +(45) ColumnarToRow [codegen id : 13] +Input [3]: [cr_returned_date_sk#51, cr_return_amount#52, cr_net_loss#53] + +(46) Filter [codegen id : 13] +Input [3]: [cr_returned_date_sk#51, cr_return_amount#52, cr_net_loss#53] +Condition : isnotnull(cr_returned_date_sk#51) + +(47) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(48) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [cr_returned_date_sk#51] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(49) Project [codegen id : 13] +Output [2]: [cr_return_amount#52, cr_net_loss#53] +Input [4]: [cr_returned_date_sk#51, cr_return_amount#52, cr_net_loss#53, d_date_sk#5] + +(50) HashAggregate [codegen id : 13] +Input [2]: [cr_return_amount#52, cr_net_loss#53] +Keys: [] +Functions [2]: [partial_sum(UnscaledValue(cr_return_amount#52)), partial_sum(UnscaledValue(cr_net_loss#53))] +Aggregate Attributes [2]: [sum#54, sum#55] +Results [2]: [sum#56, sum#57] + +(51) Exchange +Input [2]: [sum#56, sum#57] +Arguments: SinglePartition, true, [id=#58] + +(52) HashAggregate [codegen id : 14] +Input [2]: [sum#56, sum#57] +Keys: [] +Functions [2]: [sum(UnscaledValue(cr_return_amount#52)), sum(UnscaledValue(cr_net_loss#53))] +Aggregate Attributes [2]: [sum(UnscaledValue(cr_return_amount#52))#59, sum(UnscaledValue(cr_net_loss#53))#60] +Results [2]: [MakeDecimal(sum(UnscaledValue(cr_return_amount#52))#59,17,2) AS returns#61, MakeDecimal(sum(UnscaledValue(cr_net_loss#53))#60,17,2) AS profit_loss#62] + +(53) BroadcastExchange +Input [2]: [returns#61, profit_loss#62] +Arguments: IdentityBroadcastMode, [id=#63] + +(54) BroadcastNestedLoopJoin +Join condition: None + +(55) Project [codegen id : 15] +Output [5]: [catalog channel AS channel#64, cs_call_center_sk#39 AS id#65, sales#49, returns#61, CheckOverflow((promote_precision(cast(profit#50 as decimal(18,2))) - promote_precision(cast(profit_loss#62 as decimal(18,2)))), DecimalType(18,2), true) AS profit#66] +Input [5]: [cs_call_center_sk#39, sales#49, profit#50, returns#61, profit_loss#62] + +(56) Scan parquet default.web_sales +Output [4]: [ws_sold_date_sk#67, ws_web_page_sk#68, ws_ext_sales_price#69, ws_net_profit#70] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_web_page_sk)] +ReadSchema: struct + +(57) ColumnarToRow [codegen id : 18] +Input [4]: [ws_sold_date_sk#67, ws_web_page_sk#68, ws_ext_sales_price#69, ws_net_profit#70] + +(58) Filter [codegen id : 18] +Input [4]: [ws_sold_date_sk#67, ws_web_page_sk#68, ws_ext_sales_price#69, ws_net_profit#70] +Condition : (isnotnull(ws_sold_date_sk#67) AND isnotnull(ws_web_page_sk#68)) + +(59) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(60) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [ws_sold_date_sk#67] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(61) Project [codegen id : 18] +Output [3]: [ws_web_page_sk#68, ws_ext_sales_price#69, ws_net_profit#70] +Input [5]: [ws_sold_date_sk#67, ws_web_page_sk#68, ws_ext_sales_price#69, ws_net_profit#70, d_date_sk#5] + +(62) Scan parquet default.web_page +Output [1]: [wp_web_page_sk#71] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/web_page] +PushedFilters: [IsNotNull(wp_web_page_sk)] +ReadSchema: struct + +(63) ColumnarToRow [codegen id : 17] +Input [1]: [wp_web_page_sk#71] + +(64) Filter [codegen id : 17] +Input [1]: [wp_web_page_sk#71] +Condition : isnotnull(wp_web_page_sk#71) + +(65) BroadcastExchange +Input [1]: [wp_web_page_sk#71] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#72] + +(66) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [ws_web_page_sk#68] +Right keys [1]: [wp_web_page_sk#71] +Join condition: None + +(67) Project [codegen id : 18] +Output [3]: [ws_ext_sales_price#69, ws_net_profit#70, wp_web_page_sk#71] +Input [4]: [ws_web_page_sk#68, ws_ext_sales_price#69, ws_net_profit#70, wp_web_page_sk#71] + +(68) HashAggregate [codegen id : 18] +Input [3]: [ws_ext_sales_price#69, ws_net_profit#70, wp_web_page_sk#71] +Keys [1]: [wp_web_page_sk#71] +Functions [2]: [partial_sum(UnscaledValue(ws_ext_sales_price#69)), partial_sum(UnscaledValue(ws_net_profit#70))] +Aggregate Attributes [2]: [sum#73, sum#74] +Results [3]: [wp_web_page_sk#71, sum#75, sum#76] + +(69) Exchange +Input [3]: [wp_web_page_sk#71, sum#75, sum#76] +Arguments: hashpartitioning(wp_web_page_sk#71, 5), true, [id=#77] + +(70) HashAggregate [codegen id : 23] +Input [3]: [wp_web_page_sk#71, sum#75, sum#76] +Keys [1]: [wp_web_page_sk#71] +Functions [2]: [sum(UnscaledValue(ws_ext_sales_price#69)), sum(UnscaledValue(ws_net_profit#70))] +Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_sales_price#69))#78, sum(UnscaledValue(ws_net_profit#70))#79] +Results [3]: [wp_web_page_sk#71, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#69))#78,17,2) AS sales#80, MakeDecimal(sum(UnscaledValue(ws_net_profit#70))#79,17,2) AS profit#81] + +(71) Scan parquet default.web_returns +Output [4]: [wr_returned_date_sk#82, wr_web_page_sk#83, wr_return_amt#84, wr_net_loss#85] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/web_returns] +PushedFilters: [IsNotNull(wr_returned_date_sk), IsNotNull(wr_web_page_sk)] +ReadSchema: struct + +(72) ColumnarToRow [codegen id : 21] +Input [4]: [wr_returned_date_sk#82, wr_web_page_sk#83, wr_return_amt#84, wr_net_loss#85] + +(73) Filter [codegen id : 21] +Input [4]: [wr_returned_date_sk#82, wr_web_page_sk#83, wr_return_amt#84, wr_net_loss#85] +Condition : (isnotnull(wr_returned_date_sk#82) AND isnotnull(wr_web_page_sk#83)) + +(74) ReusedExchange [Reuses operator id: 65] +Output [1]: [wp_web_page_sk#86] + +(75) BroadcastHashJoin [codegen id : 21] +Left keys [1]: [wr_web_page_sk#83] +Right keys [1]: [cast(wp_web_page_sk#86 as bigint)] +Join condition: None + +(76) Project [codegen id : 21] +Output [4]: [wr_returned_date_sk#82, wr_return_amt#84, wr_net_loss#85, wp_web_page_sk#86] +Input [5]: [wr_returned_date_sk#82, wr_web_page_sk#83, wr_return_amt#84, wr_net_loss#85, wp_web_page_sk#86] + +(77) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(78) BroadcastHashJoin [codegen id : 21] +Left keys [1]: [wr_returned_date_sk#82] +Right keys [1]: [cast(d_date_sk#5 as bigint)] +Join condition: None + +(79) Project [codegen id : 21] +Output [3]: [wr_return_amt#84, wr_net_loss#85, wp_web_page_sk#86] +Input [5]: [wr_returned_date_sk#82, wr_return_amt#84, wr_net_loss#85, wp_web_page_sk#86, d_date_sk#5] + +(80) HashAggregate [codegen id : 21] +Input [3]: [wr_return_amt#84, wr_net_loss#85, wp_web_page_sk#86] +Keys [1]: [wp_web_page_sk#86] +Functions [2]: [partial_sum(UnscaledValue(wr_return_amt#84)), partial_sum(UnscaledValue(wr_net_loss#85))] +Aggregate Attributes [2]: [sum#87, sum#88] +Results [3]: [wp_web_page_sk#86, sum#89, sum#90] + +(81) Exchange +Input [3]: [wp_web_page_sk#86, sum#89, sum#90] +Arguments: hashpartitioning(wp_web_page_sk#86, 5), true, [id=#91] + +(82) HashAggregate [codegen id : 22] +Input [3]: [wp_web_page_sk#86, sum#89, sum#90] +Keys [1]: [wp_web_page_sk#86] +Functions [2]: [sum(UnscaledValue(wr_return_amt#84)), sum(UnscaledValue(wr_net_loss#85))] +Aggregate Attributes [2]: [sum(UnscaledValue(wr_return_amt#84))#92, sum(UnscaledValue(wr_net_loss#85))#93] +Results [3]: [wp_web_page_sk#86, MakeDecimal(sum(UnscaledValue(wr_return_amt#84))#92,17,2) AS returns#94, MakeDecimal(sum(UnscaledValue(wr_net_loss#85))#93,17,2) AS profit_loss#95] + +(83) BroadcastExchange +Input [3]: [wp_web_page_sk#86, returns#94, profit_loss#95] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#96] + +(84) BroadcastHashJoin [codegen id : 23] +Left keys [1]: [wp_web_page_sk#71] +Right keys [1]: [wp_web_page_sk#86] +Join condition: None + +(85) Project [codegen id : 23] +Output [5]: [web channel AS channel#97, wp_web_page_sk#71 AS id#98, sales#80, coalesce(returns#94, 0.00) AS returns#99, CheckOverflow((promote_precision(cast(profit#81 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#95, 0.00) as decimal(18,2)))), DecimalType(18,2), true) AS profit#100] +Input [6]: [wp_web_page_sk#71, sales#80, profit#81, wp_web_page_sk#86, returns#94, profit_loss#95] + +(86) Union + +(87) HashAggregate [codegen id : 24] +Input [5]: [channel#34, id#35, sales#17, returns#36, profit#37] +Keys [2]: [channel#34, id#35] +Functions [3]: [partial_sum(sales#17), partial_sum(returns#36), partial_sum(profit#37)] +Aggregate Attributes [6]: [sum#101, isEmpty#102, sum#103, isEmpty#104, sum#105, isEmpty#106] +Results [8]: [channel#34, id#35, sum#107, isEmpty#108, sum#109, isEmpty#110, sum#111, isEmpty#112] + +(88) Exchange +Input [8]: [channel#34, id#35, sum#107, isEmpty#108, sum#109, isEmpty#110, sum#111, isEmpty#112] +Arguments: hashpartitioning(channel#34, id#35, 5), true, [id=#113] + +(89) HashAggregate [codegen id : 25] +Input [8]: [channel#34, id#35, sum#107, isEmpty#108, sum#109, isEmpty#110, sum#111, isEmpty#112] +Keys [2]: [channel#34, id#35] +Functions [3]: [sum(sales#17), sum(returns#36), sum(profit#37)] +Aggregate Attributes [3]: [sum(sales#17)#114, sum(returns#36)#115, sum(profit#37)#116] +Results [5]: [channel#34, id#35, cast(sum(sales#17)#114 as decimal(37,2)) AS sales#117, cast(sum(returns#36)#115 as decimal(37,2)) AS returns#118, cast(sum(profit#37)#116 as decimal(38,2)) AS profit#119] + +(90) ReusedExchange [Reuses operator id: 88] +Output [8]: [channel#34, id#35, sum#120, isEmpty#121, sum#122, isEmpty#123, sum#124, isEmpty#125] + +(91) HashAggregate [codegen id : 50] +Input [8]: [channel#34, id#35, sum#120, isEmpty#121, sum#122, isEmpty#123, sum#124, isEmpty#125] +Keys [2]: [channel#34, id#35] +Functions [3]: [sum(sales#17), sum(returns#36), sum(profit#126)] +Aggregate Attributes [3]: [sum(sales#17)#127, sum(returns#36)#128, sum(profit#126)#129] +Results [4]: [channel#34, sum(sales#17)#127 AS sales#130, sum(returns#36)#128 AS returns#131, sum(profit#126)#129 AS profit#132] + +(92) HashAggregate [codegen id : 50] +Input [4]: [channel#34, sales#130, returns#131, profit#132] +Keys [1]: [channel#34] +Functions [3]: [partial_sum(sales#130), partial_sum(returns#131), partial_sum(profit#132)] +Aggregate Attributes [6]: [sum#133, isEmpty#134, sum#135, isEmpty#136, sum#137, isEmpty#138] +Results [7]: [channel#34, sum#139, isEmpty#140, sum#141, isEmpty#142, sum#143, isEmpty#144] + +(93) Exchange +Input [7]: [channel#34, sum#139, isEmpty#140, sum#141, isEmpty#142, sum#143, isEmpty#144] +Arguments: hashpartitioning(channel#34, 5), true, [id=#145] + +(94) HashAggregate [codegen id : 51] +Input [7]: [channel#34, sum#139, isEmpty#140, sum#141, isEmpty#142, sum#143, isEmpty#144] +Keys [1]: [channel#34] +Functions [3]: [sum(sales#130), sum(returns#131), sum(profit#132)] +Aggregate Attributes [3]: [sum(sales#130)#146, sum(returns#131)#147, sum(profit#132)#148] +Results [5]: [channel#34, null AS id#149, sum(sales#130)#146 AS sales#150, sum(returns#131)#147 AS returns#151, sum(profit#132)#148 AS profit#152] + +(95) Union + +(96) HashAggregate [codegen id : 52] +Input [5]: [channel#34, id#35, sales#117, returns#118, profit#119] +Keys [5]: [channel#34, id#35, sales#117, returns#118, profit#119] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#34, id#35, sales#117, returns#118, profit#119] + +(97) Exchange +Input [5]: [channel#34, id#35, sales#117, returns#118, profit#119] +Arguments: hashpartitioning(channel#34, id#35, sales#117, returns#118, profit#119, 5), true, [id=#153] + +(98) HashAggregate [codegen id : 53] +Input [5]: [channel#34, id#35, sales#117, returns#118, profit#119] +Keys [5]: [channel#34, id#35, sales#117, returns#118, profit#119] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#34, id#35, sales#117, returns#118, profit#119] + +(99) ReusedExchange [Reuses operator id: 88] +Output [8]: [channel#34, id#35, sum#154, isEmpty#155, sum#156, isEmpty#157, sum#158, isEmpty#159] + +(100) HashAggregate [codegen id : 78] +Input [8]: [channel#34, id#35, sum#154, isEmpty#155, sum#156, isEmpty#157, sum#158, isEmpty#159] +Keys [2]: [channel#34, id#35] +Functions [3]: [sum(sales#17), sum(returns#36), sum(profit#160)] +Aggregate Attributes [3]: [sum(sales#17)#161, sum(returns#36)#162, sum(profit#160)#163] +Results [3]: [sum(sales#17)#161 AS sales#130, sum(returns#36)#162 AS returns#131, sum(profit#160)#163 AS profit#132] + +(101) HashAggregate [codegen id : 78] +Input [3]: [sales#130, returns#131, profit#132] +Keys: [] +Functions [3]: [partial_sum(sales#130), partial_sum(returns#131), partial_sum(profit#132)] +Aggregate Attributes [6]: [sum#164, isEmpty#165, sum#166, isEmpty#167, sum#168, isEmpty#169] +Results [6]: [sum#170, isEmpty#171, sum#172, isEmpty#173, sum#174, isEmpty#175] + +(102) Exchange +Input [6]: [sum#170, isEmpty#171, sum#172, isEmpty#173, sum#174, isEmpty#175] +Arguments: SinglePartition, true, [id=#176] + +(103) HashAggregate [codegen id : 79] +Input [6]: [sum#170, isEmpty#171, sum#172, isEmpty#173, sum#174, isEmpty#175] +Keys: [] +Functions [3]: [sum(sales#130), sum(returns#131), sum(profit#132)] +Aggregate Attributes [3]: [sum(sales#130)#177, sum(returns#131)#178, sum(profit#132)#179] +Results [5]: [null AS channel#180, null AS id#181, sum(sales#130)#177 AS sales#182, sum(returns#131)#178 AS returns#183, sum(profit#132)#179 AS profit#184] + +(104) Union + +(105) HashAggregate [codegen id : 80] +Input [5]: [channel#34, id#35, sales#117, returns#118, profit#119] +Keys [5]: [channel#34, id#35, sales#117, returns#118, profit#119] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#34, id#35, sales#117, returns#118, profit#119] + +(106) Exchange +Input [5]: [channel#34, id#35, sales#117, returns#118, profit#119] +Arguments: hashpartitioning(channel#34, id#35, sales#117, returns#118, profit#119, 5), true, [id=#185] + +(107) HashAggregate [codegen id : 81] +Input [5]: [channel#34, id#35, sales#117, returns#118, profit#119] +Keys [5]: [channel#34, id#35, sales#117, returns#118, profit#119] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#34, id#35, sales#117, returns#118, profit#119] + +(108) TakeOrderedAndProject +Input [5]: [channel#34, id#35, sales#117, returns#118, profit#119] +Arguments: 100, [channel#34 ASC NULLS FIRST, id#35 ASC NULLS FIRST], [channel#34, id#35, sales#117, returns#118, profit#119] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.sf100/simplified.txt new file mode 100644 index 0000000000000..2155d4546ab99 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.sf100/simplified.txt @@ -0,0 +1,172 @@ +TakeOrderedAndProject [channel,id,profit,returns,sales] + WholeStageCodegen (81) + HashAggregate [channel,id,profit,returns,sales] + InputAdapter + Exchange [channel,id,profit,returns,sales] #1 + WholeStageCodegen (80) + HashAggregate [channel,id,profit,returns,sales] + InputAdapter + Union + WholeStageCodegen (53) + HashAggregate [channel,id,profit,returns,sales] + InputAdapter + Exchange [channel,id,profit,returns,sales] #2 + WholeStageCodegen (52) + HashAggregate [channel,id,profit,returns,sales] + InputAdapter + Union + WholeStageCodegen (25) + HashAggregate [channel,id,isEmpty,isEmpty,isEmpty,sum,sum,sum] [isEmpty,isEmpty,isEmpty,profit,returns,sales,sum,sum,sum,sum(profit),sum(returns),sum(sales)] + InputAdapter + Exchange [channel,id] #3 + WholeStageCodegen (24) + HashAggregate [channel,id,profit,returns,sales] [isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum] + InputAdapter + Union + WholeStageCodegen (8) + Project [profit,profit_loss,returns,s_store_sk,sales] + BroadcastHashJoin [s_store_sk,s_store_sk] + HashAggregate [s_store_sk,sum,sum] [profit,sales,sum,sum,sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_net_profit))] + InputAdapter + Exchange [s_store_sk] #4 + WholeStageCodegen (3) + HashAggregate [s_store_sk,ss_ext_sales_price,ss_net_profit] [sum,sum,sum,sum] + Project [s_store_sk,ss_ext_sales_price,ss_net_profit] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_ext_sales_price,ss_net_profit,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (2) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (7) + HashAggregate [s_store_sk,sum,sum] [profit_loss,returns,sum,sum,sum(UnscaledValue(sr_net_loss)),sum(UnscaledValue(sr_return_amt))] + InputAdapter + Exchange [s_store_sk] #8 + WholeStageCodegen (6) + HashAggregate [s_store_sk,sr_net_loss,sr_return_amt] [sum,sum,sum,sum] + Project [s_store_sk,sr_net_loss,sr_return_amt] + BroadcastHashJoin [d_date_sk,sr_returned_date_sk] + Project [s_store_sk,sr_net_loss,sr_return_amt,sr_returned_date_sk] + BroadcastHashJoin [s_store_sk,sr_store_sk] + Filter [sr_returned_date_sk,sr_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_net_loss,sr_return_amt,sr_returned_date_sk,sr_store_sk] + InputAdapter + ReusedExchange [s_store_sk] #6 + InputAdapter + ReusedExchange [d_date_sk] #5 + WholeStageCodegen (15) + Project [cs_call_center_sk,profit,profit_loss,returns,sales] + InputAdapter + BroadcastNestedLoopJoin + WholeStageCodegen (11) + HashAggregate [cs_call_center_sk,sum,sum] [profit,sales,sum,sum,sum(UnscaledValue(cs_ext_sales_price)),sum(UnscaledValue(cs_net_profit))] + InputAdapter + Exchange [cs_call_center_sk] #9 + WholeStageCodegen (10) + HashAggregate [cs_call_center_sk,cs_ext_sales_price,cs_net_profit] [sum,sum,sum,sum] + Project [cs_call_center_sk,cs_ext_sales_price,cs_net_profit] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_call_center_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #5 + BroadcastExchange #10 + WholeStageCodegen (14) + HashAggregate [sum,sum] [profit_loss,returns,sum,sum,sum(UnscaledValue(cr_net_loss)),sum(UnscaledValue(cr_return_amount))] + InputAdapter + Exchange #11 + WholeStageCodegen (13) + HashAggregate [cr_net_loss,cr_return_amount] [sum,sum,sum,sum] + Project [cr_net_loss,cr_return_amount] + BroadcastHashJoin [cr_returned_date_sk,d_date_sk] + Filter [cr_returned_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_net_loss,cr_return_amount,cr_returned_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #5 + WholeStageCodegen (23) + Project [profit,profit_loss,returns,sales,wp_web_page_sk] + BroadcastHashJoin [wp_web_page_sk,wp_web_page_sk] + HashAggregate [sum,sum,wp_web_page_sk] [profit,sales,sum,sum,sum(UnscaledValue(ws_ext_sales_price)),sum(UnscaledValue(ws_net_profit))] + InputAdapter + Exchange [wp_web_page_sk] #12 + WholeStageCodegen (18) + HashAggregate [wp_web_page_sk,ws_ext_sales_price,ws_net_profit] [sum,sum,sum,sum] + Project [wp_web_page_sk,ws_ext_sales_price,ws_net_profit] + BroadcastHashJoin [wp_web_page_sk,ws_web_page_sk] + Project [ws_ext_sales_price,ws_net_profit,ws_web_page_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_sold_date_sk,ws_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_ext_sales_price,ws_net_profit,ws_sold_date_sk,ws_web_page_sk] + InputAdapter + ReusedExchange [d_date_sk] #5 + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (17) + Filter [wp_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_page [wp_web_page_sk] + InputAdapter + BroadcastExchange #14 + WholeStageCodegen (22) + HashAggregate [sum,sum,wp_web_page_sk] [profit_loss,returns,sum,sum,sum(UnscaledValue(wr_net_loss)),sum(UnscaledValue(wr_return_amt))] + InputAdapter + Exchange [wp_web_page_sk] #15 + WholeStageCodegen (21) + HashAggregate [wp_web_page_sk,wr_net_loss,wr_return_amt] [sum,sum,sum,sum] + Project [wp_web_page_sk,wr_net_loss,wr_return_amt] + BroadcastHashJoin [d_date_sk,wr_returned_date_sk] + Project [wp_web_page_sk,wr_net_loss,wr_return_amt,wr_returned_date_sk] + BroadcastHashJoin [wp_web_page_sk,wr_web_page_sk] + Filter [wr_returned_date_sk,wr_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_returns [wr_net_loss,wr_return_amt,wr_returned_date_sk,wr_web_page_sk] + InputAdapter + ReusedExchange [wp_web_page_sk] #13 + InputAdapter + ReusedExchange [d_date_sk] #5 + WholeStageCodegen (51) + HashAggregate [channel,isEmpty,isEmpty,isEmpty,sum,sum,sum] [id,isEmpty,isEmpty,isEmpty,profit,returns,sales,sum,sum,sum,sum(profit),sum(returns),sum(sales)] + InputAdapter + Exchange [channel] #16 + WholeStageCodegen (50) + HashAggregate [channel,profit,returns,sales] [isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum] + HashAggregate [channel,id,isEmpty,isEmpty,isEmpty,sum,sum,sum] [isEmpty,isEmpty,isEmpty,profit,returns,sales,sum,sum,sum,sum(profit),sum(returns),sum(sales)] + InputAdapter + ReusedExchange [channel,id,isEmpty,isEmpty,isEmpty,sum,sum,sum] #3 + WholeStageCodegen (79) + HashAggregate [isEmpty,isEmpty,isEmpty,sum,sum,sum] [channel,id,isEmpty,isEmpty,isEmpty,profit,returns,sales,sum,sum,sum,sum(profit),sum(returns),sum(sales)] + InputAdapter + Exchange #17 + WholeStageCodegen (78) + HashAggregate [profit,returns,sales] [isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum] + HashAggregate [channel,id,isEmpty,isEmpty,isEmpty,sum,sum,sum] [isEmpty,isEmpty,isEmpty,profit,returns,sales,sum,sum,sum,sum(profit),sum(returns),sum(sales)] + InputAdapter + ReusedExchange [channel,id,isEmpty,isEmpty,isEmpty,sum,sum,sum] #3 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a/explain.txt new file mode 100644 index 0000000000000..75ed1713c2628 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a/explain.txt @@ -0,0 +1,629 @@ +== Physical Plan == +TakeOrderedAndProject (108) ++- * HashAggregate (107) + +- Exchange (106) + +- * HashAggregate (105) + +- Union (104) + :- * HashAggregate (98) + : +- Exchange (97) + : +- * HashAggregate (96) + : +- Union (95) + : :- * HashAggregate (89) + : : +- Exchange (88) + : : +- * HashAggregate (87) + : : +- Union (86) + : : :- * Project (34) + : : : +- * BroadcastHashJoin LeftOuter BuildRight (33) + : : : :- * HashAggregate (19) + : : : : +- Exchange (18) + : : : : +- * HashAggregate (17) + : : : : +- * Project (16) + : : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : : :- * Project (10) + : : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : : :- * Filter (3) + : : : : : : +- * ColumnarToRow (2) + : : : : : : +- Scan parquet default.store_sales (1) + : : : : : +- BroadcastExchange (8) + : : : : : +- * Project (7) + : : : : : +- * Filter (6) + : : : : : +- * ColumnarToRow (5) + : : : : : +- Scan parquet default.date_dim (4) + : : : : +- BroadcastExchange (14) + : : : : +- * Filter (13) + : : : : +- * ColumnarToRow (12) + : : : : +- Scan parquet default.store (11) + : : : +- BroadcastExchange (32) + : : : +- * HashAggregate (31) + : : : +- Exchange (30) + : : : +- * HashAggregate (29) + : : : +- * Project (28) + : : : +- * BroadcastHashJoin Inner BuildRight (27) + : : : :- * Project (25) + : : : : +- * BroadcastHashJoin Inner BuildRight (24) + : : : : :- * Filter (22) + : : : : : +- * ColumnarToRow (21) + : : : : : +- Scan parquet default.store_returns (20) + : : : : +- ReusedExchange (23) + : : : +- ReusedExchange (26) + : : :- * Project (55) + : : : +- BroadcastNestedLoopJoin Inner BuildLeft (54) + : : : :- BroadcastExchange (44) + : : : : +- * HashAggregate (43) + : : : : +- Exchange (42) + : : : : +- * HashAggregate (41) + : : : : +- * Project (40) + : : : : +- * BroadcastHashJoin Inner BuildRight (39) + : : : : :- * Filter (37) + : : : : : +- * ColumnarToRow (36) + : : : : : +- Scan parquet default.catalog_sales (35) + : : : : +- ReusedExchange (38) + : : : +- * HashAggregate (53) + : : : +- Exchange (52) + : : : +- * HashAggregate (51) + : : : +- * Project (50) + : : : +- * BroadcastHashJoin Inner BuildRight (49) + : : : :- * Filter (47) + : : : : +- * ColumnarToRow (46) + : : : : +- Scan parquet default.catalog_returns (45) + : : : +- ReusedExchange (48) + : : +- * Project (85) + : : +- * BroadcastHashJoin LeftOuter BuildRight (84) + : : :- * HashAggregate (70) + : : : +- Exchange (69) + : : : +- * HashAggregate (68) + : : : +- * Project (67) + : : : +- * BroadcastHashJoin Inner BuildRight (66) + : : : :- * Project (61) + : : : : +- * BroadcastHashJoin Inner BuildRight (60) + : : : : :- * Filter (58) + : : : : : +- * ColumnarToRow (57) + : : : : : +- Scan parquet default.web_sales (56) + : : : : +- ReusedExchange (59) + : : : +- BroadcastExchange (65) + : : : +- * Filter (64) + : : : +- * ColumnarToRow (63) + : : : +- Scan parquet default.web_page (62) + : : +- BroadcastExchange (83) + : : +- * HashAggregate (82) + : : +- Exchange (81) + : : +- * HashAggregate (80) + : : +- * Project (79) + : : +- * BroadcastHashJoin Inner BuildRight (78) + : : :- * Project (76) + : : : +- * BroadcastHashJoin Inner BuildRight (75) + : : : :- * Filter (73) + : : : : +- * ColumnarToRow (72) + : : : : +- Scan parquet default.web_returns (71) + : : : +- ReusedExchange (74) + : : +- ReusedExchange (77) + : +- * HashAggregate (94) + : +- Exchange (93) + : +- * HashAggregate (92) + : +- * HashAggregate (91) + : +- ReusedExchange (90) + +- * HashAggregate (103) + +- Exchange (102) + +- * HashAggregate (101) + +- * HashAggregate (100) + +- ReusedExchange (99) + + +(1) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [4]: [ss_sold_date_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] + +(3) Filter [codegen id : 3] +Input [4]: [ss_sold_date_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] +Condition : (isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#2)) + +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#5, d_date#6] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1998-08-04), LessThanOrEqual(d_date,1998-09-03), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#5, d_date#6] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#5, d_date#6] +Condition : (((isnotnull(d_date#6) AND (d_date#6 >= 10442)) AND (d_date#6 <= 10472)) AND isnotnull(d_date_sk#5)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#5] +Input [2]: [d_date_sk#5, d_date#6] + +(8) BroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#7] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(10) Project [codegen id : 3] +Output [3]: [ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] +Input [5]: [ss_sold_date_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, d_date_sk#5] + +(11) Scan parquet default.store +Output [1]: [s_store_sk#8] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [1]: [s_store_sk#8] + +(13) Filter [codegen id : 2] +Input [1]: [s_store_sk#8] +Condition : isnotnull(s_store_sk#8) + +(14) BroadcastExchange +Input [1]: [s_store_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#9] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_store_sk#2] +Right keys [1]: [s_store_sk#8] +Join condition: None + +(16) Project [codegen id : 3] +Output [3]: [ss_ext_sales_price#3, ss_net_profit#4, s_store_sk#8] +Input [4]: [ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, s_store_sk#8] + +(17) HashAggregate [codegen id : 3] +Input [3]: [ss_ext_sales_price#3, ss_net_profit#4, s_store_sk#8] +Keys [1]: [s_store_sk#8] +Functions [2]: [partial_sum(UnscaledValue(ss_ext_sales_price#3)), partial_sum(UnscaledValue(ss_net_profit#4))] +Aggregate Attributes [2]: [sum#10, sum#11] +Results [3]: [s_store_sk#8, sum#12, sum#13] + +(18) Exchange +Input [3]: [s_store_sk#8, sum#12, sum#13] +Arguments: hashpartitioning(s_store_sk#8, 5), true, [id=#14] + +(19) HashAggregate [codegen id : 8] +Input [3]: [s_store_sk#8, sum#12, sum#13] +Keys [1]: [s_store_sk#8] +Functions [2]: [sum(UnscaledValue(ss_ext_sales_price#3)), sum(UnscaledValue(ss_net_profit#4))] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_ext_sales_price#3))#15, sum(UnscaledValue(ss_net_profit#4))#16] +Results [3]: [s_store_sk#8, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#15,17,2) AS sales#17, MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#16,17,2) AS profit#18] + +(20) Scan parquet default.store_returns +Output [4]: [sr_returned_date_sk#19, sr_store_sk#20, sr_return_amt#21, sr_net_loss#22] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store_returns] +PushedFilters: [IsNotNull(sr_returned_date_sk), IsNotNull(sr_store_sk)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 6] +Input [4]: [sr_returned_date_sk#19, sr_store_sk#20, sr_return_amt#21, sr_net_loss#22] + +(22) Filter [codegen id : 6] +Input [4]: [sr_returned_date_sk#19, sr_store_sk#20, sr_return_amt#21, sr_net_loss#22] +Condition : (isnotnull(sr_returned_date_sk#19) AND isnotnull(sr_store_sk#20)) + +(23) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(24) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [sr_returned_date_sk#19] +Right keys [1]: [cast(d_date_sk#5 as bigint)] +Join condition: None + +(25) Project [codegen id : 6] +Output [3]: [sr_store_sk#20, sr_return_amt#21, sr_net_loss#22] +Input [5]: [sr_returned_date_sk#19, sr_store_sk#20, sr_return_amt#21, sr_net_loss#22, d_date_sk#5] + +(26) ReusedExchange [Reuses operator id: 14] +Output [1]: [s_store_sk#23] + +(27) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [sr_store_sk#20] +Right keys [1]: [cast(s_store_sk#23 as bigint)] +Join condition: None + +(28) Project [codegen id : 6] +Output [3]: [sr_return_amt#21, sr_net_loss#22, s_store_sk#23] +Input [4]: [sr_store_sk#20, sr_return_amt#21, sr_net_loss#22, s_store_sk#23] + +(29) HashAggregate [codegen id : 6] +Input [3]: [sr_return_amt#21, sr_net_loss#22, s_store_sk#23] +Keys [1]: [s_store_sk#23] +Functions [2]: [partial_sum(UnscaledValue(sr_return_amt#21)), partial_sum(UnscaledValue(sr_net_loss#22))] +Aggregate Attributes [2]: [sum#24, sum#25] +Results [3]: [s_store_sk#23, sum#26, sum#27] + +(30) Exchange +Input [3]: [s_store_sk#23, sum#26, sum#27] +Arguments: hashpartitioning(s_store_sk#23, 5), true, [id=#28] + +(31) HashAggregate [codegen id : 7] +Input [3]: [s_store_sk#23, sum#26, sum#27] +Keys [1]: [s_store_sk#23] +Functions [2]: [sum(UnscaledValue(sr_return_amt#21)), sum(UnscaledValue(sr_net_loss#22))] +Aggregate Attributes [2]: [sum(UnscaledValue(sr_return_amt#21))#29, sum(UnscaledValue(sr_net_loss#22))#30] +Results [3]: [s_store_sk#23, MakeDecimal(sum(UnscaledValue(sr_return_amt#21))#29,17,2) AS returns#31, MakeDecimal(sum(UnscaledValue(sr_net_loss#22))#30,17,2) AS profit_loss#32] + +(32) BroadcastExchange +Input [3]: [s_store_sk#23, returns#31, profit_loss#32] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#33] + +(33) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [s_store_sk#8] +Right keys [1]: [s_store_sk#23] +Join condition: None + +(34) Project [codegen id : 8] +Output [5]: [store channel AS channel#34, s_store_sk#8 AS id#35, sales#17, coalesce(returns#31, 0.00) AS returns#36, CheckOverflow((promote_precision(cast(profit#18 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#32, 0.00) as decimal(18,2)))), DecimalType(18,2), true) AS profit#37] +Input [6]: [s_store_sk#8, sales#17, profit#18, s_store_sk#23, returns#31, profit_loss#32] + +(35) Scan parquet default.catalog_sales +Output [4]: [cs_sold_date_sk#38, cs_call_center_sk#39, cs_ext_sales_price#40, cs_net_profit#41] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(36) ColumnarToRow [codegen id : 10] +Input [4]: [cs_sold_date_sk#38, cs_call_center_sk#39, cs_ext_sales_price#40, cs_net_profit#41] + +(37) Filter [codegen id : 10] +Input [4]: [cs_sold_date_sk#38, cs_call_center_sk#39, cs_ext_sales_price#40, cs_net_profit#41] +Condition : isnotnull(cs_sold_date_sk#38) + +(38) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(39) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_sold_date_sk#38] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(40) Project [codegen id : 10] +Output [3]: [cs_call_center_sk#39, cs_ext_sales_price#40, cs_net_profit#41] +Input [5]: [cs_sold_date_sk#38, cs_call_center_sk#39, cs_ext_sales_price#40, cs_net_profit#41, d_date_sk#5] + +(41) HashAggregate [codegen id : 10] +Input [3]: [cs_call_center_sk#39, cs_ext_sales_price#40, cs_net_profit#41] +Keys [1]: [cs_call_center_sk#39] +Functions [2]: [partial_sum(UnscaledValue(cs_ext_sales_price#40)), partial_sum(UnscaledValue(cs_net_profit#41))] +Aggregate Attributes [2]: [sum#42, sum#43] +Results [3]: [cs_call_center_sk#39, sum#44, sum#45] + +(42) Exchange +Input [3]: [cs_call_center_sk#39, sum#44, sum#45] +Arguments: hashpartitioning(cs_call_center_sk#39, 5), true, [id=#46] + +(43) HashAggregate [codegen id : 11] +Input [3]: [cs_call_center_sk#39, sum#44, sum#45] +Keys [1]: [cs_call_center_sk#39] +Functions [2]: [sum(UnscaledValue(cs_ext_sales_price#40)), sum(UnscaledValue(cs_net_profit#41))] +Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_sales_price#40))#47, sum(UnscaledValue(cs_net_profit#41))#48] +Results [3]: [cs_call_center_sk#39, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#40))#47,17,2) AS sales#49, MakeDecimal(sum(UnscaledValue(cs_net_profit#41))#48,17,2) AS profit#50] + +(44) BroadcastExchange +Input [3]: [cs_call_center_sk#39, sales#49, profit#50] +Arguments: IdentityBroadcastMode, [id=#51] + +(45) Scan parquet default.catalog_returns +Output [3]: [cr_returned_date_sk#52, cr_return_amount#53, cr_net_loss#54] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/catalog_returns] +PushedFilters: [IsNotNull(cr_returned_date_sk)] +ReadSchema: struct + +(46) ColumnarToRow [codegen id : 13] +Input [3]: [cr_returned_date_sk#52, cr_return_amount#53, cr_net_loss#54] + +(47) Filter [codegen id : 13] +Input [3]: [cr_returned_date_sk#52, cr_return_amount#53, cr_net_loss#54] +Condition : isnotnull(cr_returned_date_sk#52) + +(48) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(49) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [cr_returned_date_sk#52] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(50) Project [codegen id : 13] +Output [2]: [cr_return_amount#53, cr_net_loss#54] +Input [4]: [cr_returned_date_sk#52, cr_return_amount#53, cr_net_loss#54, d_date_sk#5] + +(51) HashAggregate [codegen id : 13] +Input [2]: [cr_return_amount#53, cr_net_loss#54] +Keys: [] +Functions [2]: [partial_sum(UnscaledValue(cr_return_amount#53)), partial_sum(UnscaledValue(cr_net_loss#54))] +Aggregate Attributes [2]: [sum#55, sum#56] +Results [2]: [sum#57, sum#58] + +(52) Exchange +Input [2]: [sum#57, sum#58] +Arguments: SinglePartition, true, [id=#59] + +(53) HashAggregate [codegen id : 14] +Input [2]: [sum#57, sum#58] +Keys: [] +Functions [2]: [sum(UnscaledValue(cr_return_amount#53)), sum(UnscaledValue(cr_net_loss#54))] +Aggregate Attributes [2]: [sum(UnscaledValue(cr_return_amount#53))#60, sum(UnscaledValue(cr_net_loss#54))#61] +Results [2]: [MakeDecimal(sum(UnscaledValue(cr_return_amount#53))#60,17,2) AS returns#62, MakeDecimal(sum(UnscaledValue(cr_net_loss#54))#61,17,2) AS profit_loss#63] + +(54) BroadcastNestedLoopJoin +Join condition: None + +(55) Project [codegen id : 15] +Output [5]: [catalog channel AS channel#64, cs_call_center_sk#39 AS id#65, sales#49, returns#62, CheckOverflow((promote_precision(cast(profit#50 as decimal(18,2))) - promote_precision(cast(profit_loss#63 as decimal(18,2)))), DecimalType(18,2), true) AS profit#66] +Input [5]: [cs_call_center_sk#39, sales#49, profit#50, returns#62, profit_loss#63] + +(56) Scan parquet default.web_sales +Output [4]: [ws_sold_date_sk#67, ws_web_page_sk#68, ws_ext_sales_price#69, ws_net_profit#70] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_web_page_sk)] +ReadSchema: struct + +(57) ColumnarToRow [codegen id : 18] +Input [4]: [ws_sold_date_sk#67, ws_web_page_sk#68, ws_ext_sales_price#69, ws_net_profit#70] + +(58) Filter [codegen id : 18] +Input [4]: [ws_sold_date_sk#67, ws_web_page_sk#68, ws_ext_sales_price#69, ws_net_profit#70] +Condition : (isnotnull(ws_sold_date_sk#67) AND isnotnull(ws_web_page_sk#68)) + +(59) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(60) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [ws_sold_date_sk#67] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(61) Project [codegen id : 18] +Output [3]: [ws_web_page_sk#68, ws_ext_sales_price#69, ws_net_profit#70] +Input [5]: [ws_sold_date_sk#67, ws_web_page_sk#68, ws_ext_sales_price#69, ws_net_profit#70, d_date_sk#5] + +(62) Scan parquet default.web_page +Output [1]: [wp_web_page_sk#71] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/web_page] +PushedFilters: [IsNotNull(wp_web_page_sk)] +ReadSchema: struct + +(63) ColumnarToRow [codegen id : 17] +Input [1]: [wp_web_page_sk#71] + +(64) Filter [codegen id : 17] +Input [1]: [wp_web_page_sk#71] +Condition : isnotnull(wp_web_page_sk#71) + +(65) BroadcastExchange +Input [1]: [wp_web_page_sk#71] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#72] + +(66) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [ws_web_page_sk#68] +Right keys [1]: [wp_web_page_sk#71] +Join condition: None + +(67) Project [codegen id : 18] +Output [3]: [ws_ext_sales_price#69, ws_net_profit#70, wp_web_page_sk#71] +Input [4]: [ws_web_page_sk#68, ws_ext_sales_price#69, ws_net_profit#70, wp_web_page_sk#71] + +(68) HashAggregate [codegen id : 18] +Input [3]: [ws_ext_sales_price#69, ws_net_profit#70, wp_web_page_sk#71] +Keys [1]: [wp_web_page_sk#71] +Functions [2]: [partial_sum(UnscaledValue(ws_ext_sales_price#69)), partial_sum(UnscaledValue(ws_net_profit#70))] +Aggregate Attributes [2]: [sum#73, sum#74] +Results [3]: [wp_web_page_sk#71, sum#75, sum#76] + +(69) Exchange +Input [3]: [wp_web_page_sk#71, sum#75, sum#76] +Arguments: hashpartitioning(wp_web_page_sk#71, 5), true, [id=#77] + +(70) HashAggregate [codegen id : 23] +Input [3]: [wp_web_page_sk#71, sum#75, sum#76] +Keys [1]: [wp_web_page_sk#71] +Functions [2]: [sum(UnscaledValue(ws_ext_sales_price#69)), sum(UnscaledValue(ws_net_profit#70))] +Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_sales_price#69))#78, sum(UnscaledValue(ws_net_profit#70))#79] +Results [3]: [wp_web_page_sk#71, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#69))#78,17,2) AS sales#80, MakeDecimal(sum(UnscaledValue(ws_net_profit#70))#79,17,2) AS profit#81] + +(71) Scan parquet default.web_returns +Output [4]: [wr_returned_date_sk#82, wr_web_page_sk#83, wr_return_amt#84, wr_net_loss#85] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/web_returns] +PushedFilters: [IsNotNull(wr_returned_date_sk), IsNotNull(wr_web_page_sk)] +ReadSchema: struct + +(72) ColumnarToRow [codegen id : 21] +Input [4]: [wr_returned_date_sk#82, wr_web_page_sk#83, wr_return_amt#84, wr_net_loss#85] + +(73) Filter [codegen id : 21] +Input [4]: [wr_returned_date_sk#82, wr_web_page_sk#83, wr_return_amt#84, wr_net_loss#85] +Condition : (isnotnull(wr_returned_date_sk#82) AND isnotnull(wr_web_page_sk#83)) + +(74) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(75) BroadcastHashJoin [codegen id : 21] +Left keys [1]: [wr_returned_date_sk#82] +Right keys [1]: [cast(d_date_sk#5 as bigint)] +Join condition: None + +(76) Project [codegen id : 21] +Output [3]: [wr_web_page_sk#83, wr_return_amt#84, wr_net_loss#85] +Input [5]: [wr_returned_date_sk#82, wr_web_page_sk#83, wr_return_amt#84, wr_net_loss#85, d_date_sk#5] + +(77) ReusedExchange [Reuses operator id: 65] +Output [1]: [wp_web_page_sk#86] + +(78) BroadcastHashJoin [codegen id : 21] +Left keys [1]: [wr_web_page_sk#83] +Right keys [1]: [cast(wp_web_page_sk#86 as bigint)] +Join condition: None + +(79) Project [codegen id : 21] +Output [3]: [wr_return_amt#84, wr_net_loss#85, wp_web_page_sk#86] +Input [4]: [wr_web_page_sk#83, wr_return_amt#84, wr_net_loss#85, wp_web_page_sk#86] + +(80) HashAggregate [codegen id : 21] +Input [3]: [wr_return_amt#84, wr_net_loss#85, wp_web_page_sk#86] +Keys [1]: [wp_web_page_sk#86] +Functions [2]: [partial_sum(UnscaledValue(wr_return_amt#84)), partial_sum(UnscaledValue(wr_net_loss#85))] +Aggregate Attributes [2]: [sum#87, sum#88] +Results [3]: [wp_web_page_sk#86, sum#89, sum#90] + +(81) Exchange +Input [3]: [wp_web_page_sk#86, sum#89, sum#90] +Arguments: hashpartitioning(wp_web_page_sk#86, 5), true, [id=#91] + +(82) HashAggregate [codegen id : 22] +Input [3]: [wp_web_page_sk#86, sum#89, sum#90] +Keys [1]: [wp_web_page_sk#86] +Functions [2]: [sum(UnscaledValue(wr_return_amt#84)), sum(UnscaledValue(wr_net_loss#85))] +Aggregate Attributes [2]: [sum(UnscaledValue(wr_return_amt#84))#92, sum(UnscaledValue(wr_net_loss#85))#93] +Results [3]: [wp_web_page_sk#86, MakeDecimal(sum(UnscaledValue(wr_return_amt#84))#92,17,2) AS returns#94, MakeDecimal(sum(UnscaledValue(wr_net_loss#85))#93,17,2) AS profit_loss#95] + +(83) BroadcastExchange +Input [3]: [wp_web_page_sk#86, returns#94, profit_loss#95] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#96] + +(84) BroadcastHashJoin [codegen id : 23] +Left keys [1]: [wp_web_page_sk#71] +Right keys [1]: [wp_web_page_sk#86] +Join condition: None + +(85) Project [codegen id : 23] +Output [5]: [web channel AS channel#97, wp_web_page_sk#71 AS id#98, sales#80, coalesce(returns#94, 0.00) AS returns#99, CheckOverflow((promote_precision(cast(profit#81 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#95, 0.00) as decimal(18,2)))), DecimalType(18,2), true) AS profit#100] +Input [6]: [wp_web_page_sk#71, sales#80, profit#81, wp_web_page_sk#86, returns#94, profit_loss#95] + +(86) Union + +(87) HashAggregate [codegen id : 24] +Input [5]: [channel#34, id#35, sales#17, returns#36, profit#37] +Keys [2]: [channel#34, id#35] +Functions [3]: [partial_sum(sales#17), partial_sum(returns#36), partial_sum(profit#37)] +Aggregate Attributes [6]: [sum#101, isEmpty#102, sum#103, isEmpty#104, sum#105, isEmpty#106] +Results [8]: [channel#34, id#35, sum#107, isEmpty#108, sum#109, isEmpty#110, sum#111, isEmpty#112] + +(88) Exchange +Input [8]: [channel#34, id#35, sum#107, isEmpty#108, sum#109, isEmpty#110, sum#111, isEmpty#112] +Arguments: hashpartitioning(channel#34, id#35, 5), true, [id=#113] + +(89) HashAggregate [codegen id : 25] +Input [8]: [channel#34, id#35, sum#107, isEmpty#108, sum#109, isEmpty#110, sum#111, isEmpty#112] +Keys [2]: [channel#34, id#35] +Functions [3]: [sum(sales#17), sum(returns#36), sum(profit#37)] +Aggregate Attributes [3]: [sum(sales#17)#114, sum(returns#36)#115, sum(profit#37)#116] +Results [5]: [channel#34, id#35, cast(sum(sales#17)#114 as decimal(37,2)) AS sales#117, cast(sum(returns#36)#115 as decimal(37,2)) AS returns#118, cast(sum(profit#37)#116 as decimal(38,2)) AS profit#119] + +(90) ReusedExchange [Reuses operator id: 88] +Output [8]: [channel#34, id#35, sum#120, isEmpty#121, sum#122, isEmpty#123, sum#124, isEmpty#125] + +(91) HashAggregate [codegen id : 50] +Input [8]: [channel#34, id#35, sum#120, isEmpty#121, sum#122, isEmpty#123, sum#124, isEmpty#125] +Keys [2]: [channel#34, id#35] +Functions [3]: [sum(sales#17), sum(returns#36), sum(profit#126)] +Aggregate Attributes [3]: [sum(sales#17)#127, sum(returns#36)#128, sum(profit#126)#129] +Results [4]: [channel#34, sum(sales#17)#127 AS sales#130, sum(returns#36)#128 AS returns#131, sum(profit#126)#129 AS profit#132] + +(92) HashAggregate [codegen id : 50] +Input [4]: [channel#34, sales#130, returns#131, profit#132] +Keys [1]: [channel#34] +Functions [3]: [partial_sum(sales#130), partial_sum(returns#131), partial_sum(profit#132)] +Aggregate Attributes [6]: [sum#133, isEmpty#134, sum#135, isEmpty#136, sum#137, isEmpty#138] +Results [7]: [channel#34, sum#139, isEmpty#140, sum#141, isEmpty#142, sum#143, isEmpty#144] + +(93) Exchange +Input [7]: [channel#34, sum#139, isEmpty#140, sum#141, isEmpty#142, sum#143, isEmpty#144] +Arguments: hashpartitioning(channel#34, 5), true, [id=#145] + +(94) HashAggregate [codegen id : 51] +Input [7]: [channel#34, sum#139, isEmpty#140, sum#141, isEmpty#142, sum#143, isEmpty#144] +Keys [1]: [channel#34] +Functions [3]: [sum(sales#130), sum(returns#131), sum(profit#132)] +Aggregate Attributes [3]: [sum(sales#130)#146, sum(returns#131)#147, sum(profit#132)#148] +Results [5]: [channel#34, null AS id#149, sum(sales#130)#146 AS sales#150, sum(returns#131)#147 AS returns#151, sum(profit#132)#148 AS profit#152] + +(95) Union + +(96) HashAggregate [codegen id : 52] +Input [5]: [channel#34, id#35, sales#117, returns#118, profit#119] +Keys [5]: [channel#34, id#35, sales#117, returns#118, profit#119] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#34, id#35, sales#117, returns#118, profit#119] + +(97) Exchange +Input [5]: [channel#34, id#35, sales#117, returns#118, profit#119] +Arguments: hashpartitioning(channel#34, id#35, sales#117, returns#118, profit#119, 5), true, [id=#153] + +(98) HashAggregate [codegen id : 53] +Input [5]: [channel#34, id#35, sales#117, returns#118, profit#119] +Keys [5]: [channel#34, id#35, sales#117, returns#118, profit#119] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#34, id#35, sales#117, returns#118, profit#119] + +(99) ReusedExchange [Reuses operator id: 88] +Output [8]: [channel#34, id#35, sum#154, isEmpty#155, sum#156, isEmpty#157, sum#158, isEmpty#159] + +(100) HashAggregate [codegen id : 78] +Input [8]: [channel#34, id#35, sum#154, isEmpty#155, sum#156, isEmpty#157, sum#158, isEmpty#159] +Keys [2]: [channel#34, id#35] +Functions [3]: [sum(sales#17), sum(returns#36), sum(profit#160)] +Aggregate Attributes [3]: [sum(sales#17)#161, sum(returns#36)#162, sum(profit#160)#163] +Results [3]: [sum(sales#17)#161 AS sales#130, sum(returns#36)#162 AS returns#131, sum(profit#160)#163 AS profit#132] + +(101) HashAggregate [codegen id : 78] +Input [3]: [sales#130, returns#131, profit#132] +Keys: [] +Functions [3]: [partial_sum(sales#130), partial_sum(returns#131), partial_sum(profit#132)] +Aggregate Attributes [6]: [sum#164, isEmpty#165, sum#166, isEmpty#167, sum#168, isEmpty#169] +Results [6]: [sum#170, isEmpty#171, sum#172, isEmpty#173, sum#174, isEmpty#175] + +(102) Exchange +Input [6]: [sum#170, isEmpty#171, sum#172, isEmpty#173, sum#174, isEmpty#175] +Arguments: SinglePartition, true, [id=#176] + +(103) HashAggregate [codegen id : 79] +Input [6]: [sum#170, isEmpty#171, sum#172, isEmpty#173, sum#174, isEmpty#175] +Keys: [] +Functions [3]: [sum(sales#130), sum(returns#131), sum(profit#132)] +Aggregate Attributes [3]: [sum(sales#130)#177, sum(returns#131)#178, sum(profit#132)#179] +Results [5]: [null AS channel#180, null AS id#181, sum(sales#130)#177 AS sales#182, sum(returns#131)#178 AS returns#183, sum(profit#132)#179 AS profit#184] + +(104) Union + +(105) HashAggregate [codegen id : 80] +Input [5]: [channel#34, id#35, sales#117, returns#118, profit#119] +Keys [5]: [channel#34, id#35, sales#117, returns#118, profit#119] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#34, id#35, sales#117, returns#118, profit#119] + +(106) Exchange +Input [5]: [channel#34, id#35, sales#117, returns#118, profit#119] +Arguments: hashpartitioning(channel#34, id#35, sales#117, returns#118, profit#119, 5), true, [id=#185] + +(107) HashAggregate [codegen id : 81] +Input [5]: [channel#34, id#35, sales#117, returns#118, profit#119] +Keys [5]: [channel#34, id#35, sales#117, returns#118, profit#119] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#34, id#35, sales#117, returns#118, profit#119] + +(108) TakeOrderedAndProject +Input [5]: [channel#34, id#35, sales#117, returns#118, profit#119] +Arguments: 100, [channel#34 ASC NULLS FIRST, id#35 ASC NULLS FIRST], [channel#34, id#35, sales#117, returns#118, profit#119] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a/simplified.txt new file mode 100644 index 0000000000000..a264d4273b546 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a/simplified.txt @@ -0,0 +1,172 @@ +TakeOrderedAndProject [channel,id,profit,returns,sales] + WholeStageCodegen (81) + HashAggregate [channel,id,profit,returns,sales] + InputAdapter + Exchange [channel,id,profit,returns,sales] #1 + WholeStageCodegen (80) + HashAggregate [channel,id,profit,returns,sales] + InputAdapter + Union + WholeStageCodegen (53) + HashAggregate [channel,id,profit,returns,sales] + InputAdapter + Exchange [channel,id,profit,returns,sales] #2 + WholeStageCodegen (52) + HashAggregate [channel,id,profit,returns,sales] + InputAdapter + Union + WholeStageCodegen (25) + HashAggregate [channel,id,isEmpty,isEmpty,isEmpty,sum,sum,sum] [isEmpty,isEmpty,isEmpty,profit,returns,sales,sum,sum,sum,sum(profit),sum(returns),sum(sales)] + InputAdapter + Exchange [channel,id] #3 + WholeStageCodegen (24) + HashAggregate [channel,id,profit,returns,sales] [isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum] + InputAdapter + Union + WholeStageCodegen (8) + Project [profit,profit_loss,returns,s_store_sk,sales] + BroadcastHashJoin [s_store_sk,s_store_sk] + HashAggregate [s_store_sk,sum,sum] [profit,sales,sum,sum,sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_net_profit))] + InputAdapter + Exchange [s_store_sk] #4 + WholeStageCodegen (3) + HashAggregate [s_store_sk,ss_ext_sales_price,ss_net_profit] [sum,sum,sum,sum] + Project [s_store_sk,ss_ext_sales_price,ss_net_profit] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_ext_sales_price,ss_net_profit,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (2) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (7) + HashAggregate [s_store_sk,sum,sum] [profit_loss,returns,sum,sum,sum(UnscaledValue(sr_net_loss)),sum(UnscaledValue(sr_return_amt))] + InputAdapter + Exchange [s_store_sk] #8 + WholeStageCodegen (6) + HashAggregate [s_store_sk,sr_net_loss,sr_return_amt] [sum,sum,sum,sum] + Project [s_store_sk,sr_net_loss,sr_return_amt] + BroadcastHashJoin [s_store_sk,sr_store_sk] + Project [sr_net_loss,sr_return_amt,sr_store_sk] + BroadcastHashJoin [d_date_sk,sr_returned_date_sk] + Filter [sr_returned_date_sk,sr_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_net_loss,sr_return_amt,sr_returned_date_sk,sr_store_sk] + InputAdapter + ReusedExchange [d_date_sk] #5 + InputAdapter + ReusedExchange [s_store_sk] #6 + WholeStageCodegen (15) + Project [cs_call_center_sk,profit,profit_loss,returns,sales] + InputAdapter + BroadcastNestedLoopJoin + BroadcastExchange #9 + WholeStageCodegen (11) + HashAggregate [cs_call_center_sk,sum,sum] [profit,sales,sum,sum,sum(UnscaledValue(cs_ext_sales_price)),sum(UnscaledValue(cs_net_profit))] + InputAdapter + Exchange [cs_call_center_sk] #10 + WholeStageCodegen (10) + HashAggregate [cs_call_center_sk,cs_ext_sales_price,cs_net_profit] [sum,sum,sum,sum] + Project [cs_call_center_sk,cs_ext_sales_price,cs_net_profit] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_call_center_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #5 + WholeStageCodegen (14) + HashAggregate [sum,sum] [profit_loss,returns,sum,sum,sum(UnscaledValue(cr_net_loss)),sum(UnscaledValue(cr_return_amount))] + InputAdapter + Exchange #11 + WholeStageCodegen (13) + HashAggregate [cr_net_loss,cr_return_amount] [sum,sum,sum,sum] + Project [cr_net_loss,cr_return_amount] + BroadcastHashJoin [cr_returned_date_sk,d_date_sk] + Filter [cr_returned_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_net_loss,cr_return_amount,cr_returned_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #5 + WholeStageCodegen (23) + Project [profit,profit_loss,returns,sales,wp_web_page_sk] + BroadcastHashJoin [wp_web_page_sk,wp_web_page_sk] + HashAggregate [sum,sum,wp_web_page_sk] [profit,sales,sum,sum,sum(UnscaledValue(ws_ext_sales_price)),sum(UnscaledValue(ws_net_profit))] + InputAdapter + Exchange [wp_web_page_sk] #12 + WholeStageCodegen (18) + HashAggregate [wp_web_page_sk,ws_ext_sales_price,ws_net_profit] [sum,sum,sum,sum] + Project [wp_web_page_sk,ws_ext_sales_price,ws_net_profit] + BroadcastHashJoin [wp_web_page_sk,ws_web_page_sk] + Project [ws_ext_sales_price,ws_net_profit,ws_web_page_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_sold_date_sk,ws_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_ext_sales_price,ws_net_profit,ws_sold_date_sk,ws_web_page_sk] + InputAdapter + ReusedExchange [d_date_sk] #5 + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (17) + Filter [wp_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_page [wp_web_page_sk] + InputAdapter + BroadcastExchange #14 + WholeStageCodegen (22) + HashAggregate [sum,sum,wp_web_page_sk] [profit_loss,returns,sum,sum,sum(UnscaledValue(wr_net_loss)),sum(UnscaledValue(wr_return_amt))] + InputAdapter + Exchange [wp_web_page_sk] #15 + WholeStageCodegen (21) + HashAggregate [wp_web_page_sk,wr_net_loss,wr_return_amt] [sum,sum,sum,sum] + Project [wp_web_page_sk,wr_net_loss,wr_return_amt] + BroadcastHashJoin [wp_web_page_sk,wr_web_page_sk] + Project [wr_net_loss,wr_return_amt,wr_web_page_sk] + BroadcastHashJoin [d_date_sk,wr_returned_date_sk] + Filter [wr_returned_date_sk,wr_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_returns [wr_net_loss,wr_return_amt,wr_returned_date_sk,wr_web_page_sk] + InputAdapter + ReusedExchange [d_date_sk] #5 + InputAdapter + ReusedExchange [wp_web_page_sk] #13 + WholeStageCodegen (51) + HashAggregate [channel,isEmpty,isEmpty,isEmpty,sum,sum,sum] [id,isEmpty,isEmpty,isEmpty,profit,returns,sales,sum,sum,sum,sum(profit),sum(returns),sum(sales)] + InputAdapter + Exchange [channel] #16 + WholeStageCodegen (50) + HashAggregate [channel,profit,returns,sales] [isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum] + HashAggregate [channel,id,isEmpty,isEmpty,isEmpty,sum,sum,sum] [isEmpty,isEmpty,isEmpty,profit,returns,sales,sum,sum,sum,sum(profit),sum(returns),sum(sales)] + InputAdapter + ReusedExchange [channel,id,isEmpty,isEmpty,isEmpty,sum,sum,sum] #3 + WholeStageCodegen (79) + HashAggregate [isEmpty,isEmpty,isEmpty,sum,sum,sum] [channel,id,isEmpty,isEmpty,isEmpty,profit,returns,sales,sum,sum,sum,sum(profit),sum(returns),sum(sales)] + InputAdapter + Exchange #17 + WholeStageCodegen (78) + HashAggregate [profit,returns,sales] [isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum] + HashAggregate [channel,id,isEmpty,isEmpty,isEmpty,sum,sum,sum] [isEmpty,isEmpty,isEmpty,profit,returns,sales,sum,sum,sum,sum(profit),sum(returns),sum(sales)] + InputAdapter + ReusedExchange [channel,id,isEmpty,isEmpty,isEmpty,sum,sum,sum] #3 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78.sf100/explain.txt new file mode 100644 index 0000000000000..dc2975b51f0bb --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78.sf100/explain.txt @@ -0,0 +1,391 @@ +== Physical Plan == +TakeOrderedAndProject (70) ++- * Project (69) + +- * SortMergeJoin Inner (68) + :- * Project (46) + : +- * SortMergeJoin Inner (45) + : :- * Sort (23) + : : +- * HashAggregate (22) + : : +- Exchange (21) + : : +- * HashAggregate (20) + : : +- * Project (19) + : : +- * BroadcastHashJoin Inner BuildRight (18) + : : :- * Project (13) + : : : +- * Filter (12) + : : : +- SortMergeJoin LeftOuter (11) + : : : :- * Sort (5) + : : : : +- Exchange (4) + : : : : +- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- * Sort (10) + : : : +- Exchange (9) + : : : +- * Filter (8) + : : : +- * ColumnarToRow (7) + : : : +- Scan parquet default.store_returns (6) + : : +- BroadcastExchange (17) + : : +- * Filter (16) + : : +- * ColumnarToRow (15) + : : +- Scan parquet default.date_dim (14) + : +- * Sort (44) + : +- * Filter (43) + : +- * HashAggregate (42) + : +- Exchange (41) + : +- * HashAggregate (40) + : +- * Project (39) + : +- * BroadcastHashJoin Inner BuildRight (38) + : :- * Project (36) + : : +- * Filter (35) + : : +- SortMergeJoin LeftOuter (34) + : : :- * Sort (28) + : : : +- Exchange (27) + : : : +- * Filter (26) + : : : +- * ColumnarToRow (25) + : : : +- Scan parquet default.catalog_sales (24) + : : +- * Sort (33) + : : +- Exchange (32) + : : +- * Filter (31) + : : +- * ColumnarToRow (30) + : : +- Scan parquet default.catalog_returns (29) + : +- ReusedExchange (37) + +- * Sort (67) + +- * Filter (66) + +- * HashAggregate (65) + +- Exchange (64) + +- * HashAggregate (63) + +- * Project (62) + +- * BroadcastHashJoin Inner BuildRight (61) + :- * Project (59) + : +- * Filter (58) + : +- SortMergeJoin LeftOuter (57) + : :- * Sort (51) + : : +- Exchange (50) + : : +- * Filter (49) + : : +- * ColumnarToRow (48) + : : +- Scan parquet default.web_sales (47) + : +- * Sort (56) + : +- Exchange (55) + : +- * Filter (54) + : +- * ColumnarToRow (53) + : +- Scan parquet default.web_returns (52) + +- ReusedExchange (60) + + +(1) Scan parquet default.store_sales +Output [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#4, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#4, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7] + +(3) Filter [codegen id : 1] +Input [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#4, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7] +Condition : ((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_customer_sk#3)) AND isnotnull(ss_item_sk#2)) + +(4) Exchange +Input [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#4, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7] +Arguments: hashpartitioning(cast(ss_ticket_number#4 as bigint), cast(ss_item_sk#2 as bigint), 5), true, [id=#8] + +(5) Sort [codegen id : 2] +Input [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#4, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7] +Arguments: [cast(ss_ticket_number#4 as bigint) ASC NULLS FIRST, cast(ss_item_sk#2 as bigint) ASC NULLS FIRST], false, 0 + +(6) Scan parquet default.store_returns +Output [2]: [sr_item_sk#9, sr_ticket_number#10] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store_returns] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 3] +Input [2]: [sr_item_sk#9, sr_ticket_number#10] + +(8) Filter [codegen id : 3] +Input [2]: [sr_item_sk#9, sr_ticket_number#10] +Condition : (isnotnull(sr_ticket_number#10) AND isnotnull(sr_item_sk#9)) + +(9) Exchange +Input [2]: [sr_item_sk#9, sr_ticket_number#10] +Arguments: hashpartitioning(sr_ticket_number#10, sr_item_sk#9, 5), true, [id=#11] + +(10) Sort [codegen id : 4] +Input [2]: [sr_item_sk#9, sr_ticket_number#10] +Arguments: [sr_ticket_number#10 ASC NULLS FIRST, sr_item_sk#9 ASC NULLS FIRST], false, 0 + +(11) SortMergeJoin +Left keys [2]: [cast(ss_ticket_number#4 as bigint), cast(ss_item_sk#2 as bigint)] +Right keys [2]: [sr_ticket_number#10, sr_item_sk#9] +Join condition: None + +(12) Filter [codegen id : 6] +Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#4, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7, sr_item_sk#9, sr_ticket_number#10] +Condition : isnull(sr_ticket_number#10) + +(13) Project [codegen id : 6] +Output [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7] +Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#4, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7, sr_item_sk#9, sr_ticket_number#10] + +(14) Scan parquet default.date_dim +Output [2]: [d_date_sk#12, d_year#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(15) ColumnarToRow [codegen id : 5] +Input [2]: [d_date_sk#12, d_year#13] + +(16) Filter [codegen id : 5] +Input [2]: [d_date_sk#12, d_year#13] +Condition : ((isnotnull(d_year#13) AND (d_year#13 = 2000)) AND isnotnull(d_date_sk#12)) + +(17) BroadcastExchange +Input [2]: [d_date_sk#12, d_year#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] + +(18) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#12] +Join condition: None + +(19) Project [codegen id : 6] +Output [6]: [ss_item_sk#2, ss_customer_sk#3, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7, d_year#13] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7, d_date_sk#12, d_year#13] + +(20) HashAggregate [codegen id : 6] +Input [6]: [ss_item_sk#2, ss_customer_sk#3, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7, d_year#13] +Keys [3]: [d_year#13, ss_item_sk#2, ss_customer_sk#3] +Functions [3]: [partial_sum(cast(ss_quantity#5 as bigint)), partial_sum(UnscaledValue(ss_wholesale_cost#6)), partial_sum(UnscaledValue(ss_sales_price#7))] +Aggregate Attributes [3]: [sum#15, sum#16, sum#17] +Results [6]: [d_year#13, ss_item_sk#2, ss_customer_sk#3, sum#18, sum#19, sum#20] + +(21) Exchange +Input [6]: [d_year#13, ss_item_sk#2, ss_customer_sk#3, sum#18, sum#19, sum#20] +Arguments: hashpartitioning(d_year#13, ss_item_sk#2, ss_customer_sk#3, 5), true, [id=#21] + +(22) HashAggregate [codegen id : 7] +Input [6]: [d_year#13, ss_item_sk#2, ss_customer_sk#3, sum#18, sum#19, sum#20] +Keys [3]: [d_year#13, ss_item_sk#2, ss_customer_sk#3] +Functions [3]: [sum(cast(ss_quantity#5 as bigint)), sum(UnscaledValue(ss_wholesale_cost#6)), sum(UnscaledValue(ss_sales_price#7))] +Aggregate Attributes [3]: [sum(cast(ss_quantity#5 as bigint))#22, sum(UnscaledValue(ss_wholesale_cost#6))#23, sum(UnscaledValue(ss_sales_price#7))#24] +Results [6]: [d_year#13 AS ss_sold_year#25, ss_item_sk#2, ss_customer_sk#3, sum(cast(ss_quantity#5 as bigint))#22 AS ss_qty#26, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#6))#23,17,2) AS ss_wc#27, MakeDecimal(sum(UnscaledValue(ss_sales_price#7))#24,17,2) AS ss_sp#28] + +(23) Sort [codegen id : 7] +Input [6]: [ss_sold_year#25, ss_item_sk#2, ss_customer_sk#3, ss_qty#26, ss_wc#27, ss_sp#28] +Arguments: [ss_sold_year#25 ASC NULLS FIRST, ss_item_sk#2 ASC NULLS FIRST, ss_customer_sk#3 ASC NULLS FIRST], false, 0 + +(24) Scan parquet default.catalog_sales +Output [7]: [cs_sold_date_sk#29, cs_bill_customer_sk#30, cs_item_sk#31, cs_order_number#32, cs_quantity#33, cs_wholesale_cost#34, cs_sales_price#35] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk), IsNotNull(cs_bill_customer_sk)] +ReadSchema: struct + +(25) ColumnarToRow [codegen id : 8] +Input [7]: [cs_sold_date_sk#29, cs_bill_customer_sk#30, cs_item_sk#31, cs_order_number#32, cs_quantity#33, cs_wholesale_cost#34, cs_sales_price#35] + +(26) Filter [codegen id : 8] +Input [7]: [cs_sold_date_sk#29, cs_bill_customer_sk#30, cs_item_sk#31, cs_order_number#32, cs_quantity#33, cs_wholesale_cost#34, cs_sales_price#35] +Condition : ((isnotnull(cs_sold_date_sk#29) AND isnotnull(cs_item_sk#31)) AND isnotnull(cs_bill_customer_sk#30)) + +(27) Exchange +Input [7]: [cs_sold_date_sk#29, cs_bill_customer_sk#30, cs_item_sk#31, cs_order_number#32, cs_quantity#33, cs_wholesale_cost#34, cs_sales_price#35] +Arguments: hashpartitioning(cs_order_number#32, cs_item_sk#31, 5), true, [id=#36] + +(28) Sort [codegen id : 9] +Input [7]: [cs_sold_date_sk#29, cs_bill_customer_sk#30, cs_item_sk#31, cs_order_number#32, cs_quantity#33, cs_wholesale_cost#34, cs_sales_price#35] +Arguments: [cs_order_number#32 ASC NULLS FIRST, cs_item_sk#31 ASC NULLS FIRST], false, 0 + +(29) Scan parquet default.catalog_returns +Output [2]: [cr_item_sk#37, cr_order_number#38] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/catalog_returns] +PushedFilters: [IsNotNull(cr_order_number), IsNotNull(cr_item_sk)] +ReadSchema: struct + +(30) ColumnarToRow [codegen id : 10] +Input [2]: [cr_item_sk#37, cr_order_number#38] + +(31) Filter [codegen id : 10] +Input [2]: [cr_item_sk#37, cr_order_number#38] +Condition : (isnotnull(cr_order_number#38) AND isnotnull(cr_item_sk#37)) + +(32) Exchange +Input [2]: [cr_item_sk#37, cr_order_number#38] +Arguments: hashpartitioning(cr_order_number#38, cr_item_sk#37, 5), true, [id=#39] + +(33) Sort [codegen id : 11] +Input [2]: [cr_item_sk#37, cr_order_number#38] +Arguments: [cr_order_number#38 ASC NULLS FIRST, cr_item_sk#37 ASC NULLS FIRST], false, 0 + +(34) SortMergeJoin +Left keys [2]: [cs_order_number#32, cs_item_sk#31] +Right keys [2]: [cr_order_number#38, cr_item_sk#37] +Join condition: None + +(35) Filter [codegen id : 13] +Input [9]: [cs_sold_date_sk#29, cs_bill_customer_sk#30, cs_item_sk#31, cs_order_number#32, cs_quantity#33, cs_wholesale_cost#34, cs_sales_price#35, cr_item_sk#37, cr_order_number#38] +Condition : isnull(cr_order_number#38) + +(36) Project [codegen id : 13] +Output [6]: [cs_sold_date_sk#29, cs_bill_customer_sk#30, cs_item_sk#31, cs_quantity#33, cs_wholesale_cost#34, cs_sales_price#35] +Input [9]: [cs_sold_date_sk#29, cs_bill_customer_sk#30, cs_item_sk#31, cs_order_number#32, cs_quantity#33, cs_wholesale_cost#34, cs_sales_price#35, cr_item_sk#37, cr_order_number#38] + +(37) ReusedExchange [Reuses operator id: 17] +Output [2]: [d_date_sk#12, d_year#13] + +(38) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [cs_sold_date_sk#29] +Right keys [1]: [d_date_sk#12] +Join condition: None + +(39) Project [codegen id : 13] +Output [6]: [cs_bill_customer_sk#30, cs_item_sk#31, cs_quantity#33, cs_wholesale_cost#34, cs_sales_price#35, d_year#13] +Input [8]: [cs_sold_date_sk#29, cs_bill_customer_sk#30, cs_item_sk#31, cs_quantity#33, cs_wholesale_cost#34, cs_sales_price#35, d_date_sk#12, d_year#13] + +(40) HashAggregate [codegen id : 13] +Input [6]: [cs_bill_customer_sk#30, cs_item_sk#31, cs_quantity#33, cs_wholesale_cost#34, cs_sales_price#35, d_year#13] +Keys [3]: [d_year#13, cs_item_sk#31, cs_bill_customer_sk#30] +Functions [3]: [partial_sum(cast(cs_quantity#33 as bigint)), partial_sum(UnscaledValue(cs_wholesale_cost#34)), partial_sum(UnscaledValue(cs_sales_price#35))] +Aggregate Attributes [3]: [sum#40, sum#41, sum#42] +Results [6]: [d_year#13, cs_item_sk#31, cs_bill_customer_sk#30, sum#43, sum#44, sum#45] + +(41) Exchange +Input [6]: [d_year#13, cs_item_sk#31, cs_bill_customer_sk#30, sum#43, sum#44, sum#45] +Arguments: hashpartitioning(d_year#13, cs_item_sk#31, cs_bill_customer_sk#30, 5), true, [id=#46] + +(42) HashAggregate [codegen id : 14] +Input [6]: [d_year#13, cs_item_sk#31, cs_bill_customer_sk#30, sum#43, sum#44, sum#45] +Keys [3]: [d_year#13, cs_item_sk#31, cs_bill_customer_sk#30] +Functions [3]: [sum(cast(cs_quantity#33 as bigint)), sum(UnscaledValue(cs_wholesale_cost#34)), sum(UnscaledValue(cs_sales_price#35))] +Aggregate Attributes [3]: [sum(cast(cs_quantity#33 as bigint))#47, sum(UnscaledValue(cs_wholesale_cost#34))#48, sum(UnscaledValue(cs_sales_price#35))#49] +Results [6]: [d_year#13 AS cs_sold_year#50, cs_item_sk#31, cs_bill_customer_sk#30 AS cs_customer_sk#51, sum(cast(cs_quantity#33 as bigint))#47 AS cs_qty#52, MakeDecimal(sum(UnscaledValue(cs_wholesale_cost#34))#48,17,2) AS cs_wc#53, MakeDecimal(sum(UnscaledValue(cs_sales_price#35))#49,17,2) AS cs_sp#54] + +(43) Filter [codegen id : 14] +Input [6]: [cs_sold_year#50, cs_item_sk#31, cs_customer_sk#51, cs_qty#52, cs_wc#53, cs_sp#54] +Condition : (coalesce(cs_qty#52, 0) > 0) + +(44) Sort [codegen id : 14] +Input [6]: [cs_sold_year#50, cs_item_sk#31, cs_customer_sk#51, cs_qty#52, cs_wc#53, cs_sp#54] +Arguments: [cs_sold_year#50 ASC NULLS FIRST, cs_item_sk#31 ASC NULLS FIRST, cs_customer_sk#51 ASC NULLS FIRST], false, 0 + +(45) SortMergeJoin [codegen id : 15] +Left keys [3]: [ss_sold_year#25, ss_item_sk#2, ss_customer_sk#3] +Right keys [3]: [cs_sold_year#50, cs_item_sk#31, cs_customer_sk#51] +Join condition: None + +(46) Project [codegen id : 15] +Output [9]: [ss_sold_year#25, ss_item_sk#2, ss_customer_sk#3, ss_qty#26, ss_wc#27, ss_sp#28, cs_qty#52, cs_wc#53, cs_sp#54] +Input [12]: [ss_sold_year#25, ss_item_sk#2, ss_customer_sk#3, ss_qty#26, ss_wc#27, ss_sp#28, cs_sold_year#50, cs_item_sk#31, cs_customer_sk#51, cs_qty#52, cs_wc#53, cs_sp#54] + +(47) Scan parquet default.web_sales +Output [7]: [ws_sold_date_sk#55, ws_item_sk#56, ws_bill_customer_sk#57, ws_order_number#58, ws_quantity#59, ws_wholesale_cost#60, ws_sales_price#61] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_customer_sk), IsNotNull(ws_item_sk)] +ReadSchema: struct + +(48) ColumnarToRow [codegen id : 16] +Input [7]: [ws_sold_date_sk#55, ws_item_sk#56, ws_bill_customer_sk#57, ws_order_number#58, ws_quantity#59, ws_wholesale_cost#60, ws_sales_price#61] + +(49) Filter [codegen id : 16] +Input [7]: [ws_sold_date_sk#55, ws_item_sk#56, ws_bill_customer_sk#57, ws_order_number#58, ws_quantity#59, ws_wholesale_cost#60, ws_sales_price#61] +Condition : ((isnotnull(ws_sold_date_sk#55) AND isnotnull(ws_bill_customer_sk#57)) AND isnotnull(ws_item_sk#56)) + +(50) Exchange +Input [7]: [ws_sold_date_sk#55, ws_item_sk#56, ws_bill_customer_sk#57, ws_order_number#58, ws_quantity#59, ws_wholesale_cost#60, ws_sales_price#61] +Arguments: hashpartitioning(cast(ws_order_number#58 as bigint), cast(ws_item_sk#56 as bigint), 5), true, [id=#62] + +(51) Sort [codegen id : 17] +Input [7]: [ws_sold_date_sk#55, ws_item_sk#56, ws_bill_customer_sk#57, ws_order_number#58, ws_quantity#59, ws_wholesale_cost#60, ws_sales_price#61] +Arguments: [cast(ws_order_number#58 as bigint) ASC NULLS FIRST, cast(ws_item_sk#56 as bigint) ASC NULLS FIRST], false, 0 + +(52) Scan parquet default.web_returns +Output [2]: [wr_item_sk#63, wr_order_number#64] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/web_returns] +PushedFilters: [IsNotNull(wr_order_number), IsNotNull(wr_item_sk)] +ReadSchema: struct + +(53) ColumnarToRow [codegen id : 18] +Input [2]: [wr_item_sk#63, wr_order_number#64] + +(54) Filter [codegen id : 18] +Input [2]: [wr_item_sk#63, wr_order_number#64] +Condition : (isnotnull(wr_order_number#64) AND isnotnull(wr_item_sk#63)) + +(55) Exchange +Input [2]: [wr_item_sk#63, wr_order_number#64] +Arguments: hashpartitioning(wr_order_number#64, wr_item_sk#63, 5), true, [id=#65] + +(56) Sort [codegen id : 19] +Input [2]: [wr_item_sk#63, wr_order_number#64] +Arguments: [wr_order_number#64 ASC NULLS FIRST, wr_item_sk#63 ASC NULLS FIRST], false, 0 + +(57) SortMergeJoin +Left keys [2]: [cast(ws_order_number#58 as bigint), cast(ws_item_sk#56 as bigint)] +Right keys [2]: [wr_order_number#64, wr_item_sk#63] +Join condition: None + +(58) Filter [codegen id : 21] +Input [9]: [ws_sold_date_sk#55, ws_item_sk#56, ws_bill_customer_sk#57, ws_order_number#58, ws_quantity#59, ws_wholesale_cost#60, ws_sales_price#61, wr_item_sk#63, wr_order_number#64] +Condition : isnull(wr_order_number#64) + +(59) Project [codegen id : 21] +Output [6]: [ws_sold_date_sk#55, ws_item_sk#56, ws_bill_customer_sk#57, ws_quantity#59, ws_wholesale_cost#60, ws_sales_price#61] +Input [9]: [ws_sold_date_sk#55, ws_item_sk#56, ws_bill_customer_sk#57, ws_order_number#58, ws_quantity#59, ws_wholesale_cost#60, ws_sales_price#61, wr_item_sk#63, wr_order_number#64] + +(60) ReusedExchange [Reuses operator id: 17] +Output [2]: [d_date_sk#12, d_year#13] + +(61) BroadcastHashJoin [codegen id : 21] +Left keys [1]: [ws_sold_date_sk#55] +Right keys [1]: [d_date_sk#12] +Join condition: None + +(62) Project [codegen id : 21] +Output [6]: [ws_item_sk#56, ws_bill_customer_sk#57, ws_quantity#59, ws_wholesale_cost#60, ws_sales_price#61, d_year#13] +Input [8]: [ws_sold_date_sk#55, ws_item_sk#56, ws_bill_customer_sk#57, ws_quantity#59, ws_wholesale_cost#60, ws_sales_price#61, d_date_sk#12, d_year#13] + +(63) HashAggregate [codegen id : 21] +Input [6]: [ws_item_sk#56, ws_bill_customer_sk#57, ws_quantity#59, ws_wholesale_cost#60, ws_sales_price#61, d_year#13] +Keys [3]: [d_year#13, ws_item_sk#56, ws_bill_customer_sk#57] +Functions [3]: [partial_sum(cast(ws_quantity#59 as bigint)), partial_sum(UnscaledValue(ws_wholesale_cost#60)), partial_sum(UnscaledValue(ws_sales_price#61))] +Aggregate Attributes [3]: [sum#66, sum#67, sum#68] +Results [6]: [d_year#13, ws_item_sk#56, ws_bill_customer_sk#57, sum#69, sum#70, sum#71] + +(64) Exchange +Input [6]: [d_year#13, ws_item_sk#56, ws_bill_customer_sk#57, sum#69, sum#70, sum#71] +Arguments: hashpartitioning(d_year#13, ws_item_sk#56, ws_bill_customer_sk#57, 5), true, [id=#72] + +(65) HashAggregate [codegen id : 22] +Input [6]: [d_year#13, ws_item_sk#56, ws_bill_customer_sk#57, sum#69, sum#70, sum#71] +Keys [3]: [d_year#13, ws_item_sk#56, ws_bill_customer_sk#57] +Functions [3]: [sum(cast(ws_quantity#59 as bigint)), sum(UnscaledValue(ws_wholesale_cost#60)), sum(UnscaledValue(ws_sales_price#61))] +Aggregate Attributes [3]: [sum(cast(ws_quantity#59 as bigint))#73, sum(UnscaledValue(ws_wholesale_cost#60))#74, sum(UnscaledValue(ws_sales_price#61))#75] +Results [6]: [d_year#13 AS ws_sold_year#76, ws_item_sk#56, ws_bill_customer_sk#57 AS ws_customer_sk#77, sum(cast(ws_quantity#59 as bigint))#73 AS ws_qty#78, MakeDecimal(sum(UnscaledValue(ws_wholesale_cost#60))#74,17,2) AS ws_wc#79, MakeDecimal(sum(UnscaledValue(ws_sales_price#61))#75,17,2) AS ws_sp#80] + +(66) Filter [codegen id : 22] +Input [6]: [ws_sold_year#76, ws_item_sk#56, ws_customer_sk#77, ws_qty#78, ws_wc#79, ws_sp#80] +Condition : (coalesce(ws_qty#78, 0) > 0) + +(67) Sort [codegen id : 22] +Input [6]: [ws_sold_year#76, ws_item_sk#56, ws_customer_sk#77, ws_qty#78, ws_wc#79, ws_sp#80] +Arguments: [ws_sold_year#76 ASC NULLS FIRST, ws_item_sk#56 ASC NULLS FIRST, ws_customer_sk#77 ASC NULLS FIRST], false, 0 + +(68) SortMergeJoin [codegen id : 23] +Left keys [3]: [ss_sold_year#25, ss_item_sk#2, ss_customer_sk#3] +Right keys [3]: [ws_sold_year#76, ws_item_sk#56, ws_customer_sk#77] +Join condition: None + +(69) Project [codegen id : 23] +Output [13]: [round((cast(ss_qty#26 as double) / cast(coalesce((ws_qty#78 + cs_qty#52), 1) as double)), 2) AS ratio#81, ss_qty#26 AS store_qty#82, ss_wc#27 AS store_wholesale_cost#83, ss_sp#28 AS store_sales_price#84, (coalesce(ws_qty#78, 0) + coalesce(cs_qty#52, 0)) AS other_chan_qty#85, CheckOverflow((promote_precision(cast(coalesce(ws_wc#79, 0.00) as decimal(18,2))) + promote_precision(cast(coalesce(cs_wc#53, 0.00) as decimal(18,2)))), DecimalType(18,2), true) AS other_chan_wholesale_cost#86, CheckOverflow((promote_precision(cast(coalesce(ws_sp#80, 0.00) as decimal(18,2))) + promote_precision(cast(coalesce(cs_sp#54, 0.00) as decimal(18,2)))), DecimalType(18,2), true) AS other_chan_sales_price#87, ss_sold_year#25, ss_wc#27, ss_customer_sk#3, ss_qty#26, ss_sp#28, ss_item_sk#2] +Input [15]: [ss_sold_year#25, ss_item_sk#2, ss_customer_sk#3, ss_qty#26, ss_wc#27, ss_sp#28, cs_qty#52, cs_wc#53, cs_sp#54, ws_sold_year#76, ws_item_sk#56, ws_customer_sk#77, ws_qty#78, ws_wc#79, ws_sp#80] + +(70) TakeOrderedAndProject +Input [13]: [ratio#81, store_qty#82, store_wholesale_cost#83, store_sales_price#84, other_chan_qty#85, other_chan_wholesale_cost#86, other_chan_sales_price#87, ss_sold_year#25, ss_wc#27, ss_customer_sk#3, ss_qty#26, ss_sp#28, ss_item_sk#2] +Arguments: 100, [ss_sold_year#25 ASC NULLS FIRST, ss_item_sk#2 ASC NULLS FIRST, ss_customer_sk#3 ASC NULLS FIRST, ss_qty#26 DESC NULLS LAST, ss_wc#27 DESC NULLS LAST, ss_sp#28 DESC NULLS LAST, other_chan_qty#85 ASC NULLS FIRST, other_chan_wholesale_cost#86 ASC NULLS FIRST, other_chan_sales_price#87 ASC NULLS FIRST, ratio#81 ASC NULLS FIRST], [ratio#81, store_qty#82, store_wholesale_cost#83, store_sales_price#84, other_chan_qty#85, other_chan_wholesale_cost#86, other_chan_sales_price#87] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78.sf100/simplified.txt new file mode 100644 index 0000000000000..11d27f3663012 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78.sf100/simplified.txt @@ -0,0 +1,117 @@ +TakeOrderedAndProject [other_chan_qty,other_chan_sales_price,other_chan_wholesale_cost,ratio,ss_customer_sk,ss_item_sk,ss_qty,ss_sold_year,ss_sp,ss_wc,store_qty,store_sales_price,store_wholesale_cost] + WholeStageCodegen (23) + Project [cs_qty,cs_sp,cs_wc,ss_customer_sk,ss_item_sk,ss_qty,ss_sold_year,ss_sp,ss_wc,ws_qty,ws_sp,ws_wc] + SortMergeJoin [ss_customer_sk,ss_item_sk,ss_sold_year,ws_customer_sk,ws_item_sk,ws_sold_year] + InputAdapter + WholeStageCodegen (15) + Project [cs_qty,cs_sp,cs_wc,ss_customer_sk,ss_item_sk,ss_qty,ss_sold_year,ss_sp,ss_wc] + SortMergeJoin [cs_customer_sk,cs_item_sk,cs_sold_year,ss_customer_sk,ss_item_sk,ss_sold_year] + InputAdapter + WholeStageCodegen (7) + Sort [ss_customer_sk,ss_item_sk,ss_sold_year] + HashAggregate [d_year,ss_customer_sk,ss_item_sk,sum,sum,sum] [ss_qty,ss_sold_year,ss_sp,ss_wc,sum,sum,sum,sum(UnscaledValue(ss_sales_price)),sum(UnscaledValue(ss_wholesale_cost)),sum(cast(ss_quantity as bigint))] + InputAdapter + Exchange [d_year,ss_customer_sk,ss_item_sk] #1 + WholeStageCodegen (6) + HashAggregate [d_year,ss_customer_sk,ss_item_sk,ss_quantity,ss_sales_price,ss_wholesale_cost] [sum,sum,sum,sum,sum,sum] + Project [d_year,ss_customer_sk,ss_item_sk,ss_quantity,ss_sales_price,ss_wholesale_cost] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_customer_sk,ss_item_sk,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_wholesale_cost] + Filter [sr_ticket_number] + InputAdapter + SortMergeJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + WholeStageCodegen (2) + Sort [ss_item_sk,ss_ticket_number] + InputAdapter + Exchange [ss_item_sk,ss_ticket_number] #2 + WholeStageCodegen (1) + Filter [ss_customer_sk,ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_ticket_number,ss_wholesale_cost] + WholeStageCodegen (4) + Sort [sr_item_sk,sr_ticket_number] + InputAdapter + Exchange [sr_item_sk,sr_ticket_number] #3 + WholeStageCodegen (3) + Filter [sr_item_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (5) + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + WholeStageCodegen (14) + Sort [cs_customer_sk,cs_item_sk,cs_sold_year] + Filter [cs_qty] + HashAggregate [cs_bill_customer_sk,cs_item_sk,d_year,sum,sum,sum] [cs_customer_sk,cs_qty,cs_sold_year,cs_sp,cs_wc,sum,sum,sum,sum(UnscaledValue(cs_sales_price)),sum(UnscaledValue(cs_wholesale_cost)),sum(cast(cs_quantity as bigint))] + InputAdapter + Exchange [cs_bill_customer_sk,cs_item_sk,d_year] #5 + WholeStageCodegen (13) + HashAggregate [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sales_price,cs_wholesale_cost,d_year] [sum,sum,sum,sum,sum,sum] + Project [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sales_price,cs_wholesale_cost,d_year] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sales_price,cs_sold_date_sk,cs_wholesale_cost] + Filter [cr_order_number] + InputAdapter + SortMergeJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] + WholeStageCodegen (9) + Sort [cs_item_sk,cs_order_number] + InputAdapter + Exchange [cs_item_sk,cs_order_number] #6 + WholeStageCodegen (8) + Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_order_number,cs_quantity,cs_sales_price,cs_sold_date_sk,cs_wholesale_cost] + WholeStageCodegen (11) + Sort [cr_item_sk,cr_order_number] + InputAdapter + Exchange [cr_item_sk,cr_order_number] #7 + WholeStageCodegen (10) + Filter [cr_item_sk,cr_order_number] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number] + InputAdapter + ReusedExchange [d_date_sk,d_year] #4 + InputAdapter + WholeStageCodegen (22) + Sort [ws_customer_sk,ws_item_sk,ws_sold_year] + Filter [ws_qty] + HashAggregate [d_year,sum,sum,sum,ws_bill_customer_sk,ws_item_sk] [sum,sum,sum,sum(UnscaledValue(ws_sales_price)),sum(UnscaledValue(ws_wholesale_cost)),sum(cast(ws_quantity as bigint)),ws_customer_sk,ws_qty,ws_sold_year,ws_sp,ws_wc] + InputAdapter + Exchange [d_year,ws_bill_customer_sk,ws_item_sk] #8 + WholeStageCodegen (21) + HashAggregate [d_year,ws_bill_customer_sk,ws_item_sk,ws_quantity,ws_sales_price,ws_wholesale_cost] [sum,sum,sum,sum,sum,sum] + Project [d_year,ws_bill_customer_sk,ws_item_sk,ws_quantity,ws_sales_price,ws_wholesale_cost] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ws_bill_customer_sk,ws_item_sk,ws_quantity,ws_sales_price,ws_sold_date_sk,ws_wholesale_cost] + Filter [wr_order_number] + InputAdapter + SortMergeJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] + WholeStageCodegen (17) + Sort [ws_item_sk,ws_order_number] + InputAdapter + Exchange [ws_item_sk,ws_order_number] #9 + WholeStageCodegen (16) + Filter [ws_bill_customer_sk,ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_customer_sk,ws_item_sk,ws_order_number,ws_quantity,ws_sales_price,ws_sold_date_sk,ws_wholesale_cost] + WholeStageCodegen (19) + Sort [wr_item_sk,wr_order_number] + InputAdapter + Exchange [wr_item_sk,wr_order_number] #10 + WholeStageCodegen (18) + Filter [wr_item_sk,wr_order_number] + ColumnarToRow + InputAdapter + Scan parquet default.web_returns [wr_item_sk,wr_order_number] + InputAdapter + ReusedExchange [d_date_sk,d_year] #4 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78/explain.txt new file mode 100644 index 0000000000000..d9a62f16e0475 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78/explain.txt @@ -0,0 +1,341 @@ +== Physical Plan == +TakeOrderedAndProject (60) ++- * Project (59) + +- * BroadcastHashJoin Inner BuildRight (58) + :- * Project (39) + : +- * BroadcastHashJoin Inner BuildRight (38) + : :- * HashAggregate (19) + : : +- Exchange (18) + : : +- * HashAggregate (17) + : : +- * Project (16) + : : +- * BroadcastHashJoin Inner BuildRight (15) + : : :- * Project (10) + : : : +- * Filter (9) + : : : +- * BroadcastHashJoin LeftOuter BuildRight (8) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.store_returns (4) + : : +- BroadcastExchange (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.date_dim (11) + : +- BroadcastExchange (37) + : +- * Filter (36) + : +- * HashAggregate (35) + : +- Exchange (34) + : +- * HashAggregate (33) + : +- * Project (32) + : +- * BroadcastHashJoin Inner BuildRight (31) + : :- * Project (29) + : : +- * Filter (28) + : : +- * BroadcastHashJoin LeftOuter BuildRight (27) + : : :- * Filter (22) + : : : +- * ColumnarToRow (21) + : : : +- Scan parquet default.web_sales (20) + : : +- BroadcastExchange (26) + : : +- * Filter (25) + : : +- * ColumnarToRow (24) + : : +- Scan parquet default.web_returns (23) + : +- ReusedExchange (30) + +- BroadcastExchange (57) + +- * Filter (56) + +- * HashAggregate (55) + +- Exchange (54) + +- * HashAggregate (53) + +- * Project (52) + +- * BroadcastHashJoin Inner BuildRight (51) + :- * Project (49) + : +- * Filter (48) + : +- * BroadcastHashJoin LeftOuter BuildRight (47) + : :- * Filter (42) + : : +- * ColumnarToRow (41) + : : +- Scan parquet default.catalog_sales (40) + : +- BroadcastExchange (46) + : +- * Filter (45) + : +- * ColumnarToRow (44) + : +- Scan parquet default.catalog_returns (43) + +- ReusedExchange (50) + + +(1) Scan parquet default.store_sales +Output [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#4, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#4, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7] + +(3) Filter [codegen id : 3] +Input [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#4, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7] +Condition : ((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_item_sk#2)) AND isnotnull(ss_customer_sk#3)) + +(4) Scan parquet default.store_returns +Output [2]: [sr_item_sk#8, sr_ticket_number#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store_returns] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [sr_item_sk#8, sr_ticket_number#9] + +(6) Filter [codegen id : 1] +Input [2]: [sr_item_sk#8, sr_ticket_number#9] +Condition : (isnotnull(sr_ticket_number#9) AND isnotnull(sr_item_sk#8)) + +(7) BroadcastExchange +Input [2]: [sr_item_sk#8, sr_ticket_number#9] +Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false], input[0, bigint, false]),false), [id=#10] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [2]: [cast(ss_ticket_number#4 as bigint), cast(ss_item_sk#2 as bigint)] +Right keys [2]: [sr_ticket_number#9, sr_item_sk#8] +Join condition: None + +(9) Filter [codegen id : 3] +Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#4, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7, sr_item_sk#8, sr_ticket_number#9] +Condition : isnull(sr_ticket_number#9) + +(10) Project [codegen id : 3] +Output [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7] +Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#4, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7, sr_item_sk#8, sr_ticket_number#9] + +(11) Scan parquet default.date_dim +Output [2]: [d_date_sk#11, d_year#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#11, d_year#12] + +(13) Filter [codegen id : 2] +Input [2]: [d_date_sk#11, d_year#12] +Condition : ((isnotnull(d_year#12) AND (d_year#12 = 2000)) AND isnotnull(d_date_sk#11)) + +(14) BroadcastExchange +Input [2]: [d_date_sk#11, d_year#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#13] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#11] +Join condition: None + +(16) Project [codegen id : 3] +Output [6]: [ss_item_sk#2, ss_customer_sk#3, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7, d_year#12] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7, d_date_sk#11, d_year#12] + +(17) HashAggregate [codegen id : 3] +Input [6]: [ss_item_sk#2, ss_customer_sk#3, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7, d_year#12] +Keys [3]: [d_year#12, ss_item_sk#2, ss_customer_sk#3] +Functions [3]: [partial_sum(cast(ss_quantity#5 as bigint)), partial_sum(UnscaledValue(ss_wholesale_cost#6)), partial_sum(UnscaledValue(ss_sales_price#7))] +Aggregate Attributes [3]: [sum#14, sum#15, sum#16] +Results [6]: [d_year#12, ss_item_sk#2, ss_customer_sk#3, sum#17, sum#18, sum#19] + +(18) Exchange +Input [6]: [d_year#12, ss_item_sk#2, ss_customer_sk#3, sum#17, sum#18, sum#19] +Arguments: hashpartitioning(d_year#12, ss_item_sk#2, ss_customer_sk#3, 5), true, [id=#20] + +(19) HashAggregate [codegen id : 12] +Input [6]: [d_year#12, ss_item_sk#2, ss_customer_sk#3, sum#17, sum#18, sum#19] +Keys [3]: [d_year#12, ss_item_sk#2, ss_customer_sk#3] +Functions [3]: [sum(cast(ss_quantity#5 as bigint)), sum(UnscaledValue(ss_wholesale_cost#6)), sum(UnscaledValue(ss_sales_price#7))] +Aggregate Attributes [3]: [sum(cast(ss_quantity#5 as bigint))#21, sum(UnscaledValue(ss_wholesale_cost#6))#22, sum(UnscaledValue(ss_sales_price#7))#23] +Results [6]: [d_year#12 AS ss_sold_year#24, ss_item_sk#2, ss_customer_sk#3, sum(cast(ss_quantity#5 as bigint))#21 AS ss_qty#25, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#6))#22,17,2) AS ss_wc#26, MakeDecimal(sum(UnscaledValue(ss_sales_price#7))#23,17,2) AS ss_sp#27] + +(20) Scan parquet default.web_sales +Output [7]: [ws_sold_date_sk#28, ws_item_sk#29, ws_bill_customer_sk#30, ws_order_number#31, ws_quantity#32, ws_wholesale_cost#33, ws_sales_price#34] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_customer_sk), IsNotNull(ws_item_sk)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 6] +Input [7]: [ws_sold_date_sk#28, ws_item_sk#29, ws_bill_customer_sk#30, ws_order_number#31, ws_quantity#32, ws_wholesale_cost#33, ws_sales_price#34] + +(22) Filter [codegen id : 6] +Input [7]: [ws_sold_date_sk#28, ws_item_sk#29, ws_bill_customer_sk#30, ws_order_number#31, ws_quantity#32, ws_wholesale_cost#33, ws_sales_price#34] +Condition : ((isnotnull(ws_sold_date_sk#28) AND isnotnull(ws_bill_customer_sk#30)) AND isnotnull(ws_item_sk#29)) + +(23) Scan parquet default.web_returns +Output [2]: [wr_item_sk#35, wr_order_number#36] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/web_returns] +PushedFilters: [IsNotNull(wr_order_number), IsNotNull(wr_item_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 4] +Input [2]: [wr_item_sk#35, wr_order_number#36] + +(25) Filter [codegen id : 4] +Input [2]: [wr_item_sk#35, wr_order_number#36] +Condition : (isnotnull(wr_order_number#36) AND isnotnull(wr_item_sk#35)) + +(26) BroadcastExchange +Input [2]: [wr_item_sk#35, wr_order_number#36] +Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false], input[0, bigint, false]),false), [id=#37] + +(27) BroadcastHashJoin [codegen id : 6] +Left keys [2]: [cast(ws_order_number#31 as bigint), cast(ws_item_sk#29 as bigint)] +Right keys [2]: [wr_order_number#36, wr_item_sk#35] +Join condition: None + +(28) Filter [codegen id : 6] +Input [9]: [ws_sold_date_sk#28, ws_item_sk#29, ws_bill_customer_sk#30, ws_order_number#31, ws_quantity#32, ws_wholesale_cost#33, ws_sales_price#34, wr_item_sk#35, wr_order_number#36] +Condition : isnull(wr_order_number#36) + +(29) Project [codegen id : 6] +Output [6]: [ws_sold_date_sk#28, ws_item_sk#29, ws_bill_customer_sk#30, ws_quantity#32, ws_wholesale_cost#33, ws_sales_price#34] +Input [9]: [ws_sold_date_sk#28, ws_item_sk#29, ws_bill_customer_sk#30, ws_order_number#31, ws_quantity#32, ws_wholesale_cost#33, ws_sales_price#34, wr_item_sk#35, wr_order_number#36] + +(30) ReusedExchange [Reuses operator id: 14] +Output [2]: [d_date_sk#11, d_year#12] + +(31) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_sold_date_sk#28] +Right keys [1]: [d_date_sk#11] +Join condition: None + +(32) Project [codegen id : 6] +Output [6]: [ws_item_sk#29, ws_bill_customer_sk#30, ws_quantity#32, ws_wholesale_cost#33, ws_sales_price#34, d_year#12] +Input [8]: [ws_sold_date_sk#28, ws_item_sk#29, ws_bill_customer_sk#30, ws_quantity#32, ws_wholesale_cost#33, ws_sales_price#34, d_date_sk#11, d_year#12] + +(33) HashAggregate [codegen id : 6] +Input [6]: [ws_item_sk#29, ws_bill_customer_sk#30, ws_quantity#32, ws_wholesale_cost#33, ws_sales_price#34, d_year#12] +Keys [3]: [d_year#12, ws_item_sk#29, ws_bill_customer_sk#30] +Functions [3]: [partial_sum(cast(ws_quantity#32 as bigint)), partial_sum(UnscaledValue(ws_wholesale_cost#33)), partial_sum(UnscaledValue(ws_sales_price#34))] +Aggregate Attributes [3]: [sum#38, sum#39, sum#40] +Results [6]: [d_year#12, ws_item_sk#29, ws_bill_customer_sk#30, sum#41, sum#42, sum#43] + +(34) Exchange +Input [6]: [d_year#12, ws_item_sk#29, ws_bill_customer_sk#30, sum#41, sum#42, sum#43] +Arguments: hashpartitioning(d_year#12, ws_item_sk#29, ws_bill_customer_sk#30, 5), true, [id=#44] + +(35) HashAggregate [codegen id : 7] +Input [6]: [d_year#12, ws_item_sk#29, ws_bill_customer_sk#30, sum#41, sum#42, sum#43] +Keys [3]: [d_year#12, ws_item_sk#29, ws_bill_customer_sk#30] +Functions [3]: [sum(cast(ws_quantity#32 as bigint)), sum(UnscaledValue(ws_wholesale_cost#33)), sum(UnscaledValue(ws_sales_price#34))] +Aggregate Attributes [3]: [sum(cast(ws_quantity#32 as bigint))#45, sum(UnscaledValue(ws_wholesale_cost#33))#46, sum(UnscaledValue(ws_sales_price#34))#47] +Results [6]: [d_year#12 AS ws_sold_year#48, ws_item_sk#29, ws_bill_customer_sk#30 AS ws_customer_sk#49, sum(cast(ws_quantity#32 as bigint))#45 AS ws_qty#50, MakeDecimal(sum(UnscaledValue(ws_wholesale_cost#33))#46,17,2) AS ws_wc#51, MakeDecimal(sum(UnscaledValue(ws_sales_price#34))#47,17,2) AS ws_sp#52] + +(36) Filter [codegen id : 7] +Input [6]: [ws_sold_year#48, ws_item_sk#29, ws_customer_sk#49, ws_qty#50, ws_wc#51, ws_sp#52] +Condition : (coalesce(ws_qty#50, 0) > 0) + +(37) BroadcastExchange +Input [6]: [ws_sold_year#48, ws_item_sk#29, ws_customer_sk#49, ws_qty#50, ws_wc#51, ws_sp#52] +Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [id=#53] + +(38) BroadcastHashJoin [codegen id : 12] +Left keys [3]: [ss_sold_year#24, ss_item_sk#2, ss_customer_sk#3] +Right keys [3]: [ws_sold_year#48, ws_item_sk#29, ws_customer_sk#49] +Join condition: None + +(39) Project [codegen id : 12] +Output [9]: [ss_sold_year#24, ss_item_sk#2, ss_customer_sk#3, ss_qty#25, ss_wc#26, ss_sp#27, ws_qty#50, ws_wc#51, ws_sp#52] +Input [12]: [ss_sold_year#24, ss_item_sk#2, ss_customer_sk#3, ss_qty#25, ss_wc#26, ss_sp#27, ws_sold_year#48, ws_item_sk#29, ws_customer_sk#49, ws_qty#50, ws_wc#51, ws_sp#52] + +(40) Scan parquet default.catalog_sales +Output [7]: [cs_sold_date_sk#54, cs_bill_customer_sk#55, cs_item_sk#56, cs_order_number#57, cs_quantity#58, cs_wholesale_cost#59, cs_sales_price#60] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk), IsNotNull(cs_bill_customer_sk)] +ReadSchema: struct + +(41) ColumnarToRow [codegen id : 10] +Input [7]: [cs_sold_date_sk#54, cs_bill_customer_sk#55, cs_item_sk#56, cs_order_number#57, cs_quantity#58, cs_wholesale_cost#59, cs_sales_price#60] + +(42) Filter [codegen id : 10] +Input [7]: [cs_sold_date_sk#54, cs_bill_customer_sk#55, cs_item_sk#56, cs_order_number#57, cs_quantity#58, cs_wholesale_cost#59, cs_sales_price#60] +Condition : ((isnotnull(cs_sold_date_sk#54) AND isnotnull(cs_item_sk#56)) AND isnotnull(cs_bill_customer_sk#55)) + +(43) Scan parquet default.catalog_returns +Output [2]: [cr_item_sk#61, cr_order_number#62] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/catalog_returns] +PushedFilters: [IsNotNull(cr_order_number), IsNotNull(cr_item_sk)] +ReadSchema: struct + +(44) ColumnarToRow [codegen id : 8] +Input [2]: [cr_item_sk#61, cr_order_number#62] + +(45) Filter [codegen id : 8] +Input [2]: [cr_item_sk#61, cr_order_number#62] +Condition : (isnotnull(cr_order_number#62) AND isnotnull(cr_item_sk#61)) + +(46) BroadcastExchange +Input [2]: [cr_item_sk#61, cr_order_number#62] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, false] as bigint), 32) | (cast(input[0, int, false] as bigint) & 4294967295))),false), [id=#63] + +(47) BroadcastHashJoin [codegen id : 10] +Left keys [2]: [cs_order_number#57, cs_item_sk#56] +Right keys [2]: [cr_order_number#62, cr_item_sk#61] +Join condition: None + +(48) Filter [codegen id : 10] +Input [9]: [cs_sold_date_sk#54, cs_bill_customer_sk#55, cs_item_sk#56, cs_order_number#57, cs_quantity#58, cs_wholesale_cost#59, cs_sales_price#60, cr_item_sk#61, cr_order_number#62] +Condition : isnull(cr_order_number#62) + +(49) Project [codegen id : 10] +Output [6]: [cs_sold_date_sk#54, cs_bill_customer_sk#55, cs_item_sk#56, cs_quantity#58, cs_wholesale_cost#59, cs_sales_price#60] +Input [9]: [cs_sold_date_sk#54, cs_bill_customer_sk#55, cs_item_sk#56, cs_order_number#57, cs_quantity#58, cs_wholesale_cost#59, cs_sales_price#60, cr_item_sk#61, cr_order_number#62] + +(50) ReusedExchange [Reuses operator id: 14] +Output [2]: [d_date_sk#11, d_year#12] + +(51) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_sold_date_sk#54] +Right keys [1]: [d_date_sk#11] +Join condition: None + +(52) Project [codegen id : 10] +Output [6]: [cs_bill_customer_sk#55, cs_item_sk#56, cs_quantity#58, cs_wholesale_cost#59, cs_sales_price#60, d_year#12] +Input [8]: [cs_sold_date_sk#54, cs_bill_customer_sk#55, cs_item_sk#56, cs_quantity#58, cs_wholesale_cost#59, cs_sales_price#60, d_date_sk#11, d_year#12] + +(53) HashAggregate [codegen id : 10] +Input [6]: [cs_bill_customer_sk#55, cs_item_sk#56, cs_quantity#58, cs_wholesale_cost#59, cs_sales_price#60, d_year#12] +Keys [3]: [d_year#12, cs_item_sk#56, cs_bill_customer_sk#55] +Functions [3]: [partial_sum(cast(cs_quantity#58 as bigint)), partial_sum(UnscaledValue(cs_wholesale_cost#59)), partial_sum(UnscaledValue(cs_sales_price#60))] +Aggregate Attributes [3]: [sum#64, sum#65, sum#66] +Results [6]: [d_year#12, cs_item_sk#56, cs_bill_customer_sk#55, sum#67, sum#68, sum#69] + +(54) Exchange +Input [6]: [d_year#12, cs_item_sk#56, cs_bill_customer_sk#55, sum#67, sum#68, sum#69] +Arguments: hashpartitioning(d_year#12, cs_item_sk#56, cs_bill_customer_sk#55, 5), true, [id=#70] + +(55) HashAggregate [codegen id : 11] +Input [6]: [d_year#12, cs_item_sk#56, cs_bill_customer_sk#55, sum#67, sum#68, sum#69] +Keys [3]: [d_year#12, cs_item_sk#56, cs_bill_customer_sk#55] +Functions [3]: [sum(cast(cs_quantity#58 as bigint)), sum(UnscaledValue(cs_wholesale_cost#59)), sum(UnscaledValue(cs_sales_price#60))] +Aggregate Attributes [3]: [sum(cast(cs_quantity#58 as bigint))#71, sum(UnscaledValue(cs_wholesale_cost#59))#72, sum(UnscaledValue(cs_sales_price#60))#73] +Results [6]: [d_year#12 AS cs_sold_year#74, cs_item_sk#56, cs_bill_customer_sk#55 AS cs_customer_sk#75, sum(cast(cs_quantity#58 as bigint))#71 AS cs_qty#76, MakeDecimal(sum(UnscaledValue(cs_wholesale_cost#59))#72,17,2) AS cs_wc#77, MakeDecimal(sum(UnscaledValue(cs_sales_price#60))#73,17,2) AS cs_sp#78] + +(56) Filter [codegen id : 11] +Input [6]: [cs_sold_year#74, cs_item_sk#56, cs_customer_sk#75, cs_qty#76, cs_wc#77, cs_sp#78] +Condition : (coalesce(cs_qty#76, 0) > 0) + +(57) BroadcastExchange +Input [6]: [cs_sold_year#74, cs_item_sk#56, cs_customer_sk#75, cs_qty#76, cs_wc#77, cs_sp#78] +Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [id=#79] + +(58) BroadcastHashJoin [codegen id : 12] +Left keys [3]: [ss_sold_year#24, ss_item_sk#2, ss_customer_sk#3] +Right keys [3]: [cs_sold_year#74, cs_item_sk#56, cs_customer_sk#75] +Join condition: None + +(59) Project [codegen id : 12] +Output [13]: [round((cast(ss_qty#25 as double) / cast(coalesce((ws_qty#50 + cs_qty#76), 1) as double)), 2) AS ratio#80, ss_qty#25 AS store_qty#81, ss_wc#26 AS store_wholesale_cost#82, ss_sp#27 AS store_sales_price#83, (coalesce(ws_qty#50, 0) + coalesce(cs_qty#76, 0)) AS other_chan_qty#84, CheckOverflow((promote_precision(cast(coalesce(ws_wc#51, 0.00) as decimal(18,2))) + promote_precision(cast(coalesce(cs_wc#77, 0.00) as decimal(18,2)))), DecimalType(18,2), true) AS other_chan_wholesale_cost#85, CheckOverflow((promote_precision(cast(coalesce(ws_sp#52, 0.00) as decimal(18,2))) + promote_precision(cast(coalesce(cs_sp#78, 0.00) as decimal(18,2)))), DecimalType(18,2), true) AS other_chan_sales_price#86, ss_sold_year#24, ss_qty#25, ss_item_sk#2, ss_wc#26, ss_customer_sk#3, ss_sp#27] +Input [15]: [ss_sold_year#24, ss_item_sk#2, ss_customer_sk#3, ss_qty#25, ss_wc#26, ss_sp#27, ws_qty#50, ws_wc#51, ws_sp#52, cs_sold_year#74, cs_item_sk#56, cs_customer_sk#75, cs_qty#76, cs_wc#77, cs_sp#78] + +(60) TakeOrderedAndProject +Input [13]: [ratio#80, store_qty#81, store_wholesale_cost#82, store_sales_price#83, other_chan_qty#84, other_chan_wholesale_cost#85, other_chan_sales_price#86, ss_sold_year#24, ss_qty#25, ss_item_sk#2, ss_wc#26, ss_customer_sk#3, ss_sp#27] +Arguments: 100, [ss_sold_year#24 ASC NULLS FIRST, ss_item_sk#2 ASC NULLS FIRST, ss_customer_sk#3 ASC NULLS FIRST, ss_qty#25 DESC NULLS LAST, ss_wc#26 DESC NULLS LAST, ss_sp#27 DESC NULLS LAST, other_chan_qty#84 ASC NULLS FIRST, other_chan_wholesale_cost#85 ASC NULLS FIRST, other_chan_sales_price#86 ASC NULLS FIRST, ratio#80 ASC NULLS FIRST], [ratio#80, store_qty#81, store_wholesale_cost#82, store_sales_price#83, other_chan_qty#84, other_chan_wholesale_cost#85, other_chan_sales_price#86] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78/simplified.txt new file mode 100644 index 0000000000000..85ead08da9447 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78/simplified.txt @@ -0,0 +1,88 @@ +TakeOrderedAndProject [other_chan_qty,other_chan_sales_price,other_chan_wholesale_cost,ratio,ss_customer_sk,ss_item_sk,ss_qty,ss_sold_year,ss_sp,ss_wc,store_qty,store_sales_price,store_wholesale_cost] + WholeStageCodegen (12) + Project [cs_qty,cs_sp,cs_wc,ss_customer_sk,ss_item_sk,ss_qty,ss_sold_year,ss_sp,ss_wc,ws_qty,ws_sp,ws_wc] + BroadcastHashJoin [cs_customer_sk,cs_item_sk,cs_sold_year,ss_customer_sk,ss_item_sk,ss_sold_year] + Project [ss_customer_sk,ss_item_sk,ss_qty,ss_sold_year,ss_sp,ss_wc,ws_qty,ws_sp,ws_wc] + BroadcastHashJoin [ss_customer_sk,ss_item_sk,ss_sold_year,ws_customer_sk,ws_item_sk,ws_sold_year] + HashAggregate [d_year,ss_customer_sk,ss_item_sk,sum,sum,sum] [ss_qty,ss_sold_year,ss_sp,ss_wc,sum,sum,sum,sum(UnscaledValue(ss_sales_price)),sum(UnscaledValue(ss_wholesale_cost)),sum(cast(ss_quantity as bigint))] + InputAdapter + Exchange [d_year,ss_customer_sk,ss_item_sk] #1 + WholeStageCodegen (3) + HashAggregate [d_year,ss_customer_sk,ss_item_sk,ss_quantity,ss_sales_price,ss_wholesale_cost] [sum,sum,sum,sum,sum,sum] + Project [d_year,ss_customer_sk,ss_item_sk,ss_quantity,ss_sales_price,ss_wholesale_cost] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_customer_sk,ss_item_sk,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_wholesale_cost] + Filter [sr_ticket_number] + BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + Filter [ss_customer_sk,ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_ticket_number,ss_wholesale_cost] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [sr_item_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [d_date_sk,d_year] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (7) + Filter [ws_qty] + HashAggregate [d_year,sum,sum,sum,ws_bill_customer_sk,ws_item_sk] [sum,sum,sum,sum(UnscaledValue(ws_sales_price)),sum(UnscaledValue(ws_wholesale_cost)),sum(cast(ws_quantity as bigint)),ws_customer_sk,ws_qty,ws_sold_year,ws_sp,ws_wc] + InputAdapter + Exchange [d_year,ws_bill_customer_sk,ws_item_sk] #5 + WholeStageCodegen (6) + HashAggregate [d_year,ws_bill_customer_sk,ws_item_sk,ws_quantity,ws_sales_price,ws_wholesale_cost] [sum,sum,sum,sum,sum,sum] + Project [d_year,ws_bill_customer_sk,ws_item_sk,ws_quantity,ws_sales_price,ws_wholesale_cost] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ws_bill_customer_sk,ws_item_sk,ws_quantity,ws_sales_price,ws_sold_date_sk,ws_wholesale_cost] + Filter [wr_order_number] + BroadcastHashJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] + Filter [ws_bill_customer_sk,ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_bill_customer_sk,ws_item_sk,ws_order_number,ws_quantity,ws_sales_price,ws_sold_date_sk,ws_wholesale_cost] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (4) + Filter [wr_item_sk,wr_order_number] + ColumnarToRow + InputAdapter + Scan parquet default.web_returns [wr_item_sk,wr_order_number] + InputAdapter + ReusedExchange [d_date_sk,d_year] #3 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (11) + Filter [cs_qty] + HashAggregate [cs_bill_customer_sk,cs_item_sk,d_year,sum,sum,sum] [cs_customer_sk,cs_qty,cs_sold_year,cs_sp,cs_wc,sum,sum,sum,sum(UnscaledValue(cs_sales_price)),sum(UnscaledValue(cs_wholesale_cost)),sum(cast(cs_quantity as bigint))] + InputAdapter + Exchange [cs_bill_customer_sk,cs_item_sk,d_year] #8 + WholeStageCodegen (10) + HashAggregate [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sales_price,cs_wholesale_cost,d_year] [sum,sum,sum,sum,sum,sum] + Project [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sales_price,cs_wholesale_cost,d_year] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sales_price,cs_sold_date_sk,cs_wholesale_cost] + Filter [cr_order_number] + BroadcastHashJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] + Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_order_number,cs_quantity,cs_sales_price,cs_sold_date_sk,cs_wholesale_cost] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (8) + Filter [cr_item_sk,cr_order_number] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number] + InputAdapter + ReusedExchange [d_date_sk,d_year] #3 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/explain.txt new file mode 100644 index 0000000000000..7eead39d2d1d4 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/explain.txt @@ -0,0 +1,707 @@ +== Physical Plan == +TakeOrderedAndProject (125) ++- * HashAggregate (124) + +- Exchange (123) + +- * HashAggregate (122) + +- Union (121) + :- * HashAggregate (115) + : +- Exchange (114) + : +- * HashAggregate (113) + : +- Union (112) + : :- * HashAggregate (106) + : : +- Exchange (105) + : : +- * HashAggregate (104) + : : +- Union (103) + : : :- * HashAggregate (42) + : : : +- Exchange (41) + : : : +- * HashAggregate (40) + : : : +- * Project (39) + : : : +- * BroadcastHashJoin Inner BuildRight (38) + : : : :- * Project (33) + : : : : +- * BroadcastHashJoin Inner BuildRight (32) + : : : : :- * Project (26) + : : : : : +- * BroadcastHashJoin Inner BuildRight (25) + : : : : : :- * Project (19) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (18) + : : : : : : :- * Project (12) + : : : : : : : +- SortMergeJoin LeftOuter (11) + : : : : : : : :- * Sort (5) + : : : : : : : : +- Exchange (4) + : : : : : : : : +- * Filter (3) + : : : : : : : : +- * ColumnarToRow (2) + : : : : : : : : +- Scan parquet default.store_sales (1) + : : : : : : : +- * Sort (10) + : : : : : : : +- Exchange (9) + : : : : : : : +- * Filter (8) + : : : : : : : +- * ColumnarToRow (7) + : : : : : : : +- Scan parquet default.store_returns (6) + : : : : : : +- BroadcastExchange (17) + : : : : : : +- * Project (16) + : : : : : : +- * Filter (15) + : : : : : : +- * ColumnarToRow (14) + : : : : : : +- Scan parquet default.item (13) + : : : : : +- BroadcastExchange (24) + : : : : : +- * Project (23) + : : : : : +- * Filter (22) + : : : : : +- * ColumnarToRow (21) + : : : : : +- Scan parquet default.date_dim (20) + : : : : +- BroadcastExchange (31) + : : : : +- * Project (30) + : : : : +- * Filter (29) + : : : : +- * ColumnarToRow (28) + : : : : +- Scan parquet default.promotion (27) + : : : +- BroadcastExchange (37) + : : : +- * Filter (36) + : : : +- * ColumnarToRow (35) + : : : +- Scan parquet default.store (34) + : : :- * HashAggregate (72) + : : : +- Exchange (71) + : : : +- * HashAggregate (70) + : : : +- * Project (69) + : : : +- * BroadcastHashJoin Inner BuildRight (68) + : : : :- * Project (63) + : : : : +- * BroadcastHashJoin Inner BuildRight (62) + : : : : :- * Project (60) + : : : : : +- * BroadcastHashJoin Inner BuildRight (59) + : : : : : :- * Project (57) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (56) + : : : : : : :- * Project (54) + : : : : : : : +- SortMergeJoin LeftOuter (53) + : : : : : : : :- * Sort (47) + : : : : : : : : +- Exchange (46) + : : : : : : : : +- * Filter (45) + : : : : : : : : +- * ColumnarToRow (44) + : : : : : : : : +- Scan parquet default.catalog_sales (43) + : : : : : : : +- * Sort (52) + : : : : : : : +- Exchange (51) + : : : : : : : +- * Filter (50) + : : : : : : : +- * ColumnarToRow (49) + : : : : : : : +- Scan parquet default.catalog_returns (48) + : : : : : : +- ReusedExchange (55) + : : : : : +- ReusedExchange (58) + : : : : +- ReusedExchange (61) + : : : +- BroadcastExchange (67) + : : : +- * Filter (66) + : : : +- * ColumnarToRow (65) + : : : +- Scan parquet default.catalog_page (64) + : : +- * HashAggregate (102) + : : +- Exchange (101) + : : +- * HashAggregate (100) + : : +- * Project (99) + : : +- * BroadcastHashJoin Inner BuildRight (98) + : : :- * Project (93) + : : : +- * BroadcastHashJoin Inner BuildRight (92) + : : : :- * Project (90) + : : : : +- * BroadcastHashJoin Inner BuildRight (89) + : : : : :- * Project (87) + : : : : : +- * BroadcastHashJoin Inner BuildRight (86) + : : : : : :- * Project (84) + : : : : : : +- SortMergeJoin LeftOuter (83) + : : : : : : :- * Sort (77) + : : : : : : : +- Exchange (76) + : : : : : : : +- * Filter (75) + : : : : : : : +- * ColumnarToRow (74) + : : : : : : : +- Scan parquet default.web_sales (73) + : : : : : : +- * Sort (82) + : : : : : : +- Exchange (81) + : : : : : : +- * Filter (80) + : : : : : : +- * ColumnarToRow (79) + : : : : : : +- Scan parquet default.web_returns (78) + : : : : : +- ReusedExchange (85) + : : : : +- ReusedExchange (88) + : : : +- ReusedExchange (91) + : : +- BroadcastExchange (97) + : : +- * Filter (96) + : : +- * ColumnarToRow (95) + : : +- Scan parquet default.web_site (94) + : +- * HashAggregate (111) + : +- Exchange (110) + : +- * HashAggregate (109) + : +- * HashAggregate (108) + : +- ReusedExchange (107) + +- * HashAggregate (120) + +- Exchange (119) + +- * HashAggregate (118) + +- * HashAggregate (117) + +- ReusedExchange (116) + + +(1) Scan parquet default.store_sales +Output [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_net_profit#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk), IsNotNull(ss_promo_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_net_profit#7] + +(3) Filter [codegen id : 1] +Input [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_net_profit#7] +Condition : (((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_item_sk#2)) AND isnotnull(ss_promo_sk#4)) + +(4) Exchange +Input [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_net_profit#7] +Arguments: hashpartitioning(cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#5 as bigint), 5), true, [id=#8] + +(5) Sort [codegen id : 2] +Input [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_net_profit#7] +Arguments: [cast(ss_item_sk#2 as bigint) ASC NULLS FIRST, cast(ss_ticket_number#5 as bigint) ASC NULLS FIRST], false, 0 + +(6) Scan parquet default.store_returns +Output [4]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store_returns] +PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 3] +Input [4]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12] + +(8) Filter [codegen id : 3] +Input [4]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12] +Condition : (isnotnull(sr_item_sk#9) AND isnotnull(sr_ticket_number#10)) + +(9) Exchange +Input [4]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12] +Arguments: hashpartitioning(sr_item_sk#9, sr_ticket_number#10, 5), true, [id=#13] + +(10) Sort [codegen id : 4] +Input [4]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12] +Arguments: [sr_item_sk#9 ASC NULLS FIRST, sr_ticket_number#10 ASC NULLS FIRST], false, 0 + +(11) SortMergeJoin +Left keys [2]: [cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#5 as bigint)] +Right keys [2]: [sr_item_sk#9, sr_ticket_number#10] +Join condition: None + +(12) Project [codegen id : 9] +Output [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#11, sr_net_loss#12] +Input [11]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_net_profit#7, sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12] + +(13) Scan parquet default.item +Output [2]: [i_item_sk#14, i_current_price#15] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_current_price), GreaterThan(i_current_price,50.00), IsNotNull(i_item_sk)] +ReadSchema: struct + +(14) ColumnarToRow [codegen id : 5] +Input [2]: [i_item_sk#14, i_current_price#15] + +(15) Filter [codegen id : 5] +Input [2]: [i_item_sk#14, i_current_price#15] +Condition : ((isnotnull(i_current_price#15) AND (i_current_price#15 > 50.00)) AND isnotnull(i_item_sk#14)) + +(16) Project [codegen id : 5] +Output [1]: [i_item_sk#14] +Input [2]: [i_item_sk#14, i_current_price#15] + +(17) BroadcastExchange +Input [1]: [i_item_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] + +(18) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#14] +Join condition: None + +(19) Project [codegen id : 9] +Output [7]: [ss_sold_date_sk#1, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#11, sr_net_loss#12] +Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#11, sr_net_loss#12, i_item_sk#14] + +(20) Scan parquet default.date_dim +Output [2]: [d_date_sk#17, d_date#18] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1998-08-04), LessThanOrEqual(d_date,1998-09-03), IsNotNull(d_date_sk)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 6] +Input [2]: [d_date_sk#17, d_date#18] + +(22) Filter [codegen id : 6] +Input [2]: [d_date_sk#17, d_date#18] +Condition : (((isnotnull(d_date#18) AND (d_date#18 >= 10442)) AND (d_date#18 <= 10472)) AND isnotnull(d_date_sk#17)) + +(23) Project [codegen id : 6] +Output [1]: [d_date_sk#17] +Input [2]: [d_date_sk#17, d_date#18] + +(24) BroadcastExchange +Input [1]: [d_date_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#19] + +(25) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#17] +Join condition: None + +(26) Project [codegen id : 9] +Output [6]: [ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#11, sr_net_loss#12] +Input [8]: [ss_sold_date_sk#1, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#11, sr_net_loss#12, d_date_sk#17] + +(27) Scan parquet default.promotion +Output [2]: [p_promo_sk#20, p_channel_tv#21] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/promotion] +PushedFilters: [IsNotNull(p_channel_tv), EqualTo(p_channel_tv,N), IsNotNull(p_promo_sk)] +ReadSchema: struct + +(28) ColumnarToRow [codegen id : 7] +Input [2]: [p_promo_sk#20, p_channel_tv#21] + +(29) Filter [codegen id : 7] +Input [2]: [p_promo_sk#20, p_channel_tv#21] +Condition : ((isnotnull(p_channel_tv#21) AND (p_channel_tv#21 = N)) AND isnotnull(p_promo_sk#20)) + +(30) Project [codegen id : 7] +Output [1]: [p_promo_sk#20] +Input [2]: [p_promo_sk#20, p_channel_tv#21] + +(31) BroadcastExchange +Input [1]: [p_promo_sk#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#22] + +(32) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_promo_sk#4] +Right keys [1]: [p_promo_sk#20] +Join condition: None + +(33) Project [codegen id : 9] +Output [5]: [ss_store_sk#3, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#11, sr_net_loss#12] +Input [7]: [ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#11, sr_net_loss#12, p_promo_sk#20] + +(34) Scan parquet default.store +Output [2]: [s_store_sk#23, s_store_id#24] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(35) ColumnarToRow [codegen id : 8] +Input [2]: [s_store_sk#23, s_store_id#24] + +(36) Filter [codegen id : 8] +Input [2]: [s_store_sk#23, s_store_id#24] +Condition : isnotnull(s_store_sk#23) + +(37) BroadcastExchange +Input [2]: [s_store_sk#23, s_store_id#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#25] + +(38) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#23] +Join condition: None + +(39) Project [codegen id : 9] +Output [5]: [ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#11, sr_net_loss#12, s_store_id#24] +Input [7]: [ss_store_sk#3, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#11, sr_net_loss#12, s_store_sk#23, s_store_id#24] + +(40) HashAggregate [codegen id : 9] +Input [5]: [ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#11, sr_net_loss#12, s_store_id#24] +Keys [1]: [s_store_id#24] +Functions [3]: [partial_sum(UnscaledValue(ss_ext_sales_price#6)), partial_sum(coalesce(cast(sr_return_amt#11 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ss_net_profit#7 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#12 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))] +Aggregate Attributes [5]: [sum#26, sum#27, isEmpty#28, sum#29, isEmpty#30] +Results [6]: [s_store_id#24, sum#31, sum#32, isEmpty#33, sum#34, isEmpty#35] + +(41) Exchange +Input [6]: [s_store_id#24, sum#31, sum#32, isEmpty#33, sum#34, isEmpty#35] +Arguments: hashpartitioning(s_store_id#24, 5), true, [id=#36] + +(42) HashAggregate [codegen id : 10] +Input [6]: [s_store_id#24, sum#31, sum#32, isEmpty#33, sum#34, isEmpty#35] +Keys [1]: [s_store_id#24] +Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#6)), sum(coalesce(cast(sr_return_amt#11 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ss_net_profit#7 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#12 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))] +Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#6))#37, sum(coalesce(cast(sr_return_amt#11 as decimal(12,2)), 0.00))#38, sum(CheckOverflow((promote_precision(cast(ss_net_profit#7 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#12 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#39] +Results [5]: [store channel AS channel#40, concat(store, s_store_id#24) AS id#41, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#6))#37,17,2) AS sales#42, sum(coalesce(cast(sr_return_amt#11 as decimal(12,2)), 0.00))#38 AS returns#43, sum(CheckOverflow((promote_precision(cast(ss_net_profit#7 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#12 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#39 AS profit#44] + +(43) Scan parquet default.catalog_sales +Output [7]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_item_sk#47, cs_promo_sk#48, cs_order_number#49, cs_ext_sales_price#50, cs_net_profit#51] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_catalog_page_sk), IsNotNull(cs_item_sk), IsNotNull(cs_promo_sk)] +ReadSchema: struct + +(44) ColumnarToRow [codegen id : 11] +Input [7]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_item_sk#47, cs_promo_sk#48, cs_order_number#49, cs_ext_sales_price#50, cs_net_profit#51] + +(45) Filter [codegen id : 11] +Input [7]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_item_sk#47, cs_promo_sk#48, cs_order_number#49, cs_ext_sales_price#50, cs_net_profit#51] +Condition : (((isnotnull(cs_sold_date_sk#45) AND isnotnull(cs_catalog_page_sk#46)) AND isnotnull(cs_item_sk#47)) AND isnotnull(cs_promo_sk#48)) + +(46) Exchange +Input [7]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_item_sk#47, cs_promo_sk#48, cs_order_number#49, cs_ext_sales_price#50, cs_net_profit#51] +Arguments: hashpartitioning(cs_item_sk#47, cs_order_number#49, 5), true, [id=#52] + +(47) Sort [codegen id : 12] +Input [7]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_item_sk#47, cs_promo_sk#48, cs_order_number#49, cs_ext_sales_price#50, cs_net_profit#51] +Arguments: [cs_item_sk#47 ASC NULLS FIRST, cs_order_number#49 ASC NULLS FIRST], false, 0 + +(48) Scan parquet default.catalog_returns +Output [4]: [cr_item_sk#53, cr_order_number#54, cr_return_amount#55, cr_net_loss#56] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/catalog_returns] +PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)] +ReadSchema: struct + +(49) ColumnarToRow [codegen id : 13] +Input [4]: [cr_item_sk#53, cr_order_number#54, cr_return_amount#55, cr_net_loss#56] + +(50) Filter [codegen id : 13] +Input [4]: [cr_item_sk#53, cr_order_number#54, cr_return_amount#55, cr_net_loss#56] +Condition : (isnotnull(cr_item_sk#53) AND isnotnull(cr_order_number#54)) + +(51) Exchange +Input [4]: [cr_item_sk#53, cr_order_number#54, cr_return_amount#55, cr_net_loss#56] +Arguments: hashpartitioning(cr_item_sk#53, cr_order_number#54, 5), true, [id=#57] + +(52) Sort [codegen id : 14] +Input [4]: [cr_item_sk#53, cr_order_number#54, cr_return_amount#55, cr_net_loss#56] +Arguments: [cr_item_sk#53 ASC NULLS FIRST, cr_order_number#54 ASC NULLS FIRST], false, 0 + +(53) SortMergeJoin +Left keys [2]: [cs_item_sk#47, cs_order_number#49] +Right keys [2]: [cr_item_sk#53, cr_order_number#54] +Join condition: None + +(54) Project [codegen id : 19] +Output [8]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_item_sk#47, cs_promo_sk#48, cs_ext_sales_price#50, cs_net_profit#51, cr_return_amount#55, cr_net_loss#56] +Input [11]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_item_sk#47, cs_promo_sk#48, cs_order_number#49, cs_ext_sales_price#50, cs_net_profit#51, cr_item_sk#53, cr_order_number#54, cr_return_amount#55, cr_net_loss#56] + +(55) ReusedExchange [Reuses operator id: 17] +Output [1]: [i_item_sk#14] + +(56) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [cs_item_sk#47] +Right keys [1]: [i_item_sk#14] +Join condition: None + +(57) Project [codegen id : 19] +Output [7]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_promo_sk#48, cs_ext_sales_price#50, cs_net_profit#51, cr_return_amount#55, cr_net_loss#56] +Input [9]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_item_sk#47, cs_promo_sk#48, cs_ext_sales_price#50, cs_net_profit#51, cr_return_amount#55, cr_net_loss#56, i_item_sk#14] + +(58) ReusedExchange [Reuses operator id: 24] +Output [1]: [d_date_sk#17] + +(59) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [cs_sold_date_sk#45] +Right keys [1]: [d_date_sk#17] +Join condition: None + +(60) Project [codegen id : 19] +Output [6]: [cs_catalog_page_sk#46, cs_promo_sk#48, cs_ext_sales_price#50, cs_net_profit#51, cr_return_amount#55, cr_net_loss#56] +Input [8]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_promo_sk#48, cs_ext_sales_price#50, cs_net_profit#51, cr_return_amount#55, cr_net_loss#56, d_date_sk#17] + +(61) ReusedExchange [Reuses operator id: 31] +Output [1]: [p_promo_sk#20] + +(62) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [cs_promo_sk#48] +Right keys [1]: [p_promo_sk#20] +Join condition: None + +(63) Project [codegen id : 19] +Output [5]: [cs_catalog_page_sk#46, cs_ext_sales_price#50, cs_net_profit#51, cr_return_amount#55, cr_net_loss#56] +Input [7]: [cs_catalog_page_sk#46, cs_promo_sk#48, cs_ext_sales_price#50, cs_net_profit#51, cr_return_amount#55, cr_net_loss#56, p_promo_sk#20] + +(64) Scan parquet default.catalog_page +Output [2]: [cp_catalog_page_sk#58, cp_catalog_page_id#59] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/catalog_page] +PushedFilters: [IsNotNull(cp_catalog_page_sk)] +ReadSchema: struct + +(65) ColumnarToRow [codegen id : 18] +Input [2]: [cp_catalog_page_sk#58, cp_catalog_page_id#59] + +(66) Filter [codegen id : 18] +Input [2]: [cp_catalog_page_sk#58, cp_catalog_page_id#59] +Condition : isnotnull(cp_catalog_page_sk#58) + +(67) BroadcastExchange +Input [2]: [cp_catalog_page_sk#58, cp_catalog_page_id#59] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#60] + +(68) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [cs_catalog_page_sk#46] +Right keys [1]: [cp_catalog_page_sk#58] +Join condition: None + +(69) Project [codegen id : 19] +Output [5]: [cs_ext_sales_price#50, cs_net_profit#51, cr_return_amount#55, cr_net_loss#56, cp_catalog_page_id#59] +Input [7]: [cs_catalog_page_sk#46, cs_ext_sales_price#50, cs_net_profit#51, cr_return_amount#55, cr_net_loss#56, cp_catalog_page_sk#58, cp_catalog_page_id#59] + +(70) HashAggregate [codegen id : 19] +Input [5]: [cs_ext_sales_price#50, cs_net_profit#51, cr_return_amount#55, cr_net_loss#56, cp_catalog_page_id#59] +Keys [1]: [cp_catalog_page_id#59] +Functions [3]: [partial_sum(UnscaledValue(cs_ext_sales_price#50)), partial_sum(coalesce(cast(cr_return_amount#55 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(cs_net_profit#51 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#56 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))] +Aggregate Attributes [5]: [sum#61, sum#62, isEmpty#63, sum#64, isEmpty#65] +Results [6]: [cp_catalog_page_id#59, sum#66, sum#67, isEmpty#68, sum#69, isEmpty#70] + +(71) Exchange +Input [6]: [cp_catalog_page_id#59, sum#66, sum#67, isEmpty#68, sum#69, isEmpty#70] +Arguments: hashpartitioning(cp_catalog_page_id#59, 5), true, [id=#71] + +(72) HashAggregate [codegen id : 20] +Input [6]: [cp_catalog_page_id#59, sum#66, sum#67, isEmpty#68, sum#69, isEmpty#70] +Keys [1]: [cp_catalog_page_id#59] +Functions [3]: [sum(UnscaledValue(cs_ext_sales_price#50)), sum(coalesce(cast(cr_return_amount#55 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(cs_net_profit#51 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#56 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))] +Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_sales_price#50))#72, sum(coalesce(cast(cr_return_amount#55 as decimal(12,2)), 0.00))#73, sum(CheckOverflow((promote_precision(cast(cs_net_profit#51 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#56 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#74] +Results [5]: [catalog channel AS channel#75, concat(catalog_page, cp_catalog_page_id#59) AS id#76, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#50))#72,17,2) AS sales#77, sum(coalesce(cast(cr_return_amount#55 as decimal(12,2)), 0.00))#73 AS returns#78, sum(CheckOverflow((promote_precision(cast(cs_net_profit#51 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#56 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#74 AS profit#79] + +(73) Scan parquet default.web_sales +Output [7]: [ws_sold_date_sk#80, ws_item_sk#81, ws_web_site_sk#82, ws_promo_sk#83, ws_order_number#84, ws_ext_sales_price#85, ws_net_profit#86] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_web_site_sk), IsNotNull(ws_item_sk), IsNotNull(ws_promo_sk)] +ReadSchema: struct + +(74) ColumnarToRow [codegen id : 21] +Input [7]: [ws_sold_date_sk#80, ws_item_sk#81, ws_web_site_sk#82, ws_promo_sk#83, ws_order_number#84, ws_ext_sales_price#85, ws_net_profit#86] + +(75) Filter [codegen id : 21] +Input [7]: [ws_sold_date_sk#80, ws_item_sk#81, ws_web_site_sk#82, ws_promo_sk#83, ws_order_number#84, ws_ext_sales_price#85, ws_net_profit#86] +Condition : (((isnotnull(ws_sold_date_sk#80) AND isnotnull(ws_web_site_sk#82)) AND isnotnull(ws_item_sk#81)) AND isnotnull(ws_promo_sk#83)) + +(76) Exchange +Input [7]: [ws_sold_date_sk#80, ws_item_sk#81, ws_web_site_sk#82, ws_promo_sk#83, ws_order_number#84, ws_ext_sales_price#85, ws_net_profit#86] +Arguments: hashpartitioning(cast(ws_item_sk#81 as bigint), cast(ws_order_number#84 as bigint), 5), true, [id=#87] + +(77) Sort [codegen id : 22] +Input [7]: [ws_sold_date_sk#80, ws_item_sk#81, ws_web_site_sk#82, ws_promo_sk#83, ws_order_number#84, ws_ext_sales_price#85, ws_net_profit#86] +Arguments: [cast(ws_item_sk#81 as bigint) ASC NULLS FIRST, cast(ws_order_number#84 as bigint) ASC NULLS FIRST], false, 0 + +(78) Scan parquet default.web_returns +Output [4]: [wr_item_sk#88, wr_order_number#89, wr_return_amt#90, wr_net_loss#91] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/web_returns] +PushedFilters: [IsNotNull(wr_item_sk), IsNotNull(wr_order_number)] +ReadSchema: struct + +(79) ColumnarToRow [codegen id : 23] +Input [4]: [wr_item_sk#88, wr_order_number#89, wr_return_amt#90, wr_net_loss#91] + +(80) Filter [codegen id : 23] +Input [4]: [wr_item_sk#88, wr_order_number#89, wr_return_amt#90, wr_net_loss#91] +Condition : (isnotnull(wr_item_sk#88) AND isnotnull(wr_order_number#89)) + +(81) Exchange +Input [4]: [wr_item_sk#88, wr_order_number#89, wr_return_amt#90, wr_net_loss#91] +Arguments: hashpartitioning(wr_item_sk#88, wr_order_number#89, 5), true, [id=#92] + +(82) Sort [codegen id : 24] +Input [4]: [wr_item_sk#88, wr_order_number#89, wr_return_amt#90, wr_net_loss#91] +Arguments: [wr_item_sk#88 ASC NULLS FIRST, wr_order_number#89 ASC NULLS FIRST], false, 0 + +(83) SortMergeJoin +Left keys [2]: [cast(ws_item_sk#81 as bigint), cast(ws_order_number#84 as bigint)] +Right keys [2]: [wr_item_sk#88, wr_order_number#89] +Join condition: None + +(84) Project [codegen id : 29] +Output [8]: [ws_sold_date_sk#80, ws_item_sk#81, ws_web_site_sk#82, ws_promo_sk#83, ws_ext_sales_price#85, ws_net_profit#86, wr_return_amt#90, wr_net_loss#91] +Input [11]: [ws_sold_date_sk#80, ws_item_sk#81, ws_web_site_sk#82, ws_promo_sk#83, ws_order_number#84, ws_ext_sales_price#85, ws_net_profit#86, wr_item_sk#88, wr_order_number#89, wr_return_amt#90, wr_net_loss#91] + +(85) ReusedExchange [Reuses operator id: 17] +Output [1]: [i_item_sk#14] + +(86) BroadcastHashJoin [codegen id : 29] +Left keys [1]: [ws_item_sk#81] +Right keys [1]: [i_item_sk#14] +Join condition: None + +(87) Project [codegen id : 29] +Output [7]: [ws_sold_date_sk#80, ws_web_site_sk#82, ws_promo_sk#83, ws_ext_sales_price#85, ws_net_profit#86, wr_return_amt#90, wr_net_loss#91] +Input [9]: [ws_sold_date_sk#80, ws_item_sk#81, ws_web_site_sk#82, ws_promo_sk#83, ws_ext_sales_price#85, ws_net_profit#86, wr_return_amt#90, wr_net_loss#91, i_item_sk#14] + +(88) ReusedExchange [Reuses operator id: 24] +Output [1]: [d_date_sk#17] + +(89) BroadcastHashJoin [codegen id : 29] +Left keys [1]: [ws_sold_date_sk#80] +Right keys [1]: [d_date_sk#17] +Join condition: None + +(90) Project [codegen id : 29] +Output [6]: [ws_web_site_sk#82, ws_promo_sk#83, ws_ext_sales_price#85, ws_net_profit#86, wr_return_amt#90, wr_net_loss#91] +Input [8]: [ws_sold_date_sk#80, ws_web_site_sk#82, ws_promo_sk#83, ws_ext_sales_price#85, ws_net_profit#86, wr_return_amt#90, wr_net_loss#91, d_date_sk#17] + +(91) ReusedExchange [Reuses operator id: 31] +Output [1]: [p_promo_sk#20] + +(92) BroadcastHashJoin [codegen id : 29] +Left keys [1]: [ws_promo_sk#83] +Right keys [1]: [p_promo_sk#20] +Join condition: None + +(93) Project [codegen id : 29] +Output [5]: [ws_web_site_sk#82, ws_ext_sales_price#85, ws_net_profit#86, wr_return_amt#90, wr_net_loss#91] +Input [7]: [ws_web_site_sk#82, ws_promo_sk#83, ws_ext_sales_price#85, ws_net_profit#86, wr_return_amt#90, wr_net_loss#91, p_promo_sk#20] + +(94) Scan parquet default.web_site +Output [2]: [web_site_sk#93, web_site_id#94] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/web_site] +PushedFilters: [IsNotNull(web_site_sk)] +ReadSchema: struct + +(95) ColumnarToRow [codegen id : 28] +Input [2]: [web_site_sk#93, web_site_id#94] + +(96) Filter [codegen id : 28] +Input [2]: [web_site_sk#93, web_site_id#94] +Condition : isnotnull(web_site_sk#93) + +(97) BroadcastExchange +Input [2]: [web_site_sk#93, web_site_id#94] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#95] + +(98) BroadcastHashJoin [codegen id : 29] +Left keys [1]: [ws_web_site_sk#82] +Right keys [1]: [web_site_sk#93] +Join condition: None + +(99) Project [codegen id : 29] +Output [5]: [ws_ext_sales_price#85, ws_net_profit#86, wr_return_amt#90, wr_net_loss#91, web_site_id#94] +Input [7]: [ws_web_site_sk#82, ws_ext_sales_price#85, ws_net_profit#86, wr_return_amt#90, wr_net_loss#91, web_site_sk#93, web_site_id#94] + +(100) HashAggregate [codegen id : 29] +Input [5]: [ws_ext_sales_price#85, ws_net_profit#86, wr_return_amt#90, wr_net_loss#91, web_site_id#94] +Keys [1]: [web_site_id#94] +Functions [3]: [partial_sum(UnscaledValue(ws_ext_sales_price#85)), partial_sum(coalesce(cast(wr_return_amt#90 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ws_net_profit#86 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#91 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))] +Aggregate Attributes [5]: [sum#96, sum#97, isEmpty#98, sum#99, isEmpty#100] +Results [6]: [web_site_id#94, sum#101, sum#102, isEmpty#103, sum#104, isEmpty#105] + +(101) Exchange +Input [6]: [web_site_id#94, sum#101, sum#102, isEmpty#103, sum#104, isEmpty#105] +Arguments: hashpartitioning(web_site_id#94, 5), true, [id=#106] + +(102) HashAggregate [codegen id : 30] +Input [6]: [web_site_id#94, sum#101, sum#102, isEmpty#103, sum#104, isEmpty#105] +Keys [1]: [web_site_id#94] +Functions [3]: [sum(UnscaledValue(ws_ext_sales_price#85)), sum(coalesce(cast(wr_return_amt#90 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ws_net_profit#86 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#91 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_sales_price#85))#107, sum(coalesce(cast(wr_return_amt#90 as decimal(12,2)), 0.00))#108, sum(CheckOverflow((promote_precision(cast(ws_net_profit#86 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#91 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#109] +Results [5]: [web channel AS channel#110, concat(web_site, web_site_id#94) AS id#111, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#85))#107,17,2) AS sales#112, sum(coalesce(cast(wr_return_amt#90 as decimal(12,2)), 0.00))#108 AS returns#113, sum(CheckOverflow((promote_precision(cast(ws_net_profit#86 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#91 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#109 AS profit#114] + +(103) Union + +(104) HashAggregate [codegen id : 31] +Input [5]: [channel#40, id#41, sales#42, returns#43, profit#44] +Keys [2]: [channel#40, id#41] +Functions [3]: [partial_sum(sales#42), partial_sum(returns#43), partial_sum(profit#44)] +Aggregate Attributes [6]: [sum#115, isEmpty#116, sum#117, isEmpty#118, sum#119, isEmpty#120] +Results [8]: [channel#40, id#41, sum#121, isEmpty#122, sum#123, isEmpty#124, sum#125, isEmpty#126] + +(105) Exchange +Input [8]: [channel#40, id#41, sum#121, isEmpty#122, sum#123, isEmpty#124, sum#125, isEmpty#126] +Arguments: hashpartitioning(channel#40, id#41, 5), true, [id=#127] + +(106) HashAggregate [codegen id : 32] +Input [8]: [channel#40, id#41, sum#121, isEmpty#122, sum#123, isEmpty#124, sum#125, isEmpty#126] +Keys [2]: [channel#40, id#41] +Functions [3]: [sum(sales#42), sum(returns#43), sum(profit#44)] +Aggregate Attributes [3]: [sum(sales#42)#128, sum(returns#43)#129, sum(profit#44)#130] +Results [5]: [channel#40, id#41, cast(sum(sales#42)#128 as decimal(37,2)) AS sales#131, cast(sum(returns#43)#129 as decimal(38,2)) AS returns#132, cast(sum(profit#44)#130 as decimal(38,2)) AS profit#133] + +(107) ReusedExchange [Reuses operator id: 105] +Output [8]: [channel#40, id#41, sum#134, isEmpty#135, sum#136, isEmpty#137, sum#138, isEmpty#139] + +(108) HashAggregate [codegen id : 64] +Input [8]: [channel#40, id#41, sum#134, isEmpty#135, sum#136, isEmpty#137, sum#138, isEmpty#139] +Keys [2]: [channel#40, id#41] +Functions [3]: [sum(sales#42), sum(returns#43), sum(profit#44)] +Aggregate Attributes [3]: [sum(sales#42)#140, sum(returns#43)#141, sum(profit#44)#142] +Results [4]: [channel#40, sum(sales#42)#140 AS sales#143, sum(returns#43)#141 AS returns#144, sum(profit#44)#142 AS profit#145] + +(109) HashAggregate [codegen id : 64] +Input [4]: [channel#40, sales#143, returns#144, profit#145] +Keys [1]: [channel#40] +Functions [3]: [partial_sum(sales#143), partial_sum(returns#144), partial_sum(profit#145)] +Aggregate Attributes [6]: [sum#146, isEmpty#147, sum#148, isEmpty#149, sum#150, isEmpty#151] +Results [7]: [channel#40, sum#152, isEmpty#153, sum#154, isEmpty#155, sum#156, isEmpty#157] + +(110) Exchange +Input [7]: [channel#40, sum#152, isEmpty#153, sum#154, isEmpty#155, sum#156, isEmpty#157] +Arguments: hashpartitioning(channel#40, 5), true, [id=#158] + +(111) HashAggregate [codegen id : 65] +Input [7]: [channel#40, sum#152, isEmpty#153, sum#154, isEmpty#155, sum#156, isEmpty#157] +Keys [1]: [channel#40] +Functions [3]: [sum(sales#143), sum(returns#144), sum(profit#145)] +Aggregate Attributes [3]: [sum(sales#143)#159, sum(returns#144)#160, sum(profit#145)#161] +Results [5]: [channel#40, null AS id#162, sum(sales#143)#159 AS sales#163, sum(returns#144)#160 AS returns#164, sum(profit#145)#161 AS profit#165] + +(112) Union + +(113) HashAggregate [codegen id : 66] +Input [5]: [channel#40, id#41, sales#131, returns#132, profit#133] +Keys [5]: [channel#40, id#41, sales#131, returns#132, profit#133] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#40, id#41, sales#131, returns#132, profit#133] + +(114) Exchange +Input [5]: [channel#40, id#41, sales#131, returns#132, profit#133] +Arguments: hashpartitioning(channel#40, id#41, sales#131, returns#132, profit#133, 5), true, [id=#166] + +(115) HashAggregate [codegen id : 67] +Input [5]: [channel#40, id#41, sales#131, returns#132, profit#133] +Keys [5]: [channel#40, id#41, sales#131, returns#132, profit#133] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#40, id#41, sales#131, returns#132, profit#133] + +(116) ReusedExchange [Reuses operator id: 105] +Output [8]: [channel#40, id#41, sum#167, isEmpty#168, sum#169, isEmpty#170, sum#171, isEmpty#172] + +(117) HashAggregate [codegen id : 99] +Input [8]: [channel#40, id#41, sum#167, isEmpty#168, sum#169, isEmpty#170, sum#171, isEmpty#172] +Keys [2]: [channel#40, id#41] +Functions [3]: [sum(sales#42), sum(returns#43), sum(profit#44)] +Aggregate Attributes [3]: [sum(sales#42)#173, sum(returns#43)#174, sum(profit#44)#175] +Results [3]: [sum(sales#42)#173 AS sales#143, sum(returns#43)#174 AS returns#144, sum(profit#44)#175 AS profit#145] + +(118) HashAggregate [codegen id : 99] +Input [3]: [sales#143, returns#144, profit#145] +Keys: [] +Functions [3]: [partial_sum(sales#143), partial_sum(returns#144), partial_sum(profit#145)] +Aggregate Attributes [6]: [sum#176, isEmpty#177, sum#178, isEmpty#179, sum#180, isEmpty#181] +Results [6]: [sum#182, isEmpty#183, sum#184, isEmpty#185, sum#186, isEmpty#187] + +(119) Exchange +Input [6]: [sum#182, isEmpty#183, sum#184, isEmpty#185, sum#186, isEmpty#187] +Arguments: SinglePartition, true, [id=#188] + +(120) HashAggregate [codegen id : 100] +Input [6]: [sum#182, isEmpty#183, sum#184, isEmpty#185, sum#186, isEmpty#187] +Keys: [] +Functions [3]: [sum(sales#143), sum(returns#144), sum(profit#145)] +Aggregate Attributes [3]: [sum(sales#143)#189, sum(returns#144)#190, sum(profit#145)#191] +Results [5]: [null AS channel#192, null AS id#193, sum(sales#143)#189 AS sales#194, sum(returns#144)#190 AS returns#195, sum(profit#145)#191 AS profit#196] + +(121) Union + +(122) HashAggregate [codegen id : 101] +Input [5]: [channel#40, id#41, sales#131, returns#132, profit#133] +Keys [5]: [channel#40, id#41, sales#131, returns#132, profit#133] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#40, id#41, sales#131, returns#132, profit#133] + +(123) Exchange +Input [5]: [channel#40, id#41, sales#131, returns#132, profit#133] +Arguments: hashpartitioning(channel#40, id#41, sales#131, returns#132, profit#133, 5), true, [id=#197] + +(124) HashAggregate [codegen id : 102] +Input [5]: [channel#40, id#41, sales#131, returns#132, profit#133] +Keys [5]: [channel#40, id#41, sales#131, returns#132, profit#133] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#40, id#41, sales#131, returns#132, profit#133] + +(125) TakeOrderedAndProject +Input [5]: [channel#40, id#41, sales#131, returns#132, profit#133] +Arguments: 100, [channel#40 ASC NULLS FIRST, id#41 ASC NULLS FIRST], [channel#40, id#41, sales#131, returns#132, profit#133] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/simplified.txt new file mode 100644 index 0000000000000..452668525aeba --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/simplified.txt @@ -0,0 +1,205 @@ +TakeOrderedAndProject [channel,id,profit,returns,sales] + WholeStageCodegen (102) + HashAggregate [channel,id,profit,returns,sales] + InputAdapter + Exchange [channel,id,profit,returns,sales] #1 + WholeStageCodegen (101) + HashAggregate [channel,id,profit,returns,sales] + InputAdapter + Union + WholeStageCodegen (67) + HashAggregate [channel,id,profit,returns,sales] + InputAdapter + Exchange [channel,id,profit,returns,sales] #2 + WholeStageCodegen (66) + HashAggregate [channel,id,profit,returns,sales] + InputAdapter + Union + WholeStageCodegen (32) + HashAggregate [channel,id,isEmpty,isEmpty,isEmpty,sum,sum,sum] [isEmpty,isEmpty,isEmpty,profit,returns,sales,sum,sum,sum,sum(profit),sum(returns),sum(sales)] + InputAdapter + Exchange [channel,id] #3 + WholeStageCodegen (31) + HashAggregate [channel,id,profit,returns,sales] [isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum] + InputAdapter + Union + WholeStageCodegen (10) + HashAggregate [isEmpty,isEmpty,s_store_id,sum,sum,sum] [channel,id,isEmpty,isEmpty,profit,returns,sales,sum,sum,sum,sum(CheckOverflow((promote_precision(cast(ss_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true)),sum(UnscaledValue(ss_ext_sales_price)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00))] + InputAdapter + Exchange [s_store_id] #4 + WholeStageCodegen (9) + HashAggregate [s_store_id,sr_net_loss,sr_return_amt,ss_ext_sales_price,ss_net_profit] [isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum] + Project [s_store_id,sr_net_loss,sr_return_amt,ss_ext_sales_price,ss_net_profit] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [sr_net_loss,sr_return_amt,ss_ext_sales_price,ss_net_profit,ss_store_sk] + BroadcastHashJoin [p_promo_sk,ss_promo_sk] + Project [sr_net_loss,sr_return_amt,ss_ext_sales_price,ss_net_profit,ss_promo_sk,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [sr_net_loss,sr_return_amt,ss_ext_sales_price,ss_net_profit,ss_promo_sk,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [sr_net_loss,sr_return_amt,ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_promo_sk,ss_sold_date_sk,ss_store_sk] + InputAdapter + SortMergeJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + WholeStageCodegen (2) + Sort [ss_item_sk,ss_ticket_number] + InputAdapter + Exchange [ss_item_sk,ss_ticket_number] #5 + WholeStageCodegen (1) + Filter [ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + WholeStageCodegen (4) + Sort [sr_item_sk,sr_ticket_number] + InputAdapter + Exchange [sr_item_sk,sr_ticket_number] #6 + WholeStageCodegen (3) + Filter [sr_item_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_item_sk,sr_net_loss,sr_return_amt,sr_ticket_number] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (5) + Project [i_item_sk] + Filter [i_current_price,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_current_price,i_item_sk] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (6) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (7) + Project [p_promo_sk] + Filter [p_channel_tv,p_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.promotion [p_channel_tv,p_promo_sk] + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (8) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_id,s_store_sk] + WholeStageCodegen (20) + HashAggregate [cp_catalog_page_id,isEmpty,isEmpty,sum,sum,sum] [channel,id,isEmpty,isEmpty,profit,returns,sales,sum,sum,sum,sum(CheckOverflow((promote_precision(cast(cs_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true)),sum(UnscaledValue(cs_ext_sales_price)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00))] + InputAdapter + Exchange [cp_catalog_page_id] #11 + WholeStageCodegen (19) + HashAggregate [cp_catalog_page_id,cr_net_loss,cr_return_amount,cs_ext_sales_price,cs_net_profit] [isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum] + Project [cp_catalog_page_id,cr_net_loss,cr_return_amount,cs_ext_sales_price,cs_net_profit] + BroadcastHashJoin [cp_catalog_page_sk,cs_catalog_page_sk] + Project [cr_net_loss,cr_return_amount,cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit] + BroadcastHashJoin [cs_promo_sk,p_promo_sk] + Project [cr_net_loss,cr_return_amount,cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cs_promo_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cr_net_loss,cr_return_amount,cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cs_promo_sk,cs_sold_date_sk] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cr_net_loss,cr_return_amount,cs_catalog_page_sk,cs_ext_sales_price,cs_item_sk,cs_net_profit,cs_promo_sk,cs_sold_date_sk] + InputAdapter + SortMergeJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] + WholeStageCodegen (12) + Sort [cs_item_sk,cs_order_number] + InputAdapter + Exchange [cs_item_sk,cs_order_number] #12 + WholeStageCodegen (11) + Filter [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_catalog_page_sk,cs_ext_sales_price,cs_item_sk,cs_net_profit,cs_order_number,cs_promo_sk,cs_sold_date_sk] + WholeStageCodegen (14) + Sort [cr_item_sk,cr_order_number] + InputAdapter + Exchange [cr_item_sk,cr_order_number] #13 + WholeStageCodegen (13) + Filter [cr_item_sk,cr_order_number] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_item_sk,cr_net_loss,cr_order_number,cr_return_amount] + InputAdapter + ReusedExchange [i_item_sk] #7 + InputAdapter + ReusedExchange [d_date_sk] #8 + InputAdapter + ReusedExchange [p_promo_sk] #9 + InputAdapter + BroadcastExchange #14 + WholeStageCodegen (18) + Filter [cp_catalog_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_page [cp_catalog_page_id,cp_catalog_page_sk] + WholeStageCodegen (30) + HashAggregate [isEmpty,isEmpty,sum,sum,sum,web_site_id] [channel,id,isEmpty,isEmpty,profit,returns,sales,sum,sum,sum,sum(CheckOverflow((promote_precision(cast(ws_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true)),sum(UnscaledValue(ws_ext_sales_price)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00))] + InputAdapter + Exchange [web_site_id] #15 + WholeStageCodegen (29) + HashAggregate [web_site_id,wr_net_loss,wr_return_amt,ws_ext_sales_price,ws_net_profit] [isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum] + Project [web_site_id,wr_net_loss,wr_return_amt,ws_ext_sales_price,ws_net_profit] + BroadcastHashJoin [web_site_sk,ws_web_site_sk] + Project [wr_net_loss,wr_return_amt,ws_ext_sales_price,ws_net_profit,ws_web_site_sk] + BroadcastHashJoin [p_promo_sk,ws_promo_sk] + Project [wr_net_loss,wr_return_amt,ws_ext_sales_price,ws_net_profit,ws_promo_sk,ws_web_site_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [wr_net_loss,wr_return_amt,ws_ext_sales_price,ws_net_profit,ws_promo_sk,ws_sold_date_sk,ws_web_site_sk] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Project [wr_net_loss,wr_return_amt,ws_ext_sales_price,ws_item_sk,ws_net_profit,ws_promo_sk,ws_sold_date_sk,ws_web_site_sk] + InputAdapter + SortMergeJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] + WholeStageCodegen (22) + Sort [ws_item_sk,ws_order_number] + InputAdapter + Exchange [ws_item_sk,ws_order_number] #16 + WholeStageCodegen (21) + Filter [ws_item_sk,ws_promo_sk,ws_sold_date_sk,ws_web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_ext_sales_price,ws_item_sk,ws_net_profit,ws_order_number,ws_promo_sk,ws_sold_date_sk,ws_web_site_sk] + WholeStageCodegen (24) + Sort [wr_item_sk,wr_order_number] + InputAdapter + Exchange [wr_item_sk,wr_order_number] #17 + WholeStageCodegen (23) + Filter [wr_item_sk,wr_order_number] + ColumnarToRow + InputAdapter + Scan parquet default.web_returns [wr_item_sk,wr_net_loss,wr_order_number,wr_return_amt] + InputAdapter + ReusedExchange [i_item_sk] #7 + InputAdapter + ReusedExchange [d_date_sk] #8 + InputAdapter + ReusedExchange [p_promo_sk] #9 + InputAdapter + BroadcastExchange #18 + WholeStageCodegen (28) + Filter [web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_site [web_site_id,web_site_sk] + WholeStageCodegen (65) + HashAggregate [channel,isEmpty,isEmpty,isEmpty,sum,sum,sum] [id,isEmpty,isEmpty,isEmpty,profit,returns,sales,sum,sum,sum,sum(profit),sum(returns),sum(sales)] + InputAdapter + Exchange [channel] #19 + WholeStageCodegen (64) + HashAggregate [channel,profit,returns,sales] [isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum] + HashAggregate [channel,id,isEmpty,isEmpty,isEmpty,sum,sum,sum] [isEmpty,isEmpty,isEmpty,profit,returns,sales,sum,sum,sum,sum(profit),sum(returns),sum(sales)] + InputAdapter + ReusedExchange [channel,id,isEmpty,isEmpty,isEmpty,sum,sum,sum] #3 + WholeStageCodegen (100) + HashAggregate [isEmpty,isEmpty,isEmpty,sum,sum,sum] [channel,id,isEmpty,isEmpty,isEmpty,profit,returns,sales,sum,sum,sum,sum(profit),sum(returns),sum(sales)] + InputAdapter + Exchange #20 + WholeStageCodegen (99) + HashAggregate [profit,returns,sales] [isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum] + HashAggregate [channel,id,isEmpty,isEmpty,isEmpty,sum,sum,sum] [isEmpty,isEmpty,isEmpty,profit,returns,sales,sum,sum,sum,sum(profit),sum(returns),sum(sales)] + InputAdapter + ReusedExchange [channel,id,isEmpty,isEmpty,isEmpty,sum,sum,sum] #3 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a/explain.txt new file mode 100644 index 0000000000000..9f29340a501cb --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a/explain.txt @@ -0,0 +1,662 @@ +== Physical Plan == +TakeOrderedAndProject (116) ++- * HashAggregate (115) + +- Exchange (114) + +- * HashAggregate (113) + +- Union (112) + :- * HashAggregate (106) + : +- Exchange (105) + : +- * HashAggregate (104) + : +- Union (103) + : :- * HashAggregate (97) + : : +- Exchange (96) + : : +- * HashAggregate (95) + : : +- Union (94) + : : :- * HashAggregate (39) + : : : +- Exchange (38) + : : : +- * HashAggregate (37) + : : : +- * Project (36) + : : : +- * BroadcastHashJoin Inner BuildRight (35) + : : : :- * Project (29) + : : : : +- * BroadcastHashJoin Inner BuildRight (28) + : : : : :- * Project (22) + : : : : : +- * BroadcastHashJoin Inner BuildRight (21) + : : : : : :- * Project (16) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : : : : :- * Project (9) + : : : : : : : +- * BroadcastHashJoin LeftOuter BuildRight (8) + : : : : : : : :- * Filter (3) + : : : : : : : : +- * ColumnarToRow (2) + : : : : : : : : +- Scan parquet default.store_sales (1) + : : : : : : : +- BroadcastExchange (7) + : : : : : : : +- * Filter (6) + : : : : : : : +- * ColumnarToRow (5) + : : : : : : : +- Scan parquet default.store_returns (4) + : : : : : : +- BroadcastExchange (14) + : : : : : : +- * Project (13) + : : : : : : +- * Filter (12) + : : : : : : +- * ColumnarToRow (11) + : : : : : : +- Scan parquet default.date_dim (10) + : : : : : +- BroadcastExchange (20) + : : : : : +- * Filter (19) + : : : : : +- * ColumnarToRow (18) + : : : : : +- Scan parquet default.store (17) + : : : : +- BroadcastExchange (27) + : : : : +- * Project (26) + : : : : +- * Filter (25) + : : : : +- * ColumnarToRow (24) + : : : : +- Scan parquet default.item (23) + : : : +- BroadcastExchange (34) + : : : +- * Project (33) + : : : +- * Filter (32) + : : : +- * ColumnarToRow (31) + : : : +- Scan parquet default.promotion (30) + : : :- * HashAggregate (66) + : : : +- Exchange (65) + : : : +- * HashAggregate (64) + : : : +- * Project (63) + : : : +- * BroadcastHashJoin Inner BuildRight (62) + : : : :- * Project (60) + : : : : +- * BroadcastHashJoin Inner BuildRight (59) + : : : : :- * Project (57) + : : : : : +- * BroadcastHashJoin Inner BuildRight (56) + : : : : : :- * Project (51) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (50) + : : : : : : :- * Project (48) + : : : : : : : +- * BroadcastHashJoin LeftOuter BuildRight (47) + : : : : : : : :- * Filter (42) + : : : : : : : : +- * ColumnarToRow (41) + : : : : : : : : +- Scan parquet default.catalog_sales (40) + : : : : : : : +- BroadcastExchange (46) + : : : : : : : +- * Filter (45) + : : : : : : : +- * ColumnarToRow (44) + : : : : : : : +- Scan parquet default.catalog_returns (43) + : : : : : : +- ReusedExchange (49) + : : : : : +- BroadcastExchange (55) + : : : : : +- * Filter (54) + : : : : : +- * ColumnarToRow (53) + : : : : : +- Scan parquet default.catalog_page (52) + : : : : +- ReusedExchange (58) + : : : +- ReusedExchange (61) + : : +- * HashAggregate (93) + : : +- Exchange (92) + : : +- * HashAggregate (91) + : : +- * Project (90) + : : +- * BroadcastHashJoin Inner BuildRight (89) + : : :- * Project (87) + : : : +- * BroadcastHashJoin Inner BuildRight (86) + : : : :- * Project (84) + : : : : +- * BroadcastHashJoin Inner BuildRight (83) + : : : : :- * Project (78) + : : : : : +- * BroadcastHashJoin Inner BuildRight (77) + : : : : : :- * Project (75) + : : : : : : +- * BroadcastHashJoin LeftOuter BuildRight (74) + : : : : : : :- * Filter (69) + : : : : : : : +- * ColumnarToRow (68) + : : : : : : : +- Scan parquet default.web_sales (67) + : : : : : : +- BroadcastExchange (73) + : : : : : : +- * Filter (72) + : : : : : : +- * ColumnarToRow (71) + : : : : : : +- Scan parquet default.web_returns (70) + : : : : : +- ReusedExchange (76) + : : : : +- BroadcastExchange (82) + : : : : +- * Filter (81) + : : : : +- * ColumnarToRow (80) + : : : : +- Scan parquet default.web_site (79) + : : : +- ReusedExchange (85) + : : +- ReusedExchange (88) + : +- * HashAggregate (102) + : +- Exchange (101) + : +- * HashAggregate (100) + : +- * HashAggregate (99) + : +- ReusedExchange (98) + +- * HashAggregate (111) + +- Exchange (110) + +- * HashAggregate (109) + +- * HashAggregate (108) + +- ReusedExchange (107) + + +(1) Scan parquet default.store_sales +Output [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_net_profit#7] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk), IsNotNull(ss_promo_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 6] +Input [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_net_profit#7] + +(3) Filter [codegen id : 6] +Input [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_net_profit#7] +Condition : (((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_item_sk#2)) AND isnotnull(ss_promo_sk#4)) + +(4) Scan parquet default.store_returns +Output [4]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store_returns] +PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] + +(6) Filter [codegen id : 1] +Input [4]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] +Condition : (isnotnull(sr_item_sk#8) AND isnotnull(sr_ticket_number#9)) + +(7) BroadcastExchange +Input [4]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false], input[1, bigint, false]),false), [id=#12] + +(8) BroadcastHashJoin [codegen id : 6] +Left keys [2]: [cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#5 as bigint)] +Right keys [2]: [sr_item_sk#8, sr_ticket_number#9] +Join condition: None + +(9) Project [codegen id : 6] +Output [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#10, sr_net_loss#11] +Input [11]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_net_profit#7, sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] + +(10) Scan parquet default.date_dim +Output [2]: [d_date_sk#13, d_date#14] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1998-08-04), LessThanOrEqual(d_date,1998-09-03), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#13, d_date#14] + +(12) Filter [codegen id : 2] +Input [2]: [d_date_sk#13, d_date#14] +Condition : (((isnotnull(d_date#14) AND (d_date#14 >= 10442)) AND (d_date#14 <= 10472)) AND isnotnull(d_date_sk#13)) + +(13) Project [codegen id : 2] +Output [1]: [d_date_sk#13] +Input [2]: [d_date_sk#13, d_date#14] + +(14) BroadcastExchange +Input [1]: [d_date_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] + +(15) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#13] +Join condition: None + +(16) Project [codegen id : 6] +Output [7]: [ss_item_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#10, sr_net_loss#11] +Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#10, sr_net_loss#11, d_date_sk#13] + +(17) Scan parquet default.store +Output [2]: [s_store_sk#16, s_store_id#17] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [2]: [s_store_sk#16, s_store_id#17] + +(19) Filter [codegen id : 3] +Input [2]: [s_store_sk#16, s_store_id#17] +Condition : isnotnull(s_store_sk#16) + +(20) BroadcastExchange +Input [2]: [s_store_sk#16, s_store_id#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#18] + +(21) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#16] +Join condition: None + +(22) Project [codegen id : 6] +Output [7]: [ss_item_sk#2, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#10, sr_net_loss#11, s_store_id#17] +Input [9]: [ss_item_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#10, sr_net_loss#11, s_store_sk#16, s_store_id#17] + +(23) Scan parquet default.item +Output [2]: [i_item_sk#19, i_current_price#20] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_current_price), GreaterThan(i_current_price,50.00), IsNotNull(i_item_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 4] +Input [2]: [i_item_sk#19, i_current_price#20] + +(25) Filter [codegen id : 4] +Input [2]: [i_item_sk#19, i_current_price#20] +Condition : ((isnotnull(i_current_price#20) AND (i_current_price#20 > 50.00)) AND isnotnull(i_item_sk#19)) + +(26) Project [codegen id : 4] +Output [1]: [i_item_sk#19] +Input [2]: [i_item_sk#19, i_current_price#20] + +(27) BroadcastExchange +Input [1]: [i_item_sk#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#21] + +(28) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#19] +Join condition: None + +(29) Project [codegen id : 6] +Output [6]: [ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#10, sr_net_loss#11, s_store_id#17] +Input [8]: [ss_item_sk#2, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#10, sr_net_loss#11, s_store_id#17, i_item_sk#19] + +(30) Scan parquet default.promotion +Output [2]: [p_promo_sk#22, p_channel_tv#23] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/promotion] +PushedFilters: [IsNotNull(p_channel_tv), EqualTo(p_channel_tv,N), IsNotNull(p_promo_sk)] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 5] +Input [2]: [p_promo_sk#22, p_channel_tv#23] + +(32) Filter [codegen id : 5] +Input [2]: [p_promo_sk#22, p_channel_tv#23] +Condition : ((isnotnull(p_channel_tv#23) AND (p_channel_tv#23 = N)) AND isnotnull(p_promo_sk#22)) + +(33) Project [codegen id : 5] +Output [1]: [p_promo_sk#22] +Input [2]: [p_promo_sk#22, p_channel_tv#23] + +(34) BroadcastExchange +Input [1]: [p_promo_sk#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#24] + +(35) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_promo_sk#4] +Right keys [1]: [p_promo_sk#22] +Join condition: None + +(36) Project [codegen id : 6] +Output [5]: [ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#10, sr_net_loss#11, s_store_id#17] +Input [7]: [ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#10, sr_net_loss#11, s_store_id#17, p_promo_sk#22] + +(37) HashAggregate [codegen id : 6] +Input [5]: [ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#10, sr_net_loss#11, s_store_id#17] +Keys [1]: [s_store_id#17] +Functions [3]: [partial_sum(UnscaledValue(ss_ext_sales_price#6)), partial_sum(coalesce(cast(sr_return_amt#10 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ss_net_profit#7 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#11 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))] +Aggregate Attributes [5]: [sum#25, sum#26, isEmpty#27, sum#28, isEmpty#29] +Results [6]: [s_store_id#17, sum#30, sum#31, isEmpty#32, sum#33, isEmpty#34] + +(38) Exchange +Input [6]: [s_store_id#17, sum#30, sum#31, isEmpty#32, sum#33, isEmpty#34] +Arguments: hashpartitioning(s_store_id#17, 5), true, [id=#35] + +(39) HashAggregate [codegen id : 7] +Input [6]: [s_store_id#17, sum#30, sum#31, isEmpty#32, sum#33, isEmpty#34] +Keys [1]: [s_store_id#17] +Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#6)), sum(coalesce(cast(sr_return_amt#10 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ss_net_profit#7 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#11 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))] +Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#6))#36, sum(coalesce(cast(sr_return_amt#10 as decimal(12,2)), 0.00))#37, sum(CheckOverflow((promote_precision(cast(ss_net_profit#7 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#11 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#38] +Results [5]: [store channel AS channel#39, concat(store, s_store_id#17) AS id#40, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#6))#36,17,2) AS sales#41, sum(coalesce(cast(sr_return_amt#10 as decimal(12,2)), 0.00))#37 AS returns#42, sum(CheckOverflow((promote_precision(cast(ss_net_profit#7 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#11 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#38 AS profit#43] + +(40) Scan parquet default.catalog_sales +Output [7]: [cs_sold_date_sk#44, cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_order_number#48, cs_ext_sales_price#49, cs_net_profit#50] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_catalog_page_sk), IsNotNull(cs_item_sk), IsNotNull(cs_promo_sk)] +ReadSchema: struct + +(41) ColumnarToRow [codegen id : 13] +Input [7]: [cs_sold_date_sk#44, cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_order_number#48, cs_ext_sales_price#49, cs_net_profit#50] + +(42) Filter [codegen id : 13] +Input [7]: [cs_sold_date_sk#44, cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_order_number#48, cs_ext_sales_price#49, cs_net_profit#50] +Condition : (((isnotnull(cs_sold_date_sk#44) AND isnotnull(cs_catalog_page_sk#45)) AND isnotnull(cs_item_sk#46)) AND isnotnull(cs_promo_sk#47)) + +(43) Scan parquet default.catalog_returns +Output [4]: [cr_item_sk#51, cr_order_number#52, cr_return_amount#53, cr_net_loss#54] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/catalog_returns] +PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)] +ReadSchema: struct + +(44) ColumnarToRow [codegen id : 8] +Input [4]: [cr_item_sk#51, cr_order_number#52, cr_return_amount#53, cr_net_loss#54] + +(45) Filter [codegen id : 8] +Input [4]: [cr_item_sk#51, cr_order_number#52, cr_return_amount#53, cr_net_loss#54] +Condition : (isnotnull(cr_item_sk#51) AND isnotnull(cr_order_number#52)) + +(46) BroadcastExchange +Input [4]: [cr_item_sk#51, cr_order_number#52, cr_return_amount#53, cr_net_loss#54] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, false] as bigint), 32) | (cast(input[1, int, false] as bigint) & 4294967295))),false), [id=#55] + +(47) BroadcastHashJoin [codegen id : 13] +Left keys [2]: [cs_item_sk#46, cs_order_number#48] +Right keys [2]: [cr_item_sk#51, cr_order_number#52] +Join condition: None + +(48) Project [codegen id : 13] +Output [8]: [cs_sold_date_sk#44, cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#53, cr_net_loss#54] +Input [11]: [cs_sold_date_sk#44, cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_order_number#48, cs_ext_sales_price#49, cs_net_profit#50, cr_item_sk#51, cr_order_number#52, cr_return_amount#53, cr_net_loss#54] + +(49) ReusedExchange [Reuses operator id: 14] +Output [1]: [d_date_sk#13] + +(50) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [cs_sold_date_sk#44] +Right keys [1]: [d_date_sk#13] +Join condition: None + +(51) Project [codegen id : 13] +Output [7]: [cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#53, cr_net_loss#54] +Input [9]: [cs_sold_date_sk#44, cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#53, cr_net_loss#54, d_date_sk#13] + +(52) Scan parquet default.catalog_page +Output [2]: [cp_catalog_page_sk#56, cp_catalog_page_id#57] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/catalog_page] +PushedFilters: [IsNotNull(cp_catalog_page_sk)] +ReadSchema: struct + +(53) ColumnarToRow [codegen id : 10] +Input [2]: [cp_catalog_page_sk#56, cp_catalog_page_id#57] + +(54) Filter [codegen id : 10] +Input [2]: [cp_catalog_page_sk#56, cp_catalog_page_id#57] +Condition : isnotnull(cp_catalog_page_sk#56) + +(55) BroadcastExchange +Input [2]: [cp_catalog_page_sk#56, cp_catalog_page_id#57] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#58] + +(56) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [cs_catalog_page_sk#45] +Right keys [1]: [cp_catalog_page_sk#56] +Join condition: None + +(57) Project [codegen id : 13] +Output [7]: [cs_item_sk#46, cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#53, cr_net_loss#54, cp_catalog_page_id#57] +Input [9]: [cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#53, cr_net_loss#54, cp_catalog_page_sk#56, cp_catalog_page_id#57] + +(58) ReusedExchange [Reuses operator id: 27] +Output [1]: [i_item_sk#19] + +(59) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [cs_item_sk#46] +Right keys [1]: [i_item_sk#19] +Join condition: None + +(60) Project [codegen id : 13] +Output [6]: [cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#53, cr_net_loss#54, cp_catalog_page_id#57] +Input [8]: [cs_item_sk#46, cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#53, cr_net_loss#54, cp_catalog_page_id#57, i_item_sk#19] + +(61) ReusedExchange [Reuses operator id: 34] +Output [1]: [p_promo_sk#22] + +(62) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [cs_promo_sk#47] +Right keys [1]: [p_promo_sk#22] +Join condition: None + +(63) Project [codegen id : 13] +Output [5]: [cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#53, cr_net_loss#54, cp_catalog_page_id#57] +Input [7]: [cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#53, cr_net_loss#54, cp_catalog_page_id#57, p_promo_sk#22] + +(64) HashAggregate [codegen id : 13] +Input [5]: [cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#53, cr_net_loss#54, cp_catalog_page_id#57] +Keys [1]: [cp_catalog_page_id#57] +Functions [3]: [partial_sum(UnscaledValue(cs_ext_sales_price#49)), partial_sum(coalesce(cast(cr_return_amount#53 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#54 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))] +Aggregate Attributes [5]: [sum#59, sum#60, isEmpty#61, sum#62, isEmpty#63] +Results [6]: [cp_catalog_page_id#57, sum#64, sum#65, isEmpty#66, sum#67, isEmpty#68] + +(65) Exchange +Input [6]: [cp_catalog_page_id#57, sum#64, sum#65, isEmpty#66, sum#67, isEmpty#68] +Arguments: hashpartitioning(cp_catalog_page_id#57, 5), true, [id=#69] + +(66) HashAggregate [codegen id : 14] +Input [6]: [cp_catalog_page_id#57, sum#64, sum#65, isEmpty#66, sum#67, isEmpty#68] +Keys [1]: [cp_catalog_page_id#57] +Functions [3]: [sum(UnscaledValue(cs_ext_sales_price#49)), sum(coalesce(cast(cr_return_amount#53 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#54 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))] +Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_sales_price#49))#70, sum(coalesce(cast(cr_return_amount#53 as decimal(12,2)), 0.00))#71, sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#54 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#72] +Results [5]: [catalog channel AS channel#73, concat(catalog_page, cp_catalog_page_id#57) AS id#74, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#49))#70,17,2) AS sales#75, sum(coalesce(cast(cr_return_amount#53 as decimal(12,2)), 0.00))#71 AS returns#76, sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#54 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#72 AS profit#77] + +(67) Scan parquet default.web_sales +Output [7]: [ws_sold_date_sk#78, ws_item_sk#79, ws_web_site_sk#80, ws_promo_sk#81, ws_order_number#82, ws_ext_sales_price#83, ws_net_profit#84] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_web_site_sk), IsNotNull(ws_item_sk), IsNotNull(ws_promo_sk)] +ReadSchema: struct + +(68) ColumnarToRow [codegen id : 20] +Input [7]: [ws_sold_date_sk#78, ws_item_sk#79, ws_web_site_sk#80, ws_promo_sk#81, ws_order_number#82, ws_ext_sales_price#83, ws_net_profit#84] + +(69) Filter [codegen id : 20] +Input [7]: [ws_sold_date_sk#78, ws_item_sk#79, ws_web_site_sk#80, ws_promo_sk#81, ws_order_number#82, ws_ext_sales_price#83, ws_net_profit#84] +Condition : (((isnotnull(ws_sold_date_sk#78) AND isnotnull(ws_web_site_sk#80)) AND isnotnull(ws_item_sk#79)) AND isnotnull(ws_promo_sk#81)) + +(70) Scan parquet default.web_returns +Output [4]: [wr_item_sk#85, wr_order_number#86, wr_return_amt#87, wr_net_loss#88] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/web_returns] +PushedFilters: [IsNotNull(wr_item_sk), IsNotNull(wr_order_number)] +ReadSchema: struct + +(71) ColumnarToRow [codegen id : 15] +Input [4]: [wr_item_sk#85, wr_order_number#86, wr_return_amt#87, wr_net_loss#88] + +(72) Filter [codegen id : 15] +Input [4]: [wr_item_sk#85, wr_order_number#86, wr_return_amt#87, wr_net_loss#88] +Condition : (isnotnull(wr_item_sk#85) AND isnotnull(wr_order_number#86)) + +(73) BroadcastExchange +Input [4]: [wr_item_sk#85, wr_order_number#86, wr_return_amt#87, wr_net_loss#88] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false], input[1, bigint, false]),false), [id=#89] + +(74) BroadcastHashJoin [codegen id : 20] +Left keys [2]: [cast(ws_item_sk#79 as bigint), cast(ws_order_number#82 as bigint)] +Right keys [2]: [wr_item_sk#85, wr_order_number#86] +Join condition: None + +(75) Project [codegen id : 20] +Output [8]: [ws_sold_date_sk#78, ws_item_sk#79, ws_web_site_sk#80, ws_promo_sk#81, ws_ext_sales_price#83, ws_net_profit#84, wr_return_amt#87, wr_net_loss#88] +Input [11]: [ws_sold_date_sk#78, ws_item_sk#79, ws_web_site_sk#80, ws_promo_sk#81, ws_order_number#82, ws_ext_sales_price#83, ws_net_profit#84, wr_item_sk#85, wr_order_number#86, wr_return_amt#87, wr_net_loss#88] + +(76) ReusedExchange [Reuses operator id: 14] +Output [1]: [d_date_sk#13] + +(77) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [ws_sold_date_sk#78] +Right keys [1]: [d_date_sk#13] +Join condition: None + +(78) Project [codegen id : 20] +Output [7]: [ws_item_sk#79, ws_web_site_sk#80, ws_promo_sk#81, ws_ext_sales_price#83, ws_net_profit#84, wr_return_amt#87, wr_net_loss#88] +Input [9]: [ws_sold_date_sk#78, ws_item_sk#79, ws_web_site_sk#80, ws_promo_sk#81, ws_ext_sales_price#83, ws_net_profit#84, wr_return_amt#87, wr_net_loss#88, d_date_sk#13] + +(79) Scan parquet default.web_site +Output [2]: [web_site_sk#90, web_site_id#91] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/web_site] +PushedFilters: [IsNotNull(web_site_sk)] +ReadSchema: struct + +(80) ColumnarToRow [codegen id : 17] +Input [2]: [web_site_sk#90, web_site_id#91] + +(81) Filter [codegen id : 17] +Input [2]: [web_site_sk#90, web_site_id#91] +Condition : isnotnull(web_site_sk#90) + +(82) BroadcastExchange +Input [2]: [web_site_sk#90, web_site_id#91] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#92] + +(83) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [ws_web_site_sk#80] +Right keys [1]: [web_site_sk#90] +Join condition: None + +(84) Project [codegen id : 20] +Output [7]: [ws_item_sk#79, ws_promo_sk#81, ws_ext_sales_price#83, ws_net_profit#84, wr_return_amt#87, wr_net_loss#88, web_site_id#91] +Input [9]: [ws_item_sk#79, ws_web_site_sk#80, ws_promo_sk#81, ws_ext_sales_price#83, ws_net_profit#84, wr_return_amt#87, wr_net_loss#88, web_site_sk#90, web_site_id#91] + +(85) ReusedExchange [Reuses operator id: 27] +Output [1]: [i_item_sk#19] + +(86) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [ws_item_sk#79] +Right keys [1]: [i_item_sk#19] +Join condition: None + +(87) Project [codegen id : 20] +Output [6]: [ws_promo_sk#81, ws_ext_sales_price#83, ws_net_profit#84, wr_return_amt#87, wr_net_loss#88, web_site_id#91] +Input [8]: [ws_item_sk#79, ws_promo_sk#81, ws_ext_sales_price#83, ws_net_profit#84, wr_return_amt#87, wr_net_loss#88, web_site_id#91, i_item_sk#19] + +(88) ReusedExchange [Reuses operator id: 34] +Output [1]: [p_promo_sk#22] + +(89) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [ws_promo_sk#81] +Right keys [1]: [p_promo_sk#22] +Join condition: None + +(90) Project [codegen id : 20] +Output [5]: [ws_ext_sales_price#83, ws_net_profit#84, wr_return_amt#87, wr_net_loss#88, web_site_id#91] +Input [7]: [ws_promo_sk#81, ws_ext_sales_price#83, ws_net_profit#84, wr_return_amt#87, wr_net_loss#88, web_site_id#91, p_promo_sk#22] + +(91) HashAggregate [codegen id : 20] +Input [5]: [ws_ext_sales_price#83, ws_net_profit#84, wr_return_amt#87, wr_net_loss#88, web_site_id#91] +Keys [1]: [web_site_id#91] +Functions [3]: [partial_sum(UnscaledValue(ws_ext_sales_price#83)), partial_sum(coalesce(cast(wr_return_amt#87 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ws_net_profit#84 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#88 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))] +Aggregate Attributes [5]: [sum#93, sum#94, isEmpty#95, sum#96, isEmpty#97] +Results [6]: [web_site_id#91, sum#98, sum#99, isEmpty#100, sum#101, isEmpty#102] + +(92) Exchange +Input [6]: [web_site_id#91, sum#98, sum#99, isEmpty#100, sum#101, isEmpty#102] +Arguments: hashpartitioning(web_site_id#91, 5), true, [id=#103] + +(93) HashAggregate [codegen id : 21] +Input [6]: [web_site_id#91, sum#98, sum#99, isEmpty#100, sum#101, isEmpty#102] +Keys [1]: [web_site_id#91] +Functions [3]: [sum(UnscaledValue(ws_ext_sales_price#83)), sum(coalesce(cast(wr_return_amt#87 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ws_net_profit#84 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#88 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_sales_price#83))#104, sum(coalesce(cast(wr_return_amt#87 as decimal(12,2)), 0.00))#105, sum(CheckOverflow((promote_precision(cast(ws_net_profit#84 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#88 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#106] +Results [5]: [web channel AS channel#107, concat(web_site, web_site_id#91) AS id#108, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#83))#104,17,2) AS sales#109, sum(coalesce(cast(wr_return_amt#87 as decimal(12,2)), 0.00))#105 AS returns#110, sum(CheckOverflow((promote_precision(cast(ws_net_profit#84 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#88 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#106 AS profit#111] + +(94) Union + +(95) HashAggregate [codegen id : 22] +Input [5]: [channel#39, id#40, sales#41, returns#42, profit#43] +Keys [2]: [channel#39, id#40] +Functions [3]: [partial_sum(sales#41), partial_sum(returns#42), partial_sum(profit#43)] +Aggregate Attributes [6]: [sum#112, isEmpty#113, sum#114, isEmpty#115, sum#116, isEmpty#117] +Results [8]: [channel#39, id#40, sum#118, isEmpty#119, sum#120, isEmpty#121, sum#122, isEmpty#123] + +(96) Exchange +Input [8]: [channel#39, id#40, sum#118, isEmpty#119, sum#120, isEmpty#121, sum#122, isEmpty#123] +Arguments: hashpartitioning(channel#39, id#40, 5), true, [id=#124] + +(97) HashAggregate [codegen id : 23] +Input [8]: [channel#39, id#40, sum#118, isEmpty#119, sum#120, isEmpty#121, sum#122, isEmpty#123] +Keys [2]: [channel#39, id#40] +Functions [3]: [sum(sales#41), sum(returns#42), sum(profit#43)] +Aggregate Attributes [3]: [sum(sales#41)#125, sum(returns#42)#126, sum(profit#43)#127] +Results [5]: [channel#39, id#40, cast(sum(sales#41)#125 as decimal(37,2)) AS sales#128, cast(sum(returns#42)#126 as decimal(38,2)) AS returns#129, cast(sum(profit#43)#127 as decimal(38,2)) AS profit#130] + +(98) ReusedExchange [Reuses operator id: 96] +Output [8]: [channel#39, id#40, sum#131, isEmpty#132, sum#133, isEmpty#134, sum#135, isEmpty#136] + +(99) HashAggregate [codegen id : 46] +Input [8]: [channel#39, id#40, sum#131, isEmpty#132, sum#133, isEmpty#134, sum#135, isEmpty#136] +Keys [2]: [channel#39, id#40] +Functions [3]: [sum(sales#41), sum(returns#42), sum(profit#43)] +Aggregate Attributes [3]: [sum(sales#41)#137, sum(returns#42)#138, sum(profit#43)#139] +Results [4]: [channel#39, sum(sales#41)#137 AS sales#140, sum(returns#42)#138 AS returns#141, sum(profit#43)#139 AS profit#142] + +(100) HashAggregate [codegen id : 46] +Input [4]: [channel#39, sales#140, returns#141, profit#142] +Keys [1]: [channel#39] +Functions [3]: [partial_sum(sales#140), partial_sum(returns#141), partial_sum(profit#142)] +Aggregate Attributes [6]: [sum#143, isEmpty#144, sum#145, isEmpty#146, sum#147, isEmpty#148] +Results [7]: [channel#39, sum#149, isEmpty#150, sum#151, isEmpty#152, sum#153, isEmpty#154] + +(101) Exchange +Input [7]: [channel#39, sum#149, isEmpty#150, sum#151, isEmpty#152, sum#153, isEmpty#154] +Arguments: hashpartitioning(channel#39, 5), true, [id=#155] + +(102) HashAggregate [codegen id : 47] +Input [7]: [channel#39, sum#149, isEmpty#150, sum#151, isEmpty#152, sum#153, isEmpty#154] +Keys [1]: [channel#39] +Functions [3]: [sum(sales#140), sum(returns#141), sum(profit#142)] +Aggregate Attributes [3]: [sum(sales#140)#156, sum(returns#141)#157, sum(profit#142)#158] +Results [5]: [channel#39, null AS id#159, sum(sales#140)#156 AS sales#160, sum(returns#141)#157 AS returns#161, sum(profit#142)#158 AS profit#162] + +(103) Union + +(104) HashAggregate [codegen id : 48] +Input [5]: [channel#39, id#40, sales#128, returns#129, profit#130] +Keys [5]: [channel#39, id#40, sales#128, returns#129, profit#130] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#39, id#40, sales#128, returns#129, profit#130] + +(105) Exchange +Input [5]: [channel#39, id#40, sales#128, returns#129, profit#130] +Arguments: hashpartitioning(channel#39, id#40, sales#128, returns#129, profit#130, 5), true, [id=#163] + +(106) HashAggregate [codegen id : 49] +Input [5]: [channel#39, id#40, sales#128, returns#129, profit#130] +Keys [5]: [channel#39, id#40, sales#128, returns#129, profit#130] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#39, id#40, sales#128, returns#129, profit#130] + +(107) ReusedExchange [Reuses operator id: 96] +Output [8]: [channel#39, id#40, sum#164, isEmpty#165, sum#166, isEmpty#167, sum#168, isEmpty#169] + +(108) HashAggregate [codegen id : 72] +Input [8]: [channel#39, id#40, sum#164, isEmpty#165, sum#166, isEmpty#167, sum#168, isEmpty#169] +Keys [2]: [channel#39, id#40] +Functions [3]: [sum(sales#41), sum(returns#42), sum(profit#43)] +Aggregate Attributes [3]: [sum(sales#41)#170, sum(returns#42)#171, sum(profit#43)#172] +Results [3]: [sum(sales#41)#170 AS sales#140, sum(returns#42)#171 AS returns#141, sum(profit#43)#172 AS profit#142] + +(109) HashAggregate [codegen id : 72] +Input [3]: [sales#140, returns#141, profit#142] +Keys: [] +Functions [3]: [partial_sum(sales#140), partial_sum(returns#141), partial_sum(profit#142)] +Aggregate Attributes [6]: [sum#173, isEmpty#174, sum#175, isEmpty#176, sum#177, isEmpty#178] +Results [6]: [sum#179, isEmpty#180, sum#181, isEmpty#182, sum#183, isEmpty#184] + +(110) Exchange +Input [6]: [sum#179, isEmpty#180, sum#181, isEmpty#182, sum#183, isEmpty#184] +Arguments: SinglePartition, true, [id=#185] + +(111) HashAggregate [codegen id : 73] +Input [6]: [sum#179, isEmpty#180, sum#181, isEmpty#182, sum#183, isEmpty#184] +Keys: [] +Functions [3]: [sum(sales#140), sum(returns#141), sum(profit#142)] +Aggregate Attributes [3]: [sum(sales#140)#186, sum(returns#141)#187, sum(profit#142)#188] +Results [5]: [null AS channel#189, null AS id#190, sum(sales#140)#186 AS sales#191, sum(returns#141)#187 AS returns#192, sum(profit#142)#188 AS profit#193] + +(112) Union + +(113) HashAggregate [codegen id : 74] +Input [5]: [channel#39, id#40, sales#128, returns#129, profit#130] +Keys [5]: [channel#39, id#40, sales#128, returns#129, profit#130] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#39, id#40, sales#128, returns#129, profit#130] + +(114) Exchange +Input [5]: [channel#39, id#40, sales#128, returns#129, profit#130] +Arguments: hashpartitioning(channel#39, id#40, sales#128, returns#129, profit#130, 5), true, [id=#194] + +(115) HashAggregate [codegen id : 75] +Input [5]: [channel#39, id#40, sales#128, returns#129, profit#130] +Keys [5]: [channel#39, id#40, sales#128, returns#129, profit#130] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#39, id#40, sales#128, returns#129, profit#130] + +(116) TakeOrderedAndProject +Input [5]: [channel#39, id#40, sales#128, returns#129, profit#130] +Arguments: 100, [channel#39 ASC NULLS FIRST, id#40 ASC NULLS FIRST], [channel#39, id#40, sales#128, returns#129, profit#130] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a/simplified.txt new file mode 100644 index 0000000000000..e6f723af7fd0e --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a/simplified.txt @@ -0,0 +1,181 @@ +TakeOrderedAndProject [channel,id,profit,returns,sales] + WholeStageCodegen (75) + HashAggregate [channel,id,profit,returns,sales] + InputAdapter + Exchange [channel,id,profit,returns,sales] #1 + WholeStageCodegen (74) + HashAggregate [channel,id,profit,returns,sales] + InputAdapter + Union + WholeStageCodegen (49) + HashAggregate [channel,id,profit,returns,sales] + InputAdapter + Exchange [channel,id,profit,returns,sales] #2 + WholeStageCodegen (48) + HashAggregate [channel,id,profit,returns,sales] + InputAdapter + Union + WholeStageCodegen (23) + HashAggregate [channel,id,isEmpty,isEmpty,isEmpty,sum,sum,sum] [isEmpty,isEmpty,isEmpty,profit,returns,sales,sum,sum,sum,sum(profit),sum(returns),sum(sales)] + InputAdapter + Exchange [channel,id] #3 + WholeStageCodegen (22) + HashAggregate [channel,id,profit,returns,sales] [isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum] + InputAdapter + Union + WholeStageCodegen (7) + HashAggregate [isEmpty,isEmpty,s_store_id,sum,sum,sum] [channel,id,isEmpty,isEmpty,profit,returns,sales,sum,sum,sum,sum(CheckOverflow((promote_precision(cast(ss_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true)),sum(UnscaledValue(ss_ext_sales_price)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00))] + InputAdapter + Exchange [s_store_id] #4 + WholeStageCodegen (6) + HashAggregate [s_store_id,sr_net_loss,sr_return_amt,ss_ext_sales_price,ss_net_profit] [isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum] + Project [s_store_id,sr_net_loss,sr_return_amt,ss_ext_sales_price,ss_net_profit] + BroadcastHashJoin [p_promo_sk,ss_promo_sk] + Project [s_store_id,sr_net_loss,sr_return_amt,ss_ext_sales_price,ss_net_profit,ss_promo_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [s_store_id,sr_net_loss,sr_return_amt,ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_promo_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [sr_net_loss,sr_return_amt,ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_promo_sk,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [sr_net_loss,sr_return_amt,ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_promo_sk,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + Filter [ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (1) + Filter [sr_item_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_item_sk,sr_net_loss,sr_return_amt,sr_ticket_number] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (3) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_id,s_store_sk] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (4) + Project [i_item_sk] + Filter [i_current_price,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_current_price,i_item_sk] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (5) + Project [p_promo_sk] + Filter [p_channel_tv,p_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.promotion [p_channel_tv,p_promo_sk] + WholeStageCodegen (14) + HashAggregate [cp_catalog_page_id,isEmpty,isEmpty,sum,sum,sum] [channel,id,isEmpty,isEmpty,profit,returns,sales,sum,sum,sum,sum(CheckOverflow((promote_precision(cast(cs_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true)),sum(UnscaledValue(cs_ext_sales_price)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00))] + InputAdapter + Exchange [cp_catalog_page_id] #10 + WholeStageCodegen (13) + HashAggregate [cp_catalog_page_id,cr_net_loss,cr_return_amount,cs_ext_sales_price,cs_net_profit] [isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum] + Project [cp_catalog_page_id,cr_net_loss,cr_return_amount,cs_ext_sales_price,cs_net_profit] + BroadcastHashJoin [cs_promo_sk,p_promo_sk] + Project [cp_catalog_page_id,cr_net_loss,cr_return_amount,cs_ext_sales_price,cs_net_profit,cs_promo_sk] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cp_catalog_page_id,cr_net_loss,cr_return_amount,cs_ext_sales_price,cs_item_sk,cs_net_profit,cs_promo_sk] + BroadcastHashJoin [cp_catalog_page_sk,cs_catalog_page_sk] + Project [cr_net_loss,cr_return_amount,cs_catalog_page_sk,cs_ext_sales_price,cs_item_sk,cs_net_profit,cs_promo_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cr_net_loss,cr_return_amount,cs_catalog_page_sk,cs_ext_sales_price,cs_item_sk,cs_net_profit,cs_promo_sk,cs_sold_date_sk] + BroadcastHashJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] + Filter [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_catalog_page_sk,cs_ext_sales_price,cs_item_sk,cs_net_profit,cs_order_number,cs_promo_sk,cs_sold_date_sk] + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (8) + Filter [cr_item_sk,cr_order_number] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_item_sk,cr_net_loss,cr_order_number,cr_return_amount] + InputAdapter + ReusedExchange [d_date_sk] #6 + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (10) + Filter [cp_catalog_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_page [cp_catalog_page_id,cp_catalog_page_sk] + InputAdapter + ReusedExchange [i_item_sk] #8 + InputAdapter + ReusedExchange [p_promo_sk] #9 + WholeStageCodegen (21) + HashAggregate [isEmpty,isEmpty,sum,sum,sum,web_site_id] [channel,id,isEmpty,isEmpty,profit,returns,sales,sum,sum,sum,sum(CheckOverflow((promote_precision(cast(ws_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true)),sum(UnscaledValue(ws_ext_sales_price)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00))] + InputAdapter + Exchange [web_site_id] #13 + WholeStageCodegen (20) + HashAggregate [web_site_id,wr_net_loss,wr_return_amt,ws_ext_sales_price,ws_net_profit] [isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum] + Project [web_site_id,wr_net_loss,wr_return_amt,ws_ext_sales_price,ws_net_profit] + BroadcastHashJoin [p_promo_sk,ws_promo_sk] + Project [web_site_id,wr_net_loss,wr_return_amt,ws_ext_sales_price,ws_net_profit,ws_promo_sk] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Project [web_site_id,wr_net_loss,wr_return_amt,ws_ext_sales_price,ws_item_sk,ws_net_profit,ws_promo_sk] + BroadcastHashJoin [web_site_sk,ws_web_site_sk] + Project [wr_net_loss,wr_return_amt,ws_ext_sales_price,ws_item_sk,ws_net_profit,ws_promo_sk,ws_web_site_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [wr_net_loss,wr_return_amt,ws_ext_sales_price,ws_item_sk,ws_net_profit,ws_promo_sk,ws_sold_date_sk,ws_web_site_sk] + BroadcastHashJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] + Filter [ws_item_sk,ws_promo_sk,ws_sold_date_sk,ws_web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_ext_sales_price,ws_item_sk,ws_net_profit,ws_order_number,ws_promo_sk,ws_sold_date_sk,ws_web_site_sk] + InputAdapter + BroadcastExchange #14 + WholeStageCodegen (15) + Filter [wr_item_sk,wr_order_number] + ColumnarToRow + InputAdapter + Scan parquet default.web_returns [wr_item_sk,wr_net_loss,wr_order_number,wr_return_amt] + InputAdapter + ReusedExchange [d_date_sk] #6 + InputAdapter + BroadcastExchange #15 + WholeStageCodegen (17) + Filter [web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_site [web_site_id,web_site_sk] + InputAdapter + ReusedExchange [i_item_sk] #8 + InputAdapter + ReusedExchange [p_promo_sk] #9 + WholeStageCodegen (47) + HashAggregate [channel,isEmpty,isEmpty,isEmpty,sum,sum,sum] [id,isEmpty,isEmpty,isEmpty,profit,returns,sales,sum,sum,sum,sum(profit),sum(returns),sum(sales)] + InputAdapter + Exchange [channel] #16 + WholeStageCodegen (46) + HashAggregate [channel,profit,returns,sales] [isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum] + HashAggregate [channel,id,isEmpty,isEmpty,isEmpty,sum,sum,sum] [isEmpty,isEmpty,isEmpty,profit,returns,sales,sum,sum,sum,sum(profit),sum(returns),sum(sales)] + InputAdapter + ReusedExchange [channel,id,isEmpty,isEmpty,isEmpty,sum,sum,sum] #3 + WholeStageCodegen (73) + HashAggregate [isEmpty,isEmpty,isEmpty,sum,sum,sum] [channel,id,isEmpty,isEmpty,isEmpty,profit,returns,sales,sum,sum,sum,sum(profit),sum(returns),sum(sales)] + InputAdapter + Exchange #17 + WholeStageCodegen (72) + HashAggregate [profit,returns,sales] [isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,isEmpty,sum,sum,sum,sum,sum,sum] + HashAggregate [channel,id,isEmpty,isEmpty,isEmpty,sum,sum,sum] [isEmpty,isEmpty,isEmpty,profit,returns,sales,sum,sum,sum,sum(profit),sum(returns),sum(sales)] + InputAdapter + ReusedExchange [channel,id,isEmpty,isEmpty,isEmpty,sum,sum,sum] #3 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a.sf100/explain.txt new file mode 100644 index 0000000000000..d331a2b0a2a7f --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a.sf100/explain.txt @@ -0,0 +1,251 @@ +== Physical Plan == +TakeOrderedAndProject (42) ++- * Project (41) + +- Window (40) + +- * Sort (39) + +- Exchange (38) + +- * HashAggregate (37) + +- Exchange (36) + +- * HashAggregate (35) + +- Union (34) + :- * HashAggregate (28) + : +- Exchange (27) + : +- * HashAggregate (26) + : +- Union (25) + : :- * HashAggregate (19) + : : +- Exchange (18) + : : +- * HashAggregate (17) + : : +- * Project (16) + : : +- * BroadcastHashJoin Inner BuildRight (15) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.web_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.item (11) + : +- * HashAggregate (24) + : +- Exchange (23) + : +- * HashAggregate (22) + : +- * HashAggregate (21) + : +- ReusedExchange (20) + +- * HashAggregate (33) + +- Exchange (32) + +- * HashAggregate (31) + +- * HashAggregate (30) + +- ReusedExchange (29) + + +(1) Scan parquet default.web_sales +Output [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_net_paid#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_net_paid#3] + +(3) Filter [codegen id : 3] +Input [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_net_paid#3] +Condition : (isnotnull(ws_sold_date_sk#1) AND isnotnull(ws_item_sk#2)) + +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#4, d_month_seq#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1212)) AND (d_month_seq#5 <= 1223)) AND isnotnull(d_date_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#4] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(8) BroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#6] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ws_sold_date_sk#1] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(10) Project [codegen id : 3] +Output [2]: [ws_item_sk#2, ws_net_paid#3] +Input [4]: [ws_sold_date_sk#1, ws_item_sk#2, ws_net_paid#3, d_date_sk#4] + +(11) Scan parquet default.item +Output [3]: [i_item_sk#7, i_class#8, i_category#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [i_item_sk#7, i_class#8, i_category#9] + +(13) Filter [codegen id : 2] +Input [3]: [i_item_sk#7, i_class#8, i_category#9] +Condition : isnotnull(i_item_sk#7) + +(14) BroadcastExchange +Input [3]: [i_item_sk#7, i_class#8, i_category#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#10] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ws_item_sk#2] +Right keys [1]: [i_item_sk#7] +Join condition: None + +(16) Project [codegen id : 3] +Output [3]: [ws_net_paid#3, i_class#8, i_category#9] +Input [5]: [ws_item_sk#2, ws_net_paid#3, i_item_sk#7, i_class#8, i_category#9] + +(17) HashAggregate [codegen id : 3] +Input [3]: [ws_net_paid#3, i_class#8, i_category#9] +Keys [2]: [i_category#9, i_class#8] +Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#3))] +Aggregate Attributes [1]: [sum#11] +Results [3]: [i_category#9, i_class#8, sum#12] + +(18) Exchange +Input [3]: [i_category#9, i_class#8, sum#12] +Arguments: hashpartitioning(i_category#9, i_class#8, 5), true, [id=#13] + +(19) HashAggregate [codegen id : 4] +Input [3]: [i_category#9, i_class#8, sum#12] +Keys [2]: [i_category#9, i_class#8] +Functions [1]: [sum(UnscaledValue(ws_net_paid#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#3))#14] +Results [6]: [cast(MakeDecimal(sum(UnscaledValue(ws_net_paid#3))#14,17,2) as decimal(27,2)) AS total_sum#15, i_category#9, i_class#8, 0 AS g_category#16, 0 AS g_class#17, 0 AS lochierarchy#18] + +(20) ReusedExchange [Reuses operator id: 18] +Output [3]: [i_category#9, i_class#8, sum#19] + +(21) HashAggregate [codegen id : 8] +Input [3]: [i_category#9, i_class#8, sum#19] +Keys [2]: [i_category#9, i_class#8] +Functions [1]: [sum(UnscaledValue(ws_net_paid#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#3))#20] +Results [2]: [MakeDecimal(sum(UnscaledValue(ws_net_paid#3))#20,17,2) AS total_sum#21, i_category#9] + +(22) HashAggregate [codegen id : 8] +Input [2]: [total_sum#21, i_category#9] +Keys [1]: [i_category#9] +Functions [1]: [partial_sum(total_sum#21)] +Aggregate Attributes [2]: [sum#22, isEmpty#23] +Results [3]: [i_category#9, sum#24, isEmpty#25] + +(23) Exchange +Input [3]: [i_category#9, sum#24, isEmpty#25] +Arguments: hashpartitioning(i_category#9, 5), true, [id=#26] + +(24) HashAggregate [codegen id : 9] +Input [3]: [i_category#9, sum#24, isEmpty#25] +Keys [1]: [i_category#9] +Functions [1]: [sum(total_sum#21)] +Aggregate Attributes [1]: [sum(total_sum#21)#27] +Results [6]: [sum(total_sum#21)#27 AS total_sum#28, i_category#9, null AS i_class#29, 0 AS g_category#30, 1 AS g_class#31, 1 AS lochierarchy#32] + +(25) Union + +(26) HashAggregate [codegen id : 10] +Input [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18] +Keys [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18] +Functions: [] +Aggregate Attributes: [] +Results [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18] + +(27) Exchange +Input [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18] +Arguments: hashpartitioning(total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18, 5), true, [id=#33] + +(28) HashAggregate [codegen id : 11] +Input [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18] +Keys [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18] +Functions: [] +Aggregate Attributes: [] +Results [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18] + +(29) ReusedExchange [Reuses operator id: 18] +Output [3]: [i_category#9, i_class#8, sum#34] + +(30) HashAggregate [codegen id : 15] +Input [3]: [i_category#9, i_class#8, sum#34] +Keys [2]: [i_category#9, i_class#8] +Functions [1]: [sum(UnscaledValue(ws_net_paid#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#3))#35] +Results [1]: [MakeDecimal(sum(UnscaledValue(ws_net_paid#3))#35,17,2) AS total_sum#21] + +(31) HashAggregate [codegen id : 15] +Input [1]: [total_sum#21] +Keys: [] +Functions [1]: [partial_sum(total_sum#21)] +Aggregate Attributes [2]: [sum#36, isEmpty#37] +Results [2]: [sum#38, isEmpty#39] + +(32) Exchange +Input [2]: [sum#38, isEmpty#39] +Arguments: SinglePartition, true, [id=#40] + +(33) HashAggregate [codegen id : 16] +Input [2]: [sum#38, isEmpty#39] +Keys: [] +Functions [1]: [sum(total_sum#21)] +Aggregate Attributes [1]: [sum(total_sum#21)#41] +Results [6]: [sum(total_sum#21)#41 AS total_sum#42, null AS i_category#43, null AS i_class#44, 1 AS g_category#45, 1 AS g_class#46, 2 AS lochierarchy#47] + +(34) Union + +(35) HashAggregate [codegen id : 17] +Input [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18] +Keys [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18] +Functions: [] +Aggregate Attributes: [] +Results [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18] + +(36) Exchange +Input [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18] +Arguments: hashpartitioning(total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18, 5), true, [id=#48] + +(37) HashAggregate [codegen id : 18] +Input [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18] +Keys [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18] +Functions: [] +Aggregate Attributes: [] +Results [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, CASE WHEN (g_class#17 = 0) THEN i_category#9 END AS _w0#49] + +(38) Exchange +Input [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, _w0#49] +Arguments: hashpartitioning(lochierarchy#18, _w0#49, 5), true, [id=#50] + +(39) Sort [codegen id : 19] +Input [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, _w0#49] +Arguments: [lochierarchy#18 ASC NULLS FIRST, _w0#49 ASC NULLS FIRST, total_sum#15 DESC NULLS LAST], false, 0 + +(40) Window +Input [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, _w0#49] +Arguments: [rank(total_sum#15) windowspecdefinition(lochierarchy#18, _w0#49, total_sum#15 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#51], [lochierarchy#18, _w0#49], [total_sum#15 DESC NULLS LAST] + +(41) Project [codegen id : 20] +Output [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, rank_within_parent#51] +Input [6]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, _w0#49, rank_within_parent#51] + +(42) TakeOrderedAndProject +Input [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, rank_within_parent#51] +Arguments: 100, [lochierarchy#18 DESC NULLS LAST, CASE WHEN (lochierarchy#18 = 0) THEN i_category#9 END ASC NULLS FIRST, rank_within_parent#51 ASC NULLS FIRST], [total_sum#15, i_category#9, i_class#8, lochierarchy#18, rank_within_parent#51] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a.sf100/simplified.txt new file mode 100644 index 0000000000000..5ecbf84847937 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a.sf100/simplified.txt @@ -0,0 +1,72 @@ +TakeOrderedAndProject [i_category,i_class,lochierarchy,rank_within_parent,total_sum] + WholeStageCodegen (20) + Project [i_category,i_class,lochierarchy,rank_within_parent,total_sum] + InputAdapter + Window [_w0,lochierarchy,total_sum] + WholeStageCodegen (19) + Sort [_w0,lochierarchy,total_sum] + InputAdapter + Exchange [_w0,lochierarchy] #1 + WholeStageCodegen (18) + HashAggregate [g_category,g_class,i_category,i_class,lochierarchy,total_sum] [_w0] + InputAdapter + Exchange [g_category,g_class,i_category,i_class,lochierarchy,total_sum] #2 + WholeStageCodegen (17) + HashAggregate [g_category,g_class,i_category,i_class,lochierarchy,total_sum] + InputAdapter + Union + WholeStageCodegen (11) + HashAggregate [g_category,g_class,i_category,i_class,lochierarchy,total_sum] + InputAdapter + Exchange [g_category,g_class,i_category,i_class,lochierarchy,total_sum] #3 + WholeStageCodegen (10) + HashAggregate [g_category,g_class,i_category,i_class,lochierarchy,total_sum] + InputAdapter + Union + WholeStageCodegen (4) + HashAggregate [i_category,i_class,sum] [g_category,g_class,lochierarchy,sum,sum(UnscaledValue(ws_net_paid)),total_sum] + InputAdapter + Exchange [i_category,i_class] #4 + WholeStageCodegen (3) + HashAggregate [i_category,i_class,ws_net_paid] [sum,sum] + Project [i_category,i_class,ws_net_paid] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Project [ws_item_sk,ws_net_paid] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_item_sk,ws_net_paid,ws_sold_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_month_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (2) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_category,i_class,i_item_sk] + WholeStageCodegen (9) + HashAggregate [i_category,isEmpty,sum] [g_category,g_class,i_class,isEmpty,lochierarchy,sum,sum(total_sum),total_sum] + InputAdapter + Exchange [i_category] #7 + WholeStageCodegen (8) + HashAggregate [i_category,total_sum] [isEmpty,isEmpty,sum,sum] + HashAggregate [i_category,i_class,sum] [sum,sum(UnscaledValue(ws_net_paid)),total_sum] + InputAdapter + ReusedExchange [i_category,i_class,sum] #4 + WholeStageCodegen (16) + HashAggregate [isEmpty,sum] [g_category,g_class,i_category,i_class,isEmpty,lochierarchy,sum,sum(total_sum),total_sum] + InputAdapter + Exchange #8 + WholeStageCodegen (15) + HashAggregate [total_sum] [isEmpty,isEmpty,sum,sum] + HashAggregate [i_category,i_class,sum] [sum,sum(UnscaledValue(ws_net_paid)),total_sum] + InputAdapter + ReusedExchange [i_category,i_class,sum] #4 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a/explain.txt new file mode 100644 index 0000000000000..c1c10c53bce82 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a/explain.txt @@ -0,0 +1,251 @@ +== Physical Plan == +TakeOrderedAndProject (42) ++- * Project (41) + +- Window (40) + +- * Sort (39) + +- Exchange (38) + +- * HashAggregate (37) + +- Exchange (36) + +- * HashAggregate (35) + +- Union (34) + :- * HashAggregate (28) + : +- Exchange (27) + : +- * HashAggregate (26) + : +- Union (25) + : :- * HashAggregate (19) + : : +- Exchange (18) + : : +- * HashAggregate (17) + : : +- * Project (16) + : : +- * BroadcastHashJoin Inner BuildRight (15) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.web_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.item (11) + : +- * HashAggregate (24) + : +- Exchange (23) + : +- * HashAggregate (22) + : +- * HashAggregate (21) + : +- ReusedExchange (20) + +- * HashAggregate (33) + +- Exchange (32) + +- * HashAggregate (31) + +- * HashAggregate (30) + +- ReusedExchange (29) + + +(1) Scan parquet default.web_sales +Output [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_net_paid#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_net_paid#3] + +(3) Filter [codegen id : 3] +Input [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_net_paid#3] +Condition : (isnotnull(ws_sold_date_sk#1) AND isnotnull(ws_item_sk#2)) + +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#4, d_month_seq#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1212)) AND (d_month_seq#5 <= 1223)) AND isnotnull(d_date_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#4] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(8) BroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#6] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ws_sold_date_sk#1] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(10) Project [codegen id : 3] +Output [2]: [ws_item_sk#2, ws_net_paid#3] +Input [4]: [ws_sold_date_sk#1, ws_item_sk#2, ws_net_paid#3, d_date_sk#4] + +(11) Scan parquet default.item +Output [3]: [i_item_sk#7, i_class#8, i_category#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [i_item_sk#7, i_class#8, i_category#9] + +(13) Filter [codegen id : 2] +Input [3]: [i_item_sk#7, i_class#8, i_category#9] +Condition : isnotnull(i_item_sk#7) + +(14) BroadcastExchange +Input [3]: [i_item_sk#7, i_class#8, i_category#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#10] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ws_item_sk#2] +Right keys [1]: [i_item_sk#7] +Join condition: None + +(16) Project [codegen id : 3] +Output [3]: [ws_net_paid#3, i_class#8, i_category#9] +Input [5]: [ws_item_sk#2, ws_net_paid#3, i_item_sk#7, i_class#8, i_category#9] + +(17) HashAggregate [codegen id : 3] +Input [3]: [ws_net_paid#3, i_class#8, i_category#9] +Keys [2]: [i_category#9, i_class#8] +Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#3))] +Aggregate Attributes [1]: [sum#11] +Results [3]: [i_category#9, i_class#8, sum#12] + +(18) Exchange +Input [3]: [i_category#9, i_class#8, sum#12] +Arguments: hashpartitioning(i_category#9, i_class#8, 5), true, [id=#13] + +(19) HashAggregate [codegen id : 4] +Input [3]: [i_category#9, i_class#8, sum#12] +Keys [2]: [i_category#9, i_class#8] +Functions [1]: [sum(UnscaledValue(ws_net_paid#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#3))#14] +Results [6]: [cast(MakeDecimal(sum(UnscaledValue(ws_net_paid#3))#14,17,2) as decimal(27,2)) AS total_sum#15, i_category#9, i_class#8, 0 AS g_category#16, 0 AS g_class#17, 0 AS lochierarchy#18] + +(20) ReusedExchange [Reuses operator id: 18] +Output [3]: [i_category#9, i_class#8, sum#19] + +(21) HashAggregate [codegen id : 8] +Input [3]: [i_category#9, i_class#8, sum#19] +Keys [2]: [i_category#9, i_class#8] +Functions [1]: [sum(UnscaledValue(ws_net_paid#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#3))#20] +Results [2]: [MakeDecimal(sum(UnscaledValue(ws_net_paid#3))#20,17,2) AS total_sum#21, i_category#9] + +(22) HashAggregate [codegen id : 8] +Input [2]: [total_sum#21, i_category#9] +Keys [1]: [i_category#9] +Functions [1]: [partial_sum(total_sum#21)] +Aggregate Attributes [2]: [sum#22, isEmpty#23] +Results [3]: [i_category#9, sum#24, isEmpty#25] + +(23) Exchange +Input [3]: [i_category#9, sum#24, isEmpty#25] +Arguments: hashpartitioning(i_category#9, 5), true, [id=#26] + +(24) HashAggregate [codegen id : 9] +Input [3]: [i_category#9, sum#24, isEmpty#25] +Keys [1]: [i_category#9] +Functions [1]: [sum(total_sum#21)] +Aggregate Attributes [1]: [sum(total_sum#21)#27] +Results [6]: [sum(total_sum#21)#27 AS total_sum#28, i_category#9, null AS i_class#29, 0 AS g_category#30, 1 AS g_class#31, 1 AS lochierarchy#32] + +(25) Union + +(26) HashAggregate [codegen id : 10] +Input [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18] +Keys [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18] +Functions: [] +Aggregate Attributes: [] +Results [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18] + +(27) Exchange +Input [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18] +Arguments: hashpartitioning(total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18, 5), true, [id=#33] + +(28) HashAggregate [codegen id : 11] +Input [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18] +Keys [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18] +Functions: [] +Aggregate Attributes: [] +Results [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18] + +(29) ReusedExchange [Reuses operator id: 18] +Output [3]: [i_category#9, i_class#8, sum#34] + +(30) HashAggregate [codegen id : 15] +Input [3]: [i_category#9, i_class#8, sum#34] +Keys [2]: [i_category#9, i_class#8] +Functions [1]: [sum(UnscaledValue(ws_net_paid#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#3))#35] +Results [1]: [MakeDecimal(sum(UnscaledValue(ws_net_paid#3))#35,17,2) AS total_sum#21] + +(31) HashAggregate [codegen id : 15] +Input [1]: [total_sum#21] +Keys: [] +Functions [1]: [partial_sum(total_sum#21)] +Aggregate Attributes [2]: [sum#36, isEmpty#37] +Results [2]: [sum#38, isEmpty#39] + +(32) Exchange +Input [2]: [sum#38, isEmpty#39] +Arguments: SinglePartition, true, [id=#40] + +(33) HashAggregate [codegen id : 16] +Input [2]: [sum#38, isEmpty#39] +Keys: [] +Functions [1]: [sum(total_sum#21)] +Aggregate Attributes [1]: [sum(total_sum#21)#41] +Results [6]: [sum(total_sum#21)#41 AS total_sum#42, null AS i_category#43, null AS i_class#44, 1 AS g_category#45, 1 AS g_class#46, 2 AS lochierarchy#47] + +(34) Union + +(35) HashAggregate [codegen id : 17] +Input [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18] +Keys [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18] +Functions: [] +Aggregate Attributes: [] +Results [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18] + +(36) Exchange +Input [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18] +Arguments: hashpartitioning(total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18, 5), true, [id=#48] + +(37) HashAggregate [codegen id : 18] +Input [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18] +Keys [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18] +Functions: [] +Aggregate Attributes: [] +Results [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, CASE WHEN (g_class#17 = 0) THEN i_category#9 END AS _w0#49] + +(38) Exchange +Input [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, _w0#49] +Arguments: hashpartitioning(lochierarchy#18, _w0#49, 5), true, [id=#50] + +(39) Sort [codegen id : 19] +Input [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, _w0#49] +Arguments: [lochierarchy#18 ASC NULLS FIRST, _w0#49 ASC NULLS FIRST, total_sum#15 DESC NULLS LAST], false, 0 + +(40) Window +Input [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, _w0#49] +Arguments: [rank(total_sum#15) windowspecdefinition(lochierarchy#18, _w0#49, total_sum#15 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#51], [lochierarchy#18, _w0#49], [total_sum#15 DESC NULLS LAST] + +(41) Project [codegen id : 20] +Output [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, rank_within_parent#51] +Input [6]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, _w0#49, rank_within_parent#51] + +(42) TakeOrderedAndProject +Input [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, rank_within_parent#51] +Arguments: 100, [lochierarchy#18 DESC NULLS LAST, CASE WHEN (lochierarchy#18 = 0) THEN i_category#9 END ASC NULLS FIRST, rank_within_parent#51 ASC NULLS FIRST], [total_sum#15, i_category#9, i_class#8, lochierarchy#18, rank_within_parent#51] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a/simplified.txt new file mode 100644 index 0000000000000..5ecbf84847937 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a/simplified.txt @@ -0,0 +1,72 @@ +TakeOrderedAndProject [i_category,i_class,lochierarchy,rank_within_parent,total_sum] + WholeStageCodegen (20) + Project [i_category,i_class,lochierarchy,rank_within_parent,total_sum] + InputAdapter + Window [_w0,lochierarchy,total_sum] + WholeStageCodegen (19) + Sort [_w0,lochierarchy,total_sum] + InputAdapter + Exchange [_w0,lochierarchy] #1 + WholeStageCodegen (18) + HashAggregate [g_category,g_class,i_category,i_class,lochierarchy,total_sum] [_w0] + InputAdapter + Exchange [g_category,g_class,i_category,i_class,lochierarchy,total_sum] #2 + WholeStageCodegen (17) + HashAggregate [g_category,g_class,i_category,i_class,lochierarchy,total_sum] + InputAdapter + Union + WholeStageCodegen (11) + HashAggregate [g_category,g_class,i_category,i_class,lochierarchy,total_sum] + InputAdapter + Exchange [g_category,g_class,i_category,i_class,lochierarchy,total_sum] #3 + WholeStageCodegen (10) + HashAggregate [g_category,g_class,i_category,i_class,lochierarchy,total_sum] + InputAdapter + Union + WholeStageCodegen (4) + HashAggregate [i_category,i_class,sum] [g_category,g_class,lochierarchy,sum,sum(UnscaledValue(ws_net_paid)),total_sum] + InputAdapter + Exchange [i_category,i_class] #4 + WholeStageCodegen (3) + HashAggregate [i_category,i_class,ws_net_paid] [sum,sum] + Project [i_category,i_class,ws_net_paid] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Project [ws_item_sk,ws_net_paid] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Filter [ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_item_sk,ws_net_paid,ws_sold_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date_sk,d_month_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (2) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_category,i_class,i_item_sk] + WholeStageCodegen (9) + HashAggregate [i_category,isEmpty,sum] [g_category,g_class,i_class,isEmpty,lochierarchy,sum,sum(total_sum),total_sum] + InputAdapter + Exchange [i_category] #7 + WholeStageCodegen (8) + HashAggregate [i_category,total_sum] [isEmpty,isEmpty,sum,sum] + HashAggregate [i_category,i_class,sum] [sum,sum(UnscaledValue(ws_net_paid)),total_sum] + InputAdapter + ReusedExchange [i_category,i_class,sum] #4 + WholeStageCodegen (16) + HashAggregate [isEmpty,sum] [g_category,g_class,i_category,i_class,isEmpty,lochierarchy,sum,sum(total_sum),total_sum] + InputAdapter + Exchange #8 + WholeStageCodegen (15) + HashAggregate [total_sum] [isEmpty,isEmpty,sum,sum] + HashAggregate [i_category,i_class,sum] [sum,sum(UnscaledValue(ws_net_paid)),total_sum] + InputAdapter + ReusedExchange [i_category,i_class,sum] #4 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98.sf100/explain.txt new file mode 100644 index 0000000000000..b9542957f9f0a --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98.sf100/explain.txt @@ -0,0 +1,157 @@ +== Physical Plan == +* Sort (28) ++- Exchange (27) + +- * Project (26) + +- Window (25) + +- * Sort (24) + +- Exchange (23) + +- * HashAggregate (22) + +- Exchange (21) + +- * HashAggregate (20) + +- * Project (19) + +- * SortMergeJoin Inner (18) + :- * Sort (12) + : +- Exchange (11) + : +- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.store_sales (1) + : +- BroadcastExchange (8) + : +- * Project (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet default.date_dim (4) + +- * Sort (17) + +- Exchange (16) + +- * Filter (15) + +- * ColumnarToRow (14) + +- Scan parquet default.item (13) + + +(1) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 2] +Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3] + +(3) Filter [codegen id : 2] +Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3] +Condition : (isnotnull(ss_item_sk#2) AND isnotnull(ss_sold_date_sk#1)) + +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#4, d_date#5] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#4, d_date#5] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#4, d_date#5] +Condition : (((isnotnull(d_date#5) AND (d_date#5 >= 10644)) AND (d_date#5 <= 10674)) AND isnotnull(d_date_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#4] +Input [2]: [d_date_sk#4, d_date#5] + +(8) BroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#6] + +(9) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(10) Project [codegen id : 2] +Output [2]: [ss_item_sk#2, ss_ext_sales_price#3] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3, d_date_sk#4] + +(11) Exchange +Input [2]: [ss_item_sk#2, ss_ext_sales_price#3] +Arguments: hashpartitioning(ss_item_sk#2, 5), true, [id=#7] + +(12) Sort [codegen id : 3] +Input [2]: [ss_item_sk#2, ss_ext_sales_price#3] +Arguments: [ss_item_sk#2 ASC NULLS FIRST], false, 0 + +(13) Scan parquet default.item +Output [6]: [i_item_sk#8, i_item_id#9, i_item_desc#10, i_current_price#11, i_class#12, i_category#13] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilityWithStatsSuite/item] +PushedFilters: [In(i_category, [Sports,Books,Home]), IsNotNull(i_item_sk)] +ReadSchema: struct + +(14) ColumnarToRow [codegen id : 4] +Input [6]: [i_item_sk#8, i_item_id#9, i_item_desc#10, i_current_price#11, i_class#12, i_category#13] + +(15) Filter [codegen id : 4] +Input [6]: [i_item_sk#8, i_item_id#9, i_item_desc#10, i_current_price#11, i_class#12, i_category#13] +Condition : (i_category#13 IN (Sports,Books,Home) AND isnotnull(i_item_sk#8)) + +(16) Exchange +Input [6]: [i_item_sk#8, i_item_id#9, i_item_desc#10, i_current_price#11, i_class#12, i_category#13] +Arguments: hashpartitioning(i_item_sk#8, 5), true, [id=#14] + +(17) Sort [codegen id : 5] +Input [6]: [i_item_sk#8, i_item_id#9, i_item_desc#10, i_current_price#11, i_class#12, i_category#13] +Arguments: [i_item_sk#8 ASC NULLS FIRST], false, 0 + +(18) SortMergeJoin [codegen id : 6] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#8] +Join condition: None + +(19) Project [codegen id : 6] +Output [6]: [ss_ext_sales_price#3, i_item_id#9, i_item_desc#10, i_current_price#11, i_class#12, i_category#13] +Input [8]: [ss_item_sk#2, ss_ext_sales_price#3, i_item_sk#8, i_item_id#9, i_item_desc#10, i_current_price#11, i_class#12, i_category#13] + +(20) HashAggregate [codegen id : 6] +Input [6]: [ss_ext_sales_price#3, i_item_id#9, i_item_desc#10, i_current_price#11, i_class#12, i_category#13] +Keys [5]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum#15] +Results [6]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11, sum#16] + +(21) Exchange +Input [6]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11, sum#16] +Arguments: hashpartitioning(i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11, 5), true, [id=#17] + +(22) HashAggregate [codegen id : 7] +Input [6]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11, sum#16] +Keys [5]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#18] +Results [8]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#18,17,2) AS itemrevenue#19, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#18,17,2) AS _w0#20, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#18,17,2) AS _w1#21] + +(23) Exchange +Input [8]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11, itemrevenue#19, _w0#20, _w1#21] +Arguments: hashpartitioning(i_class#12, 5), true, [id=#22] + +(24) Sort [codegen id : 8] +Input [8]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11, itemrevenue#19, _w0#20, _w1#21] +Arguments: [i_class#12 ASC NULLS FIRST], false, 0 + +(25) Window +Input [8]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11, itemrevenue#19, _w0#20, _w1#21] +Arguments: [sum(_w1#21) windowspecdefinition(i_class#12, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#23], [i_class#12] + +(26) Project [codegen id : 9] +Output [7]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11, itemrevenue#19, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#20) * 100.00), DecimalType(21,2), true) as decimal(27,2))) / promote_precision(_we0#23)), DecimalType(38,17), true) AS revenueratio#24] +Input [9]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11, itemrevenue#19, _w0#20, _w1#21, _we0#23] + +(27) Exchange +Input [7]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11, itemrevenue#19, revenueratio#24] +Arguments: rangepartitioning(i_category#13 ASC NULLS FIRST, i_class#12 ASC NULLS FIRST, i_item_id#9 ASC NULLS FIRST, i_item_desc#10 ASC NULLS FIRST, revenueratio#24 ASC NULLS FIRST, 5), true, [id=#25] + +(28) Sort [codegen id : 10] +Input [7]: [i_item_id#9, i_item_desc#10, i_category#13, i_class#12, i_current_price#11, itemrevenue#19, revenueratio#24] +Arguments: [i_category#13 ASC NULLS FIRST, i_class#12 ASC NULLS FIRST, i_item_id#9 ASC NULLS FIRST, i_item_desc#10 ASC NULLS FIRST, revenueratio#24 ASC NULLS FIRST], true, 0 + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98.sf100/simplified.txt new file mode 100644 index 0000000000000..b2bd85daa5301 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98.sf100/simplified.txt @@ -0,0 +1,50 @@ +WholeStageCodegen (10) + Sort [i_category,i_class,i_item_desc,i_item_id,revenueratio] + InputAdapter + Exchange [i_category,i_class,i_item_desc,i_item_id,revenueratio] #1 + WholeStageCodegen (9) + Project [_w0,_we0,i_category,i_class,i_current_price,i_item_desc,i_item_id,itemrevenue] + InputAdapter + Window [_w1,i_class] + WholeStageCodegen (8) + Sort [i_class] + InputAdapter + Exchange [i_class] #2 + WholeStageCodegen (7) + HashAggregate [i_category,i_class,i_current_price,i_item_desc,i_item_id,sum] [_w0,_w1,itemrevenue,sum,sum(UnscaledValue(ss_ext_sales_price))] + InputAdapter + Exchange [i_category,i_class,i_current_price,i_item_desc,i_item_id] #3 + WholeStageCodegen (6) + HashAggregate [i_category,i_class,i_current_price,i_item_desc,i_item_id,ss_ext_sales_price] [sum,sum] + Project [i_category,i_class,i_current_price,i_item_desc,i_item_id,ss_ext_sales_price] + SortMergeJoin [i_item_sk,ss_item_sk] + InputAdapter + WholeStageCodegen (3) + Sort [ss_item_sk] + InputAdapter + Exchange [ss_item_sk] #4 + WholeStageCodegen (2) + Project [ss_ext_sales_price,ss_item_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk] + InputAdapter + WholeStageCodegen (5) + Sort [i_item_sk] + InputAdapter + Exchange [i_item_sk] #6 + WholeStageCodegen (4) + Filter [i_category,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_category,i_class,i_current_price,i_item_desc,i_item_id,i_item_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98/explain.txt new file mode 100644 index 0000000000000..017f79da89705 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98/explain.txt @@ -0,0 +1,142 @@ +== Physical Plan == +* Sort (25) ++- Exchange (24) + +- * Project (23) + +- Window (22) + +- * Sort (21) + +- Exchange (20) + +- * HashAggregate (19) + +- Exchange (18) + +- * HashAggregate (17) + +- * Project (16) + +- * BroadcastHashJoin Inner BuildRight (15) + :- * Project (9) + : +- * BroadcastHashJoin Inner BuildRight (8) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.store_sales (1) + : +- BroadcastExchange (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet default.item (4) + +- BroadcastExchange (14) + +- * Project (13) + +- * Filter (12) + +- * ColumnarToRow (11) + +- Scan parquet default.date_dim (10) + + +(1) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3] + +(3) Filter [codegen id : 3] +Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3] +Condition : (isnotnull(ss_item_sk#2) AND isnotnull(ss_sold_date_sk#1)) + +(4) Scan parquet default.item +Output [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/item] +PushedFilters: [In(i_category, [Sports,Books,Home]), IsNotNull(i_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(6) Filter [codegen id : 1] +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Condition : (i_category#9 IN (Sports,Books,Home) AND isnotnull(i_item_sk#4)) + +(7) BroadcastExchange +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#10] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#4] +Join condition: None + +(9) Project [codegen id : 3] +Output [7]: [ss_sold_date_sk#1, ss_ext_sales_price#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3, i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(10) Scan parquet default.date_dim +Output [2]: [d_date_sk#11, d_date#12] +Batched: true +Location: InMemoryFileIndex [file:/Users/yi.wu/IdeaProjects/spark/sql/core/spark-warehouse/org.apache.spark.sql.TPCDSV2_7_PlanStabilitySuite/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#11, d_date#12] + +(12) Filter [codegen id : 2] +Input [2]: [d_date_sk#11, d_date#12] +Condition : (((isnotnull(d_date#12) AND (d_date#12 >= 10644)) AND (d_date#12 <= 10674)) AND isnotnull(d_date_sk#11)) + +(13) Project [codegen id : 2] +Output [1]: [d_date_sk#11] +Input [2]: [d_date_sk#11, d_date#12] + +(14) BroadcastExchange +Input [1]: [d_date_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#11] +Join condition: None + +(16) Project [codegen id : 3] +Output [6]: [ss_ext_sales_price#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Input [8]: [ss_sold_date_sk#1, ss_ext_sales_price#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9, d_date_sk#11] + +(17) HashAggregate [codegen id : 3] +Input [6]: [ss_ext_sales_price#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum#14] +Results [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#15] + +(18) Exchange +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#15] +Arguments: hashpartitioning(i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, 5), true, [id=#16] + +(19) HashAggregate [codegen id : 4] +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#15] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#17] +Results [8]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#17,17,2) AS itemrevenue#18, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#17,17,2) AS _w0#19, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#17,17,2) AS _w1#20] + +(20) Exchange +Input [8]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, _w0#19, _w1#20] +Arguments: hashpartitioning(i_class#8, 5), true, [id=#21] + +(21) Sort [codegen id : 5] +Input [8]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, _w0#19, _w1#20] +Arguments: [i_class#8 ASC NULLS FIRST], false, 0 + +(22) Window +Input [8]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, _w0#19, _w1#20] +Arguments: [sum(_w1#20) windowspecdefinition(i_class#8, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#22], [i_class#8] + +(23) Project [codegen id : 6] +Output [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#19) * 100.00), DecimalType(21,2), true) as decimal(27,2))) / promote_precision(_we0#22)), DecimalType(38,17), true) AS revenueratio#23] +Input [9]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, _w0#19, _w1#20, _we0#22] + +(24) Exchange +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, revenueratio#23] +Arguments: rangepartitioning(i_category#9 ASC NULLS FIRST, i_class#8 ASC NULLS FIRST, i_item_id#5 ASC NULLS FIRST, i_item_desc#6 ASC NULLS FIRST, revenueratio#23 ASC NULLS FIRST, 5), true, [id=#24] + +(25) Sort [codegen id : 7] +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, revenueratio#23] +Arguments: [i_category#9 ASC NULLS FIRST, i_class#8 ASC NULLS FIRST, i_item_id#5 ASC NULLS FIRST, i_item_desc#6 ASC NULLS FIRST, revenueratio#23 ASC NULLS FIRST], true, 0 + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98/simplified.txt new file mode 100644 index 0000000000000..13113a1aec48a --- /dev/null +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98/simplified.txt @@ -0,0 +1,41 @@ +WholeStageCodegen (7) + Sort [i_category,i_class,i_item_desc,i_item_id,revenueratio] + InputAdapter + Exchange [i_category,i_class,i_item_desc,i_item_id,revenueratio] #1 + WholeStageCodegen (6) + Project [_w0,_we0,i_category,i_class,i_current_price,i_item_desc,i_item_id,itemrevenue] + InputAdapter + Window [_w1,i_class] + WholeStageCodegen (5) + Sort [i_class] + InputAdapter + Exchange [i_class] #2 + WholeStageCodegen (4) + HashAggregate [i_category,i_class,i_current_price,i_item_desc,i_item_id,sum] [_w0,_w1,itemrevenue,sum,sum(UnscaledValue(ss_ext_sales_price))] + InputAdapter + Exchange [i_category,i_class,i_current_price,i_item_desc,i_item_id] #3 + WholeStageCodegen (3) + HashAggregate [i_category,i_class,i_current_price,i_item_desc,i_item_id,ss_ext_sales_price] [sum,sum] + Project [i_category,i_class,i_current_price,i_item_desc,i_item_id,ss_ext_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_category,i_class,i_current_price,i_item_desc,i_item_id,ss_ext_sales_price,ss_sold_date_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Filter [i_category,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_category,i_class,i_current_price,i_item_desc,i_item_id,i_item_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_date_sk] diff --git a/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala new file mode 100644 index 0000000000000..8368875039041 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala @@ -0,0 +1,327 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql + +import java.io.File +import java.nio.charset.StandardCharsets + +import scala.collection.mutable + +import org.apache.commons.io.FileUtils + +import org.apache.spark.sql.catalyst.expressions.AttributeSet +import org.apache.spark.sql.catalyst.util._ +import org.apache.spark.sql.execution._ +import org.apache.spark.sql.execution.adaptive.DisableAdaptiveExecutionSuite +import org.apache.spark.sql.execution.exchange.{Exchange, ReusedExchangeExec} +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.tags.ExtendedSQLTest + +// scalastyle:off line.size.limit +/** + * Check that TPC-DS SparkPlans don't change. + * If there are plan differences, the error message looks like this: + * Plans did not match: + * last approved simplified plan: /path/to/tpcds-plan-stability/approved-plans-xxx/q1/simplified.txt + * last approved explain plan: /path/to/tpcds-plan-stability/approved-plans-xxx/q1/explain.txt + * [last approved simplified plan] + * + * actual simplified plan: /path/to/tmp/q1.actual.simplified.txt + * actual explain plan: /path/to/tmp/q1.actual.explain.txt + * [actual simplified plan] + * + * The explain files are saved to help debug later, they are not checked. Only the simplified + * plans are checked (by string comparison). + * + * + * To run the entire test suite: + * {{{ + * build/sbt "sql/test-only *PlanStability[WithStats]Suite" + * }}} + * + * To run a single test file upon change: + * {{{ + * build/sbt "sql/test-only *PlanStability[WithStats]Suite -- -z (tpcds-v1.4/q49)" + * }}} + * + * To re-generate golden files for entire suite, run: + * {{{ + * SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/test-only *PlanStability[WithStats]Suite" + * }}} + * + * To re-generate golden file for a single test, run: + * {{{ + * SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/test-only *PlanStability[WithStats]Suite -- -z (tpcds-v1.4/q49)" + * }}} + */ +// scalastyle:on line.size.limit +trait PlanStabilitySuite extends TPCDSBase with DisableAdaptiveExecutionSuite { + + private val originalMaxToStringFields = conf.maxToStringFields + + override def beforeAll(): Unit = { + conf.setConf(SQLConf.MAX_TO_STRING_FIELDS, Int.MaxValue) + super.beforeAll() + } + + override def afterAll(): Unit = { + super.afterAll() + conf.setConf(SQLConf.MAX_TO_STRING_FIELDS, originalMaxToStringFields) + } + + private val regenerateGoldenFiles: Boolean = System.getenv("SPARK_GENERATE_GOLDEN_FILES") == "1" + + protected val baseResourcePath = { + // use the same way as `SQLQueryTestSuite` to get the resource path + java.nio.file.Paths.get("src", "test", "resources", "tpcds-plan-stability").toFile + } + + private val referenceRegex = "#\\d+".r + private val normalizeRegex = "#\\d+L?".r + + def goldenFilePath: String + + private def getDirForTest(name: String): File = { + new File(goldenFilePath, name) + } + + private def isApproved(dir: File, actualSimplifiedPlan: String): Boolean = { + val file = new File(dir, "simplified.txt") + val approved = FileUtils.readFileToString(file, StandardCharsets.UTF_8) + approved == actualSimplifiedPlan + } + + /** + * Serialize and save this SparkPlan. + * The resulting file is used by [[checkWithApproved]] to check stability. + * + * @param plan the SparkPlan + * @param name the name of the query + * @param explain the full explain output; this is saved to help debug later as the simplified + * plan is not too useful for debugging + */ + private def generateApprovedPlanFile(plan: SparkPlan, name: String, explain: String): Unit = { + val dir = getDirForTest(name) + val simplified = getSimplifiedPlan(plan) + val foundMatch = dir.exists() && isApproved(dir, simplified) + + if (!foundMatch) { + FileUtils.deleteDirectory(dir) + assert(dir.mkdirs()) + + val file = new File(dir, "simplified.txt") + FileUtils.writeStringToFile(file, simplified, StandardCharsets.UTF_8) + val fileOriginalPlan = new File(dir, "explain.txt") + FileUtils.writeStringToFile(fileOriginalPlan, explain, StandardCharsets.UTF_8) + logDebug(s"APPROVED: $file $fileOriginalPlan") + } + } + + private def checkWithApproved(plan: SparkPlan, name: String, explain: String): Unit = { + val dir = getDirForTest(name) + val tempDir = FileUtils.getTempDirectory + val actualSimplified = getSimplifiedPlan(plan) + val foundMatch = isApproved(dir, actualSimplified) + + if (!foundMatch) { + // show diff with last approved + val approvedSimplifiedFile = new File(dir, "simplified.txt") + val approvedExplainFile = new File(dir, "explain.txt") + + val actualSimplifiedFile = new File(tempDir, s"$name.actual.simplified.txt") + val actualExplainFile = new File(tempDir, s"$name.actual.explain.txt") + + val approvedSimplified = FileUtils.readFileToString( + approvedSimplifiedFile, StandardCharsets.UTF_8) + // write out for debugging + FileUtils.writeStringToFile(actualSimplifiedFile, actualSimplified, StandardCharsets.UTF_8) + FileUtils.writeStringToFile(actualExplainFile, explain, StandardCharsets.UTF_8) + + fail( + s""" + |Plans did not match: + |last approved simplified plan: ${approvedSimplifiedFile.getAbsolutePath} + |last approved explain plan: ${approvedExplainFile.getAbsolutePath} + | + |$approvedSimplified + | + |actual simplified plan: ${actualSimplifiedFile.getAbsolutePath} + |actual explain plan: ${actualExplainFile.getAbsolutePath} + | + |$actualSimplified + """.stripMargin) + } + } + + /** + * Get the simplified plan for a specific SparkPlan. In the simplified plan, the node only has + * its name and all the sorted reference and produced attributes names(without ExprId) and its + * simplified children as well. And we'll only identify the performance sensitive nodes, e.g., + * Exchange, Subquery, in the simplified plan. Given such a identical but simplified plan, we'd + * expect to avoid frequent plan changing and catch the possible meaningful regression. + */ + private def getSimplifiedPlan(plan: SparkPlan): String = { + val exchangeIdMap = new mutable.HashMap[Int, Int]() + val subqueriesMap = new mutable.HashMap[Int, Int]() + + def getId(plan: SparkPlan): Int = plan match { + case exchange: Exchange => exchangeIdMap.getOrElseUpdate(exchange.id, exchangeIdMap.size + 1) + case ReusedExchangeExec(_, exchange) => + exchangeIdMap.getOrElseUpdate(exchange.id, exchangeIdMap.size + 1) + case subquery: SubqueryExec => + subqueriesMap.getOrElseUpdate(subquery.id, subqueriesMap.size + 1) + case subquery: SubqueryBroadcastExec => + subqueriesMap.getOrElseUpdate(subquery.id, subqueriesMap.size + 1) + case ReusedSubqueryExec(subquery) => + subqueriesMap.getOrElseUpdate(subquery.id, subqueriesMap.size + 1) + case _ => -1 + } + + /** + * Some expression names have ExprId in them due to using things such as + * "sum(sr_return_amt#14)", so we remove all of these using regex + */ + def cleanUpReferences(references: AttributeSet): String = { + referenceRegex.replaceAllIn(references.toSeq.map(_.name).sorted.mkString(","), "") + } + + /** + * Generate a simplified plan as a string + * Example output: + * TakeOrderedAndProject [c_customer_id] + * WholeStageCodegen + * Project [c_customer_id] + */ + def getSimplifiedPlan(node: SparkPlan, depth: Int): String = { + val padding = " " * depth + var thisNode = node.nodeName + if (node.references.nonEmpty) { + thisNode += s" [${cleanUpReferences(node.references)}]" + } + if (node.producedAttributes.nonEmpty) { + thisNode += s" [${cleanUpReferences(node.producedAttributes)}]" + } + val id = getId(node) + if (id > 0) { + thisNode += s" #$id" + } + val childrenSimplified = node.children.map(getSimplifiedPlan(_, depth + 1)) + val subqueriesSimplified = node.subqueries.map(getSimplifiedPlan(_, depth + 1)) + s"$padding$thisNode\n${subqueriesSimplified.mkString("")}${childrenSimplified.mkString("")}" + } + + getSimplifiedPlan(plan, 0) + } + + private def normalizeIds(query: String): String = { + val map = new mutable.HashMap[String, String]() + normalizeRegex.findAllMatchIn(query).map(_.toString) + .foreach(map.getOrElseUpdate(_, (map.size + 1).toString)) + normalizeRegex.replaceAllIn(query, regexMatch => s"#${map(regexMatch.toString)}") + } + + /** + * Test a TPC-DS query. Depending on the settings this test will either check if the plan matches + * a golden file or it will create a new golden file. + */ + protected def testQuery(tpcdsGroup: String, query: String, suffix: String = ""): Unit = { + val queryString = resourceToString(s"$tpcdsGroup/$query.sql", + classLoader = Thread.currentThread().getContextClassLoader) + val qe = sql(queryString).queryExecution + val plan = qe.executedPlan + val explain = normalizeIds(qe.explainString(FormattedMode)) + + if (regenerateGoldenFiles) { + generateApprovedPlanFile(plan, query + suffix, explain) + } else { + checkWithApproved(plan, query + suffix, explain) + } + } +} + +class TPCDSV1_4_PlanStabilitySuite extends PlanStabilitySuite { + override val goldenFilePath: String = + new File(baseResourcePath, s"approved-plans-v1_4").getAbsolutePath + + tpcdsQueries.foreach { q => + test(s"check simplified (tpcds-v1.4/$q)") { + testQuery("tpcds", q) + } + } +} + +class TPCDSV1_4_PlanStabilityWithStatsSuite extends PlanStabilitySuite { + override def injectStats: Boolean = true + + override val goldenFilePath: String = + new File(baseResourcePath, s"approved-plans-v1_4").getAbsolutePath + + tpcdsQueries.foreach { q => + test(s"check simplified sf100 (tpcds-v1.4/$q)") { + testQuery("tpcds", q, ".sf100") + } + } +} + +class TPCDSV2_7_PlanStabilitySuite extends PlanStabilitySuite { + override val goldenFilePath: String = + new File(baseResourcePath, s"approved-plans-v2_7").getAbsolutePath + + tpcdsQueriesV2_7_0.foreach { q => + test(s"check simplified (tpcds-v2.7.0/$q)") { + testQuery("tpcds-v2.7.0", q) + } + } +} + +class TPCDSV2_7_PlanStabilityWithStatsSuite extends PlanStabilitySuite { + override def injectStats: Boolean = true + + override val goldenFilePath: String = + new File(baseResourcePath, s"approved-plans-v2_7").getAbsolutePath + + tpcdsQueriesV2_7_0.foreach { q => + test(s"check simplified sf100 (tpcds-v2.7.0/$q)") { + testQuery("tpcds-v2.7.0", q, ".sf100") + } + } +} + +class TPCDSModifiedPlanStabilitySuite extends PlanStabilitySuite { + override val goldenFilePath: String = + new File(baseResourcePath, s"approved-plans-modified").getAbsolutePath + + modifiedTPCDSQueries.foreach { q => + test(s"check simplified (tpcds-modifiedQueries/$q)") { + testQuery("tpcds-modifiedQueries", q) + } + } +} + +class TPCDSModifiedPlanStabilityWithStatsSuite extends PlanStabilitySuite { + override def injectStats: Boolean = true + + override val goldenFilePath: String = + new File(baseResourcePath, s"approved-plans-modified").getAbsolutePath + + modifiedTPCDSQueries.foreach { q => + test(s"check simplified sf100 (tpcds-modifiedQueries/$q)") { + testQuery("tpcds-modifiedQueries", q, ".sf100") + } + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSSchema.scala b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSBase.scala similarity index 82% rename from sql/core/src/test/scala/org/apache/spark/sql/TPCDSSchema.scala rename to sql/core/src/test/scala/org/apache/spark/sql/TPCDSBase.scala index 43974ad22f2ef..9f420cac971fb 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSSchema.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSBase.scala @@ -17,7 +17,36 @@ package org.apache.spark.sql -trait TPCDSSchema { +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.test.SharedSparkSession + +trait TPCDSBase extends SharedSparkSession { + + // The TPCDS queries below are based on v1.4 + val tpcdsQueries = Seq( + "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11", + "q12", "q13", "q14a", "q14b", "q15", "q16", "q17", "q18", "q19", "q20", + "q21", "q22", "q23a", "q23b", "q24a", "q24b", "q25", "q26", "q27", "q28", "q29", "q30", + "q31", "q32", "q33", "q34", "q35", "q36", "q37", "q38", "q39a", "q39b", "q40", + "q41", "q42", "q43", "q44", "q45", "q46", "q47", "q48", "q49", "q50", + "q51", "q52", "q53", "q54", "q55", "q56", "q57", "q58", "q59", "q60", + "q61", "q62", "q63", "q64", "q65", "q66", "q67", "q68", "q69", "q70", + "q71", "q72", "q73", "q74", "q75", "q76", "q77", "q78", "q79", "q80", + "q81", "q82", "q83", "q84", "q85", "q86", "q87", "q88", "q89", "q90", + "q91", "q92", "q93", "q94", "q95", "q96", "q97", "q98", "q99") + + // This list only includes TPCDS v2.7 queries that are different from v1.4 ones + val tpcdsQueriesV2_7_0 = Seq( + "q5a", "q6", "q10a", "q11", "q12", "q14", "q14a", "q18a", + "q20", "q22", "q22a", "q24", "q27a", "q34", "q35", "q35a", "q36a", "q47", "q49", + "q51a", "q57", "q64", "q67a", "q70a", "q72", "q74", "q75", "q77a", "q78", + "q80a", "q86a", "q98") + + // These queries are from https://github.com/cloudera/impala-tpcds-kit/tree/master/queries + val modifiedTPCDSQueries = Seq( + "q3", "q7", "q10", "q19", "q27", "q34", "q42", "q43", "q46", "q52", "q53", "q55", "q59", + "q63", "q65", "q68", "q73", "q79", "q89", "q98", "ss_max") private val tableColumns = Map( "store_sales" -> @@ -256,4 +285,39 @@ trait TPCDSSchema { |${options.mkString("\n")} """.stripMargin) } + + private val originalCBCEnabled = conf.cboEnabled + private val originalPlanStatsEnabled = conf.planStatsEnabled + private val originalJoinReorderEnabled = conf.joinReorderEnabled + + override def beforeAll(): Unit = { + super.beforeAll() + if (injectStats) { + // Sets configurations for enabling the optimization rules that + // exploit data statistics. + conf.setConf(SQLConf.CBO_ENABLED, true) + conf.setConf(SQLConf.PLAN_STATS_ENABLED, true) + conf.setConf(SQLConf.JOIN_REORDER_ENABLED, true) + } + tableNames.foreach { tableName => + createTable(spark, tableName) + if (injectStats) { + // To simulate plan generation on actual TPC-DS data, injects data stats here + spark.sessionState.catalog.alterTableStats( + TableIdentifier(tableName), Some(TPCDSTableStats.sf100TableStats(tableName))) + } + } + } + + override def afterAll(): Unit = { + conf.setConf(SQLConf.CBO_ENABLED, originalCBCEnabled) + conf.setConf(SQLConf.PLAN_STATS_ENABLED, originalPlanStatsEnabled) + conf.setConf(SQLConf.JOIN_REORDER_ENABLED, originalJoinReorderEnabled) + tableNames.foreach { tableName => + spark.sessionState.catalog.dropTable(TableIdentifier(tableName), true, true) + } + super.afterAll() + } + + protected def injectStats: Boolean = false } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala index afbdd971a0922..30751af61d10e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala @@ -17,74 +17,34 @@ package org.apache.spark.sql -import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.util.resourceToString -import org.apache.spark.sql.internal.SQLConf /** * This test suite ensures all the TPC-DS queries can be successfully analyzed, optimized * and compiled without hitting the max iteration threshold. */ -class TPCDSQuerySuite extends BenchmarkQueryTest with TPCDSSchema { - - override def beforeAll(): Unit = { - super.beforeAll() - for (tableName <- tableNames) { - createTable(spark, tableName) - } - } - - // The TPCDS queries below are based on v1.4 - val tpcdsQueries = Seq( - "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11", - "q12", "q13", "q14a", "q14b", "q15", "q16", "q17", "q18", "q19", "q20", - "q21", "q22", "q23a", "q23b", "q24a", "q24b", "q25", "q26", "q27", "q28", "q29", "q30", - "q31", "q32", "q33", "q34", "q35", "q36", "q37", "q38", "q39a", "q39b", "q40", - "q41", "q42", "q43", "q44", "q45", "q46", "q47", "q48", "q49", "q50", - "q51", "q52", "q53", "q54", "q55", "q56", "q57", "q58", "q59", "q60", - "q61", "q62", "q63", "q64", "q65", "q66", "q67", "q68", "q69", "q70", - "q71", "q72", "q73", "q74", "q75", "q76", "q77", "q78", "q79", "q80", - "q81", "q82", "q83", "q84", "q85", "q86", "q87", "q88", "q89", "q90", - "q91", "q92", "q93", "q94", "q95", "q96", "q97", "q98", "q99") - - val sqlConfgs: Seq[(String, String)] = Nil +class TPCDSQuerySuite extends BenchmarkQueryTest with TPCDSBase { tpcdsQueries.foreach { name => val queryString = resourceToString(s"tpcds/$name.sql", classLoader = Thread.currentThread().getContextClassLoader) test(name) { - withSQLConf(sqlConfgs: _*) { - // check the plans can be properly generated - val plan = sql(queryString).queryExecution.executedPlan - checkGeneratedCode(plan) - } + // check the plans can be properly generated + val plan = sql(queryString).queryExecution.executedPlan + checkGeneratedCode(plan) } } - // This list only includes TPCDS v2.7 queries that are different from v1.4 ones - val tpcdsQueriesV2_7_0 = Seq( - "q5a", "q6", "q10a", "q11", "q12", "q14", "q14a", "q18a", - "q20", "q22", "q22a", "q24", "q27a", "q34", "q35", "q35a", "q36a", "q47", "q49", - "q51a", "q57", "q64", "q67a", "q70a", "q72", "q74", "q75", "q77a", "q78", - "q80a", "q86a", "q98") - tpcdsQueriesV2_7_0.foreach { name => val queryString = resourceToString(s"tpcds-v2.7.0/$name.sql", classLoader = Thread.currentThread().getContextClassLoader) test(s"$name-v2.7") { - withSQLConf(sqlConfgs: _*) { - // check the plans can be properly generated - val plan = sql(queryString).queryExecution.executedPlan - checkGeneratedCode(plan) - } + // check the plans can be properly generated + val plan = sql(queryString).queryExecution.executedPlan + checkGeneratedCode(plan) } } - // These queries are from https://github.com/cloudera/impala-tpcds-kit/tree/master/queries - val modifiedTPCDSQueries = Seq( - "q3", "q7", "q10", "q19", "q27", "q34", "q42", "q43", "q46", "q52", "q53", "q55", "q59", - "q63", "q65", "q68", "q73", "q79", "q89", "q98", "ss_max") - // List up the known queries having too large code in a generated function. // A JIRA file for `modified-q3` is as follows; // [SPARK-29128] Split predicate code in OR expressions @@ -103,21 +63,5 @@ class TPCDSQuerySuite extends BenchmarkQueryTest with TPCDSSchema { } class TPCDSQueryWithStatsSuite extends TPCDSQuerySuite { - - override def beforeAll(): Unit = { - super.beforeAll() - for (tableName <- tableNames) { - // To simulate plan generation on actual TPCDS data, injects data stats here - spark.sessionState.catalog.alterTableStats( - TableIdentifier(tableName), Some(TPCDSTableStats.sf100TableStats(tableName))) - } - } - - // Sets configurations for enabling the optimization rules that - // exploit data statistics. - override val sqlConfgs = Seq( - SQLConf.CBO_ENABLED.key -> "true", - SQLConf.PLAN_STATS_ENABLED.key -> "true", - SQLConf.JOIN_REORDER_ENABLED.key -> "true" - ) + override def injectStats: Boolean = true }