From 9c1e87bb1f8546d9281735cd7040ed7e28fe762b Mon Sep 17 00:00:00 2001 From: qstommyshu Date: Wed, 26 Mar 2025 17:34:42 -0400 Subject: [PATCH 01/17] add `cargo insta` to dev dependencies --- Cargo.lock | 1 + datafusion/substrait/Cargo.toml | 1 + 2 files changed, 2 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 9f9263e52903..fb6aa3ce2482 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2568,6 +2568,7 @@ dependencies = [ "chrono", "datafusion", "datafusion-functions-aggregate", + "insta", "itertools 0.14.0", "object_store", "pbjson-types", diff --git a/datafusion/substrait/Cargo.toml b/datafusion/substrait/Cargo.toml index edc3b8d2f214..d23e986914fc 100644 --- a/datafusion/substrait/Cargo.toml +++ b/datafusion/substrait/Cargo.toml @@ -48,6 +48,7 @@ datafusion = { workspace = true, features = ["nested_expressions"] } datafusion-functions-aggregate = { workspace = true } serde_json = "1.0" tokio = { workspace = true } +insta = { workspace = true } [features] default = ["physical"] From c3be2ebfeaeb5afff841810e38eff657f1b86a3f Mon Sep 17 00:00:00 2001 From: qstommyshu Date: Wed, 26 Mar 2025 17:35:54 -0400 Subject: [PATCH 02/17] migrate `consumer_intergration.rs` tests to `insta` --- .../tests/cases/consumer_integration.rs | 311 ++---------------- ...ests__test_select_count_from_select_1.snap | 6 + ...tion__tests__test_select_window_count.snap | 7 + ...umer_integration__tests__tpch_test_01.snap | 10 + ...umer_integration__tests__tpch_test_02.snap | 28 ++ ...umer_integration__tests__tpch_test_03.snap | 16 + ...umer_integration__tests__tpch_test_04.snap | 13 + ...umer_integration__tests__tpch_test_05.snap | 20 ++ ...umer_integration__tests__tpch_test_06.snap | 8 + ...umer_integration__tests__tpch_test_10.snap | 18 + ...umer_integration__tests__tpch_test_11.snap | 25 ++ ...umer_integration__tests__tpch_test_12.snap | 12 + ...umer_integration__tests__tpch_test_13.snap | 14 + ...umer_integration__tests__tpch_test_14.snap | 11 + ...umer_integration__tests__tpch_test_16.snap | 16 + ...umer_integration__tests__tpch_test_18.snap | 21 ++ ...umer_integration__tests__tpch_test_19.snap | 10 + ...umer_integration__tests__tpch_test_20.snap | 24 ++ ...umer_integration__tests__tpch_test_21.snap | 23 ++ ...umer_integration__tests__tpch_test_22.snap | 18 + 20 files changed, 325 insertions(+), 286 deletions(-) create mode 100644 datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__test_select_count_from_select_1.snap create mode 100644 datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__test_select_window_count.snap create mode 100644 datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_01.snap create mode 100644 datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_02.snap create mode 100644 datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_03.snap create mode 100644 datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_04.snap create mode 100644 datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_05.snap create mode 100644 datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_06.snap create mode 100644 datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_10.snap create mode 100644 datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_11.snap create mode 100644 datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_12.snap create mode 100644 datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_13.snap create mode 100644 datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_14.snap create mode 100644 datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_16.snap create mode 100644 datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_18.snap create mode 100644 datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_19.snap create mode 100644 datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_20.snap create mode 100644 datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_21.snap create mode 100644 datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_22.snap diff --git a/datafusion/substrait/tests/cases/consumer_integration.rs b/datafusion/substrait/tests/cases/consumer_integration.rs index 1f1a15abb837..28b41f82cdca 100644 --- a/datafusion/substrait/tests/cases/consumer_integration.rs +++ b/datafusion/substrait/tests/cases/consumer_integration.rs @@ -28,6 +28,7 @@ mod tests { use datafusion::common::Result; use datafusion::prelude::SessionContext; use datafusion_substrait::logical_plan::consumer::from_substrait_plan; + use insta::assert_snapshot; use std::fs::File; use std::io::BufReader; use substrait::proto::Plan; @@ -49,125 +50,42 @@ mod tests { #[tokio::test] async fn tpch_test_01() -> Result<()> { let plan_str = tpch_plan_to_string(1).await?; - assert_eq!( - plan_str, - "Projection: LINEITEM.L_RETURNFLAG, LINEITEM.L_LINESTATUS, sum(LINEITEM.L_QUANTITY) AS SUM_QTY, sum(LINEITEM.L_EXTENDEDPRICE) AS SUM_BASE_PRICE, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) AS SUM_DISC_PRICE, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT * Int32(1) + LINEITEM.L_TAX) AS SUM_CHARGE, avg(LINEITEM.L_QUANTITY) AS AVG_QTY, avg(LINEITEM.L_EXTENDEDPRICE) AS AVG_PRICE, avg(LINEITEM.L_DISCOUNT) AS AVG_DISC, count(Int64(1)) AS COUNT_ORDER\ - \n Sort: LINEITEM.L_RETURNFLAG ASC NULLS LAST, LINEITEM.L_LINESTATUS ASC NULLS LAST\ - \n Aggregate: groupBy=[[LINEITEM.L_RETURNFLAG, LINEITEM.L_LINESTATUS]], aggr=[[sum(LINEITEM.L_QUANTITY), sum(LINEITEM.L_EXTENDEDPRICE), sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT), sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT * Int32(1) + LINEITEM.L_TAX), avg(LINEITEM.L_QUANTITY), avg(LINEITEM.L_EXTENDEDPRICE), avg(LINEITEM.L_DISCOUNT), count(Int64(1))]]\ - \n Projection: LINEITEM.L_RETURNFLAG, LINEITEM.L_LINESTATUS, LINEITEM.L_QUANTITY, LINEITEM.L_EXTENDEDPRICE, LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT), LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT) * (CAST(Int32(1) AS Decimal128(15, 2)) + LINEITEM.L_TAX), LINEITEM.L_DISCOUNT\ - \n Filter: LINEITEM.L_SHIPDATE <= Date32(\"1998-12-01\") - IntervalDayTime(\"IntervalDayTime { days: 0, milliseconds: 10368000 }\")\ - \n TableScan: LINEITEM" - ); + assert_snapshot!("tpch_test_01", plan_str); Ok(()) } #[tokio::test] async fn tpch_test_02() -> Result<()> { let plan_str = tpch_plan_to_string(2).await?; - assert_eq!( - plan_str, - "Limit: skip=0, fetch=100\ - \n Sort: SUPPLIER.S_ACCTBAL DESC NULLS FIRST, NATION.N_NAME ASC NULLS LAST, SUPPLIER.S_NAME ASC NULLS LAST, PART.P_PARTKEY ASC NULLS LAST\ - \n Projection: SUPPLIER.S_ACCTBAL, SUPPLIER.S_NAME, NATION.N_NAME, PART.P_PARTKEY, PART.P_MFGR, SUPPLIER.S_ADDRESS, SUPPLIER.S_PHONE, SUPPLIER.S_COMMENT\ - \n Filter: PART.P_PARTKEY = PARTSUPP.PS_PARTKEY AND SUPPLIER.S_SUPPKEY = PARTSUPP.PS_SUPPKEY AND PART.P_SIZE = Int32(15) AND PART.P_TYPE LIKE CAST(Utf8(\"%BRASS\") AS Utf8) AND SUPPLIER.S_NATIONKEY = NATION.N_NATIONKEY AND NATION.N_REGIONKEY = REGION.R_REGIONKEY AND REGION.R_NAME = Utf8(\"EUROPE\") AND PARTSUPP.PS_SUPPLYCOST = ()\ - \n Subquery:\ - \n Aggregate: groupBy=[[]], aggr=[[min(PARTSUPP.PS_SUPPLYCOST)]]\ - \n Projection: PARTSUPP.PS_SUPPLYCOST\ - \n Filter: PARTSUPP.PS_PARTKEY = PARTSUPP.PS_PARTKEY AND SUPPLIER.S_SUPPKEY = PARTSUPP.PS_SUPPKEY AND SUPPLIER.S_NATIONKEY = NATION.N_NATIONKEY AND NATION.N_REGIONKEY = REGION.R_REGIONKEY AND REGION.R_NAME = Utf8(\"EUROPE\")\ - \n Cross Join: \ - \n Cross Join: \ - \n Cross Join: \ - \n TableScan: PARTSUPP\ - \n TableScan: SUPPLIER\ - \n TableScan: NATION\ - \n TableScan: REGION\ - \n Cross Join: \ - \n Cross Join: \ - \n Cross Join: \ - \n Cross Join: \ - \n TableScan: PART\ - \n TableScan: SUPPLIER\ - \n TableScan: PARTSUPP\ - \n TableScan: NATION\ - \n TableScan: REGION" - ); + assert_snapshot!("tpch_test_02", plan_str); Ok(()) } #[tokio::test] async fn tpch_test_03() -> Result<()> { let plan_str = tpch_plan_to_string(3).await?; - assert_eq!( - plan_str, - "Projection: LINEITEM.L_ORDERKEY, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) AS REVENUE, ORDERS.O_ORDERDATE, ORDERS.O_SHIPPRIORITY\ - \n Limit: skip=0, fetch=10\ - \n Sort: sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) DESC NULLS FIRST, ORDERS.O_ORDERDATE ASC NULLS LAST\ - \n Projection: LINEITEM.L_ORDERKEY, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT), ORDERS.O_ORDERDATE, ORDERS.O_SHIPPRIORITY\ - \n Aggregate: groupBy=[[LINEITEM.L_ORDERKEY, ORDERS.O_ORDERDATE, ORDERS.O_SHIPPRIORITY]], aggr=[[sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT)]]\ - \n Projection: LINEITEM.L_ORDERKEY, ORDERS.O_ORDERDATE, ORDERS.O_SHIPPRIORITY, LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT)\ - \n Filter: CUSTOMER.C_MKTSEGMENT = Utf8(\"BUILDING\") AND CUSTOMER.C_CUSTKEY = ORDERS.O_CUSTKEY AND LINEITEM.L_ORDERKEY = ORDERS.O_ORDERKEY AND ORDERS.O_ORDERDATE < CAST(Utf8(\"1995-03-15\") AS Date32) AND LINEITEM.L_SHIPDATE > CAST(Utf8(\"1995-03-15\") AS Date32)\ - \n Cross Join: \ - \n Cross Join: \ - \n TableScan: LINEITEM\ - \n TableScan: CUSTOMER\ - \n TableScan: ORDERS" - ); + assert_snapshot!("tpch_test_03", plan_str); Ok(()) } #[tokio::test] async fn tpch_test_04() -> Result<()> { let plan_str = tpch_plan_to_string(4).await?; - assert_eq!( - plan_str, - "Projection: ORDERS.O_ORDERPRIORITY, count(Int64(1)) AS ORDER_COUNT\ - \n Sort: ORDERS.O_ORDERPRIORITY ASC NULLS LAST\ - \n Aggregate: groupBy=[[ORDERS.O_ORDERPRIORITY]], aggr=[[count(Int64(1))]]\ - \n Projection: ORDERS.O_ORDERPRIORITY\ - \n Filter: ORDERS.O_ORDERDATE >= CAST(Utf8(\"1993-07-01\") AS Date32) AND ORDERS.O_ORDERDATE < CAST(Utf8(\"1993-10-01\") AS Date32) AND EXISTS ()\ - \n Subquery:\ - \n Filter: LINEITEM.L_ORDERKEY = LINEITEM.L_ORDERKEY AND LINEITEM.L_COMMITDATE < LINEITEM.L_RECEIPTDATE\ - \n TableScan: LINEITEM\ - \n TableScan: ORDERS" - ); + assert_snapshot!("tpch_test_04", plan_str); Ok(()) } #[tokio::test] async fn tpch_test_05() -> Result<()> { let plan_str = tpch_plan_to_string(5).await?; - assert_eq!( - plan_str, - "Projection: NATION.N_NAME, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) AS REVENUE\ - \n Sort: sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) DESC NULLS FIRST\ - \n Aggregate: groupBy=[[NATION.N_NAME]], aggr=[[sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT)]]\ - \n Projection: NATION.N_NAME, LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT)\ - \n Filter: CUSTOMER.C_CUSTKEY = ORDERS.O_CUSTKEY AND LINEITEM.L_ORDERKEY = ORDERS.O_ORDERKEY AND LINEITEM.L_SUPPKEY = SUPPLIER.S_SUPPKEY AND CUSTOMER.C_NATIONKEY = SUPPLIER.S_NATIONKEY AND SUPPLIER.S_NATIONKEY = NATION.N_NATIONKEY AND NATION.N_REGIONKEY = REGION.R_REGIONKEY AND REGION.R_NAME = Utf8(\"ASIA\") AND ORDERS.O_ORDERDATE >= CAST(Utf8(\"1994-01-01\") AS Date32) AND ORDERS.O_ORDERDATE < CAST(Utf8(\"1995-01-01\") AS Date32)\ - \n Cross Join: \ - \n Cross Join: \ - \n Cross Join: \ - \n Cross Join: \ - \n Cross Join: \ - \n TableScan: CUSTOMER\ - \n TableScan: ORDERS\ - \n TableScan: LINEITEM\ - \n TableScan: SUPPLIER\ - \n TableScan: NATION\ - \n TableScan: REGION" - ); + assert_snapshot!("tpch_test_05", plan_str); Ok(()) } #[tokio::test] async fn tpch_test_06() -> Result<()> { let plan_str = tpch_plan_to_string(6).await?; - assert_eq!( - plan_str, - "Aggregate: groupBy=[[]], aggr=[[sum(LINEITEM.L_EXTENDEDPRICE * LINEITEM.L_DISCOUNT) AS REVENUE]]\ - \n Projection: LINEITEM.L_EXTENDEDPRICE * LINEITEM.L_DISCOUNT\ - \n Filter: LINEITEM.L_SHIPDATE >= CAST(Utf8(\"1994-01-01\") AS Date32) AND LINEITEM.L_SHIPDATE < CAST(Utf8(\"1995-01-01\") AS Date32) AND LINEITEM.L_DISCOUNT >= Decimal128(Some(5),3,2) AND LINEITEM.L_DISCOUNT <= Decimal128(Some(7),3,2) AND LINEITEM.L_QUANTITY < CAST(Int32(24) AS Decimal128(15, 2))\ - \n TableScan: LINEITEM" - ); + assert_snapshot!("tpch_test_06", plan_str); Ok(()) } @@ -175,7 +93,7 @@ mod tests { #[tokio::test] async fn tpch_test_07() -> Result<()> { let plan_str = tpch_plan_to_string(7).await?; - assert_eq!(plan_str, "Missing support for enum function arguments"); + assert_snapshot!("tpch_test_07", plan_str); Ok(()) } @@ -183,7 +101,7 @@ mod tests { #[tokio::test] async fn tpch_test_08() -> Result<()> { let plan_str = tpch_plan_to_string(8).await?; - assert_eq!(plan_str, "Missing support for enum function arguments"); + assert_snapshot!("tpch_test_08", plan_str); Ok(()) } @@ -191,112 +109,42 @@ mod tests { #[tokio::test] async fn tpch_test_09() -> Result<()> { let plan_str = tpch_plan_to_string(9).await?; - assert_eq!(plan_str, "Missing support for enum function arguments"); + assert_snapshot!("tpch_test_09", plan_str); Ok(()) } #[tokio::test] async fn tpch_test_10() -> Result<()> { let plan_str = tpch_plan_to_string(10).await?; - assert_eq!( - plan_str, - "Projection: CUSTOMER.C_CUSTKEY, CUSTOMER.C_NAME, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) AS REVENUE, CUSTOMER.C_ACCTBAL, NATION.N_NAME, CUSTOMER.C_ADDRESS, CUSTOMER.C_PHONE, CUSTOMER.C_COMMENT\ - \n Limit: skip=0, fetch=20\ - \n Sort: sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) DESC NULLS FIRST\ - \n Projection: CUSTOMER.C_CUSTKEY, CUSTOMER.C_NAME, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT), CUSTOMER.C_ACCTBAL, NATION.N_NAME, CUSTOMER.C_ADDRESS, CUSTOMER.C_PHONE, CUSTOMER.C_COMMENT\ - \n Aggregate: groupBy=[[CUSTOMER.C_CUSTKEY, CUSTOMER.C_NAME, CUSTOMER.C_ACCTBAL, CUSTOMER.C_PHONE, NATION.N_NAME, CUSTOMER.C_ADDRESS, CUSTOMER.C_COMMENT]], aggr=[[sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT)]]\ - \n Projection: CUSTOMER.C_CUSTKEY, CUSTOMER.C_NAME, CUSTOMER.C_ACCTBAL, CUSTOMER.C_PHONE, NATION.N_NAME, CUSTOMER.C_ADDRESS, CUSTOMER.C_COMMENT, LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT)\ - \n Filter: CUSTOMER.C_CUSTKEY = ORDERS.O_CUSTKEY AND LINEITEM.L_ORDERKEY = ORDERS.O_ORDERKEY AND ORDERS.O_ORDERDATE >= CAST(Utf8(\"1993-10-01\") AS Date32) AND ORDERS.O_ORDERDATE < CAST(Utf8(\"1994-01-01\") AS Date32) AND LINEITEM.L_RETURNFLAG = Utf8(\"R\") AND CUSTOMER.C_NATIONKEY = NATION.N_NATIONKEY\ - \n Cross Join: \ - \n Cross Join: \ - \n Cross Join: \ - \n TableScan: CUSTOMER\ - \n TableScan: ORDERS\ - \n TableScan: LINEITEM\ - \n TableScan: NATION" - ); + assert_snapshot!("tpch_test_10", plan_str); Ok(()) } #[tokio::test] async fn tpch_test_11() -> Result<()> { let plan_str = tpch_plan_to_string(11).await?; - assert_eq!( - plan_str, - "Projection: PARTSUPP.PS_PARTKEY, sum(PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY) AS value\ - \n Sort: sum(PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY) DESC NULLS FIRST\ - \n Filter: sum(PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY) > ()\ - \n Subquery:\ - \n Projection: sum(PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY) * Decimal128(Some(1000000),11,10)\ - \n Aggregate: groupBy=[[]], aggr=[[sum(PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY)]]\ - \n Projection: PARTSUPP.PS_SUPPLYCOST * CAST(PARTSUPP.PS_AVAILQTY AS Decimal128(19, 0))\ - \n Filter: PARTSUPP.PS_SUPPKEY = SUPPLIER.S_SUPPKEY AND SUPPLIER.S_NATIONKEY = NATION.N_NATIONKEY AND NATION.N_NAME = Utf8(\"JAPAN\")\ - \n Cross Join: \ - \n Cross Join: \ - \n TableScan: PARTSUPP\ - \n TableScan: SUPPLIER\ - \n TableScan: NATION\ - \n Aggregate: groupBy=[[PARTSUPP.PS_PARTKEY]], aggr=[[sum(PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY)]]\ - \n Projection: PARTSUPP.PS_PARTKEY, PARTSUPP.PS_SUPPLYCOST * CAST(PARTSUPP.PS_AVAILQTY AS Decimal128(19, 0))\ - \n Filter: PARTSUPP.PS_SUPPKEY = SUPPLIER.S_SUPPKEY AND SUPPLIER.S_NATIONKEY = NATION.N_NATIONKEY AND NATION.N_NAME = Utf8(\"JAPAN\")\ - \n Cross Join: \ - \n Cross Join: \ - \n TableScan: PARTSUPP\ - \n TableScan: SUPPLIER\ - \n TableScan: NATION" - ); + assert_snapshot!("tpch_test_11", plan_str); Ok(()) } #[tokio::test] async fn tpch_test_12() -> Result<()> { let plan_str = tpch_plan_to_string(12).await?; - assert_eq!( - plan_str, - "Projection: LINEITEM.L_SHIPMODE, sum(CASE WHEN ORDERS.O_ORDERPRIORITY = Utf8(\"1-URGENT\") OR ORDERS.O_ORDERPRIORITY = Utf8(\"2-HIGH\") THEN Int32(1) ELSE Int32(0) END) AS HIGH_LINE_COUNT, sum(CASE WHEN ORDERS.O_ORDERPRIORITY != Utf8(\"1-URGENT\") AND ORDERS.O_ORDERPRIORITY != Utf8(\"2-HIGH\") THEN Int32(1) ELSE Int32(0) END) AS LOW_LINE_COUNT\ - \n Sort: LINEITEM.L_SHIPMODE ASC NULLS LAST\ - \n Aggregate: groupBy=[[LINEITEM.L_SHIPMODE]], aggr=[[sum(CASE WHEN ORDERS.O_ORDERPRIORITY = Utf8(\"1-URGENT\") OR ORDERS.O_ORDERPRIORITY = Utf8(\"2-HIGH\") THEN Int32(1) ELSE Int32(0) END), sum(CASE WHEN ORDERS.O_ORDERPRIORITY != Utf8(\"1-URGENT\") AND ORDERS.O_ORDERPRIORITY != Utf8(\"2-HIGH\") THEN Int32(1) ELSE Int32(0) END)]]\ - \n Projection: LINEITEM.L_SHIPMODE, CASE WHEN ORDERS.O_ORDERPRIORITY = Utf8(\"1-URGENT\") OR ORDERS.O_ORDERPRIORITY = Utf8(\"2-HIGH\") THEN Int32(1) ELSE Int32(0) END, CASE WHEN ORDERS.O_ORDERPRIORITY != Utf8(\"1-URGENT\") AND ORDERS.O_ORDERPRIORITY != Utf8(\"2-HIGH\") THEN Int32(1) ELSE Int32(0) END\ - \n Filter: ORDERS.O_ORDERKEY = LINEITEM.L_ORDERKEY AND (LINEITEM.L_SHIPMODE = CAST(Utf8(\"MAIL\") AS Utf8) OR LINEITEM.L_SHIPMODE = CAST(Utf8(\"SHIP\") AS Utf8)) AND LINEITEM.L_COMMITDATE < LINEITEM.L_RECEIPTDATE AND LINEITEM.L_SHIPDATE < LINEITEM.L_COMMITDATE AND LINEITEM.L_RECEIPTDATE >= CAST(Utf8(\"1994-01-01\") AS Date32) AND LINEITEM.L_RECEIPTDATE < CAST(Utf8(\"1995-01-01\") AS Date32)\ - \n Cross Join: \ - \n TableScan: ORDERS\ - \n TableScan: LINEITEM" - ); + assert_snapshot!("tpch_test_12", plan_str); Ok(()) } #[tokio::test] async fn tpch_test_13() -> Result<()> { let plan_str = tpch_plan_to_string(13).await?; - assert_eq!( - plan_str, - "Projection: count(ORDERS.O_ORDERKEY) AS C_COUNT, count(Int64(1)) AS CUSTDIST\ - \n Sort: count(Int64(1)) DESC NULLS FIRST, count(ORDERS.O_ORDERKEY) DESC NULLS FIRST\ - \n Projection: count(ORDERS.O_ORDERKEY), count(Int64(1))\ - \n Aggregate: groupBy=[[count(ORDERS.O_ORDERKEY)]], aggr=[[count(Int64(1))]]\ - \n Projection: count(ORDERS.O_ORDERKEY)\ - \n Aggregate: groupBy=[[CUSTOMER.C_CUSTKEY]], aggr=[[count(ORDERS.O_ORDERKEY)]]\ - \n Projection: CUSTOMER.C_CUSTKEY, ORDERS.O_ORDERKEY\ - \n Left Join: CUSTOMER.C_CUSTKEY = ORDERS.O_CUSTKEY Filter: NOT ORDERS.O_COMMENT LIKE CAST(Utf8(\"%special%requests%\") AS Utf8)\ - \n TableScan: CUSTOMER\ - \n TableScan: ORDERS" - ); + assert_snapshot!("tpch_test_13", plan_str); Ok(()) } #[tokio::test] async fn tpch_test_14() -> Result<()> { let plan_str = tpch_plan_to_string(14).await?; - assert_eq!( - plan_str, - "Projection: Decimal128(Some(10000),5,2) * sum(CASE WHEN PART.P_TYPE LIKE Utf8(\"PROMO%\") THEN LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT ELSE Decimal128(Some(0),19,4) END) / sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) AS PROMO_REVENUE\ - \n Aggregate: groupBy=[[]], aggr=[[sum(CASE WHEN PART.P_TYPE LIKE Utf8(\"PROMO%\") THEN LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT ELSE Decimal128(Some(0),19,4) END), sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT)]]\ - \n Projection: CASE WHEN PART.P_TYPE LIKE CAST(Utf8(\"PROMO%\") AS Utf8) THEN LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT) ELSE Decimal128(Some(0),19,4) END, LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT)\ - \n Filter: LINEITEM.L_PARTKEY = PART.P_PARTKEY AND LINEITEM.L_SHIPDATE >= Date32(\"1995-09-01\") AND LINEITEM.L_SHIPDATE < CAST(Utf8(\"1995-10-01\") AS Date32)\ - \n Cross Join: \ - \n TableScan: LINEITEM\ - \n TableScan: PART" - ); + assert_snapshot!("tpch_test_14", plan_str); Ok(()) } @@ -304,28 +152,14 @@ mod tests { #[tokio::test] async fn tpch_test_15() -> Result<()> { let plan_str = tpch_plan_to_string(15).await?; - assert_eq!(plan_str, "Test file is empty"); + assert_snapshot!("tpch_test_15", plan_str); Ok(()) } #[tokio::test] async fn tpch_test_16() -> Result<()> { let plan_str = tpch_plan_to_string(16).await?; - assert_eq!( - plan_str, - "Projection: PART.P_BRAND, PART.P_TYPE, PART.P_SIZE, count(DISTINCT PARTSUPP.PS_SUPPKEY) AS SUPPLIER_CNT\ - \n Sort: count(DISTINCT PARTSUPP.PS_SUPPKEY) DESC NULLS FIRST, PART.P_BRAND ASC NULLS LAST, PART.P_TYPE ASC NULLS LAST, PART.P_SIZE ASC NULLS LAST\ - \n Aggregate: groupBy=[[PART.P_BRAND, PART.P_TYPE, PART.P_SIZE]], aggr=[[count(DISTINCT PARTSUPP.PS_SUPPKEY)]]\ - \n Projection: PART.P_BRAND, PART.P_TYPE, PART.P_SIZE, PARTSUPP.PS_SUPPKEY\ - \n Filter: PART.P_PARTKEY = PARTSUPP.PS_PARTKEY AND PART.P_BRAND != Utf8(\"Brand#45\") AND NOT PART.P_TYPE LIKE CAST(Utf8(\"MEDIUM POLISHED%\") AS Utf8) AND (PART.P_SIZE = Int32(49) OR PART.P_SIZE = Int32(14) OR PART.P_SIZE = Int32(23) OR PART.P_SIZE = Int32(45) OR PART.P_SIZE = Int32(19) OR PART.P_SIZE = Int32(3) OR PART.P_SIZE = Int32(36) OR PART.P_SIZE = Int32(9)) AND NOT PARTSUPP.PS_SUPPKEY IN ()\ - \n Subquery:\ - \n Projection: SUPPLIER.S_SUPPKEY\ - \n Filter: SUPPLIER.S_COMMENT LIKE CAST(Utf8(\"%Customer%Complaints%\") AS Utf8)\ - \n TableScan: SUPPLIER\ - \n Cross Join: \ - \n TableScan: PARTSUPP\ - \n TableScan: PART" - ); + assert_snapshot!("tpch_test_16", plan_str); Ok(()) } @@ -333,127 +167,41 @@ mod tests { #[tokio::test] async fn tpch_test_17() -> Result<()> { let plan_str = tpch_plan_to_string(17).await?; - assert_eq!(plan_str, "panics due to out of bounds field access"); + assert_snapshot!("tpch_test_17", plan_str); Ok(()) } #[tokio::test] async fn tpch_test_18() -> Result<()> { let plan_str = tpch_plan_to_string(18).await?; - assert_eq!( - plan_str, - "Projection: CUSTOMER.C_NAME, CUSTOMER.C_CUSTKEY, ORDERS.O_ORDERKEY, ORDERS.O_ORDERDATE, ORDERS.O_TOTALPRICE, sum(LINEITEM.L_QUANTITY) AS EXPR$5\ - \n Limit: skip=0, fetch=100\ - \n Sort: ORDERS.O_TOTALPRICE DESC NULLS FIRST, ORDERS.O_ORDERDATE ASC NULLS LAST\ - \n Aggregate: groupBy=[[CUSTOMER.C_NAME, CUSTOMER.C_CUSTKEY, ORDERS.O_ORDERKEY, ORDERS.O_ORDERDATE, ORDERS.O_TOTALPRICE]], aggr=[[sum(LINEITEM.L_QUANTITY)]]\ - \n Projection: CUSTOMER.C_NAME, CUSTOMER.C_CUSTKEY, ORDERS.O_ORDERKEY, ORDERS.O_ORDERDATE, ORDERS.O_TOTALPRICE, LINEITEM.L_QUANTITY\ - \n Filter: ORDERS.O_ORDERKEY IN () AND CUSTOMER.C_CUSTKEY = ORDERS.O_CUSTKEY AND ORDERS.O_ORDERKEY = LINEITEM.L_ORDERKEY\ - \n Subquery:\ - \n Projection: LINEITEM.L_ORDERKEY\ - \n Filter: sum(LINEITEM.L_QUANTITY) > CAST(Int32(300) AS Decimal128(15, 2))\ - \n Aggregate: groupBy=[[LINEITEM.L_ORDERKEY]], aggr=[[sum(LINEITEM.L_QUANTITY)]]\ - \n Projection: LINEITEM.L_ORDERKEY, LINEITEM.L_QUANTITY\ - \n TableScan: LINEITEM\ - \n Cross Join: \ - \n Cross Join: \ - \n TableScan: CUSTOMER\ - \n TableScan: ORDERS\ - \n TableScan: LINEITEM" - ); + assert_snapshot!("tpch_test_18", plan_str); Ok(()) } #[tokio::test] async fn tpch_test_19() -> Result<()> { let plan_str = tpch_plan_to_string(19).await?; - assert_eq!( - plan_str, - "Aggregate: groupBy=[[]], aggr=[[sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) AS REVENUE]]\ - \n Projection: LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT)\ - \n Filter: PART.P_PARTKEY = LINEITEM.L_PARTKEY AND PART.P_BRAND = Utf8(\"Brand#12\") AND (PART.P_CONTAINER = CAST(Utf8(\"SM CASE\") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8(\"SM BOX\") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8(\"SM PACK\") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8(\"SM PKG\") AS Utf8)) AND LINEITEM.L_QUANTITY >= CAST(Int32(1) AS Decimal128(15, 2)) AND LINEITEM.L_QUANTITY <= CAST(Int32(1) + Int32(10) AS Decimal128(15, 2)) AND PART.P_SIZE >= Int32(1) AND PART.P_SIZE <= Int32(5) AND (LINEITEM.L_SHIPMODE = CAST(Utf8(\"AIR\") AS Utf8) OR LINEITEM.L_SHIPMODE = CAST(Utf8(\"AIR REG\") AS Utf8)) AND LINEITEM.L_SHIPINSTRUCT = Utf8(\"DELIVER IN PERSON\") OR PART.P_PARTKEY = LINEITEM.L_PARTKEY AND PART.P_BRAND = Utf8(\"Brand#23\") AND (PART.P_CONTAINER = CAST(Utf8(\"MED BAG\") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8(\"MED BOX\") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8(\"MED PKG\") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8(\"MED PACK\") AS Utf8)) AND LINEITEM.L_QUANTITY >= CAST(Int32(10) AS Decimal128(15, 2)) AND LINEITEM.L_QUANTITY <= CAST(Int32(10) + Int32(10) AS Decimal128(15, 2)) AND PART.P_SIZE >= Int32(1) AND PART.P_SIZE <= Int32(10) AND (LINEITEM.L_SHIPMODE = CAST(Utf8(\"AIR\") AS Utf8) OR LINEITEM.L_SHIPMODE = CAST(Utf8(\"AIR REG\") AS Utf8)) AND LINEITEM.L_SHIPINSTRUCT = Utf8(\"DELIVER IN PERSON\") OR PART.P_PARTKEY = LINEITEM.L_PARTKEY AND PART.P_BRAND = Utf8(\"Brand#34\") AND (PART.P_CONTAINER = CAST(Utf8(\"LG CASE\") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8(\"LG BOX\") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8(\"LG PACK\") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8(\"LG PKG\") AS Utf8)) AND LINEITEM.L_QUANTITY >= CAST(Int32(20) AS Decimal128(15, 2)) AND LINEITEM.L_QUANTITY <= CAST(Int32(20) + Int32(10) AS Decimal128(15, 2)) AND PART.P_SIZE >= Int32(1) AND PART.P_SIZE <= Int32(15) AND (LINEITEM.L_SHIPMODE = CAST(Utf8(\"AIR\") AS Utf8) OR LINEITEM.L_SHIPMODE = CAST(Utf8(\"AIR REG\") AS Utf8)) AND LINEITEM.L_SHIPINSTRUCT = Utf8(\"DELIVER IN PERSON\")\ - \n Cross Join: \ - \n TableScan: LINEITEM\ - \n TableScan: PART" - ); + assert_snapshot!("tpch_test_19", plan_str); Ok(()) } #[tokio::test] async fn tpch_test_20() -> Result<()> { let plan_str = tpch_plan_to_string(20).await?; - assert_eq!( - plan_str, - "Sort: SUPPLIER.S_NAME ASC NULLS LAST\ - \n Projection: SUPPLIER.S_NAME, SUPPLIER.S_ADDRESS\ - \n Filter: SUPPLIER.S_SUPPKEY IN () AND SUPPLIER.S_NATIONKEY = NATION.N_NATIONKEY AND NATION.N_NAME = Utf8(\"CANADA\")\ - \n Subquery:\ - \n Projection: PARTSUPP.PS_SUPPKEY\ - \n Filter: PARTSUPP.PS_PARTKEY IN () AND CAST(PARTSUPP.PS_AVAILQTY AS Decimal128(19, 0)) > ()\ - \n Subquery:\ - \n Projection: PART.P_PARTKEY\ - \n Filter: PART.P_NAME LIKE CAST(Utf8(\"forest%\") AS Utf8)\ - \n TableScan: PART\ - \n Subquery:\ - \n Projection: Decimal128(Some(5),2,1) * sum(LINEITEM.L_QUANTITY)\ - \n Aggregate: groupBy=[[]], aggr=[[sum(LINEITEM.L_QUANTITY)]]\ - \n Projection: LINEITEM.L_QUANTITY\ - \n Filter: LINEITEM.L_PARTKEY = LINEITEM.L_ORDERKEY AND LINEITEM.L_SUPPKEY = LINEITEM.L_PARTKEY AND LINEITEM.L_SHIPDATE >= CAST(Utf8(\"1994-01-01\") AS Date32) AND LINEITEM.L_SHIPDATE < CAST(Utf8(\"1995-01-01\") AS Date32)\ - \n TableScan: LINEITEM\ - \n TableScan: PARTSUPP\ - \n Cross Join: \ - \n TableScan: SUPPLIER\ - \n TableScan: NATION" - ); + assert_snapshot!("tpch_test_20", plan_str); Ok(()) } #[tokio::test] async fn tpch_test_21() -> Result<()> { let plan_str = tpch_plan_to_string(21).await?; - assert_eq!( - plan_str, - "Projection: SUPPLIER.S_NAME, count(Int64(1)) AS NUMWAIT\ - \n Limit: skip=0, fetch=100\ - \n Sort: count(Int64(1)) DESC NULLS FIRST, SUPPLIER.S_NAME ASC NULLS LAST\ - \n Aggregate: groupBy=[[SUPPLIER.S_NAME]], aggr=[[count(Int64(1))]]\ - \n Projection: SUPPLIER.S_NAME\ - \n Filter: SUPPLIER.S_SUPPKEY = LINEITEM.L_SUPPKEY AND ORDERS.O_ORDERKEY = LINEITEM.L_ORDERKEY AND ORDERS.O_ORDERSTATUS = Utf8(\"F\") AND LINEITEM.L_RECEIPTDATE > LINEITEM.L_COMMITDATE AND EXISTS () AND NOT EXISTS () AND SUPPLIER.S_NATIONKEY = NATION.N_NATIONKEY AND NATION.N_NAME = Utf8(\"SAUDI ARABIA\")\ - \n Subquery:\ - \n Filter: LINEITEM.L_ORDERKEY = LINEITEM.L_TAX AND LINEITEM.L_SUPPKEY != LINEITEM.L_LINESTATUS\ - \n TableScan: LINEITEM\ - \n Subquery:\ - \n Filter: LINEITEM.L_ORDERKEY = LINEITEM.L_TAX AND LINEITEM.L_SUPPKEY != LINEITEM.L_LINESTATUS AND LINEITEM.L_RECEIPTDATE > LINEITEM.L_COMMITDATE\ - \n TableScan: LINEITEM\ - \n Cross Join: \ - \n Cross Join: \ - \n Cross Join: \ - \n TableScan: SUPPLIER\ - \n TableScan: LINEITEM\ - \n TableScan: ORDERS\ - \n TableScan: NATION" - ); + assert_snapshot!("tpch_test_21", plan_str); Ok(()) } #[tokio::test] async fn tpch_test_22() -> Result<()> { let plan_str = tpch_plan_to_string(22).await?; - assert_eq!( - plan_str, - "Projection: substr(CUSTOMER.C_PHONE,Int32(1),Int32(2)) AS CNTRYCODE, count(Int64(1)) AS NUMCUST, sum(CUSTOMER.C_ACCTBAL) AS TOTACCTBAL\ - \n Sort: substr(CUSTOMER.C_PHONE,Int32(1),Int32(2)) ASC NULLS LAST\ - \n Aggregate: groupBy=[[substr(CUSTOMER.C_PHONE,Int32(1),Int32(2))]], aggr=[[count(Int64(1)), sum(CUSTOMER.C_ACCTBAL)]]\ - \n Projection: substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)), CUSTOMER.C_ACCTBAL\ - \n Filter: (substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8(\"13\") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8(\"31\") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8(\"23\") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8(\"29\") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8(\"30\") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8(\"18\") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8(\"17\") AS Utf8)) AND CUSTOMER.C_ACCTBAL > () AND NOT EXISTS ()\ - \n Subquery:\ - \n Aggregate: groupBy=[[]], aggr=[[avg(CUSTOMER.C_ACCTBAL)]]\ - \n Projection: CUSTOMER.C_ACCTBAL\ - \n Filter: CUSTOMER.C_ACCTBAL > Decimal128(Some(0),3,2) AND (substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8(\"13\") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8(\"31\") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8(\"23\") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8(\"29\") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8(\"30\") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8(\"18\") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8(\"17\") AS Utf8))\ - \n TableScan: CUSTOMER\ - \n Subquery:\ - \n Filter: ORDERS.O_CUSTKEY = ORDERS.O_ORDERKEY\ - \n TableScan: ORDERS\ - \n TableScan: CUSTOMER" - ); + assert_snapshot!("tpch_test_22", plan_str); Ok(()) } @@ -475,11 +223,7 @@ mod tests { let plan_str = test_plan_to_string("select_count_from_select_1.substrait.json").await?; - assert_eq!( - plan_str, - "Aggregate: groupBy=[[]], aggr=[[count(Int64(1)) AS count(*)]]\ - \n Values: (Int64(0))" - ); + assert_snapshot!("test_select_count_from_select_1", plan_str); Ok(()) } @@ -487,12 +231,7 @@ mod tests { async fn test_select_window_count() -> Result<()> { let plan_str = test_plan_to_string("select_window_count.substrait.json").await?; - assert_eq!( - plan_str, - "Projection: count(Int64(1)) PARTITION BY [DATA.PART] ORDER BY [DATA.ORD ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING AS LEAD_EXPR\ - \n WindowAggr: windowExpr=[[count(Int64(1)) PARTITION BY [DATA.PART] ORDER BY [DATA.ORD ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING]]\ - \n TableScan: DATA" - ); + assert_snapshot!("test_select_window_count", plan_str); Ok(()) } } diff --git a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__test_select_count_from_select_1.snap b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__test_select_count_from_select_1.snap new file mode 100644 index 000000000000..50307bd38f66 --- /dev/null +++ b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__test_select_count_from_select_1.snap @@ -0,0 +1,6 @@ +--- +source: datafusion/substrait/tests/cases/consumer_integration.rs +expression: plan_str +--- +Aggregate: groupBy=[[]], aggr=[[count(Int64(1)) AS count(*)]] + Values: (Int64(0)) diff --git a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__test_select_window_count.snap b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__test_select_window_count.snap new file mode 100644 index 000000000000..d9b4bc4601f3 --- /dev/null +++ b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__test_select_window_count.snap @@ -0,0 +1,7 @@ +--- +source: datafusion/substrait/tests/cases/consumer_integration.rs +expression: plan_str +--- +Projection: count(Int64(1)) PARTITION BY [DATA.PART] ORDER BY [DATA.ORD ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING AS LEAD_EXPR + WindowAggr: windowExpr=[[count(Int64(1)) PARTITION BY [DATA.PART] ORDER BY [DATA.ORD ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING]] + TableScan: DATA diff --git a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_01.snap b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_01.snap new file mode 100644 index 000000000000..6aa3926d1517 --- /dev/null +++ b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_01.snap @@ -0,0 +1,10 @@ +--- +source: datafusion/substrait/tests/cases/consumer_integration.rs +expression: plan_str +--- +Projection: LINEITEM.L_RETURNFLAG, LINEITEM.L_LINESTATUS, sum(LINEITEM.L_QUANTITY) AS SUM_QTY, sum(LINEITEM.L_EXTENDEDPRICE) AS SUM_BASE_PRICE, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) AS SUM_DISC_PRICE, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT * Int32(1) + LINEITEM.L_TAX) AS SUM_CHARGE, avg(LINEITEM.L_QUANTITY) AS AVG_QTY, avg(LINEITEM.L_EXTENDEDPRICE) AS AVG_PRICE, avg(LINEITEM.L_DISCOUNT) AS AVG_DISC, count(Int64(1)) AS COUNT_ORDER + Sort: LINEITEM.L_RETURNFLAG ASC NULLS LAST, LINEITEM.L_LINESTATUS ASC NULLS LAST + Aggregate: groupBy=[[LINEITEM.L_RETURNFLAG, LINEITEM.L_LINESTATUS]], aggr=[[sum(LINEITEM.L_QUANTITY), sum(LINEITEM.L_EXTENDEDPRICE), sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT), sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT * Int32(1) + LINEITEM.L_TAX), avg(LINEITEM.L_QUANTITY), avg(LINEITEM.L_EXTENDEDPRICE), avg(LINEITEM.L_DISCOUNT), count(Int64(1))]] + Projection: LINEITEM.L_RETURNFLAG, LINEITEM.L_LINESTATUS, LINEITEM.L_QUANTITY, LINEITEM.L_EXTENDEDPRICE, LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT), LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT) * (CAST(Int32(1) AS Decimal128(15, 2)) + LINEITEM.L_TAX), LINEITEM.L_DISCOUNT + Filter: LINEITEM.L_SHIPDATE <= Date32("1998-12-01") - IntervalDayTime("IntervalDayTime { days: 0, milliseconds: 10368000 }") + TableScan: LINEITEM diff --git a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_02.snap b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_02.snap new file mode 100644 index 000000000000..78b0e463939b --- /dev/null +++ b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_02.snap @@ -0,0 +1,28 @@ +--- +source: datafusion/substrait/tests/cases/consumer_integration.rs +expression: plan_str +--- +Limit: skip=0, fetch=100 + Sort: SUPPLIER.S_ACCTBAL DESC NULLS FIRST, NATION.N_NAME ASC NULLS LAST, SUPPLIER.S_NAME ASC NULLS LAST, PART.P_PARTKEY ASC NULLS LAST + Projection: SUPPLIER.S_ACCTBAL, SUPPLIER.S_NAME, NATION.N_NAME, PART.P_PARTKEY, PART.P_MFGR, SUPPLIER.S_ADDRESS, SUPPLIER.S_PHONE, SUPPLIER.S_COMMENT + Filter: PART.P_PARTKEY = PARTSUPP.PS_PARTKEY AND SUPPLIER.S_SUPPKEY = PARTSUPP.PS_SUPPKEY AND PART.P_SIZE = Int32(15) AND PART.P_TYPE LIKE CAST(Utf8("%BRASS") AS Utf8) AND SUPPLIER.S_NATIONKEY = NATION.N_NATIONKEY AND NATION.N_REGIONKEY = REGION.R_REGIONKEY AND REGION.R_NAME = Utf8("EUROPE") AND PARTSUPP.PS_SUPPLYCOST = () + Subquery: + Aggregate: groupBy=[[]], aggr=[[min(PARTSUPP.PS_SUPPLYCOST)]] + Projection: PARTSUPP.PS_SUPPLYCOST + Filter: PARTSUPP.PS_PARTKEY = PARTSUPP.PS_PARTKEY AND SUPPLIER.S_SUPPKEY = PARTSUPP.PS_SUPPKEY AND SUPPLIER.S_NATIONKEY = NATION.N_NATIONKEY AND NATION.N_REGIONKEY = REGION.R_REGIONKEY AND REGION.R_NAME = Utf8("EUROPE") + Cross Join: + Cross Join: + Cross Join: + TableScan: PARTSUPP + TableScan: SUPPLIER + TableScan: NATION + TableScan: REGION + Cross Join: + Cross Join: + Cross Join: + Cross Join: + TableScan: PART + TableScan: SUPPLIER + TableScan: PARTSUPP + TableScan: NATION + TableScan: REGION diff --git a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_03.snap b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_03.snap new file mode 100644 index 000000000000..00939ecb9c37 --- /dev/null +++ b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_03.snap @@ -0,0 +1,16 @@ +--- +source: datafusion/substrait/tests/cases/consumer_integration.rs +expression: plan_str +--- +Projection: LINEITEM.L_ORDERKEY, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) AS REVENUE, ORDERS.O_ORDERDATE, ORDERS.O_SHIPPRIORITY + Limit: skip=0, fetch=10 + Sort: sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) DESC NULLS FIRST, ORDERS.O_ORDERDATE ASC NULLS LAST + Projection: LINEITEM.L_ORDERKEY, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT), ORDERS.O_ORDERDATE, ORDERS.O_SHIPPRIORITY + Aggregate: groupBy=[[LINEITEM.L_ORDERKEY, ORDERS.O_ORDERDATE, ORDERS.O_SHIPPRIORITY]], aggr=[[sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT)]] + Projection: LINEITEM.L_ORDERKEY, ORDERS.O_ORDERDATE, ORDERS.O_SHIPPRIORITY, LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT) + Filter: CUSTOMER.C_MKTSEGMENT = Utf8("BUILDING") AND CUSTOMER.C_CUSTKEY = ORDERS.O_CUSTKEY AND LINEITEM.L_ORDERKEY = ORDERS.O_ORDERKEY AND ORDERS.O_ORDERDATE < CAST(Utf8("1995-03-15") AS Date32) AND LINEITEM.L_SHIPDATE > CAST(Utf8("1995-03-15") AS Date32) + Cross Join: + Cross Join: + TableScan: LINEITEM + TableScan: CUSTOMER + TableScan: ORDERS diff --git a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_04.snap b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_04.snap new file mode 100644 index 000000000000..3c54c1a413f8 --- /dev/null +++ b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_04.snap @@ -0,0 +1,13 @@ +--- +source: datafusion/substrait/tests/cases/consumer_integration.rs +expression: plan_str +--- +Projection: ORDERS.O_ORDERPRIORITY, count(Int64(1)) AS ORDER_COUNT + Sort: ORDERS.O_ORDERPRIORITY ASC NULLS LAST + Aggregate: groupBy=[[ORDERS.O_ORDERPRIORITY]], aggr=[[count(Int64(1))]] + Projection: ORDERS.O_ORDERPRIORITY + Filter: ORDERS.O_ORDERDATE >= CAST(Utf8("1993-07-01") AS Date32) AND ORDERS.O_ORDERDATE < CAST(Utf8("1993-10-01") AS Date32) AND EXISTS () + Subquery: + Filter: LINEITEM.L_ORDERKEY = LINEITEM.L_ORDERKEY AND LINEITEM.L_COMMITDATE < LINEITEM.L_RECEIPTDATE + TableScan: LINEITEM + TableScan: ORDERS diff --git a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_05.snap b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_05.snap new file mode 100644 index 000000000000..a7f331709d18 --- /dev/null +++ b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_05.snap @@ -0,0 +1,20 @@ +--- +source: datafusion/substrait/tests/cases/consumer_integration.rs +expression: plan_str +--- +Projection: NATION.N_NAME, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) AS REVENUE + Sort: sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) DESC NULLS FIRST + Aggregate: groupBy=[[NATION.N_NAME]], aggr=[[sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT)]] + Projection: NATION.N_NAME, LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT) + Filter: CUSTOMER.C_CUSTKEY = ORDERS.O_CUSTKEY AND LINEITEM.L_ORDERKEY = ORDERS.O_ORDERKEY AND LINEITEM.L_SUPPKEY = SUPPLIER.S_SUPPKEY AND CUSTOMER.C_NATIONKEY = SUPPLIER.S_NATIONKEY AND SUPPLIER.S_NATIONKEY = NATION.N_NATIONKEY AND NATION.N_REGIONKEY = REGION.R_REGIONKEY AND REGION.R_NAME = Utf8("ASIA") AND ORDERS.O_ORDERDATE >= CAST(Utf8("1994-01-01") AS Date32) AND ORDERS.O_ORDERDATE < CAST(Utf8("1995-01-01") AS Date32) + Cross Join: + Cross Join: + Cross Join: + Cross Join: + Cross Join: + TableScan: CUSTOMER + TableScan: ORDERS + TableScan: LINEITEM + TableScan: SUPPLIER + TableScan: NATION + TableScan: REGION diff --git a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_06.snap b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_06.snap new file mode 100644 index 000000000000..e8cf01830ec0 --- /dev/null +++ b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_06.snap @@ -0,0 +1,8 @@ +--- +source: datafusion/substrait/tests/cases/consumer_integration.rs +expression: plan_str +--- +Aggregate: groupBy=[[]], aggr=[[sum(LINEITEM.L_EXTENDEDPRICE * LINEITEM.L_DISCOUNT) AS REVENUE]] + Projection: LINEITEM.L_EXTENDEDPRICE * LINEITEM.L_DISCOUNT + Filter: LINEITEM.L_SHIPDATE >= CAST(Utf8("1994-01-01") AS Date32) AND LINEITEM.L_SHIPDATE < CAST(Utf8("1995-01-01") AS Date32) AND LINEITEM.L_DISCOUNT >= Decimal128(Some(5),3,2) AND LINEITEM.L_DISCOUNT <= Decimal128(Some(7),3,2) AND LINEITEM.L_QUANTITY < CAST(Int32(24) AS Decimal128(15, 2)) + TableScan: LINEITEM diff --git a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_10.snap b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_10.snap new file mode 100644 index 000000000000..2ae4a3dd140c --- /dev/null +++ b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_10.snap @@ -0,0 +1,18 @@ +--- +source: datafusion/substrait/tests/cases/consumer_integration.rs +expression: plan_str +--- +Projection: CUSTOMER.C_CUSTKEY, CUSTOMER.C_NAME, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) AS REVENUE, CUSTOMER.C_ACCTBAL, NATION.N_NAME, CUSTOMER.C_ADDRESS, CUSTOMER.C_PHONE, CUSTOMER.C_COMMENT + Limit: skip=0, fetch=20 + Sort: sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) DESC NULLS FIRST + Projection: CUSTOMER.C_CUSTKEY, CUSTOMER.C_NAME, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT), CUSTOMER.C_ACCTBAL, NATION.N_NAME, CUSTOMER.C_ADDRESS, CUSTOMER.C_PHONE, CUSTOMER.C_COMMENT + Aggregate: groupBy=[[CUSTOMER.C_CUSTKEY, CUSTOMER.C_NAME, CUSTOMER.C_ACCTBAL, CUSTOMER.C_PHONE, NATION.N_NAME, CUSTOMER.C_ADDRESS, CUSTOMER.C_COMMENT]], aggr=[[sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT)]] + Projection: CUSTOMER.C_CUSTKEY, CUSTOMER.C_NAME, CUSTOMER.C_ACCTBAL, CUSTOMER.C_PHONE, NATION.N_NAME, CUSTOMER.C_ADDRESS, CUSTOMER.C_COMMENT, LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT) + Filter: CUSTOMER.C_CUSTKEY = ORDERS.O_CUSTKEY AND LINEITEM.L_ORDERKEY = ORDERS.O_ORDERKEY AND ORDERS.O_ORDERDATE >= CAST(Utf8("1993-10-01") AS Date32) AND ORDERS.O_ORDERDATE < CAST(Utf8("1994-01-01") AS Date32) AND LINEITEM.L_RETURNFLAG = Utf8("R") AND CUSTOMER.C_NATIONKEY = NATION.N_NATIONKEY + Cross Join: + Cross Join: + Cross Join: + TableScan: CUSTOMER + TableScan: ORDERS + TableScan: LINEITEM + TableScan: NATION diff --git a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_11.snap b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_11.snap new file mode 100644 index 000000000000..a75b5a69496a --- /dev/null +++ b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_11.snap @@ -0,0 +1,25 @@ +--- +source: datafusion/substrait/tests/cases/consumer_integration.rs +expression: plan_str +--- +Projection: PARTSUPP.PS_PARTKEY, sum(PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY) AS value + Sort: sum(PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY) DESC NULLS FIRST + Filter: sum(PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY) > () + Subquery: + Projection: sum(PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY) * Decimal128(Some(1000000),11,10) + Aggregate: groupBy=[[]], aggr=[[sum(PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY)]] + Projection: PARTSUPP.PS_SUPPLYCOST * CAST(PARTSUPP.PS_AVAILQTY AS Decimal128(19, 0)) + Filter: PARTSUPP.PS_SUPPKEY = SUPPLIER.S_SUPPKEY AND SUPPLIER.S_NATIONKEY = NATION.N_NATIONKEY AND NATION.N_NAME = Utf8("JAPAN") + Cross Join: + Cross Join: + TableScan: PARTSUPP + TableScan: SUPPLIER + TableScan: NATION + Aggregate: groupBy=[[PARTSUPP.PS_PARTKEY]], aggr=[[sum(PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY)]] + Projection: PARTSUPP.PS_PARTKEY, PARTSUPP.PS_SUPPLYCOST * CAST(PARTSUPP.PS_AVAILQTY AS Decimal128(19, 0)) + Filter: PARTSUPP.PS_SUPPKEY = SUPPLIER.S_SUPPKEY AND SUPPLIER.S_NATIONKEY = NATION.N_NATIONKEY AND NATION.N_NAME = Utf8("JAPAN") + Cross Join: + Cross Join: + TableScan: PARTSUPP + TableScan: SUPPLIER + TableScan: NATION diff --git a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_12.snap b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_12.snap new file mode 100644 index 000000000000..93716872f001 --- /dev/null +++ b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_12.snap @@ -0,0 +1,12 @@ +--- +source: datafusion/substrait/tests/cases/consumer_integration.rs +expression: plan_str +--- +Projection: LINEITEM.L_SHIPMODE, sum(CASE WHEN ORDERS.O_ORDERPRIORITY = Utf8("1-URGENT") OR ORDERS.O_ORDERPRIORITY = Utf8("2-HIGH") THEN Int32(1) ELSE Int32(0) END) AS HIGH_LINE_COUNT, sum(CASE WHEN ORDERS.O_ORDERPRIORITY != Utf8("1-URGENT") AND ORDERS.O_ORDERPRIORITY != Utf8("2-HIGH") THEN Int32(1) ELSE Int32(0) END) AS LOW_LINE_COUNT + Sort: LINEITEM.L_SHIPMODE ASC NULLS LAST + Aggregate: groupBy=[[LINEITEM.L_SHIPMODE]], aggr=[[sum(CASE WHEN ORDERS.O_ORDERPRIORITY = Utf8("1-URGENT") OR ORDERS.O_ORDERPRIORITY = Utf8("2-HIGH") THEN Int32(1) ELSE Int32(0) END), sum(CASE WHEN ORDERS.O_ORDERPRIORITY != Utf8("1-URGENT") AND ORDERS.O_ORDERPRIORITY != Utf8("2-HIGH") THEN Int32(1) ELSE Int32(0) END)]] + Projection: LINEITEM.L_SHIPMODE, CASE WHEN ORDERS.O_ORDERPRIORITY = Utf8("1-URGENT") OR ORDERS.O_ORDERPRIORITY = Utf8("2-HIGH") THEN Int32(1) ELSE Int32(0) END, CASE WHEN ORDERS.O_ORDERPRIORITY != Utf8("1-URGENT") AND ORDERS.O_ORDERPRIORITY != Utf8("2-HIGH") THEN Int32(1) ELSE Int32(0) END + Filter: ORDERS.O_ORDERKEY = LINEITEM.L_ORDERKEY AND (LINEITEM.L_SHIPMODE = CAST(Utf8("MAIL") AS Utf8) OR LINEITEM.L_SHIPMODE = CAST(Utf8("SHIP") AS Utf8)) AND LINEITEM.L_COMMITDATE < LINEITEM.L_RECEIPTDATE AND LINEITEM.L_SHIPDATE < LINEITEM.L_COMMITDATE AND LINEITEM.L_RECEIPTDATE >= CAST(Utf8("1994-01-01") AS Date32) AND LINEITEM.L_RECEIPTDATE < CAST(Utf8("1995-01-01") AS Date32) + Cross Join: + TableScan: ORDERS + TableScan: LINEITEM diff --git a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_13.snap b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_13.snap new file mode 100644 index 000000000000..18b44547db60 --- /dev/null +++ b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_13.snap @@ -0,0 +1,14 @@ +--- +source: datafusion/substrait/tests/cases/consumer_integration.rs +expression: plan_str +--- +Projection: count(ORDERS.O_ORDERKEY) AS C_COUNT, count(Int64(1)) AS CUSTDIST + Sort: count(Int64(1)) DESC NULLS FIRST, count(ORDERS.O_ORDERKEY) DESC NULLS FIRST + Projection: count(ORDERS.O_ORDERKEY), count(Int64(1)) + Aggregate: groupBy=[[count(ORDERS.O_ORDERKEY)]], aggr=[[count(Int64(1))]] + Projection: count(ORDERS.O_ORDERKEY) + Aggregate: groupBy=[[CUSTOMER.C_CUSTKEY]], aggr=[[count(ORDERS.O_ORDERKEY)]] + Projection: CUSTOMER.C_CUSTKEY, ORDERS.O_ORDERKEY + Left Join: CUSTOMER.C_CUSTKEY = ORDERS.O_CUSTKEY Filter: NOT ORDERS.O_COMMENT LIKE CAST(Utf8("%special%requests%") AS Utf8) + TableScan: CUSTOMER + TableScan: ORDERS diff --git a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_14.snap b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_14.snap new file mode 100644 index 000000000000..7172e566862a --- /dev/null +++ b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_14.snap @@ -0,0 +1,11 @@ +--- +source: datafusion/substrait/tests/cases/consumer_integration.rs +expression: plan_str +--- +Projection: Decimal128(Some(10000),5,2) * sum(CASE WHEN PART.P_TYPE LIKE Utf8("PROMO%") THEN LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT ELSE Decimal128(Some(0),19,4) END) / sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) AS PROMO_REVENUE + Aggregate: groupBy=[[]], aggr=[[sum(CASE WHEN PART.P_TYPE LIKE Utf8("PROMO%") THEN LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT ELSE Decimal128(Some(0),19,4) END), sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT)]] + Projection: CASE WHEN PART.P_TYPE LIKE CAST(Utf8("PROMO%") AS Utf8) THEN LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT) ELSE Decimal128(Some(0),19,4) END, LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT) + Filter: LINEITEM.L_PARTKEY = PART.P_PARTKEY AND LINEITEM.L_SHIPDATE >= Date32("1995-09-01") AND LINEITEM.L_SHIPDATE < CAST(Utf8("1995-10-01") AS Date32) + Cross Join: + TableScan: LINEITEM + TableScan: PART diff --git a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_16.snap b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_16.snap new file mode 100644 index 000000000000..1170905e7416 --- /dev/null +++ b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_16.snap @@ -0,0 +1,16 @@ +--- +source: datafusion/substrait/tests/cases/consumer_integration.rs +expression: plan_str +--- +Projection: PART.P_BRAND, PART.P_TYPE, PART.P_SIZE, count(DISTINCT PARTSUPP.PS_SUPPKEY) AS SUPPLIER_CNT + Sort: count(DISTINCT PARTSUPP.PS_SUPPKEY) DESC NULLS FIRST, PART.P_BRAND ASC NULLS LAST, PART.P_TYPE ASC NULLS LAST, PART.P_SIZE ASC NULLS LAST + Aggregate: groupBy=[[PART.P_BRAND, PART.P_TYPE, PART.P_SIZE]], aggr=[[count(DISTINCT PARTSUPP.PS_SUPPKEY)]] + Projection: PART.P_BRAND, PART.P_TYPE, PART.P_SIZE, PARTSUPP.PS_SUPPKEY + Filter: PART.P_PARTKEY = PARTSUPP.PS_PARTKEY AND PART.P_BRAND != Utf8("Brand#45") AND NOT PART.P_TYPE LIKE CAST(Utf8("MEDIUM POLISHED%") AS Utf8) AND (PART.P_SIZE = Int32(49) OR PART.P_SIZE = Int32(14) OR PART.P_SIZE = Int32(23) OR PART.P_SIZE = Int32(45) OR PART.P_SIZE = Int32(19) OR PART.P_SIZE = Int32(3) OR PART.P_SIZE = Int32(36) OR PART.P_SIZE = Int32(9)) AND NOT PARTSUPP.PS_SUPPKEY IN () + Subquery: + Projection: SUPPLIER.S_SUPPKEY + Filter: SUPPLIER.S_COMMENT LIKE CAST(Utf8("%Customer%Complaints%") AS Utf8) + TableScan: SUPPLIER + Cross Join: + TableScan: PARTSUPP + TableScan: PART diff --git a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_18.snap b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_18.snap new file mode 100644 index 000000000000..f36cd138bd01 --- /dev/null +++ b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_18.snap @@ -0,0 +1,21 @@ +--- +source: datafusion/substrait/tests/cases/consumer_integration.rs +expression: plan_str +--- +Projection: CUSTOMER.C_NAME, CUSTOMER.C_CUSTKEY, ORDERS.O_ORDERKEY, ORDERS.O_ORDERDATE, ORDERS.O_TOTALPRICE, sum(LINEITEM.L_QUANTITY) AS EXPR$5 + Limit: skip=0, fetch=100 + Sort: ORDERS.O_TOTALPRICE DESC NULLS FIRST, ORDERS.O_ORDERDATE ASC NULLS LAST + Aggregate: groupBy=[[CUSTOMER.C_NAME, CUSTOMER.C_CUSTKEY, ORDERS.O_ORDERKEY, ORDERS.O_ORDERDATE, ORDERS.O_TOTALPRICE]], aggr=[[sum(LINEITEM.L_QUANTITY)]] + Projection: CUSTOMER.C_NAME, CUSTOMER.C_CUSTKEY, ORDERS.O_ORDERKEY, ORDERS.O_ORDERDATE, ORDERS.O_TOTALPRICE, LINEITEM.L_QUANTITY + Filter: ORDERS.O_ORDERKEY IN () AND CUSTOMER.C_CUSTKEY = ORDERS.O_CUSTKEY AND ORDERS.O_ORDERKEY = LINEITEM.L_ORDERKEY + Subquery: + Projection: LINEITEM.L_ORDERKEY + Filter: sum(LINEITEM.L_QUANTITY) > CAST(Int32(300) AS Decimal128(15, 2)) + Aggregate: groupBy=[[LINEITEM.L_ORDERKEY]], aggr=[[sum(LINEITEM.L_QUANTITY)]] + Projection: LINEITEM.L_ORDERKEY, LINEITEM.L_QUANTITY + TableScan: LINEITEM + Cross Join: + Cross Join: + TableScan: CUSTOMER + TableScan: ORDERS + TableScan: LINEITEM diff --git a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_19.snap b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_19.snap new file mode 100644 index 000000000000..f7a3eb44afe4 --- /dev/null +++ b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_19.snap @@ -0,0 +1,10 @@ +--- +source: datafusion/substrait/tests/cases/consumer_integration.rs +expression: plan_str +--- +Aggregate: groupBy=[[]], aggr=[[sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) AS REVENUE]] + Projection: LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT) + Filter: PART.P_PARTKEY = LINEITEM.L_PARTKEY AND PART.P_BRAND = Utf8("Brand#12") AND (PART.P_CONTAINER = CAST(Utf8("SM CASE") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("SM BOX") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("SM PACK") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("SM PKG") AS Utf8)) AND LINEITEM.L_QUANTITY >= CAST(Int32(1) AS Decimal128(15, 2)) AND LINEITEM.L_QUANTITY <= CAST(Int32(1) + Int32(10) AS Decimal128(15, 2)) AND PART.P_SIZE >= Int32(1) AND PART.P_SIZE <= Int32(5) AND (LINEITEM.L_SHIPMODE = CAST(Utf8("AIR") AS Utf8) OR LINEITEM.L_SHIPMODE = CAST(Utf8("AIR REG") AS Utf8)) AND LINEITEM.L_SHIPINSTRUCT = Utf8("DELIVER IN PERSON") OR PART.P_PARTKEY = LINEITEM.L_PARTKEY AND PART.P_BRAND = Utf8("Brand#23") AND (PART.P_CONTAINER = CAST(Utf8("MED BAG") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("MED BOX") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("MED PKG") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("MED PACK") AS Utf8)) AND LINEITEM.L_QUANTITY >= CAST(Int32(10) AS Decimal128(15, 2)) AND LINEITEM.L_QUANTITY <= CAST(Int32(10) + Int32(10) AS Decimal128(15, 2)) AND PART.P_SIZE >= Int32(1) AND PART.P_SIZE <= Int32(10) AND (LINEITEM.L_SHIPMODE = CAST(Utf8("AIR") AS Utf8) OR LINEITEM.L_SHIPMODE = CAST(Utf8("AIR REG") AS Utf8)) AND LINEITEM.L_SHIPINSTRUCT = Utf8("DELIVER IN PERSON") OR PART.P_PARTKEY = LINEITEM.L_PARTKEY AND PART.P_BRAND = Utf8("Brand#34") AND (PART.P_CONTAINER = CAST(Utf8("LG CASE") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("LG BOX") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("LG PACK") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("LG PKG") AS Utf8)) AND LINEITEM.L_QUANTITY >= CAST(Int32(20) AS Decimal128(15, 2)) AND LINEITEM.L_QUANTITY <= CAST(Int32(20) + Int32(10) AS Decimal128(15, 2)) AND PART.P_SIZE >= Int32(1) AND PART.P_SIZE <= Int32(15) AND (LINEITEM.L_SHIPMODE = CAST(Utf8("AIR") AS Utf8) OR LINEITEM.L_SHIPMODE = CAST(Utf8("AIR REG") AS Utf8)) AND LINEITEM.L_SHIPINSTRUCT = Utf8("DELIVER IN PERSON") + Cross Join: + TableScan: LINEITEM + TableScan: PART diff --git a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_20.snap b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_20.snap new file mode 100644 index 000000000000..b0a2069b19a0 --- /dev/null +++ b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_20.snap @@ -0,0 +1,24 @@ +--- +source: datafusion/substrait/tests/cases/consumer_integration.rs +expression: plan_str +--- +Sort: SUPPLIER.S_NAME ASC NULLS LAST + Projection: SUPPLIER.S_NAME, SUPPLIER.S_ADDRESS + Filter: SUPPLIER.S_SUPPKEY IN () AND SUPPLIER.S_NATIONKEY = NATION.N_NATIONKEY AND NATION.N_NAME = Utf8("CANADA") + Subquery: + Projection: PARTSUPP.PS_SUPPKEY + Filter: PARTSUPP.PS_PARTKEY IN () AND CAST(PARTSUPP.PS_AVAILQTY AS Decimal128(19, 0)) > () + Subquery: + Projection: PART.P_PARTKEY + Filter: PART.P_NAME LIKE CAST(Utf8("forest%") AS Utf8) + TableScan: PART + Subquery: + Projection: Decimal128(Some(5),2,1) * sum(LINEITEM.L_QUANTITY) + Aggregate: groupBy=[[]], aggr=[[sum(LINEITEM.L_QUANTITY)]] + Projection: LINEITEM.L_QUANTITY + Filter: LINEITEM.L_PARTKEY = LINEITEM.L_ORDERKEY AND LINEITEM.L_SUPPKEY = LINEITEM.L_PARTKEY AND LINEITEM.L_SHIPDATE >= CAST(Utf8("1994-01-01") AS Date32) AND LINEITEM.L_SHIPDATE < CAST(Utf8("1995-01-01") AS Date32) + TableScan: LINEITEM + TableScan: PARTSUPP + Cross Join: + TableScan: SUPPLIER + TableScan: NATION diff --git a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_21.snap b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_21.snap new file mode 100644 index 000000000000..9fe1a65c9acd --- /dev/null +++ b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_21.snap @@ -0,0 +1,23 @@ +--- +source: datafusion/substrait/tests/cases/consumer_integration.rs +expression: plan_str +--- +Projection: SUPPLIER.S_NAME, count(Int64(1)) AS NUMWAIT + Limit: skip=0, fetch=100 + Sort: count(Int64(1)) DESC NULLS FIRST, SUPPLIER.S_NAME ASC NULLS LAST + Aggregate: groupBy=[[SUPPLIER.S_NAME]], aggr=[[count(Int64(1))]] + Projection: SUPPLIER.S_NAME + Filter: SUPPLIER.S_SUPPKEY = LINEITEM.L_SUPPKEY AND ORDERS.O_ORDERKEY = LINEITEM.L_ORDERKEY AND ORDERS.O_ORDERSTATUS = Utf8("F") AND LINEITEM.L_RECEIPTDATE > LINEITEM.L_COMMITDATE AND EXISTS () AND NOT EXISTS () AND SUPPLIER.S_NATIONKEY = NATION.N_NATIONKEY AND NATION.N_NAME = Utf8("SAUDI ARABIA") + Subquery: + Filter: LINEITEM.L_ORDERKEY = LINEITEM.L_TAX AND LINEITEM.L_SUPPKEY != LINEITEM.L_LINESTATUS + TableScan: LINEITEM + Subquery: + Filter: LINEITEM.L_ORDERKEY = LINEITEM.L_TAX AND LINEITEM.L_SUPPKEY != LINEITEM.L_LINESTATUS AND LINEITEM.L_RECEIPTDATE > LINEITEM.L_COMMITDATE + TableScan: LINEITEM + Cross Join: + Cross Join: + Cross Join: + TableScan: SUPPLIER + TableScan: LINEITEM + TableScan: ORDERS + TableScan: NATION diff --git a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_22.snap b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_22.snap new file mode 100644 index 000000000000..f6259132bcd5 --- /dev/null +++ b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_22.snap @@ -0,0 +1,18 @@ +--- +source: datafusion/substrait/tests/cases/consumer_integration.rs +expression: plan_str +--- +Projection: substr(CUSTOMER.C_PHONE,Int32(1),Int32(2)) AS CNTRYCODE, count(Int64(1)) AS NUMCUST, sum(CUSTOMER.C_ACCTBAL) AS TOTACCTBAL + Sort: substr(CUSTOMER.C_PHONE,Int32(1),Int32(2)) ASC NULLS LAST + Aggregate: groupBy=[[substr(CUSTOMER.C_PHONE,Int32(1),Int32(2))]], aggr=[[count(Int64(1)), sum(CUSTOMER.C_ACCTBAL)]] + Projection: substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)), CUSTOMER.C_ACCTBAL + Filter: (substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8("13") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8("31") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8("23") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8("29") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8("30") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8("18") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8("17") AS Utf8)) AND CUSTOMER.C_ACCTBAL > () AND NOT EXISTS () + Subquery: + Aggregate: groupBy=[[]], aggr=[[avg(CUSTOMER.C_ACCTBAL)]] + Projection: CUSTOMER.C_ACCTBAL + Filter: CUSTOMER.C_ACCTBAL > Decimal128(Some(0),3,2) AND (substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8("13") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8("31") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8("23") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8("29") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8("30") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8("18") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8("17") AS Utf8)) + TableScan: CUSTOMER + Subquery: + Filter: ORDERS.O_CUSTKEY = ORDERS.O_ORDERKEY + TableScan: ORDERS + TableScan: CUSTOMER From ed1a0955c4ecb04a17988da6a03ba04f20d3fd26 Mon Sep 17 00:00:00 2001 From: qstommyshu Date: Wed, 26 Mar 2025 18:02:26 -0400 Subject: [PATCH 03/17] Revert "migrate `consumer_intergration.rs` tests to `insta`" This reverts commit c3be2ebfeaeb5afff841810e38eff657f1b86a3f. --- .../tests/cases/consumer_integration.rs | 311 ++++++++++++++++-- ...ests__test_select_count_from_select_1.snap | 6 - ...tion__tests__test_select_window_count.snap | 7 - ...umer_integration__tests__tpch_test_01.snap | 10 - ...umer_integration__tests__tpch_test_02.snap | 28 -- ...umer_integration__tests__tpch_test_03.snap | 16 - ...umer_integration__tests__tpch_test_04.snap | 13 - ...umer_integration__tests__tpch_test_05.snap | 20 -- ...umer_integration__tests__tpch_test_06.snap | 8 - ...umer_integration__tests__tpch_test_10.snap | 18 - ...umer_integration__tests__tpch_test_11.snap | 25 -- ...umer_integration__tests__tpch_test_12.snap | 12 - ...umer_integration__tests__tpch_test_13.snap | 14 - ...umer_integration__tests__tpch_test_14.snap | 11 - ...umer_integration__tests__tpch_test_16.snap | 16 - ...umer_integration__tests__tpch_test_18.snap | 21 -- ...umer_integration__tests__tpch_test_19.snap | 10 - ...umer_integration__tests__tpch_test_20.snap | 24 -- ...umer_integration__tests__tpch_test_21.snap | 23 -- ...umer_integration__tests__tpch_test_22.snap | 18 - 20 files changed, 286 insertions(+), 325 deletions(-) delete mode 100644 datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__test_select_count_from_select_1.snap delete mode 100644 datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__test_select_window_count.snap delete mode 100644 datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_01.snap delete mode 100644 datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_02.snap delete mode 100644 datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_03.snap delete mode 100644 datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_04.snap delete mode 100644 datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_05.snap delete mode 100644 datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_06.snap delete mode 100644 datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_10.snap delete mode 100644 datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_11.snap delete mode 100644 datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_12.snap delete mode 100644 datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_13.snap delete mode 100644 datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_14.snap delete mode 100644 datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_16.snap delete mode 100644 datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_18.snap delete mode 100644 datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_19.snap delete mode 100644 datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_20.snap delete mode 100644 datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_21.snap delete mode 100644 datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_22.snap diff --git a/datafusion/substrait/tests/cases/consumer_integration.rs b/datafusion/substrait/tests/cases/consumer_integration.rs index 28b41f82cdca..1f1a15abb837 100644 --- a/datafusion/substrait/tests/cases/consumer_integration.rs +++ b/datafusion/substrait/tests/cases/consumer_integration.rs @@ -28,7 +28,6 @@ mod tests { use datafusion::common::Result; use datafusion::prelude::SessionContext; use datafusion_substrait::logical_plan::consumer::from_substrait_plan; - use insta::assert_snapshot; use std::fs::File; use std::io::BufReader; use substrait::proto::Plan; @@ -50,42 +49,125 @@ mod tests { #[tokio::test] async fn tpch_test_01() -> Result<()> { let plan_str = tpch_plan_to_string(1).await?; - assert_snapshot!("tpch_test_01", plan_str); + assert_eq!( + plan_str, + "Projection: LINEITEM.L_RETURNFLAG, LINEITEM.L_LINESTATUS, sum(LINEITEM.L_QUANTITY) AS SUM_QTY, sum(LINEITEM.L_EXTENDEDPRICE) AS SUM_BASE_PRICE, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) AS SUM_DISC_PRICE, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT * Int32(1) + LINEITEM.L_TAX) AS SUM_CHARGE, avg(LINEITEM.L_QUANTITY) AS AVG_QTY, avg(LINEITEM.L_EXTENDEDPRICE) AS AVG_PRICE, avg(LINEITEM.L_DISCOUNT) AS AVG_DISC, count(Int64(1)) AS COUNT_ORDER\ + \n Sort: LINEITEM.L_RETURNFLAG ASC NULLS LAST, LINEITEM.L_LINESTATUS ASC NULLS LAST\ + \n Aggregate: groupBy=[[LINEITEM.L_RETURNFLAG, LINEITEM.L_LINESTATUS]], aggr=[[sum(LINEITEM.L_QUANTITY), sum(LINEITEM.L_EXTENDEDPRICE), sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT), sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT * Int32(1) + LINEITEM.L_TAX), avg(LINEITEM.L_QUANTITY), avg(LINEITEM.L_EXTENDEDPRICE), avg(LINEITEM.L_DISCOUNT), count(Int64(1))]]\ + \n Projection: LINEITEM.L_RETURNFLAG, LINEITEM.L_LINESTATUS, LINEITEM.L_QUANTITY, LINEITEM.L_EXTENDEDPRICE, LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT), LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT) * (CAST(Int32(1) AS Decimal128(15, 2)) + LINEITEM.L_TAX), LINEITEM.L_DISCOUNT\ + \n Filter: LINEITEM.L_SHIPDATE <= Date32(\"1998-12-01\") - IntervalDayTime(\"IntervalDayTime { days: 0, milliseconds: 10368000 }\")\ + \n TableScan: LINEITEM" + ); Ok(()) } #[tokio::test] async fn tpch_test_02() -> Result<()> { let plan_str = tpch_plan_to_string(2).await?; - assert_snapshot!("tpch_test_02", plan_str); + assert_eq!( + plan_str, + "Limit: skip=0, fetch=100\ + \n Sort: SUPPLIER.S_ACCTBAL DESC NULLS FIRST, NATION.N_NAME ASC NULLS LAST, SUPPLIER.S_NAME ASC NULLS LAST, PART.P_PARTKEY ASC NULLS LAST\ + \n Projection: SUPPLIER.S_ACCTBAL, SUPPLIER.S_NAME, NATION.N_NAME, PART.P_PARTKEY, PART.P_MFGR, SUPPLIER.S_ADDRESS, SUPPLIER.S_PHONE, SUPPLIER.S_COMMENT\ + \n Filter: PART.P_PARTKEY = PARTSUPP.PS_PARTKEY AND SUPPLIER.S_SUPPKEY = PARTSUPP.PS_SUPPKEY AND PART.P_SIZE = Int32(15) AND PART.P_TYPE LIKE CAST(Utf8(\"%BRASS\") AS Utf8) AND SUPPLIER.S_NATIONKEY = NATION.N_NATIONKEY AND NATION.N_REGIONKEY = REGION.R_REGIONKEY AND REGION.R_NAME = Utf8(\"EUROPE\") AND PARTSUPP.PS_SUPPLYCOST = ()\ + \n Subquery:\ + \n Aggregate: groupBy=[[]], aggr=[[min(PARTSUPP.PS_SUPPLYCOST)]]\ + \n Projection: PARTSUPP.PS_SUPPLYCOST\ + \n Filter: PARTSUPP.PS_PARTKEY = PARTSUPP.PS_PARTKEY AND SUPPLIER.S_SUPPKEY = PARTSUPP.PS_SUPPKEY AND SUPPLIER.S_NATIONKEY = NATION.N_NATIONKEY AND NATION.N_REGIONKEY = REGION.R_REGIONKEY AND REGION.R_NAME = Utf8(\"EUROPE\")\ + \n Cross Join: \ + \n Cross Join: \ + \n Cross Join: \ + \n TableScan: PARTSUPP\ + \n TableScan: SUPPLIER\ + \n TableScan: NATION\ + \n TableScan: REGION\ + \n Cross Join: \ + \n Cross Join: \ + \n Cross Join: \ + \n Cross Join: \ + \n TableScan: PART\ + \n TableScan: SUPPLIER\ + \n TableScan: PARTSUPP\ + \n TableScan: NATION\ + \n TableScan: REGION" + ); Ok(()) } #[tokio::test] async fn tpch_test_03() -> Result<()> { let plan_str = tpch_plan_to_string(3).await?; - assert_snapshot!("tpch_test_03", plan_str); + assert_eq!( + plan_str, + "Projection: LINEITEM.L_ORDERKEY, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) AS REVENUE, ORDERS.O_ORDERDATE, ORDERS.O_SHIPPRIORITY\ + \n Limit: skip=0, fetch=10\ + \n Sort: sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) DESC NULLS FIRST, ORDERS.O_ORDERDATE ASC NULLS LAST\ + \n Projection: LINEITEM.L_ORDERKEY, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT), ORDERS.O_ORDERDATE, ORDERS.O_SHIPPRIORITY\ + \n Aggregate: groupBy=[[LINEITEM.L_ORDERKEY, ORDERS.O_ORDERDATE, ORDERS.O_SHIPPRIORITY]], aggr=[[sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT)]]\ + \n Projection: LINEITEM.L_ORDERKEY, ORDERS.O_ORDERDATE, ORDERS.O_SHIPPRIORITY, LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT)\ + \n Filter: CUSTOMER.C_MKTSEGMENT = Utf8(\"BUILDING\") AND CUSTOMER.C_CUSTKEY = ORDERS.O_CUSTKEY AND LINEITEM.L_ORDERKEY = ORDERS.O_ORDERKEY AND ORDERS.O_ORDERDATE < CAST(Utf8(\"1995-03-15\") AS Date32) AND LINEITEM.L_SHIPDATE > CAST(Utf8(\"1995-03-15\") AS Date32)\ + \n Cross Join: \ + \n Cross Join: \ + \n TableScan: LINEITEM\ + \n TableScan: CUSTOMER\ + \n TableScan: ORDERS" + ); Ok(()) } #[tokio::test] async fn tpch_test_04() -> Result<()> { let plan_str = tpch_plan_to_string(4).await?; - assert_snapshot!("tpch_test_04", plan_str); + assert_eq!( + plan_str, + "Projection: ORDERS.O_ORDERPRIORITY, count(Int64(1)) AS ORDER_COUNT\ + \n Sort: ORDERS.O_ORDERPRIORITY ASC NULLS LAST\ + \n Aggregate: groupBy=[[ORDERS.O_ORDERPRIORITY]], aggr=[[count(Int64(1))]]\ + \n Projection: ORDERS.O_ORDERPRIORITY\ + \n Filter: ORDERS.O_ORDERDATE >= CAST(Utf8(\"1993-07-01\") AS Date32) AND ORDERS.O_ORDERDATE < CAST(Utf8(\"1993-10-01\") AS Date32) AND EXISTS ()\ + \n Subquery:\ + \n Filter: LINEITEM.L_ORDERKEY = LINEITEM.L_ORDERKEY AND LINEITEM.L_COMMITDATE < LINEITEM.L_RECEIPTDATE\ + \n TableScan: LINEITEM\ + \n TableScan: ORDERS" + ); Ok(()) } #[tokio::test] async fn tpch_test_05() -> Result<()> { let plan_str = tpch_plan_to_string(5).await?; - assert_snapshot!("tpch_test_05", plan_str); + assert_eq!( + plan_str, + "Projection: NATION.N_NAME, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) AS REVENUE\ + \n Sort: sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) DESC NULLS FIRST\ + \n Aggregate: groupBy=[[NATION.N_NAME]], aggr=[[sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT)]]\ + \n Projection: NATION.N_NAME, LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT)\ + \n Filter: CUSTOMER.C_CUSTKEY = ORDERS.O_CUSTKEY AND LINEITEM.L_ORDERKEY = ORDERS.O_ORDERKEY AND LINEITEM.L_SUPPKEY = SUPPLIER.S_SUPPKEY AND CUSTOMER.C_NATIONKEY = SUPPLIER.S_NATIONKEY AND SUPPLIER.S_NATIONKEY = NATION.N_NATIONKEY AND NATION.N_REGIONKEY = REGION.R_REGIONKEY AND REGION.R_NAME = Utf8(\"ASIA\") AND ORDERS.O_ORDERDATE >= CAST(Utf8(\"1994-01-01\") AS Date32) AND ORDERS.O_ORDERDATE < CAST(Utf8(\"1995-01-01\") AS Date32)\ + \n Cross Join: \ + \n Cross Join: \ + \n Cross Join: \ + \n Cross Join: \ + \n Cross Join: \ + \n TableScan: CUSTOMER\ + \n TableScan: ORDERS\ + \n TableScan: LINEITEM\ + \n TableScan: SUPPLIER\ + \n TableScan: NATION\ + \n TableScan: REGION" + ); Ok(()) } #[tokio::test] async fn tpch_test_06() -> Result<()> { let plan_str = tpch_plan_to_string(6).await?; - assert_snapshot!("tpch_test_06", plan_str); + assert_eq!( + plan_str, + "Aggregate: groupBy=[[]], aggr=[[sum(LINEITEM.L_EXTENDEDPRICE * LINEITEM.L_DISCOUNT) AS REVENUE]]\ + \n Projection: LINEITEM.L_EXTENDEDPRICE * LINEITEM.L_DISCOUNT\ + \n Filter: LINEITEM.L_SHIPDATE >= CAST(Utf8(\"1994-01-01\") AS Date32) AND LINEITEM.L_SHIPDATE < CAST(Utf8(\"1995-01-01\") AS Date32) AND LINEITEM.L_DISCOUNT >= Decimal128(Some(5),3,2) AND LINEITEM.L_DISCOUNT <= Decimal128(Some(7),3,2) AND LINEITEM.L_QUANTITY < CAST(Int32(24) AS Decimal128(15, 2))\ + \n TableScan: LINEITEM" + ); Ok(()) } @@ -93,7 +175,7 @@ mod tests { #[tokio::test] async fn tpch_test_07() -> Result<()> { let plan_str = tpch_plan_to_string(7).await?; - assert_snapshot!("tpch_test_07", plan_str); + assert_eq!(plan_str, "Missing support for enum function arguments"); Ok(()) } @@ -101,7 +183,7 @@ mod tests { #[tokio::test] async fn tpch_test_08() -> Result<()> { let plan_str = tpch_plan_to_string(8).await?; - assert_snapshot!("tpch_test_08", plan_str); + assert_eq!(plan_str, "Missing support for enum function arguments"); Ok(()) } @@ -109,42 +191,112 @@ mod tests { #[tokio::test] async fn tpch_test_09() -> Result<()> { let plan_str = tpch_plan_to_string(9).await?; - assert_snapshot!("tpch_test_09", plan_str); + assert_eq!(plan_str, "Missing support for enum function arguments"); Ok(()) } #[tokio::test] async fn tpch_test_10() -> Result<()> { let plan_str = tpch_plan_to_string(10).await?; - assert_snapshot!("tpch_test_10", plan_str); + assert_eq!( + plan_str, + "Projection: CUSTOMER.C_CUSTKEY, CUSTOMER.C_NAME, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) AS REVENUE, CUSTOMER.C_ACCTBAL, NATION.N_NAME, CUSTOMER.C_ADDRESS, CUSTOMER.C_PHONE, CUSTOMER.C_COMMENT\ + \n Limit: skip=0, fetch=20\ + \n Sort: sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) DESC NULLS FIRST\ + \n Projection: CUSTOMER.C_CUSTKEY, CUSTOMER.C_NAME, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT), CUSTOMER.C_ACCTBAL, NATION.N_NAME, CUSTOMER.C_ADDRESS, CUSTOMER.C_PHONE, CUSTOMER.C_COMMENT\ + \n Aggregate: groupBy=[[CUSTOMER.C_CUSTKEY, CUSTOMER.C_NAME, CUSTOMER.C_ACCTBAL, CUSTOMER.C_PHONE, NATION.N_NAME, CUSTOMER.C_ADDRESS, CUSTOMER.C_COMMENT]], aggr=[[sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT)]]\ + \n Projection: CUSTOMER.C_CUSTKEY, CUSTOMER.C_NAME, CUSTOMER.C_ACCTBAL, CUSTOMER.C_PHONE, NATION.N_NAME, CUSTOMER.C_ADDRESS, CUSTOMER.C_COMMENT, LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT)\ + \n Filter: CUSTOMER.C_CUSTKEY = ORDERS.O_CUSTKEY AND LINEITEM.L_ORDERKEY = ORDERS.O_ORDERKEY AND ORDERS.O_ORDERDATE >= CAST(Utf8(\"1993-10-01\") AS Date32) AND ORDERS.O_ORDERDATE < CAST(Utf8(\"1994-01-01\") AS Date32) AND LINEITEM.L_RETURNFLAG = Utf8(\"R\") AND CUSTOMER.C_NATIONKEY = NATION.N_NATIONKEY\ + \n Cross Join: \ + \n Cross Join: \ + \n Cross Join: \ + \n TableScan: CUSTOMER\ + \n TableScan: ORDERS\ + \n TableScan: LINEITEM\ + \n TableScan: NATION" + ); Ok(()) } #[tokio::test] async fn tpch_test_11() -> Result<()> { let plan_str = tpch_plan_to_string(11).await?; - assert_snapshot!("tpch_test_11", plan_str); + assert_eq!( + plan_str, + "Projection: PARTSUPP.PS_PARTKEY, sum(PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY) AS value\ + \n Sort: sum(PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY) DESC NULLS FIRST\ + \n Filter: sum(PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY) > ()\ + \n Subquery:\ + \n Projection: sum(PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY) * Decimal128(Some(1000000),11,10)\ + \n Aggregate: groupBy=[[]], aggr=[[sum(PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY)]]\ + \n Projection: PARTSUPP.PS_SUPPLYCOST * CAST(PARTSUPP.PS_AVAILQTY AS Decimal128(19, 0))\ + \n Filter: PARTSUPP.PS_SUPPKEY = SUPPLIER.S_SUPPKEY AND SUPPLIER.S_NATIONKEY = NATION.N_NATIONKEY AND NATION.N_NAME = Utf8(\"JAPAN\")\ + \n Cross Join: \ + \n Cross Join: \ + \n TableScan: PARTSUPP\ + \n TableScan: SUPPLIER\ + \n TableScan: NATION\ + \n Aggregate: groupBy=[[PARTSUPP.PS_PARTKEY]], aggr=[[sum(PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY)]]\ + \n Projection: PARTSUPP.PS_PARTKEY, PARTSUPP.PS_SUPPLYCOST * CAST(PARTSUPP.PS_AVAILQTY AS Decimal128(19, 0))\ + \n Filter: PARTSUPP.PS_SUPPKEY = SUPPLIER.S_SUPPKEY AND SUPPLIER.S_NATIONKEY = NATION.N_NATIONKEY AND NATION.N_NAME = Utf8(\"JAPAN\")\ + \n Cross Join: \ + \n Cross Join: \ + \n TableScan: PARTSUPP\ + \n TableScan: SUPPLIER\ + \n TableScan: NATION" + ); Ok(()) } #[tokio::test] async fn tpch_test_12() -> Result<()> { let plan_str = tpch_plan_to_string(12).await?; - assert_snapshot!("tpch_test_12", plan_str); + assert_eq!( + plan_str, + "Projection: LINEITEM.L_SHIPMODE, sum(CASE WHEN ORDERS.O_ORDERPRIORITY = Utf8(\"1-URGENT\") OR ORDERS.O_ORDERPRIORITY = Utf8(\"2-HIGH\") THEN Int32(1) ELSE Int32(0) END) AS HIGH_LINE_COUNT, sum(CASE WHEN ORDERS.O_ORDERPRIORITY != Utf8(\"1-URGENT\") AND ORDERS.O_ORDERPRIORITY != Utf8(\"2-HIGH\") THEN Int32(1) ELSE Int32(0) END) AS LOW_LINE_COUNT\ + \n Sort: LINEITEM.L_SHIPMODE ASC NULLS LAST\ + \n Aggregate: groupBy=[[LINEITEM.L_SHIPMODE]], aggr=[[sum(CASE WHEN ORDERS.O_ORDERPRIORITY = Utf8(\"1-URGENT\") OR ORDERS.O_ORDERPRIORITY = Utf8(\"2-HIGH\") THEN Int32(1) ELSE Int32(0) END), sum(CASE WHEN ORDERS.O_ORDERPRIORITY != Utf8(\"1-URGENT\") AND ORDERS.O_ORDERPRIORITY != Utf8(\"2-HIGH\") THEN Int32(1) ELSE Int32(0) END)]]\ + \n Projection: LINEITEM.L_SHIPMODE, CASE WHEN ORDERS.O_ORDERPRIORITY = Utf8(\"1-URGENT\") OR ORDERS.O_ORDERPRIORITY = Utf8(\"2-HIGH\") THEN Int32(1) ELSE Int32(0) END, CASE WHEN ORDERS.O_ORDERPRIORITY != Utf8(\"1-URGENT\") AND ORDERS.O_ORDERPRIORITY != Utf8(\"2-HIGH\") THEN Int32(1) ELSE Int32(0) END\ + \n Filter: ORDERS.O_ORDERKEY = LINEITEM.L_ORDERKEY AND (LINEITEM.L_SHIPMODE = CAST(Utf8(\"MAIL\") AS Utf8) OR LINEITEM.L_SHIPMODE = CAST(Utf8(\"SHIP\") AS Utf8)) AND LINEITEM.L_COMMITDATE < LINEITEM.L_RECEIPTDATE AND LINEITEM.L_SHIPDATE < LINEITEM.L_COMMITDATE AND LINEITEM.L_RECEIPTDATE >= CAST(Utf8(\"1994-01-01\") AS Date32) AND LINEITEM.L_RECEIPTDATE < CAST(Utf8(\"1995-01-01\") AS Date32)\ + \n Cross Join: \ + \n TableScan: ORDERS\ + \n TableScan: LINEITEM" + ); Ok(()) } #[tokio::test] async fn tpch_test_13() -> Result<()> { let plan_str = tpch_plan_to_string(13).await?; - assert_snapshot!("tpch_test_13", plan_str); + assert_eq!( + plan_str, + "Projection: count(ORDERS.O_ORDERKEY) AS C_COUNT, count(Int64(1)) AS CUSTDIST\ + \n Sort: count(Int64(1)) DESC NULLS FIRST, count(ORDERS.O_ORDERKEY) DESC NULLS FIRST\ + \n Projection: count(ORDERS.O_ORDERKEY), count(Int64(1))\ + \n Aggregate: groupBy=[[count(ORDERS.O_ORDERKEY)]], aggr=[[count(Int64(1))]]\ + \n Projection: count(ORDERS.O_ORDERKEY)\ + \n Aggregate: groupBy=[[CUSTOMER.C_CUSTKEY]], aggr=[[count(ORDERS.O_ORDERKEY)]]\ + \n Projection: CUSTOMER.C_CUSTKEY, ORDERS.O_ORDERKEY\ + \n Left Join: CUSTOMER.C_CUSTKEY = ORDERS.O_CUSTKEY Filter: NOT ORDERS.O_COMMENT LIKE CAST(Utf8(\"%special%requests%\") AS Utf8)\ + \n TableScan: CUSTOMER\ + \n TableScan: ORDERS" + ); Ok(()) } #[tokio::test] async fn tpch_test_14() -> Result<()> { let plan_str = tpch_plan_to_string(14).await?; - assert_snapshot!("tpch_test_14", plan_str); + assert_eq!( + plan_str, + "Projection: Decimal128(Some(10000),5,2) * sum(CASE WHEN PART.P_TYPE LIKE Utf8(\"PROMO%\") THEN LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT ELSE Decimal128(Some(0),19,4) END) / sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) AS PROMO_REVENUE\ + \n Aggregate: groupBy=[[]], aggr=[[sum(CASE WHEN PART.P_TYPE LIKE Utf8(\"PROMO%\") THEN LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT ELSE Decimal128(Some(0),19,4) END), sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT)]]\ + \n Projection: CASE WHEN PART.P_TYPE LIKE CAST(Utf8(\"PROMO%\") AS Utf8) THEN LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT) ELSE Decimal128(Some(0),19,4) END, LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT)\ + \n Filter: LINEITEM.L_PARTKEY = PART.P_PARTKEY AND LINEITEM.L_SHIPDATE >= Date32(\"1995-09-01\") AND LINEITEM.L_SHIPDATE < CAST(Utf8(\"1995-10-01\") AS Date32)\ + \n Cross Join: \ + \n TableScan: LINEITEM\ + \n TableScan: PART" + ); Ok(()) } @@ -152,14 +304,28 @@ mod tests { #[tokio::test] async fn tpch_test_15() -> Result<()> { let plan_str = tpch_plan_to_string(15).await?; - assert_snapshot!("tpch_test_15", plan_str); + assert_eq!(plan_str, "Test file is empty"); Ok(()) } #[tokio::test] async fn tpch_test_16() -> Result<()> { let plan_str = tpch_plan_to_string(16).await?; - assert_snapshot!("tpch_test_16", plan_str); + assert_eq!( + plan_str, + "Projection: PART.P_BRAND, PART.P_TYPE, PART.P_SIZE, count(DISTINCT PARTSUPP.PS_SUPPKEY) AS SUPPLIER_CNT\ + \n Sort: count(DISTINCT PARTSUPP.PS_SUPPKEY) DESC NULLS FIRST, PART.P_BRAND ASC NULLS LAST, PART.P_TYPE ASC NULLS LAST, PART.P_SIZE ASC NULLS LAST\ + \n Aggregate: groupBy=[[PART.P_BRAND, PART.P_TYPE, PART.P_SIZE]], aggr=[[count(DISTINCT PARTSUPP.PS_SUPPKEY)]]\ + \n Projection: PART.P_BRAND, PART.P_TYPE, PART.P_SIZE, PARTSUPP.PS_SUPPKEY\ + \n Filter: PART.P_PARTKEY = PARTSUPP.PS_PARTKEY AND PART.P_BRAND != Utf8(\"Brand#45\") AND NOT PART.P_TYPE LIKE CAST(Utf8(\"MEDIUM POLISHED%\") AS Utf8) AND (PART.P_SIZE = Int32(49) OR PART.P_SIZE = Int32(14) OR PART.P_SIZE = Int32(23) OR PART.P_SIZE = Int32(45) OR PART.P_SIZE = Int32(19) OR PART.P_SIZE = Int32(3) OR PART.P_SIZE = Int32(36) OR PART.P_SIZE = Int32(9)) AND NOT PARTSUPP.PS_SUPPKEY IN ()\ + \n Subquery:\ + \n Projection: SUPPLIER.S_SUPPKEY\ + \n Filter: SUPPLIER.S_COMMENT LIKE CAST(Utf8(\"%Customer%Complaints%\") AS Utf8)\ + \n TableScan: SUPPLIER\ + \n Cross Join: \ + \n TableScan: PARTSUPP\ + \n TableScan: PART" + ); Ok(()) } @@ -167,41 +333,127 @@ mod tests { #[tokio::test] async fn tpch_test_17() -> Result<()> { let plan_str = tpch_plan_to_string(17).await?; - assert_snapshot!("tpch_test_17", plan_str); + assert_eq!(plan_str, "panics due to out of bounds field access"); Ok(()) } #[tokio::test] async fn tpch_test_18() -> Result<()> { let plan_str = tpch_plan_to_string(18).await?; - assert_snapshot!("tpch_test_18", plan_str); + assert_eq!( + plan_str, + "Projection: CUSTOMER.C_NAME, CUSTOMER.C_CUSTKEY, ORDERS.O_ORDERKEY, ORDERS.O_ORDERDATE, ORDERS.O_TOTALPRICE, sum(LINEITEM.L_QUANTITY) AS EXPR$5\ + \n Limit: skip=0, fetch=100\ + \n Sort: ORDERS.O_TOTALPRICE DESC NULLS FIRST, ORDERS.O_ORDERDATE ASC NULLS LAST\ + \n Aggregate: groupBy=[[CUSTOMER.C_NAME, CUSTOMER.C_CUSTKEY, ORDERS.O_ORDERKEY, ORDERS.O_ORDERDATE, ORDERS.O_TOTALPRICE]], aggr=[[sum(LINEITEM.L_QUANTITY)]]\ + \n Projection: CUSTOMER.C_NAME, CUSTOMER.C_CUSTKEY, ORDERS.O_ORDERKEY, ORDERS.O_ORDERDATE, ORDERS.O_TOTALPRICE, LINEITEM.L_QUANTITY\ + \n Filter: ORDERS.O_ORDERKEY IN () AND CUSTOMER.C_CUSTKEY = ORDERS.O_CUSTKEY AND ORDERS.O_ORDERKEY = LINEITEM.L_ORDERKEY\ + \n Subquery:\ + \n Projection: LINEITEM.L_ORDERKEY\ + \n Filter: sum(LINEITEM.L_QUANTITY) > CAST(Int32(300) AS Decimal128(15, 2))\ + \n Aggregate: groupBy=[[LINEITEM.L_ORDERKEY]], aggr=[[sum(LINEITEM.L_QUANTITY)]]\ + \n Projection: LINEITEM.L_ORDERKEY, LINEITEM.L_QUANTITY\ + \n TableScan: LINEITEM\ + \n Cross Join: \ + \n Cross Join: \ + \n TableScan: CUSTOMER\ + \n TableScan: ORDERS\ + \n TableScan: LINEITEM" + ); Ok(()) } #[tokio::test] async fn tpch_test_19() -> Result<()> { let plan_str = tpch_plan_to_string(19).await?; - assert_snapshot!("tpch_test_19", plan_str); + assert_eq!( + plan_str, + "Aggregate: groupBy=[[]], aggr=[[sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) AS REVENUE]]\ + \n Projection: LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT)\ + \n Filter: PART.P_PARTKEY = LINEITEM.L_PARTKEY AND PART.P_BRAND = Utf8(\"Brand#12\") AND (PART.P_CONTAINER = CAST(Utf8(\"SM CASE\") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8(\"SM BOX\") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8(\"SM PACK\") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8(\"SM PKG\") AS Utf8)) AND LINEITEM.L_QUANTITY >= CAST(Int32(1) AS Decimal128(15, 2)) AND LINEITEM.L_QUANTITY <= CAST(Int32(1) + Int32(10) AS Decimal128(15, 2)) AND PART.P_SIZE >= Int32(1) AND PART.P_SIZE <= Int32(5) AND (LINEITEM.L_SHIPMODE = CAST(Utf8(\"AIR\") AS Utf8) OR LINEITEM.L_SHIPMODE = CAST(Utf8(\"AIR REG\") AS Utf8)) AND LINEITEM.L_SHIPINSTRUCT = Utf8(\"DELIVER IN PERSON\") OR PART.P_PARTKEY = LINEITEM.L_PARTKEY AND PART.P_BRAND = Utf8(\"Brand#23\") AND (PART.P_CONTAINER = CAST(Utf8(\"MED BAG\") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8(\"MED BOX\") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8(\"MED PKG\") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8(\"MED PACK\") AS Utf8)) AND LINEITEM.L_QUANTITY >= CAST(Int32(10) AS Decimal128(15, 2)) AND LINEITEM.L_QUANTITY <= CAST(Int32(10) + Int32(10) AS Decimal128(15, 2)) AND PART.P_SIZE >= Int32(1) AND PART.P_SIZE <= Int32(10) AND (LINEITEM.L_SHIPMODE = CAST(Utf8(\"AIR\") AS Utf8) OR LINEITEM.L_SHIPMODE = CAST(Utf8(\"AIR REG\") AS Utf8)) AND LINEITEM.L_SHIPINSTRUCT = Utf8(\"DELIVER IN PERSON\") OR PART.P_PARTKEY = LINEITEM.L_PARTKEY AND PART.P_BRAND = Utf8(\"Brand#34\") AND (PART.P_CONTAINER = CAST(Utf8(\"LG CASE\") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8(\"LG BOX\") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8(\"LG PACK\") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8(\"LG PKG\") AS Utf8)) AND LINEITEM.L_QUANTITY >= CAST(Int32(20) AS Decimal128(15, 2)) AND LINEITEM.L_QUANTITY <= CAST(Int32(20) + Int32(10) AS Decimal128(15, 2)) AND PART.P_SIZE >= Int32(1) AND PART.P_SIZE <= Int32(15) AND (LINEITEM.L_SHIPMODE = CAST(Utf8(\"AIR\") AS Utf8) OR LINEITEM.L_SHIPMODE = CAST(Utf8(\"AIR REG\") AS Utf8)) AND LINEITEM.L_SHIPINSTRUCT = Utf8(\"DELIVER IN PERSON\")\ + \n Cross Join: \ + \n TableScan: LINEITEM\ + \n TableScan: PART" + ); Ok(()) } #[tokio::test] async fn tpch_test_20() -> Result<()> { let plan_str = tpch_plan_to_string(20).await?; - assert_snapshot!("tpch_test_20", plan_str); + assert_eq!( + plan_str, + "Sort: SUPPLIER.S_NAME ASC NULLS LAST\ + \n Projection: SUPPLIER.S_NAME, SUPPLIER.S_ADDRESS\ + \n Filter: SUPPLIER.S_SUPPKEY IN () AND SUPPLIER.S_NATIONKEY = NATION.N_NATIONKEY AND NATION.N_NAME = Utf8(\"CANADA\")\ + \n Subquery:\ + \n Projection: PARTSUPP.PS_SUPPKEY\ + \n Filter: PARTSUPP.PS_PARTKEY IN () AND CAST(PARTSUPP.PS_AVAILQTY AS Decimal128(19, 0)) > ()\ + \n Subquery:\ + \n Projection: PART.P_PARTKEY\ + \n Filter: PART.P_NAME LIKE CAST(Utf8(\"forest%\") AS Utf8)\ + \n TableScan: PART\ + \n Subquery:\ + \n Projection: Decimal128(Some(5),2,1) * sum(LINEITEM.L_QUANTITY)\ + \n Aggregate: groupBy=[[]], aggr=[[sum(LINEITEM.L_QUANTITY)]]\ + \n Projection: LINEITEM.L_QUANTITY\ + \n Filter: LINEITEM.L_PARTKEY = LINEITEM.L_ORDERKEY AND LINEITEM.L_SUPPKEY = LINEITEM.L_PARTKEY AND LINEITEM.L_SHIPDATE >= CAST(Utf8(\"1994-01-01\") AS Date32) AND LINEITEM.L_SHIPDATE < CAST(Utf8(\"1995-01-01\") AS Date32)\ + \n TableScan: LINEITEM\ + \n TableScan: PARTSUPP\ + \n Cross Join: \ + \n TableScan: SUPPLIER\ + \n TableScan: NATION" + ); Ok(()) } #[tokio::test] async fn tpch_test_21() -> Result<()> { let plan_str = tpch_plan_to_string(21).await?; - assert_snapshot!("tpch_test_21", plan_str); + assert_eq!( + plan_str, + "Projection: SUPPLIER.S_NAME, count(Int64(1)) AS NUMWAIT\ + \n Limit: skip=0, fetch=100\ + \n Sort: count(Int64(1)) DESC NULLS FIRST, SUPPLIER.S_NAME ASC NULLS LAST\ + \n Aggregate: groupBy=[[SUPPLIER.S_NAME]], aggr=[[count(Int64(1))]]\ + \n Projection: SUPPLIER.S_NAME\ + \n Filter: SUPPLIER.S_SUPPKEY = LINEITEM.L_SUPPKEY AND ORDERS.O_ORDERKEY = LINEITEM.L_ORDERKEY AND ORDERS.O_ORDERSTATUS = Utf8(\"F\") AND LINEITEM.L_RECEIPTDATE > LINEITEM.L_COMMITDATE AND EXISTS () AND NOT EXISTS () AND SUPPLIER.S_NATIONKEY = NATION.N_NATIONKEY AND NATION.N_NAME = Utf8(\"SAUDI ARABIA\")\ + \n Subquery:\ + \n Filter: LINEITEM.L_ORDERKEY = LINEITEM.L_TAX AND LINEITEM.L_SUPPKEY != LINEITEM.L_LINESTATUS\ + \n TableScan: LINEITEM\ + \n Subquery:\ + \n Filter: LINEITEM.L_ORDERKEY = LINEITEM.L_TAX AND LINEITEM.L_SUPPKEY != LINEITEM.L_LINESTATUS AND LINEITEM.L_RECEIPTDATE > LINEITEM.L_COMMITDATE\ + \n TableScan: LINEITEM\ + \n Cross Join: \ + \n Cross Join: \ + \n Cross Join: \ + \n TableScan: SUPPLIER\ + \n TableScan: LINEITEM\ + \n TableScan: ORDERS\ + \n TableScan: NATION" + ); Ok(()) } #[tokio::test] async fn tpch_test_22() -> Result<()> { let plan_str = tpch_plan_to_string(22).await?; - assert_snapshot!("tpch_test_22", plan_str); + assert_eq!( + plan_str, + "Projection: substr(CUSTOMER.C_PHONE,Int32(1),Int32(2)) AS CNTRYCODE, count(Int64(1)) AS NUMCUST, sum(CUSTOMER.C_ACCTBAL) AS TOTACCTBAL\ + \n Sort: substr(CUSTOMER.C_PHONE,Int32(1),Int32(2)) ASC NULLS LAST\ + \n Aggregate: groupBy=[[substr(CUSTOMER.C_PHONE,Int32(1),Int32(2))]], aggr=[[count(Int64(1)), sum(CUSTOMER.C_ACCTBAL)]]\ + \n Projection: substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)), CUSTOMER.C_ACCTBAL\ + \n Filter: (substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8(\"13\") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8(\"31\") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8(\"23\") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8(\"29\") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8(\"30\") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8(\"18\") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8(\"17\") AS Utf8)) AND CUSTOMER.C_ACCTBAL > () AND NOT EXISTS ()\ + \n Subquery:\ + \n Aggregate: groupBy=[[]], aggr=[[avg(CUSTOMER.C_ACCTBAL)]]\ + \n Projection: CUSTOMER.C_ACCTBAL\ + \n Filter: CUSTOMER.C_ACCTBAL > Decimal128(Some(0),3,2) AND (substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8(\"13\") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8(\"31\") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8(\"23\") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8(\"29\") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8(\"30\") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8(\"18\") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8(\"17\") AS Utf8))\ + \n TableScan: CUSTOMER\ + \n Subquery:\ + \n Filter: ORDERS.O_CUSTKEY = ORDERS.O_ORDERKEY\ + \n TableScan: ORDERS\ + \n TableScan: CUSTOMER" + ); Ok(()) } @@ -223,7 +475,11 @@ mod tests { let plan_str = test_plan_to_string("select_count_from_select_1.substrait.json").await?; - assert_snapshot!("test_select_count_from_select_1", plan_str); + assert_eq!( + plan_str, + "Aggregate: groupBy=[[]], aggr=[[count(Int64(1)) AS count(*)]]\ + \n Values: (Int64(0))" + ); Ok(()) } @@ -231,7 +487,12 @@ mod tests { async fn test_select_window_count() -> Result<()> { let plan_str = test_plan_to_string("select_window_count.substrait.json").await?; - assert_snapshot!("test_select_window_count", plan_str); + assert_eq!( + plan_str, + "Projection: count(Int64(1)) PARTITION BY [DATA.PART] ORDER BY [DATA.ORD ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING AS LEAD_EXPR\ + \n WindowAggr: windowExpr=[[count(Int64(1)) PARTITION BY [DATA.PART] ORDER BY [DATA.ORD ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING]]\ + \n TableScan: DATA" + ); Ok(()) } } diff --git a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__test_select_count_from_select_1.snap b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__test_select_count_from_select_1.snap deleted file mode 100644 index 50307bd38f66..000000000000 --- a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__test_select_count_from_select_1.snap +++ /dev/null @@ -1,6 +0,0 @@ ---- -source: datafusion/substrait/tests/cases/consumer_integration.rs -expression: plan_str ---- -Aggregate: groupBy=[[]], aggr=[[count(Int64(1)) AS count(*)]] - Values: (Int64(0)) diff --git a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__test_select_window_count.snap b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__test_select_window_count.snap deleted file mode 100644 index d9b4bc4601f3..000000000000 --- a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__test_select_window_count.snap +++ /dev/null @@ -1,7 +0,0 @@ ---- -source: datafusion/substrait/tests/cases/consumer_integration.rs -expression: plan_str ---- -Projection: count(Int64(1)) PARTITION BY [DATA.PART] ORDER BY [DATA.ORD ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING AS LEAD_EXPR - WindowAggr: windowExpr=[[count(Int64(1)) PARTITION BY [DATA.PART] ORDER BY [DATA.ORD ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING]] - TableScan: DATA diff --git a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_01.snap b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_01.snap deleted file mode 100644 index 6aa3926d1517..000000000000 --- a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_01.snap +++ /dev/null @@ -1,10 +0,0 @@ ---- -source: datafusion/substrait/tests/cases/consumer_integration.rs -expression: plan_str ---- -Projection: LINEITEM.L_RETURNFLAG, LINEITEM.L_LINESTATUS, sum(LINEITEM.L_QUANTITY) AS SUM_QTY, sum(LINEITEM.L_EXTENDEDPRICE) AS SUM_BASE_PRICE, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) AS SUM_DISC_PRICE, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT * Int32(1) + LINEITEM.L_TAX) AS SUM_CHARGE, avg(LINEITEM.L_QUANTITY) AS AVG_QTY, avg(LINEITEM.L_EXTENDEDPRICE) AS AVG_PRICE, avg(LINEITEM.L_DISCOUNT) AS AVG_DISC, count(Int64(1)) AS COUNT_ORDER - Sort: LINEITEM.L_RETURNFLAG ASC NULLS LAST, LINEITEM.L_LINESTATUS ASC NULLS LAST - Aggregate: groupBy=[[LINEITEM.L_RETURNFLAG, LINEITEM.L_LINESTATUS]], aggr=[[sum(LINEITEM.L_QUANTITY), sum(LINEITEM.L_EXTENDEDPRICE), sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT), sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT * Int32(1) + LINEITEM.L_TAX), avg(LINEITEM.L_QUANTITY), avg(LINEITEM.L_EXTENDEDPRICE), avg(LINEITEM.L_DISCOUNT), count(Int64(1))]] - Projection: LINEITEM.L_RETURNFLAG, LINEITEM.L_LINESTATUS, LINEITEM.L_QUANTITY, LINEITEM.L_EXTENDEDPRICE, LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT), LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT) * (CAST(Int32(1) AS Decimal128(15, 2)) + LINEITEM.L_TAX), LINEITEM.L_DISCOUNT - Filter: LINEITEM.L_SHIPDATE <= Date32("1998-12-01") - IntervalDayTime("IntervalDayTime { days: 0, milliseconds: 10368000 }") - TableScan: LINEITEM diff --git a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_02.snap b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_02.snap deleted file mode 100644 index 78b0e463939b..000000000000 --- a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_02.snap +++ /dev/null @@ -1,28 +0,0 @@ ---- -source: datafusion/substrait/tests/cases/consumer_integration.rs -expression: plan_str ---- -Limit: skip=0, fetch=100 - Sort: SUPPLIER.S_ACCTBAL DESC NULLS FIRST, NATION.N_NAME ASC NULLS LAST, SUPPLIER.S_NAME ASC NULLS LAST, PART.P_PARTKEY ASC NULLS LAST - Projection: SUPPLIER.S_ACCTBAL, SUPPLIER.S_NAME, NATION.N_NAME, PART.P_PARTKEY, PART.P_MFGR, SUPPLIER.S_ADDRESS, SUPPLIER.S_PHONE, SUPPLIER.S_COMMENT - Filter: PART.P_PARTKEY = PARTSUPP.PS_PARTKEY AND SUPPLIER.S_SUPPKEY = PARTSUPP.PS_SUPPKEY AND PART.P_SIZE = Int32(15) AND PART.P_TYPE LIKE CAST(Utf8("%BRASS") AS Utf8) AND SUPPLIER.S_NATIONKEY = NATION.N_NATIONKEY AND NATION.N_REGIONKEY = REGION.R_REGIONKEY AND REGION.R_NAME = Utf8("EUROPE") AND PARTSUPP.PS_SUPPLYCOST = () - Subquery: - Aggregate: groupBy=[[]], aggr=[[min(PARTSUPP.PS_SUPPLYCOST)]] - Projection: PARTSUPP.PS_SUPPLYCOST - Filter: PARTSUPP.PS_PARTKEY = PARTSUPP.PS_PARTKEY AND SUPPLIER.S_SUPPKEY = PARTSUPP.PS_SUPPKEY AND SUPPLIER.S_NATIONKEY = NATION.N_NATIONKEY AND NATION.N_REGIONKEY = REGION.R_REGIONKEY AND REGION.R_NAME = Utf8("EUROPE") - Cross Join: - Cross Join: - Cross Join: - TableScan: PARTSUPP - TableScan: SUPPLIER - TableScan: NATION - TableScan: REGION - Cross Join: - Cross Join: - Cross Join: - Cross Join: - TableScan: PART - TableScan: SUPPLIER - TableScan: PARTSUPP - TableScan: NATION - TableScan: REGION diff --git a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_03.snap b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_03.snap deleted file mode 100644 index 00939ecb9c37..000000000000 --- a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_03.snap +++ /dev/null @@ -1,16 +0,0 @@ ---- -source: datafusion/substrait/tests/cases/consumer_integration.rs -expression: plan_str ---- -Projection: LINEITEM.L_ORDERKEY, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) AS REVENUE, ORDERS.O_ORDERDATE, ORDERS.O_SHIPPRIORITY - Limit: skip=0, fetch=10 - Sort: sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) DESC NULLS FIRST, ORDERS.O_ORDERDATE ASC NULLS LAST - Projection: LINEITEM.L_ORDERKEY, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT), ORDERS.O_ORDERDATE, ORDERS.O_SHIPPRIORITY - Aggregate: groupBy=[[LINEITEM.L_ORDERKEY, ORDERS.O_ORDERDATE, ORDERS.O_SHIPPRIORITY]], aggr=[[sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT)]] - Projection: LINEITEM.L_ORDERKEY, ORDERS.O_ORDERDATE, ORDERS.O_SHIPPRIORITY, LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT) - Filter: CUSTOMER.C_MKTSEGMENT = Utf8("BUILDING") AND CUSTOMER.C_CUSTKEY = ORDERS.O_CUSTKEY AND LINEITEM.L_ORDERKEY = ORDERS.O_ORDERKEY AND ORDERS.O_ORDERDATE < CAST(Utf8("1995-03-15") AS Date32) AND LINEITEM.L_SHIPDATE > CAST(Utf8("1995-03-15") AS Date32) - Cross Join: - Cross Join: - TableScan: LINEITEM - TableScan: CUSTOMER - TableScan: ORDERS diff --git a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_04.snap b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_04.snap deleted file mode 100644 index 3c54c1a413f8..000000000000 --- a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_04.snap +++ /dev/null @@ -1,13 +0,0 @@ ---- -source: datafusion/substrait/tests/cases/consumer_integration.rs -expression: plan_str ---- -Projection: ORDERS.O_ORDERPRIORITY, count(Int64(1)) AS ORDER_COUNT - Sort: ORDERS.O_ORDERPRIORITY ASC NULLS LAST - Aggregate: groupBy=[[ORDERS.O_ORDERPRIORITY]], aggr=[[count(Int64(1))]] - Projection: ORDERS.O_ORDERPRIORITY - Filter: ORDERS.O_ORDERDATE >= CAST(Utf8("1993-07-01") AS Date32) AND ORDERS.O_ORDERDATE < CAST(Utf8("1993-10-01") AS Date32) AND EXISTS () - Subquery: - Filter: LINEITEM.L_ORDERKEY = LINEITEM.L_ORDERKEY AND LINEITEM.L_COMMITDATE < LINEITEM.L_RECEIPTDATE - TableScan: LINEITEM - TableScan: ORDERS diff --git a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_05.snap b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_05.snap deleted file mode 100644 index a7f331709d18..000000000000 --- a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_05.snap +++ /dev/null @@ -1,20 +0,0 @@ ---- -source: datafusion/substrait/tests/cases/consumer_integration.rs -expression: plan_str ---- -Projection: NATION.N_NAME, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) AS REVENUE - Sort: sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) DESC NULLS FIRST - Aggregate: groupBy=[[NATION.N_NAME]], aggr=[[sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT)]] - Projection: NATION.N_NAME, LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT) - Filter: CUSTOMER.C_CUSTKEY = ORDERS.O_CUSTKEY AND LINEITEM.L_ORDERKEY = ORDERS.O_ORDERKEY AND LINEITEM.L_SUPPKEY = SUPPLIER.S_SUPPKEY AND CUSTOMER.C_NATIONKEY = SUPPLIER.S_NATIONKEY AND SUPPLIER.S_NATIONKEY = NATION.N_NATIONKEY AND NATION.N_REGIONKEY = REGION.R_REGIONKEY AND REGION.R_NAME = Utf8("ASIA") AND ORDERS.O_ORDERDATE >= CAST(Utf8("1994-01-01") AS Date32) AND ORDERS.O_ORDERDATE < CAST(Utf8("1995-01-01") AS Date32) - Cross Join: - Cross Join: - Cross Join: - Cross Join: - Cross Join: - TableScan: CUSTOMER - TableScan: ORDERS - TableScan: LINEITEM - TableScan: SUPPLIER - TableScan: NATION - TableScan: REGION diff --git a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_06.snap b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_06.snap deleted file mode 100644 index e8cf01830ec0..000000000000 --- a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_06.snap +++ /dev/null @@ -1,8 +0,0 @@ ---- -source: datafusion/substrait/tests/cases/consumer_integration.rs -expression: plan_str ---- -Aggregate: groupBy=[[]], aggr=[[sum(LINEITEM.L_EXTENDEDPRICE * LINEITEM.L_DISCOUNT) AS REVENUE]] - Projection: LINEITEM.L_EXTENDEDPRICE * LINEITEM.L_DISCOUNT - Filter: LINEITEM.L_SHIPDATE >= CAST(Utf8("1994-01-01") AS Date32) AND LINEITEM.L_SHIPDATE < CAST(Utf8("1995-01-01") AS Date32) AND LINEITEM.L_DISCOUNT >= Decimal128(Some(5),3,2) AND LINEITEM.L_DISCOUNT <= Decimal128(Some(7),3,2) AND LINEITEM.L_QUANTITY < CAST(Int32(24) AS Decimal128(15, 2)) - TableScan: LINEITEM diff --git a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_10.snap b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_10.snap deleted file mode 100644 index 2ae4a3dd140c..000000000000 --- a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_10.snap +++ /dev/null @@ -1,18 +0,0 @@ ---- -source: datafusion/substrait/tests/cases/consumer_integration.rs -expression: plan_str ---- -Projection: CUSTOMER.C_CUSTKEY, CUSTOMER.C_NAME, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) AS REVENUE, CUSTOMER.C_ACCTBAL, NATION.N_NAME, CUSTOMER.C_ADDRESS, CUSTOMER.C_PHONE, CUSTOMER.C_COMMENT - Limit: skip=0, fetch=20 - Sort: sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) DESC NULLS FIRST - Projection: CUSTOMER.C_CUSTKEY, CUSTOMER.C_NAME, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT), CUSTOMER.C_ACCTBAL, NATION.N_NAME, CUSTOMER.C_ADDRESS, CUSTOMER.C_PHONE, CUSTOMER.C_COMMENT - Aggregate: groupBy=[[CUSTOMER.C_CUSTKEY, CUSTOMER.C_NAME, CUSTOMER.C_ACCTBAL, CUSTOMER.C_PHONE, NATION.N_NAME, CUSTOMER.C_ADDRESS, CUSTOMER.C_COMMENT]], aggr=[[sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT)]] - Projection: CUSTOMER.C_CUSTKEY, CUSTOMER.C_NAME, CUSTOMER.C_ACCTBAL, CUSTOMER.C_PHONE, NATION.N_NAME, CUSTOMER.C_ADDRESS, CUSTOMER.C_COMMENT, LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT) - Filter: CUSTOMER.C_CUSTKEY = ORDERS.O_CUSTKEY AND LINEITEM.L_ORDERKEY = ORDERS.O_ORDERKEY AND ORDERS.O_ORDERDATE >= CAST(Utf8("1993-10-01") AS Date32) AND ORDERS.O_ORDERDATE < CAST(Utf8("1994-01-01") AS Date32) AND LINEITEM.L_RETURNFLAG = Utf8("R") AND CUSTOMER.C_NATIONKEY = NATION.N_NATIONKEY - Cross Join: - Cross Join: - Cross Join: - TableScan: CUSTOMER - TableScan: ORDERS - TableScan: LINEITEM - TableScan: NATION diff --git a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_11.snap b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_11.snap deleted file mode 100644 index a75b5a69496a..000000000000 --- a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_11.snap +++ /dev/null @@ -1,25 +0,0 @@ ---- -source: datafusion/substrait/tests/cases/consumer_integration.rs -expression: plan_str ---- -Projection: PARTSUPP.PS_PARTKEY, sum(PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY) AS value - Sort: sum(PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY) DESC NULLS FIRST - Filter: sum(PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY) > () - Subquery: - Projection: sum(PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY) * Decimal128(Some(1000000),11,10) - Aggregate: groupBy=[[]], aggr=[[sum(PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY)]] - Projection: PARTSUPP.PS_SUPPLYCOST * CAST(PARTSUPP.PS_AVAILQTY AS Decimal128(19, 0)) - Filter: PARTSUPP.PS_SUPPKEY = SUPPLIER.S_SUPPKEY AND SUPPLIER.S_NATIONKEY = NATION.N_NATIONKEY AND NATION.N_NAME = Utf8("JAPAN") - Cross Join: - Cross Join: - TableScan: PARTSUPP - TableScan: SUPPLIER - TableScan: NATION - Aggregate: groupBy=[[PARTSUPP.PS_PARTKEY]], aggr=[[sum(PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY)]] - Projection: PARTSUPP.PS_PARTKEY, PARTSUPP.PS_SUPPLYCOST * CAST(PARTSUPP.PS_AVAILQTY AS Decimal128(19, 0)) - Filter: PARTSUPP.PS_SUPPKEY = SUPPLIER.S_SUPPKEY AND SUPPLIER.S_NATIONKEY = NATION.N_NATIONKEY AND NATION.N_NAME = Utf8("JAPAN") - Cross Join: - Cross Join: - TableScan: PARTSUPP - TableScan: SUPPLIER - TableScan: NATION diff --git a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_12.snap b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_12.snap deleted file mode 100644 index 93716872f001..000000000000 --- a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_12.snap +++ /dev/null @@ -1,12 +0,0 @@ ---- -source: datafusion/substrait/tests/cases/consumer_integration.rs -expression: plan_str ---- -Projection: LINEITEM.L_SHIPMODE, sum(CASE WHEN ORDERS.O_ORDERPRIORITY = Utf8("1-URGENT") OR ORDERS.O_ORDERPRIORITY = Utf8("2-HIGH") THEN Int32(1) ELSE Int32(0) END) AS HIGH_LINE_COUNT, sum(CASE WHEN ORDERS.O_ORDERPRIORITY != Utf8("1-URGENT") AND ORDERS.O_ORDERPRIORITY != Utf8("2-HIGH") THEN Int32(1) ELSE Int32(0) END) AS LOW_LINE_COUNT - Sort: LINEITEM.L_SHIPMODE ASC NULLS LAST - Aggregate: groupBy=[[LINEITEM.L_SHIPMODE]], aggr=[[sum(CASE WHEN ORDERS.O_ORDERPRIORITY = Utf8("1-URGENT") OR ORDERS.O_ORDERPRIORITY = Utf8("2-HIGH") THEN Int32(1) ELSE Int32(0) END), sum(CASE WHEN ORDERS.O_ORDERPRIORITY != Utf8("1-URGENT") AND ORDERS.O_ORDERPRIORITY != Utf8("2-HIGH") THEN Int32(1) ELSE Int32(0) END)]] - Projection: LINEITEM.L_SHIPMODE, CASE WHEN ORDERS.O_ORDERPRIORITY = Utf8("1-URGENT") OR ORDERS.O_ORDERPRIORITY = Utf8("2-HIGH") THEN Int32(1) ELSE Int32(0) END, CASE WHEN ORDERS.O_ORDERPRIORITY != Utf8("1-URGENT") AND ORDERS.O_ORDERPRIORITY != Utf8("2-HIGH") THEN Int32(1) ELSE Int32(0) END - Filter: ORDERS.O_ORDERKEY = LINEITEM.L_ORDERKEY AND (LINEITEM.L_SHIPMODE = CAST(Utf8("MAIL") AS Utf8) OR LINEITEM.L_SHIPMODE = CAST(Utf8("SHIP") AS Utf8)) AND LINEITEM.L_COMMITDATE < LINEITEM.L_RECEIPTDATE AND LINEITEM.L_SHIPDATE < LINEITEM.L_COMMITDATE AND LINEITEM.L_RECEIPTDATE >= CAST(Utf8("1994-01-01") AS Date32) AND LINEITEM.L_RECEIPTDATE < CAST(Utf8("1995-01-01") AS Date32) - Cross Join: - TableScan: ORDERS - TableScan: LINEITEM diff --git a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_13.snap b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_13.snap deleted file mode 100644 index 18b44547db60..000000000000 --- a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_13.snap +++ /dev/null @@ -1,14 +0,0 @@ ---- -source: datafusion/substrait/tests/cases/consumer_integration.rs -expression: plan_str ---- -Projection: count(ORDERS.O_ORDERKEY) AS C_COUNT, count(Int64(1)) AS CUSTDIST - Sort: count(Int64(1)) DESC NULLS FIRST, count(ORDERS.O_ORDERKEY) DESC NULLS FIRST - Projection: count(ORDERS.O_ORDERKEY), count(Int64(1)) - Aggregate: groupBy=[[count(ORDERS.O_ORDERKEY)]], aggr=[[count(Int64(1))]] - Projection: count(ORDERS.O_ORDERKEY) - Aggregate: groupBy=[[CUSTOMER.C_CUSTKEY]], aggr=[[count(ORDERS.O_ORDERKEY)]] - Projection: CUSTOMER.C_CUSTKEY, ORDERS.O_ORDERKEY - Left Join: CUSTOMER.C_CUSTKEY = ORDERS.O_CUSTKEY Filter: NOT ORDERS.O_COMMENT LIKE CAST(Utf8("%special%requests%") AS Utf8) - TableScan: CUSTOMER - TableScan: ORDERS diff --git a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_14.snap b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_14.snap deleted file mode 100644 index 7172e566862a..000000000000 --- a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_14.snap +++ /dev/null @@ -1,11 +0,0 @@ ---- -source: datafusion/substrait/tests/cases/consumer_integration.rs -expression: plan_str ---- -Projection: Decimal128(Some(10000),5,2) * sum(CASE WHEN PART.P_TYPE LIKE Utf8("PROMO%") THEN LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT ELSE Decimal128(Some(0),19,4) END) / sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) AS PROMO_REVENUE - Aggregate: groupBy=[[]], aggr=[[sum(CASE WHEN PART.P_TYPE LIKE Utf8("PROMO%") THEN LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT ELSE Decimal128(Some(0),19,4) END), sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT)]] - Projection: CASE WHEN PART.P_TYPE LIKE CAST(Utf8("PROMO%") AS Utf8) THEN LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT) ELSE Decimal128(Some(0),19,4) END, LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT) - Filter: LINEITEM.L_PARTKEY = PART.P_PARTKEY AND LINEITEM.L_SHIPDATE >= Date32("1995-09-01") AND LINEITEM.L_SHIPDATE < CAST(Utf8("1995-10-01") AS Date32) - Cross Join: - TableScan: LINEITEM - TableScan: PART diff --git a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_16.snap b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_16.snap deleted file mode 100644 index 1170905e7416..000000000000 --- a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_16.snap +++ /dev/null @@ -1,16 +0,0 @@ ---- -source: datafusion/substrait/tests/cases/consumer_integration.rs -expression: plan_str ---- -Projection: PART.P_BRAND, PART.P_TYPE, PART.P_SIZE, count(DISTINCT PARTSUPP.PS_SUPPKEY) AS SUPPLIER_CNT - Sort: count(DISTINCT PARTSUPP.PS_SUPPKEY) DESC NULLS FIRST, PART.P_BRAND ASC NULLS LAST, PART.P_TYPE ASC NULLS LAST, PART.P_SIZE ASC NULLS LAST - Aggregate: groupBy=[[PART.P_BRAND, PART.P_TYPE, PART.P_SIZE]], aggr=[[count(DISTINCT PARTSUPP.PS_SUPPKEY)]] - Projection: PART.P_BRAND, PART.P_TYPE, PART.P_SIZE, PARTSUPP.PS_SUPPKEY - Filter: PART.P_PARTKEY = PARTSUPP.PS_PARTKEY AND PART.P_BRAND != Utf8("Brand#45") AND NOT PART.P_TYPE LIKE CAST(Utf8("MEDIUM POLISHED%") AS Utf8) AND (PART.P_SIZE = Int32(49) OR PART.P_SIZE = Int32(14) OR PART.P_SIZE = Int32(23) OR PART.P_SIZE = Int32(45) OR PART.P_SIZE = Int32(19) OR PART.P_SIZE = Int32(3) OR PART.P_SIZE = Int32(36) OR PART.P_SIZE = Int32(9)) AND NOT PARTSUPP.PS_SUPPKEY IN () - Subquery: - Projection: SUPPLIER.S_SUPPKEY - Filter: SUPPLIER.S_COMMENT LIKE CAST(Utf8("%Customer%Complaints%") AS Utf8) - TableScan: SUPPLIER - Cross Join: - TableScan: PARTSUPP - TableScan: PART diff --git a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_18.snap b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_18.snap deleted file mode 100644 index f36cd138bd01..000000000000 --- a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_18.snap +++ /dev/null @@ -1,21 +0,0 @@ ---- -source: datafusion/substrait/tests/cases/consumer_integration.rs -expression: plan_str ---- -Projection: CUSTOMER.C_NAME, CUSTOMER.C_CUSTKEY, ORDERS.O_ORDERKEY, ORDERS.O_ORDERDATE, ORDERS.O_TOTALPRICE, sum(LINEITEM.L_QUANTITY) AS EXPR$5 - Limit: skip=0, fetch=100 - Sort: ORDERS.O_TOTALPRICE DESC NULLS FIRST, ORDERS.O_ORDERDATE ASC NULLS LAST - Aggregate: groupBy=[[CUSTOMER.C_NAME, CUSTOMER.C_CUSTKEY, ORDERS.O_ORDERKEY, ORDERS.O_ORDERDATE, ORDERS.O_TOTALPRICE]], aggr=[[sum(LINEITEM.L_QUANTITY)]] - Projection: CUSTOMER.C_NAME, CUSTOMER.C_CUSTKEY, ORDERS.O_ORDERKEY, ORDERS.O_ORDERDATE, ORDERS.O_TOTALPRICE, LINEITEM.L_QUANTITY - Filter: ORDERS.O_ORDERKEY IN () AND CUSTOMER.C_CUSTKEY = ORDERS.O_CUSTKEY AND ORDERS.O_ORDERKEY = LINEITEM.L_ORDERKEY - Subquery: - Projection: LINEITEM.L_ORDERKEY - Filter: sum(LINEITEM.L_QUANTITY) > CAST(Int32(300) AS Decimal128(15, 2)) - Aggregate: groupBy=[[LINEITEM.L_ORDERKEY]], aggr=[[sum(LINEITEM.L_QUANTITY)]] - Projection: LINEITEM.L_ORDERKEY, LINEITEM.L_QUANTITY - TableScan: LINEITEM - Cross Join: - Cross Join: - TableScan: CUSTOMER - TableScan: ORDERS - TableScan: LINEITEM diff --git a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_19.snap b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_19.snap deleted file mode 100644 index f7a3eb44afe4..000000000000 --- a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_19.snap +++ /dev/null @@ -1,10 +0,0 @@ ---- -source: datafusion/substrait/tests/cases/consumer_integration.rs -expression: plan_str ---- -Aggregate: groupBy=[[]], aggr=[[sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) AS REVENUE]] - Projection: LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT) - Filter: PART.P_PARTKEY = LINEITEM.L_PARTKEY AND PART.P_BRAND = Utf8("Brand#12") AND (PART.P_CONTAINER = CAST(Utf8("SM CASE") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("SM BOX") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("SM PACK") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("SM PKG") AS Utf8)) AND LINEITEM.L_QUANTITY >= CAST(Int32(1) AS Decimal128(15, 2)) AND LINEITEM.L_QUANTITY <= CAST(Int32(1) + Int32(10) AS Decimal128(15, 2)) AND PART.P_SIZE >= Int32(1) AND PART.P_SIZE <= Int32(5) AND (LINEITEM.L_SHIPMODE = CAST(Utf8("AIR") AS Utf8) OR LINEITEM.L_SHIPMODE = CAST(Utf8("AIR REG") AS Utf8)) AND LINEITEM.L_SHIPINSTRUCT = Utf8("DELIVER IN PERSON") OR PART.P_PARTKEY = LINEITEM.L_PARTKEY AND PART.P_BRAND = Utf8("Brand#23") AND (PART.P_CONTAINER = CAST(Utf8("MED BAG") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("MED BOX") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("MED PKG") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("MED PACK") AS Utf8)) AND LINEITEM.L_QUANTITY >= CAST(Int32(10) AS Decimal128(15, 2)) AND LINEITEM.L_QUANTITY <= CAST(Int32(10) + Int32(10) AS Decimal128(15, 2)) AND PART.P_SIZE >= Int32(1) AND PART.P_SIZE <= Int32(10) AND (LINEITEM.L_SHIPMODE = CAST(Utf8("AIR") AS Utf8) OR LINEITEM.L_SHIPMODE = CAST(Utf8("AIR REG") AS Utf8)) AND LINEITEM.L_SHIPINSTRUCT = Utf8("DELIVER IN PERSON") OR PART.P_PARTKEY = LINEITEM.L_PARTKEY AND PART.P_BRAND = Utf8("Brand#34") AND (PART.P_CONTAINER = CAST(Utf8("LG CASE") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("LG BOX") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("LG PACK") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("LG PKG") AS Utf8)) AND LINEITEM.L_QUANTITY >= CAST(Int32(20) AS Decimal128(15, 2)) AND LINEITEM.L_QUANTITY <= CAST(Int32(20) + Int32(10) AS Decimal128(15, 2)) AND PART.P_SIZE >= Int32(1) AND PART.P_SIZE <= Int32(15) AND (LINEITEM.L_SHIPMODE = CAST(Utf8("AIR") AS Utf8) OR LINEITEM.L_SHIPMODE = CAST(Utf8("AIR REG") AS Utf8)) AND LINEITEM.L_SHIPINSTRUCT = Utf8("DELIVER IN PERSON") - Cross Join: - TableScan: LINEITEM - TableScan: PART diff --git a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_20.snap b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_20.snap deleted file mode 100644 index b0a2069b19a0..000000000000 --- a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_20.snap +++ /dev/null @@ -1,24 +0,0 @@ ---- -source: datafusion/substrait/tests/cases/consumer_integration.rs -expression: plan_str ---- -Sort: SUPPLIER.S_NAME ASC NULLS LAST - Projection: SUPPLIER.S_NAME, SUPPLIER.S_ADDRESS - Filter: SUPPLIER.S_SUPPKEY IN () AND SUPPLIER.S_NATIONKEY = NATION.N_NATIONKEY AND NATION.N_NAME = Utf8("CANADA") - Subquery: - Projection: PARTSUPP.PS_SUPPKEY - Filter: PARTSUPP.PS_PARTKEY IN () AND CAST(PARTSUPP.PS_AVAILQTY AS Decimal128(19, 0)) > () - Subquery: - Projection: PART.P_PARTKEY - Filter: PART.P_NAME LIKE CAST(Utf8("forest%") AS Utf8) - TableScan: PART - Subquery: - Projection: Decimal128(Some(5),2,1) * sum(LINEITEM.L_QUANTITY) - Aggregate: groupBy=[[]], aggr=[[sum(LINEITEM.L_QUANTITY)]] - Projection: LINEITEM.L_QUANTITY - Filter: LINEITEM.L_PARTKEY = LINEITEM.L_ORDERKEY AND LINEITEM.L_SUPPKEY = LINEITEM.L_PARTKEY AND LINEITEM.L_SHIPDATE >= CAST(Utf8("1994-01-01") AS Date32) AND LINEITEM.L_SHIPDATE < CAST(Utf8("1995-01-01") AS Date32) - TableScan: LINEITEM - TableScan: PARTSUPP - Cross Join: - TableScan: SUPPLIER - TableScan: NATION diff --git a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_21.snap b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_21.snap deleted file mode 100644 index 9fe1a65c9acd..000000000000 --- a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_21.snap +++ /dev/null @@ -1,23 +0,0 @@ ---- -source: datafusion/substrait/tests/cases/consumer_integration.rs -expression: plan_str ---- -Projection: SUPPLIER.S_NAME, count(Int64(1)) AS NUMWAIT - Limit: skip=0, fetch=100 - Sort: count(Int64(1)) DESC NULLS FIRST, SUPPLIER.S_NAME ASC NULLS LAST - Aggregate: groupBy=[[SUPPLIER.S_NAME]], aggr=[[count(Int64(1))]] - Projection: SUPPLIER.S_NAME - Filter: SUPPLIER.S_SUPPKEY = LINEITEM.L_SUPPKEY AND ORDERS.O_ORDERKEY = LINEITEM.L_ORDERKEY AND ORDERS.O_ORDERSTATUS = Utf8("F") AND LINEITEM.L_RECEIPTDATE > LINEITEM.L_COMMITDATE AND EXISTS () AND NOT EXISTS () AND SUPPLIER.S_NATIONKEY = NATION.N_NATIONKEY AND NATION.N_NAME = Utf8("SAUDI ARABIA") - Subquery: - Filter: LINEITEM.L_ORDERKEY = LINEITEM.L_TAX AND LINEITEM.L_SUPPKEY != LINEITEM.L_LINESTATUS - TableScan: LINEITEM - Subquery: - Filter: LINEITEM.L_ORDERKEY = LINEITEM.L_TAX AND LINEITEM.L_SUPPKEY != LINEITEM.L_LINESTATUS AND LINEITEM.L_RECEIPTDATE > LINEITEM.L_COMMITDATE - TableScan: LINEITEM - Cross Join: - Cross Join: - Cross Join: - TableScan: SUPPLIER - TableScan: LINEITEM - TableScan: ORDERS - TableScan: NATION diff --git a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_22.snap b/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_22.snap deleted file mode 100644 index f6259132bcd5..000000000000 --- a/datafusion/substrait/tests/cases/snapshots/substrait_integration__cases__consumer_integration__tests__tpch_test_22.snap +++ /dev/null @@ -1,18 +0,0 @@ ---- -source: datafusion/substrait/tests/cases/consumer_integration.rs -expression: plan_str ---- -Projection: substr(CUSTOMER.C_PHONE,Int32(1),Int32(2)) AS CNTRYCODE, count(Int64(1)) AS NUMCUST, sum(CUSTOMER.C_ACCTBAL) AS TOTACCTBAL - Sort: substr(CUSTOMER.C_PHONE,Int32(1),Int32(2)) ASC NULLS LAST - Aggregate: groupBy=[[substr(CUSTOMER.C_PHONE,Int32(1),Int32(2))]], aggr=[[count(Int64(1)), sum(CUSTOMER.C_ACCTBAL)]] - Projection: substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)), CUSTOMER.C_ACCTBAL - Filter: (substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8("13") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8("31") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8("23") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8("29") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8("30") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8("18") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8("17") AS Utf8)) AND CUSTOMER.C_ACCTBAL > () AND NOT EXISTS () - Subquery: - Aggregate: groupBy=[[]], aggr=[[avg(CUSTOMER.C_ACCTBAL)]] - Projection: CUSTOMER.C_ACCTBAL - Filter: CUSTOMER.C_ACCTBAL > Decimal128(Some(0),3,2) AND (substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8("13") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8("31") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8("23") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8("29") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8("30") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8("18") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8("17") AS Utf8)) - TableScan: CUSTOMER - Subquery: - Filter: ORDERS.O_CUSTKEY = ORDERS.O_ORDERKEY - TableScan: ORDERS - TableScan: CUSTOMER From 5ba4450fdca541ca74ed35556cad17b0d75071d4 Mon Sep 17 00:00:00 2001 From: qstommyshu Date: Wed, 26 Mar 2025 19:51:16 -0400 Subject: [PATCH 04/17] migrate `consumer_integration.rs` to `insta` inline snapshot --- .../tests/cases/consumer_integration.rs | 608 ++++++++++-------- 1 file changed, 323 insertions(+), 285 deletions(-) diff --git a/datafusion/substrait/tests/cases/consumer_integration.rs b/datafusion/substrait/tests/cases/consumer_integration.rs index 1f1a15abb837..af9d92378298 100644 --- a/datafusion/substrait/tests/cases/consumer_integration.rs +++ b/datafusion/substrait/tests/cases/consumer_integration.rs @@ -28,6 +28,7 @@ mod tests { use datafusion::common::Result; use datafusion::prelude::SessionContext; use datafusion_substrait::logical_plan::consumer::from_substrait_plan; + use insta::assert_snapshot; use std::fs::File; use std::io::BufReader; use substrait::proto::Plan; @@ -49,125 +50,137 @@ mod tests { #[tokio::test] async fn tpch_test_01() -> Result<()> { let plan_str = tpch_plan_to_string(1).await?; - assert_eq!( - plan_str, - "Projection: LINEITEM.L_RETURNFLAG, LINEITEM.L_LINESTATUS, sum(LINEITEM.L_QUANTITY) AS SUM_QTY, sum(LINEITEM.L_EXTENDEDPRICE) AS SUM_BASE_PRICE, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) AS SUM_DISC_PRICE, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT * Int32(1) + LINEITEM.L_TAX) AS SUM_CHARGE, avg(LINEITEM.L_QUANTITY) AS AVG_QTY, avg(LINEITEM.L_EXTENDEDPRICE) AS AVG_PRICE, avg(LINEITEM.L_DISCOUNT) AS AVG_DISC, count(Int64(1)) AS COUNT_ORDER\ - \n Sort: LINEITEM.L_RETURNFLAG ASC NULLS LAST, LINEITEM.L_LINESTATUS ASC NULLS LAST\ - \n Aggregate: groupBy=[[LINEITEM.L_RETURNFLAG, LINEITEM.L_LINESTATUS]], aggr=[[sum(LINEITEM.L_QUANTITY), sum(LINEITEM.L_EXTENDEDPRICE), sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT), sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT * Int32(1) + LINEITEM.L_TAX), avg(LINEITEM.L_QUANTITY), avg(LINEITEM.L_EXTENDEDPRICE), avg(LINEITEM.L_DISCOUNT), count(Int64(1))]]\ - \n Projection: LINEITEM.L_RETURNFLAG, LINEITEM.L_LINESTATUS, LINEITEM.L_QUANTITY, LINEITEM.L_EXTENDEDPRICE, LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT), LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT) * (CAST(Int32(1) AS Decimal128(15, 2)) + LINEITEM.L_TAX), LINEITEM.L_DISCOUNT\ - \n Filter: LINEITEM.L_SHIPDATE <= Date32(\"1998-12-01\") - IntervalDayTime(\"IntervalDayTime { days: 0, milliseconds: 10368000 }\")\ - \n TableScan: LINEITEM" - ); + assert_snapshot!( + plan_str, + @r#" + Projection: LINEITEM.L_RETURNFLAG, LINEITEM.L_LINESTATUS, sum(LINEITEM.L_QUANTITY) AS SUM_QTY, sum(LINEITEM.L_EXTENDEDPRICE) AS SUM_BASE_PRICE, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) AS SUM_DISC_PRICE, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT * Int32(1) + LINEITEM.L_TAX) AS SUM_CHARGE, avg(LINEITEM.L_QUANTITY) AS AVG_QTY, avg(LINEITEM.L_EXTENDEDPRICE) AS AVG_PRICE, avg(LINEITEM.L_DISCOUNT) AS AVG_DISC, count(Int64(1)) AS COUNT_ORDER + Sort: LINEITEM.L_RETURNFLAG ASC NULLS LAST, LINEITEM.L_LINESTATUS ASC NULLS LAST + Aggregate: groupBy=[[LINEITEM.L_RETURNFLAG, LINEITEM.L_LINESTATUS]], aggr=[[sum(LINEITEM.L_QUANTITY), sum(LINEITEM.L_EXTENDEDPRICE), sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT), sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT * Int32(1) + LINEITEM.L_TAX), avg(LINEITEM.L_QUANTITY), avg(LINEITEM.L_EXTENDEDPRICE), avg(LINEITEM.L_DISCOUNT), count(Int64(1))]] + Projection: LINEITEM.L_RETURNFLAG, LINEITEM.L_LINESTATUS, LINEITEM.L_QUANTITY, LINEITEM.L_EXTENDEDPRICE, LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT), LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT) * (CAST(Int32(1) AS Decimal128(15, 2)) + LINEITEM.L_TAX), LINEITEM.L_DISCOUNT + Filter: LINEITEM.L_SHIPDATE <= Date32("1998-12-01") - IntervalDayTime("IntervalDayTime { days: 0, milliseconds: 10368000 }") + TableScan: LINEITEM + "# + ); Ok(()) } #[tokio::test] async fn tpch_test_02() -> Result<()> { let plan_str = tpch_plan_to_string(2).await?; - assert_eq!( - plan_str, - "Limit: skip=0, fetch=100\ - \n Sort: SUPPLIER.S_ACCTBAL DESC NULLS FIRST, NATION.N_NAME ASC NULLS LAST, SUPPLIER.S_NAME ASC NULLS LAST, PART.P_PARTKEY ASC NULLS LAST\ - \n Projection: SUPPLIER.S_ACCTBAL, SUPPLIER.S_NAME, NATION.N_NAME, PART.P_PARTKEY, PART.P_MFGR, SUPPLIER.S_ADDRESS, SUPPLIER.S_PHONE, SUPPLIER.S_COMMENT\ - \n Filter: PART.P_PARTKEY = PARTSUPP.PS_PARTKEY AND SUPPLIER.S_SUPPKEY = PARTSUPP.PS_SUPPKEY AND PART.P_SIZE = Int32(15) AND PART.P_TYPE LIKE CAST(Utf8(\"%BRASS\") AS Utf8) AND SUPPLIER.S_NATIONKEY = NATION.N_NATIONKEY AND NATION.N_REGIONKEY = REGION.R_REGIONKEY AND REGION.R_NAME = Utf8(\"EUROPE\") AND PARTSUPP.PS_SUPPLYCOST = ()\ - \n Subquery:\ - \n Aggregate: groupBy=[[]], aggr=[[min(PARTSUPP.PS_SUPPLYCOST)]]\ - \n Projection: PARTSUPP.PS_SUPPLYCOST\ - \n Filter: PARTSUPP.PS_PARTKEY = PARTSUPP.PS_PARTKEY AND SUPPLIER.S_SUPPKEY = PARTSUPP.PS_SUPPKEY AND SUPPLIER.S_NATIONKEY = NATION.N_NATIONKEY AND NATION.N_REGIONKEY = REGION.R_REGIONKEY AND REGION.R_NAME = Utf8(\"EUROPE\")\ - \n Cross Join: \ - \n Cross Join: \ - \n Cross Join: \ - \n TableScan: PARTSUPP\ - \n TableScan: SUPPLIER\ - \n TableScan: NATION\ - \n TableScan: REGION\ - \n Cross Join: \ - \n Cross Join: \ - \n Cross Join: \ - \n Cross Join: \ - \n TableScan: PART\ - \n TableScan: SUPPLIER\ - \n TableScan: PARTSUPP\ - \n TableScan: NATION\ - \n TableScan: REGION" - ); + assert_snapshot!( + plan_str, + @r#" + Limit: skip=0, fetch=100 + Sort: SUPPLIER.S_ACCTBAL DESC NULLS FIRST, NATION.N_NAME ASC NULLS LAST, SUPPLIER.S_NAME ASC NULLS LAST, PART.P_PARTKEY ASC NULLS LAST + Projection: SUPPLIER.S_ACCTBAL, SUPPLIER.S_NAME, NATION.N_NAME, PART.P_PARTKEY, PART.P_MFGR, SUPPLIER.S_ADDRESS, SUPPLIER.S_PHONE, SUPPLIER.S_COMMENT + Filter: PART.P_PARTKEY = PARTSUPP.PS_PARTKEY AND SUPPLIER.S_SUPPKEY = PARTSUPP.PS_SUPPKEY AND PART.P_SIZE = Int32(15) AND PART.P_TYPE LIKE CAST(Utf8("%BRASS") AS Utf8) AND SUPPLIER.S_NATIONKEY = NATION.N_NATIONKEY AND NATION.N_REGIONKEY = REGION.R_REGIONKEY AND REGION.R_NAME = Utf8("EUROPE") AND PARTSUPP.PS_SUPPLYCOST = () + Subquery: + Aggregate: groupBy=[[]], aggr=[[min(PARTSUPP.PS_SUPPLYCOST)]] + Projection: PARTSUPP.PS_SUPPLYCOST + Filter: PARTSUPP.PS_PARTKEY = PARTSUPP.PS_PARTKEY AND SUPPLIER.S_SUPPKEY = PARTSUPP.PS_SUPPKEY AND SUPPLIER.S_NATIONKEY = NATION.N_NATIONKEY AND NATION.N_REGIONKEY = REGION.R_REGIONKEY AND REGION.R_NAME = Utf8("EUROPE") + Cross Join: + Cross Join: + Cross Join: + TableScan: PARTSUPP + TableScan: SUPPLIER + TableScan: NATION + TableScan: REGION + Cross Join: + Cross Join: + Cross Join: + Cross Join: + TableScan: PART + TableScan: SUPPLIER + TableScan: PARTSUPP + TableScan: NATION + TableScan: REGION + "# + ); Ok(()) } #[tokio::test] async fn tpch_test_03() -> Result<()> { let plan_str = tpch_plan_to_string(3).await?; - assert_eq!( - plan_str, - "Projection: LINEITEM.L_ORDERKEY, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) AS REVENUE, ORDERS.O_ORDERDATE, ORDERS.O_SHIPPRIORITY\ - \n Limit: skip=0, fetch=10\ - \n Sort: sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) DESC NULLS FIRST, ORDERS.O_ORDERDATE ASC NULLS LAST\ - \n Projection: LINEITEM.L_ORDERKEY, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT), ORDERS.O_ORDERDATE, ORDERS.O_SHIPPRIORITY\ - \n Aggregate: groupBy=[[LINEITEM.L_ORDERKEY, ORDERS.O_ORDERDATE, ORDERS.O_SHIPPRIORITY]], aggr=[[sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT)]]\ - \n Projection: LINEITEM.L_ORDERKEY, ORDERS.O_ORDERDATE, ORDERS.O_SHIPPRIORITY, LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT)\ - \n Filter: CUSTOMER.C_MKTSEGMENT = Utf8(\"BUILDING\") AND CUSTOMER.C_CUSTKEY = ORDERS.O_CUSTKEY AND LINEITEM.L_ORDERKEY = ORDERS.O_ORDERKEY AND ORDERS.O_ORDERDATE < CAST(Utf8(\"1995-03-15\") AS Date32) AND LINEITEM.L_SHIPDATE > CAST(Utf8(\"1995-03-15\") AS Date32)\ - \n Cross Join: \ - \n Cross Join: \ - \n TableScan: LINEITEM\ - \n TableScan: CUSTOMER\ - \n TableScan: ORDERS" - ); + assert_snapshot!( + plan_str, + @r#" + Projection: LINEITEM.L_ORDERKEY, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) AS REVENUE, ORDERS.O_ORDERDATE, ORDERS.O_SHIPPRIORITY + Limit: skip=0, fetch=10 + Sort: sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) DESC NULLS FIRST, ORDERS.O_ORDERDATE ASC NULLS LAST + Projection: LINEITEM.L_ORDERKEY, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT), ORDERS.O_ORDERDATE, ORDERS.O_SHIPPRIORITY + Aggregate: groupBy=[[LINEITEM.L_ORDERKEY, ORDERS.O_ORDERDATE, ORDERS.O_SHIPPRIORITY]], aggr=[[sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT)]] + Projection: LINEITEM.L_ORDERKEY, ORDERS.O_ORDERDATE, ORDERS.O_SHIPPRIORITY, LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT) + Filter: CUSTOMER.C_MKTSEGMENT = Utf8("BUILDING") AND CUSTOMER.C_CUSTKEY = ORDERS.O_CUSTKEY AND LINEITEM.L_ORDERKEY = ORDERS.O_ORDERKEY AND ORDERS.O_ORDERDATE < CAST(Utf8("1995-03-15") AS Date32) AND LINEITEM.L_SHIPDATE > CAST(Utf8("1995-03-15") AS Date32) + Cross Join: + Cross Join: + TableScan: LINEITEM + TableScan: CUSTOMER + TableScan: ORDERS + "# + ); Ok(()) } #[tokio::test] async fn tpch_test_04() -> Result<()> { let plan_str = tpch_plan_to_string(4).await?; - assert_eq!( - plan_str, - "Projection: ORDERS.O_ORDERPRIORITY, count(Int64(1)) AS ORDER_COUNT\ - \n Sort: ORDERS.O_ORDERPRIORITY ASC NULLS LAST\ - \n Aggregate: groupBy=[[ORDERS.O_ORDERPRIORITY]], aggr=[[count(Int64(1))]]\ - \n Projection: ORDERS.O_ORDERPRIORITY\ - \n Filter: ORDERS.O_ORDERDATE >= CAST(Utf8(\"1993-07-01\") AS Date32) AND ORDERS.O_ORDERDATE < CAST(Utf8(\"1993-10-01\") AS Date32) AND EXISTS ()\ - \n Subquery:\ - \n Filter: LINEITEM.L_ORDERKEY = LINEITEM.L_ORDERKEY AND LINEITEM.L_COMMITDATE < LINEITEM.L_RECEIPTDATE\ - \n TableScan: LINEITEM\ - \n TableScan: ORDERS" - ); + assert_snapshot!( + plan_str, + @r#" + Projection: ORDERS.O_ORDERPRIORITY, count(Int64(1)) AS ORDER_COUNT + Sort: ORDERS.O_ORDERPRIORITY ASC NULLS LAST + Aggregate: groupBy=[[ORDERS.O_ORDERPRIORITY]], aggr=[[count(Int64(1))]] + Projection: ORDERS.O_ORDERPRIORITY + Filter: ORDERS.O_ORDERDATE >= CAST(Utf8("1993-07-01") AS Date32) AND ORDERS.O_ORDERDATE < CAST(Utf8("1993-10-01") AS Date32) AND EXISTS () + Subquery: + Filter: LINEITEM.L_ORDERKEY = LINEITEM.L_ORDERKEY AND LINEITEM.L_COMMITDATE < LINEITEM.L_RECEIPTDATE + TableScan: LINEITEM + TableScan: ORDERS + "# + ); Ok(()) } #[tokio::test] async fn tpch_test_05() -> Result<()> { let plan_str = tpch_plan_to_string(5).await?; - assert_eq!( - plan_str, - "Projection: NATION.N_NAME, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) AS REVENUE\ - \n Sort: sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) DESC NULLS FIRST\ - \n Aggregate: groupBy=[[NATION.N_NAME]], aggr=[[sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT)]]\ - \n Projection: NATION.N_NAME, LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT)\ - \n Filter: CUSTOMER.C_CUSTKEY = ORDERS.O_CUSTKEY AND LINEITEM.L_ORDERKEY = ORDERS.O_ORDERKEY AND LINEITEM.L_SUPPKEY = SUPPLIER.S_SUPPKEY AND CUSTOMER.C_NATIONKEY = SUPPLIER.S_NATIONKEY AND SUPPLIER.S_NATIONKEY = NATION.N_NATIONKEY AND NATION.N_REGIONKEY = REGION.R_REGIONKEY AND REGION.R_NAME = Utf8(\"ASIA\") AND ORDERS.O_ORDERDATE >= CAST(Utf8(\"1994-01-01\") AS Date32) AND ORDERS.O_ORDERDATE < CAST(Utf8(\"1995-01-01\") AS Date32)\ - \n Cross Join: \ - \n Cross Join: \ - \n Cross Join: \ - \n Cross Join: \ - \n Cross Join: \ - \n TableScan: CUSTOMER\ - \n TableScan: ORDERS\ - \n TableScan: LINEITEM\ - \n TableScan: SUPPLIER\ - \n TableScan: NATION\ - \n TableScan: REGION" - ); + assert_snapshot!( + plan_str, + @r#" + Projection: NATION.N_NAME, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) AS REVENUE + Sort: sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) DESC NULLS FIRST + Aggregate: groupBy=[[NATION.N_NAME]], aggr=[[sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT)]] + Projection: NATION.N_NAME, LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT) + Filter: CUSTOMER.C_CUSTKEY = ORDERS.O_CUSTKEY AND LINEITEM.L_ORDERKEY = ORDERS.O_ORDERKEY AND LINEITEM.L_SUPPKEY = SUPPLIER.S_SUPPKEY AND CUSTOMER.C_NATIONKEY = SUPPLIER.S_NATIONKEY AND SUPPLIER.S_NATIONKEY = NATION.N_NATIONKEY AND NATION.N_REGIONKEY = REGION.R_REGIONKEY AND REGION.R_NAME = Utf8("ASIA") AND ORDERS.O_ORDERDATE >= CAST(Utf8("1994-01-01") AS Date32) AND ORDERS.O_ORDERDATE < CAST(Utf8("1995-01-01") AS Date32) + Cross Join: + Cross Join: + Cross Join: + Cross Join: + Cross Join: + TableScan: CUSTOMER + TableScan: ORDERS + TableScan: LINEITEM + TableScan: SUPPLIER + TableScan: NATION + TableScan: REGION + "# + ); Ok(()) } #[tokio::test] async fn tpch_test_06() -> Result<()> { let plan_str = tpch_plan_to_string(6).await?; - assert_eq!( - plan_str, - "Aggregate: groupBy=[[]], aggr=[[sum(LINEITEM.L_EXTENDEDPRICE * LINEITEM.L_DISCOUNT) AS REVENUE]]\ - \n Projection: LINEITEM.L_EXTENDEDPRICE * LINEITEM.L_DISCOUNT\ - \n Filter: LINEITEM.L_SHIPDATE >= CAST(Utf8(\"1994-01-01\") AS Date32) AND LINEITEM.L_SHIPDATE < CAST(Utf8(\"1995-01-01\") AS Date32) AND LINEITEM.L_DISCOUNT >= Decimal128(Some(5),3,2) AND LINEITEM.L_DISCOUNT <= Decimal128(Some(7),3,2) AND LINEITEM.L_QUANTITY < CAST(Int32(24) AS Decimal128(15, 2))\ - \n TableScan: LINEITEM" - ); + assert_snapshot!( + plan_str, + @r#" + Aggregate: groupBy=[[]], aggr=[[sum(LINEITEM.L_EXTENDEDPRICE * LINEITEM.L_DISCOUNT) AS REVENUE]] + Projection: LINEITEM.L_EXTENDEDPRICE * LINEITEM.L_DISCOUNT + Filter: LINEITEM.L_SHIPDATE >= CAST(Utf8("1994-01-01") AS Date32) AND LINEITEM.L_SHIPDATE < CAST(Utf8("1995-01-01") AS Date32) AND LINEITEM.L_DISCOUNT >= Decimal128(Some(5),3,2) AND LINEITEM.L_DISCOUNT <= Decimal128(Some(7),3,2) AND LINEITEM.L_QUANTITY < CAST(Int32(24) AS Decimal128(15, 2)) + TableScan: LINEITEM + "# + ); Ok(()) } @@ -175,7 +188,7 @@ mod tests { #[tokio::test] async fn tpch_test_07() -> Result<()> { let plan_str = tpch_plan_to_string(7).await?; - assert_eq!(plan_str, "Missing support for enum function arguments"); + assert_snapshot!(plan_str, "Missing support for enum function arguments"); Ok(()) } @@ -183,7 +196,7 @@ mod tests { #[tokio::test] async fn tpch_test_08() -> Result<()> { let plan_str = tpch_plan_to_string(8).await?; - assert_eq!(plan_str, "Missing support for enum function arguments"); + assert_snapshot!(plan_str, "Missing support for enum function arguments"); Ok(()) } @@ -191,112 +204,121 @@ mod tests { #[tokio::test] async fn tpch_test_09() -> Result<()> { let plan_str = tpch_plan_to_string(9).await?; - assert_eq!(plan_str, "Missing support for enum function arguments"); + assert_snapshot!(plan_str, "Missing support for enum function arguments"); Ok(()) } #[tokio::test] async fn tpch_test_10() -> Result<()> { let plan_str = tpch_plan_to_string(10).await?; - assert_eq!( - plan_str, - "Projection: CUSTOMER.C_CUSTKEY, CUSTOMER.C_NAME, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) AS REVENUE, CUSTOMER.C_ACCTBAL, NATION.N_NAME, CUSTOMER.C_ADDRESS, CUSTOMER.C_PHONE, CUSTOMER.C_COMMENT\ - \n Limit: skip=0, fetch=20\ - \n Sort: sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) DESC NULLS FIRST\ - \n Projection: CUSTOMER.C_CUSTKEY, CUSTOMER.C_NAME, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT), CUSTOMER.C_ACCTBAL, NATION.N_NAME, CUSTOMER.C_ADDRESS, CUSTOMER.C_PHONE, CUSTOMER.C_COMMENT\ - \n Aggregate: groupBy=[[CUSTOMER.C_CUSTKEY, CUSTOMER.C_NAME, CUSTOMER.C_ACCTBAL, CUSTOMER.C_PHONE, NATION.N_NAME, CUSTOMER.C_ADDRESS, CUSTOMER.C_COMMENT]], aggr=[[sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT)]]\ - \n Projection: CUSTOMER.C_CUSTKEY, CUSTOMER.C_NAME, CUSTOMER.C_ACCTBAL, CUSTOMER.C_PHONE, NATION.N_NAME, CUSTOMER.C_ADDRESS, CUSTOMER.C_COMMENT, LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT)\ - \n Filter: CUSTOMER.C_CUSTKEY = ORDERS.O_CUSTKEY AND LINEITEM.L_ORDERKEY = ORDERS.O_ORDERKEY AND ORDERS.O_ORDERDATE >= CAST(Utf8(\"1993-10-01\") AS Date32) AND ORDERS.O_ORDERDATE < CAST(Utf8(\"1994-01-01\") AS Date32) AND LINEITEM.L_RETURNFLAG = Utf8(\"R\") AND CUSTOMER.C_NATIONKEY = NATION.N_NATIONKEY\ - \n Cross Join: \ - \n Cross Join: \ - \n Cross Join: \ - \n TableScan: CUSTOMER\ - \n TableScan: ORDERS\ - \n TableScan: LINEITEM\ - \n TableScan: NATION" - ); + assert_snapshot!( + plan_str, + @r#" + Projection: CUSTOMER.C_CUSTKEY, CUSTOMER.C_NAME, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) AS REVENUE, CUSTOMER.C_ACCTBAL, NATION.N_NAME, CUSTOMER.C_ADDRESS, CUSTOMER.C_PHONE, CUSTOMER.C_COMMENT + Limit: skip=0, fetch=20 + Sort: sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) DESC NULLS FIRST + Projection: CUSTOMER.C_CUSTKEY, CUSTOMER.C_NAME, sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT), CUSTOMER.C_ACCTBAL, NATION.N_NAME, CUSTOMER.C_ADDRESS, CUSTOMER.C_PHONE, CUSTOMER.C_COMMENT + Aggregate: groupBy=[[CUSTOMER.C_CUSTKEY, CUSTOMER.C_NAME, CUSTOMER.C_ACCTBAL, CUSTOMER.C_PHONE, NATION.N_NAME, CUSTOMER.C_ADDRESS, CUSTOMER.C_COMMENT]], aggr=[[sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT)]] + Projection: CUSTOMER.C_CUSTKEY, CUSTOMER.C_NAME, CUSTOMER.C_ACCTBAL, CUSTOMER.C_PHONE, NATION.N_NAME, CUSTOMER.C_ADDRESS, CUSTOMER.C_COMMENT, LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT) + Filter: CUSTOMER.C_CUSTKEY = ORDERS.O_CUSTKEY AND LINEITEM.L_ORDERKEY = ORDERS.O_ORDERKEY AND ORDERS.O_ORDERDATE >= CAST(Utf8("1993-10-01") AS Date32) AND ORDERS.O_ORDERDATE < CAST(Utf8("1994-01-01") AS Date32) AND LINEITEM.L_RETURNFLAG = Utf8("R") AND CUSTOMER.C_NATIONKEY = NATION.N_NATIONKEY + Cross Join: + Cross Join: + Cross Join: + TableScan: CUSTOMER + TableScan: ORDERS + TableScan: LINEITEM + TableScan: NATION + "# + ); Ok(()) } #[tokio::test] async fn tpch_test_11() -> Result<()> { let plan_str = tpch_plan_to_string(11).await?; - assert_eq!( - plan_str, - "Projection: PARTSUPP.PS_PARTKEY, sum(PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY) AS value\ - \n Sort: sum(PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY) DESC NULLS FIRST\ - \n Filter: sum(PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY) > ()\ - \n Subquery:\ - \n Projection: sum(PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY) * Decimal128(Some(1000000),11,10)\ - \n Aggregate: groupBy=[[]], aggr=[[sum(PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY)]]\ - \n Projection: PARTSUPP.PS_SUPPLYCOST * CAST(PARTSUPP.PS_AVAILQTY AS Decimal128(19, 0))\ - \n Filter: PARTSUPP.PS_SUPPKEY = SUPPLIER.S_SUPPKEY AND SUPPLIER.S_NATIONKEY = NATION.N_NATIONKEY AND NATION.N_NAME = Utf8(\"JAPAN\")\ - \n Cross Join: \ - \n Cross Join: \ - \n TableScan: PARTSUPP\ - \n TableScan: SUPPLIER\ - \n TableScan: NATION\ - \n Aggregate: groupBy=[[PARTSUPP.PS_PARTKEY]], aggr=[[sum(PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY)]]\ - \n Projection: PARTSUPP.PS_PARTKEY, PARTSUPP.PS_SUPPLYCOST * CAST(PARTSUPP.PS_AVAILQTY AS Decimal128(19, 0))\ - \n Filter: PARTSUPP.PS_SUPPKEY = SUPPLIER.S_SUPPKEY AND SUPPLIER.S_NATIONKEY = NATION.N_NATIONKEY AND NATION.N_NAME = Utf8(\"JAPAN\")\ - \n Cross Join: \ - \n Cross Join: \ - \n TableScan: PARTSUPP\ - \n TableScan: SUPPLIER\ - \n TableScan: NATION" - ); + assert_snapshot!( + plan_str, + @r#" + Projection: PARTSUPP.PS_PARTKEY, sum(PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY) AS value + Sort: sum(PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY) DESC NULLS FIRST + Filter: sum(PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY) > () + Subquery: + Projection: sum(PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY) * Decimal128(Some(1000000),11,10) + Aggregate: groupBy=[[]], aggr=[[sum(PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY)]] + Projection: PARTSUPP.PS_SUPPLYCOST * CAST(PARTSUPP.PS_AVAILQTY AS Decimal128(19, 0)) + Filter: PARTSUPP.PS_SUPPKEY = SUPPLIER.S_SUPPKEY AND SUPPLIER.S_NATIONKEY = NATION.N_NATIONKEY AND NATION.N_NAME = Utf8("JAPAN") + Cross Join: + Cross Join: + TableScan: PARTSUPP + TableScan: SUPPLIER + TableScan: NATION + Aggregate: groupBy=[[PARTSUPP.PS_PARTKEY]], aggr=[[sum(PARTSUPP.PS_SUPPLYCOST * PARTSUPP.PS_AVAILQTY)]] + Projection: PARTSUPP.PS_PARTKEY, PARTSUPP.PS_SUPPLYCOST * CAST(PARTSUPP.PS_AVAILQTY AS Decimal128(19, 0)) + Filter: PARTSUPP.PS_SUPPKEY = SUPPLIER.S_SUPPKEY AND SUPPLIER.S_NATIONKEY = NATION.N_NATIONKEY AND NATION.N_NAME = Utf8("JAPAN") + Cross Join: + Cross Join: + TableScan: PARTSUPP + TableScan: SUPPLIER + TableScan: NATION + "# + ); Ok(()) } #[tokio::test] async fn tpch_test_12() -> Result<()> { let plan_str = tpch_plan_to_string(12).await?; - assert_eq!( - plan_str, - "Projection: LINEITEM.L_SHIPMODE, sum(CASE WHEN ORDERS.O_ORDERPRIORITY = Utf8(\"1-URGENT\") OR ORDERS.O_ORDERPRIORITY = Utf8(\"2-HIGH\") THEN Int32(1) ELSE Int32(0) END) AS HIGH_LINE_COUNT, sum(CASE WHEN ORDERS.O_ORDERPRIORITY != Utf8(\"1-URGENT\") AND ORDERS.O_ORDERPRIORITY != Utf8(\"2-HIGH\") THEN Int32(1) ELSE Int32(0) END) AS LOW_LINE_COUNT\ - \n Sort: LINEITEM.L_SHIPMODE ASC NULLS LAST\ - \n Aggregate: groupBy=[[LINEITEM.L_SHIPMODE]], aggr=[[sum(CASE WHEN ORDERS.O_ORDERPRIORITY = Utf8(\"1-URGENT\") OR ORDERS.O_ORDERPRIORITY = Utf8(\"2-HIGH\") THEN Int32(1) ELSE Int32(0) END), sum(CASE WHEN ORDERS.O_ORDERPRIORITY != Utf8(\"1-URGENT\") AND ORDERS.O_ORDERPRIORITY != Utf8(\"2-HIGH\") THEN Int32(1) ELSE Int32(0) END)]]\ - \n Projection: LINEITEM.L_SHIPMODE, CASE WHEN ORDERS.O_ORDERPRIORITY = Utf8(\"1-URGENT\") OR ORDERS.O_ORDERPRIORITY = Utf8(\"2-HIGH\") THEN Int32(1) ELSE Int32(0) END, CASE WHEN ORDERS.O_ORDERPRIORITY != Utf8(\"1-URGENT\") AND ORDERS.O_ORDERPRIORITY != Utf8(\"2-HIGH\") THEN Int32(1) ELSE Int32(0) END\ - \n Filter: ORDERS.O_ORDERKEY = LINEITEM.L_ORDERKEY AND (LINEITEM.L_SHIPMODE = CAST(Utf8(\"MAIL\") AS Utf8) OR LINEITEM.L_SHIPMODE = CAST(Utf8(\"SHIP\") AS Utf8)) AND LINEITEM.L_COMMITDATE < LINEITEM.L_RECEIPTDATE AND LINEITEM.L_SHIPDATE < LINEITEM.L_COMMITDATE AND LINEITEM.L_RECEIPTDATE >= CAST(Utf8(\"1994-01-01\") AS Date32) AND LINEITEM.L_RECEIPTDATE < CAST(Utf8(\"1995-01-01\") AS Date32)\ - \n Cross Join: \ - \n TableScan: ORDERS\ - \n TableScan: LINEITEM" - ); + assert_snapshot!( + plan_str, + @r#" + Projection: LINEITEM.L_SHIPMODE, sum(CASE WHEN ORDERS.O_ORDERPRIORITY = Utf8("1-URGENT") OR ORDERS.O_ORDERPRIORITY = Utf8("2-HIGH") THEN Int32(1) ELSE Int32(0) END) AS HIGH_LINE_COUNT, sum(CASE WHEN ORDERS.O_ORDERPRIORITY != Utf8("1-URGENT") AND ORDERS.O_ORDERPRIORITY != Utf8("2-HIGH") THEN Int32(1) ELSE Int32(0) END) AS LOW_LINE_COUNT + Sort: LINEITEM.L_SHIPMODE ASC NULLS LAST + Aggregate: groupBy=[[LINEITEM.L_SHIPMODE]], aggr=[[sum(CASE WHEN ORDERS.O_ORDERPRIORITY = Utf8("1-URGENT") OR ORDERS.O_ORDERPRIORITY = Utf8("2-HIGH") THEN Int32(1) ELSE Int32(0) END), sum(CASE WHEN ORDERS.O_ORDERPRIORITY != Utf8("1-URGENT") AND ORDERS.O_ORDERPRIORITY != Utf8("2-HIGH") THEN Int32(1) ELSE Int32(0) END)]] + Projection: LINEITEM.L_SHIPMODE, CASE WHEN ORDERS.O_ORDERPRIORITY = Utf8("1-URGENT") OR ORDERS.O_ORDERPRIORITY = Utf8("2-HIGH") THEN Int32(1) ELSE Int32(0) END, CASE WHEN ORDERS.O_ORDERPRIORITY != Utf8("1-URGENT") AND ORDERS.O_ORDERPRIORITY != Utf8("2-HIGH") THEN Int32(1) ELSE Int32(0) END + Filter: ORDERS.O_ORDERKEY = LINEITEM.L_ORDERKEY AND (LINEITEM.L_SHIPMODE = CAST(Utf8("MAIL") AS Utf8) OR LINEITEM.L_SHIPMODE = CAST(Utf8("SHIP") AS Utf8)) AND LINEITEM.L_COMMITDATE < LINEITEM.L_RECEIPTDATE AND LINEITEM.L_SHIPDATE < LINEITEM.L_COMMITDATE AND LINEITEM.L_RECEIPTDATE >= CAST(Utf8("1994-01-01") AS Date32) AND LINEITEM.L_RECEIPTDATE < CAST(Utf8("1995-01-01") AS Date32) + Cross Join: + TableScan: ORDERS + TableScan: LINEITEM + "# + ); Ok(()) } #[tokio::test] async fn tpch_test_13() -> Result<()> { let plan_str = tpch_plan_to_string(13).await?; - assert_eq!( + assert_snapshot!( plan_str, - "Projection: count(ORDERS.O_ORDERKEY) AS C_COUNT, count(Int64(1)) AS CUSTDIST\ - \n Sort: count(Int64(1)) DESC NULLS FIRST, count(ORDERS.O_ORDERKEY) DESC NULLS FIRST\ - \n Projection: count(ORDERS.O_ORDERKEY), count(Int64(1))\ - \n Aggregate: groupBy=[[count(ORDERS.O_ORDERKEY)]], aggr=[[count(Int64(1))]]\ - \n Projection: count(ORDERS.O_ORDERKEY)\ - \n Aggregate: groupBy=[[CUSTOMER.C_CUSTKEY]], aggr=[[count(ORDERS.O_ORDERKEY)]]\ - \n Projection: CUSTOMER.C_CUSTKEY, ORDERS.O_ORDERKEY\ - \n Left Join: CUSTOMER.C_CUSTKEY = ORDERS.O_CUSTKEY Filter: NOT ORDERS.O_COMMENT LIKE CAST(Utf8(\"%special%requests%\") AS Utf8)\ - \n TableScan: CUSTOMER\ - \n TableScan: ORDERS" - ); + @r#" + Projection: count(ORDERS.O_ORDERKEY) AS C_COUNT, count(Int64(1)) AS CUSTDIST + Sort: count(Int64(1)) DESC NULLS FIRST, count(ORDERS.O_ORDERKEY) DESC NULLS FIRST + Projection: count(ORDERS.O_ORDERKEY), count(Int64(1)) + Aggregate: groupBy=[[count(ORDERS.O_ORDERKEY)]], aggr=[[count(Int64(1))]] + Projection: count(ORDERS.O_ORDERKEY) + Aggregate: groupBy=[[CUSTOMER.C_CUSTKEY]], aggr=[[count(ORDERS.O_ORDERKEY)]] + Projection: CUSTOMER.C_CUSTKEY, ORDERS.O_ORDERKEY + Left Join: CUSTOMER.C_CUSTKEY = ORDERS.O_CUSTKEY Filter: NOT ORDERS.O_COMMENT LIKE CAST(Utf8("%special%requests%") AS Utf8) + TableScan: CUSTOMER + TableScan: ORDERS + "# ); Ok(()) } #[tokio::test] async fn tpch_test_14() -> Result<()> { let plan_str = tpch_plan_to_string(14).await?; - assert_eq!( - plan_str, - "Projection: Decimal128(Some(10000),5,2) * sum(CASE WHEN PART.P_TYPE LIKE Utf8(\"PROMO%\") THEN LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT ELSE Decimal128(Some(0),19,4) END) / sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) AS PROMO_REVENUE\ - \n Aggregate: groupBy=[[]], aggr=[[sum(CASE WHEN PART.P_TYPE LIKE Utf8(\"PROMO%\") THEN LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT ELSE Decimal128(Some(0),19,4) END), sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT)]]\ - \n Projection: CASE WHEN PART.P_TYPE LIKE CAST(Utf8(\"PROMO%\") AS Utf8) THEN LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT) ELSE Decimal128(Some(0),19,4) END, LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT)\ - \n Filter: LINEITEM.L_PARTKEY = PART.P_PARTKEY AND LINEITEM.L_SHIPDATE >= Date32(\"1995-09-01\") AND LINEITEM.L_SHIPDATE < CAST(Utf8(\"1995-10-01\") AS Date32)\ - \n Cross Join: \ - \n TableScan: LINEITEM\ - \n TableScan: PART" - ); + assert_snapshot!( + plan_str, + @r#" + Projection: Decimal128(Some(10000),5,2) * sum(CASE WHEN PART.P_TYPE LIKE Utf8("PROMO%") THEN LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT ELSE Decimal128(Some(0),19,4) END) / sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) AS PROMO_REVENUE + Aggregate: groupBy=[[]], aggr=[[sum(CASE WHEN PART.P_TYPE LIKE Utf8("PROMO%") THEN LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT ELSE Decimal128(Some(0),19,4) END), sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT)]] + Projection: CASE WHEN PART.P_TYPE LIKE CAST(Utf8("PROMO%") AS Utf8) THEN LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT) ELSE Decimal128(Some(0),19,4) END, LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT) + Filter: LINEITEM.L_PARTKEY = PART.P_PARTKEY AND LINEITEM.L_SHIPDATE >= Date32("1995-09-01") AND LINEITEM.L_SHIPDATE < CAST(Utf8("1995-10-01") AS Date32) + Cross Join: + TableScan: LINEITEM + TableScan: PART + "# + ); Ok(()) } @@ -304,28 +326,30 @@ mod tests { #[tokio::test] async fn tpch_test_15() -> Result<()> { let plan_str = tpch_plan_to_string(15).await?; - assert_eq!(plan_str, "Test file is empty"); + assert_snapshot!(plan_str, "Test file is empty"); Ok(()) } #[tokio::test] async fn tpch_test_16() -> Result<()> { let plan_str = tpch_plan_to_string(16).await?; - assert_eq!( - plan_str, - "Projection: PART.P_BRAND, PART.P_TYPE, PART.P_SIZE, count(DISTINCT PARTSUPP.PS_SUPPKEY) AS SUPPLIER_CNT\ - \n Sort: count(DISTINCT PARTSUPP.PS_SUPPKEY) DESC NULLS FIRST, PART.P_BRAND ASC NULLS LAST, PART.P_TYPE ASC NULLS LAST, PART.P_SIZE ASC NULLS LAST\ - \n Aggregate: groupBy=[[PART.P_BRAND, PART.P_TYPE, PART.P_SIZE]], aggr=[[count(DISTINCT PARTSUPP.PS_SUPPKEY)]]\ - \n Projection: PART.P_BRAND, PART.P_TYPE, PART.P_SIZE, PARTSUPP.PS_SUPPKEY\ - \n Filter: PART.P_PARTKEY = PARTSUPP.PS_PARTKEY AND PART.P_BRAND != Utf8(\"Brand#45\") AND NOT PART.P_TYPE LIKE CAST(Utf8(\"MEDIUM POLISHED%\") AS Utf8) AND (PART.P_SIZE = Int32(49) OR PART.P_SIZE = Int32(14) OR PART.P_SIZE = Int32(23) OR PART.P_SIZE = Int32(45) OR PART.P_SIZE = Int32(19) OR PART.P_SIZE = Int32(3) OR PART.P_SIZE = Int32(36) OR PART.P_SIZE = Int32(9)) AND NOT PARTSUPP.PS_SUPPKEY IN ()\ - \n Subquery:\ - \n Projection: SUPPLIER.S_SUPPKEY\ - \n Filter: SUPPLIER.S_COMMENT LIKE CAST(Utf8(\"%Customer%Complaints%\") AS Utf8)\ - \n TableScan: SUPPLIER\ - \n Cross Join: \ - \n TableScan: PARTSUPP\ - \n TableScan: PART" - ); + assert_snapshot!( + plan_str, + @r#" + Projection: PART.P_BRAND, PART.P_TYPE, PART.P_SIZE, count(DISTINCT PARTSUPP.PS_SUPPKEY) AS SUPPLIER_CNT + Sort: count(DISTINCT PARTSUPP.PS_SUPPKEY) DESC NULLS FIRST, PART.P_BRAND ASC NULLS LAST, PART.P_TYPE ASC NULLS LAST, PART.P_SIZE ASC NULLS LAST + Aggregate: groupBy=[[PART.P_BRAND, PART.P_TYPE, PART.P_SIZE]], aggr=[[count(DISTINCT PARTSUPP.PS_SUPPKEY)]] + Projection: PART.P_BRAND, PART.P_TYPE, PART.P_SIZE, PARTSUPP.PS_SUPPKEY + Filter: PART.P_PARTKEY = PARTSUPP.PS_PARTKEY AND PART.P_BRAND != Utf8("Brand#45") AND NOT PART.P_TYPE LIKE CAST(Utf8("MEDIUM POLISHED%") AS Utf8) AND (PART.P_SIZE = Int32(49) OR PART.P_SIZE = Int32(14) OR PART.P_SIZE = Int32(23) OR PART.P_SIZE = Int32(45) OR PART.P_SIZE = Int32(19) OR PART.P_SIZE = Int32(3) OR PART.P_SIZE = Int32(36) OR PART.P_SIZE = Int32(9)) AND NOT PARTSUPP.PS_SUPPKEY IN () + Subquery: + Projection: SUPPLIER.S_SUPPKEY + Filter: SUPPLIER.S_COMMENT LIKE CAST(Utf8("%Customer%Complaints%") AS Utf8) + TableScan: SUPPLIER + Cross Join: + TableScan: PARTSUPP + TableScan: PART + "# + ); Ok(()) } @@ -333,127 +357,137 @@ mod tests { #[tokio::test] async fn tpch_test_17() -> Result<()> { let plan_str = tpch_plan_to_string(17).await?; - assert_eq!(plan_str, "panics due to out of bounds field access"); + assert_snapshot!(plan_str, "panics due to out of bounds field access"); Ok(()) } #[tokio::test] async fn tpch_test_18() -> Result<()> { let plan_str = tpch_plan_to_string(18).await?; - assert_eq!( - plan_str, - "Projection: CUSTOMER.C_NAME, CUSTOMER.C_CUSTKEY, ORDERS.O_ORDERKEY, ORDERS.O_ORDERDATE, ORDERS.O_TOTALPRICE, sum(LINEITEM.L_QUANTITY) AS EXPR$5\ - \n Limit: skip=0, fetch=100\ - \n Sort: ORDERS.O_TOTALPRICE DESC NULLS FIRST, ORDERS.O_ORDERDATE ASC NULLS LAST\ - \n Aggregate: groupBy=[[CUSTOMER.C_NAME, CUSTOMER.C_CUSTKEY, ORDERS.O_ORDERKEY, ORDERS.O_ORDERDATE, ORDERS.O_TOTALPRICE]], aggr=[[sum(LINEITEM.L_QUANTITY)]]\ - \n Projection: CUSTOMER.C_NAME, CUSTOMER.C_CUSTKEY, ORDERS.O_ORDERKEY, ORDERS.O_ORDERDATE, ORDERS.O_TOTALPRICE, LINEITEM.L_QUANTITY\ - \n Filter: ORDERS.O_ORDERKEY IN () AND CUSTOMER.C_CUSTKEY = ORDERS.O_CUSTKEY AND ORDERS.O_ORDERKEY = LINEITEM.L_ORDERKEY\ - \n Subquery:\ - \n Projection: LINEITEM.L_ORDERKEY\ - \n Filter: sum(LINEITEM.L_QUANTITY) > CAST(Int32(300) AS Decimal128(15, 2))\ - \n Aggregate: groupBy=[[LINEITEM.L_ORDERKEY]], aggr=[[sum(LINEITEM.L_QUANTITY)]]\ - \n Projection: LINEITEM.L_ORDERKEY, LINEITEM.L_QUANTITY\ - \n TableScan: LINEITEM\ - \n Cross Join: \ - \n Cross Join: \ - \n TableScan: CUSTOMER\ - \n TableScan: ORDERS\ - \n TableScan: LINEITEM" - ); + assert_snapshot!( + plan_str, + @r#" + Projection: CUSTOMER.C_NAME, CUSTOMER.C_CUSTKEY, ORDERS.O_ORDERKEY, ORDERS.O_ORDERDATE, ORDERS.O_TOTALPRICE, sum(LINEITEM.L_QUANTITY) AS EXPR$5 + Limit: skip=0, fetch=100 + Sort: ORDERS.O_TOTALPRICE DESC NULLS FIRST, ORDERS.O_ORDERDATE ASC NULLS LAST + Aggregate: groupBy=[[CUSTOMER.C_NAME, CUSTOMER.C_CUSTKEY, ORDERS.O_ORDERKEY, ORDERS.O_ORDERDATE, ORDERS.O_TOTALPRICE]], aggr=[[sum(LINEITEM.L_QUANTITY)]] + Projection: CUSTOMER.C_NAME, CUSTOMER.C_CUSTKEY, ORDERS.O_ORDERKEY, ORDERS.O_ORDERDATE, ORDERS.O_TOTALPRICE, LINEITEM.L_QUANTITY + Filter: ORDERS.O_ORDERKEY IN () AND CUSTOMER.C_CUSTKEY = ORDERS.O_CUSTKEY AND ORDERS.O_ORDERKEY = LINEITEM.L_ORDERKEY + Subquery: + Projection: LINEITEM.L_ORDERKEY + Filter: sum(LINEITEM.L_QUANTITY) > CAST(Int32(300) AS Decimal128(15, 2)) + Aggregate: groupBy=[[LINEITEM.L_ORDERKEY]], aggr=[[sum(LINEITEM.L_QUANTITY)]] + Projection: LINEITEM.L_ORDERKEY, LINEITEM.L_QUANTITY + TableScan: LINEITEM + Cross Join: + Cross Join: + TableScan: CUSTOMER + TableScan: ORDERS + TableScan: LINEITEM + "# + ); Ok(()) } #[tokio::test] async fn tpch_test_19() -> Result<()> { let plan_str = tpch_plan_to_string(19).await?; - assert_eq!( - plan_str, - "Aggregate: groupBy=[[]], aggr=[[sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) AS REVENUE]]\ - \n Projection: LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT)\ - \n Filter: PART.P_PARTKEY = LINEITEM.L_PARTKEY AND PART.P_BRAND = Utf8(\"Brand#12\") AND (PART.P_CONTAINER = CAST(Utf8(\"SM CASE\") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8(\"SM BOX\") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8(\"SM PACK\") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8(\"SM PKG\") AS Utf8)) AND LINEITEM.L_QUANTITY >= CAST(Int32(1) AS Decimal128(15, 2)) AND LINEITEM.L_QUANTITY <= CAST(Int32(1) + Int32(10) AS Decimal128(15, 2)) AND PART.P_SIZE >= Int32(1) AND PART.P_SIZE <= Int32(5) AND (LINEITEM.L_SHIPMODE = CAST(Utf8(\"AIR\") AS Utf8) OR LINEITEM.L_SHIPMODE = CAST(Utf8(\"AIR REG\") AS Utf8)) AND LINEITEM.L_SHIPINSTRUCT = Utf8(\"DELIVER IN PERSON\") OR PART.P_PARTKEY = LINEITEM.L_PARTKEY AND PART.P_BRAND = Utf8(\"Brand#23\") AND (PART.P_CONTAINER = CAST(Utf8(\"MED BAG\") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8(\"MED BOX\") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8(\"MED PKG\") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8(\"MED PACK\") AS Utf8)) AND LINEITEM.L_QUANTITY >= CAST(Int32(10) AS Decimal128(15, 2)) AND LINEITEM.L_QUANTITY <= CAST(Int32(10) + Int32(10) AS Decimal128(15, 2)) AND PART.P_SIZE >= Int32(1) AND PART.P_SIZE <= Int32(10) AND (LINEITEM.L_SHIPMODE = CAST(Utf8(\"AIR\") AS Utf8) OR LINEITEM.L_SHIPMODE = CAST(Utf8(\"AIR REG\") AS Utf8)) AND LINEITEM.L_SHIPINSTRUCT = Utf8(\"DELIVER IN PERSON\") OR PART.P_PARTKEY = LINEITEM.L_PARTKEY AND PART.P_BRAND = Utf8(\"Brand#34\") AND (PART.P_CONTAINER = CAST(Utf8(\"LG CASE\") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8(\"LG BOX\") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8(\"LG PACK\") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8(\"LG PKG\") AS Utf8)) AND LINEITEM.L_QUANTITY >= CAST(Int32(20) AS Decimal128(15, 2)) AND LINEITEM.L_QUANTITY <= CAST(Int32(20) + Int32(10) AS Decimal128(15, 2)) AND PART.P_SIZE >= Int32(1) AND PART.P_SIZE <= Int32(15) AND (LINEITEM.L_SHIPMODE = CAST(Utf8(\"AIR\") AS Utf8) OR LINEITEM.L_SHIPMODE = CAST(Utf8(\"AIR REG\") AS Utf8)) AND LINEITEM.L_SHIPINSTRUCT = Utf8(\"DELIVER IN PERSON\")\ - \n Cross Join: \ - \n TableScan: LINEITEM\ - \n TableScan: PART" - ); + assert_snapshot!( + plan_str, + @r#" + Aggregate: groupBy=[[]], aggr=[[sum(LINEITEM.L_EXTENDEDPRICE * Int32(1) - LINEITEM.L_DISCOUNT) AS REVENUE]] + Projection: LINEITEM.L_EXTENDEDPRICE * (CAST(Int32(1) AS Decimal128(15, 2)) - LINEITEM.L_DISCOUNT) + Filter: PART.P_PARTKEY = LINEITEM.L_PARTKEY AND PART.P_BRAND = Utf8("Brand#12") AND (PART.P_CONTAINER = CAST(Utf8("SM CASE") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("SM BOX") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("SM PACK") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("SM PKG") AS Utf8)) AND LINEITEM.L_QUANTITY >= CAST(Int32(1) AS Decimal128(15, 2)) AND LINEITEM.L_QUANTITY <= CAST(Int32(1) + Int32(10) AS Decimal128(15, 2)) AND PART.P_SIZE >= Int32(1) AND PART.P_SIZE <= Int32(5) AND (LINEITEM.L_SHIPMODE = CAST(Utf8("AIR") AS Utf8) OR LINEITEM.L_SHIPMODE = CAST(Utf8("AIR REG") AS Utf8)) AND LINEITEM.L_SHIPINSTRUCT = Utf8("DELIVER IN PERSON") OR PART.P_PARTKEY = LINEITEM.L_PARTKEY AND PART.P_BRAND = Utf8("Brand#23") AND (PART.P_CONTAINER = CAST(Utf8("MED BAG") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("MED BOX") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("MED PKG") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("MED PACK") AS Utf8)) AND LINEITEM.L_QUANTITY >= CAST(Int32(10) AS Decimal128(15, 2)) AND LINEITEM.L_QUANTITY <= CAST(Int32(10) + Int32(10) AS Decimal128(15, 2)) AND PART.P_SIZE >= Int32(1) AND PART.P_SIZE <= Int32(10) AND (LINEITEM.L_SHIPMODE = CAST(Utf8("AIR") AS Utf8) OR LINEITEM.L_SHIPMODE = CAST(Utf8("AIR REG") AS Utf8)) AND LINEITEM.L_SHIPINSTRUCT = Utf8("DELIVER IN PERSON") OR PART.P_PARTKEY = LINEITEM.L_PARTKEY AND PART.P_BRAND = Utf8("Brand#34") AND (PART.P_CONTAINER = CAST(Utf8("LG CASE") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("LG BOX") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("LG PACK") AS Utf8) OR PART.P_CONTAINER = CAST(Utf8("LG PKG") AS Utf8)) AND LINEITEM.L_QUANTITY >= CAST(Int32(20) AS Decimal128(15, 2)) AND LINEITEM.L_QUANTITY <= CAST(Int32(20) + Int32(10) AS Decimal128(15, 2)) AND PART.P_SIZE >= Int32(1) AND PART.P_SIZE <= Int32(15) AND (LINEITEM.L_SHIPMODE = CAST(Utf8("AIR") AS Utf8) OR LINEITEM.L_SHIPMODE = CAST(Utf8("AIR REG") AS Utf8)) AND LINEITEM.L_SHIPINSTRUCT = Utf8("DELIVER IN PERSON") + Cross Join: + TableScan: LINEITEM + TableScan: PART + "# + ); Ok(()) } #[tokio::test] async fn tpch_test_20() -> Result<()> { let plan_str = tpch_plan_to_string(20).await?; - assert_eq!( - plan_str, - "Sort: SUPPLIER.S_NAME ASC NULLS LAST\ - \n Projection: SUPPLIER.S_NAME, SUPPLIER.S_ADDRESS\ - \n Filter: SUPPLIER.S_SUPPKEY IN () AND SUPPLIER.S_NATIONKEY = NATION.N_NATIONKEY AND NATION.N_NAME = Utf8(\"CANADA\")\ - \n Subquery:\ - \n Projection: PARTSUPP.PS_SUPPKEY\ - \n Filter: PARTSUPP.PS_PARTKEY IN () AND CAST(PARTSUPP.PS_AVAILQTY AS Decimal128(19, 0)) > ()\ - \n Subquery:\ - \n Projection: PART.P_PARTKEY\ - \n Filter: PART.P_NAME LIKE CAST(Utf8(\"forest%\") AS Utf8)\ - \n TableScan: PART\ - \n Subquery:\ - \n Projection: Decimal128(Some(5),2,1) * sum(LINEITEM.L_QUANTITY)\ - \n Aggregate: groupBy=[[]], aggr=[[sum(LINEITEM.L_QUANTITY)]]\ - \n Projection: LINEITEM.L_QUANTITY\ - \n Filter: LINEITEM.L_PARTKEY = LINEITEM.L_ORDERKEY AND LINEITEM.L_SUPPKEY = LINEITEM.L_PARTKEY AND LINEITEM.L_SHIPDATE >= CAST(Utf8(\"1994-01-01\") AS Date32) AND LINEITEM.L_SHIPDATE < CAST(Utf8(\"1995-01-01\") AS Date32)\ - \n TableScan: LINEITEM\ - \n TableScan: PARTSUPP\ - \n Cross Join: \ - \n TableScan: SUPPLIER\ - \n TableScan: NATION" - ); + assert_snapshot!( + plan_str, + @r#" + Sort: SUPPLIER.S_NAME ASC NULLS LAST + Projection: SUPPLIER.S_NAME, SUPPLIER.S_ADDRESS + Filter: SUPPLIER.S_SUPPKEY IN () AND SUPPLIER.S_NATIONKEY = NATION.N_NATIONKEY AND NATION.N_NAME = Utf8("CANADA") + Subquery: + Projection: PARTSUPP.PS_SUPPKEY + Filter: PARTSUPP.PS_PARTKEY IN () AND CAST(PARTSUPP.PS_AVAILQTY AS Decimal128(19, 0)) > () + Subquery: + Projection: PART.P_PARTKEY + Filter: PART.P_NAME LIKE CAST(Utf8("forest%") AS Utf8) + TableScan: PART + Subquery: + Projection: Decimal128(Some(5),2,1) * sum(LINEITEM.L_QUANTITY) + Aggregate: groupBy=[[]], aggr=[[sum(LINEITEM.L_QUANTITY)]] + Projection: LINEITEM.L_QUANTITY + Filter: LINEITEM.L_PARTKEY = LINEITEM.L_ORDERKEY AND LINEITEM.L_SUPPKEY = LINEITEM.L_PARTKEY AND LINEITEM.L_SHIPDATE >= CAST(Utf8("1994-01-01") AS Date32) AND LINEITEM.L_SHIPDATE < CAST(Utf8("1995-01-01") AS Date32) + TableScan: LINEITEM + TableScan: PARTSUPP + Cross Join: + TableScan: SUPPLIER + TableScan: NATION + "# + ); Ok(()) } #[tokio::test] async fn tpch_test_21() -> Result<()> { let plan_str = tpch_plan_to_string(21).await?; - assert_eq!( - plan_str, - "Projection: SUPPLIER.S_NAME, count(Int64(1)) AS NUMWAIT\ - \n Limit: skip=0, fetch=100\ - \n Sort: count(Int64(1)) DESC NULLS FIRST, SUPPLIER.S_NAME ASC NULLS LAST\ - \n Aggregate: groupBy=[[SUPPLIER.S_NAME]], aggr=[[count(Int64(1))]]\ - \n Projection: SUPPLIER.S_NAME\ - \n Filter: SUPPLIER.S_SUPPKEY = LINEITEM.L_SUPPKEY AND ORDERS.O_ORDERKEY = LINEITEM.L_ORDERKEY AND ORDERS.O_ORDERSTATUS = Utf8(\"F\") AND LINEITEM.L_RECEIPTDATE > LINEITEM.L_COMMITDATE AND EXISTS () AND NOT EXISTS () AND SUPPLIER.S_NATIONKEY = NATION.N_NATIONKEY AND NATION.N_NAME = Utf8(\"SAUDI ARABIA\")\ - \n Subquery:\ - \n Filter: LINEITEM.L_ORDERKEY = LINEITEM.L_TAX AND LINEITEM.L_SUPPKEY != LINEITEM.L_LINESTATUS\ - \n TableScan: LINEITEM\ - \n Subquery:\ - \n Filter: LINEITEM.L_ORDERKEY = LINEITEM.L_TAX AND LINEITEM.L_SUPPKEY != LINEITEM.L_LINESTATUS AND LINEITEM.L_RECEIPTDATE > LINEITEM.L_COMMITDATE\ - \n TableScan: LINEITEM\ - \n Cross Join: \ - \n Cross Join: \ - \n Cross Join: \ - \n TableScan: SUPPLIER\ - \n TableScan: LINEITEM\ - \n TableScan: ORDERS\ - \n TableScan: NATION" - ); + assert_snapshot!( + plan_str, + @r#" + Projection: SUPPLIER.S_NAME, count(Int64(1)) AS NUMWAIT + Limit: skip=0, fetch=100 + Sort: count(Int64(1)) DESC NULLS FIRST, SUPPLIER.S_NAME ASC NULLS LAST + Aggregate: groupBy=[[SUPPLIER.S_NAME]], aggr=[[count(Int64(1))]] + Projection: SUPPLIER.S_NAME + Filter: SUPPLIER.S_SUPPKEY = LINEITEM.L_SUPPKEY AND ORDERS.O_ORDERKEY = LINEITEM.L_ORDERKEY AND ORDERS.O_ORDERSTATUS = Utf8("F") AND LINEITEM.L_RECEIPTDATE > LINEITEM.L_COMMITDATE AND EXISTS () AND NOT EXISTS () AND SUPPLIER.S_NATIONKEY = NATION.N_NATIONKEY AND NATION.N_NAME = Utf8("SAUDI ARABIA") + Subquery: + Filter: LINEITEM.L_ORDERKEY = LINEITEM.L_TAX AND LINEITEM.L_SUPPKEY != LINEITEM.L_LINESTATUS + TableScan: LINEITEM + Subquery: + Filter: LINEITEM.L_ORDERKEY = LINEITEM.L_TAX AND LINEITEM.L_SUPPKEY != LINEITEM.L_LINESTATUS AND LINEITEM.L_RECEIPTDATE > LINEITEM.L_COMMITDATE + TableScan: LINEITEM + Cross Join: + Cross Join: + Cross Join: + TableScan: SUPPLIER + TableScan: LINEITEM + TableScan: ORDERS + TableScan: NATION + "# + ); Ok(()) } #[tokio::test] async fn tpch_test_22() -> Result<()> { let plan_str = tpch_plan_to_string(22).await?; - assert_eq!( - plan_str, - "Projection: substr(CUSTOMER.C_PHONE,Int32(1),Int32(2)) AS CNTRYCODE, count(Int64(1)) AS NUMCUST, sum(CUSTOMER.C_ACCTBAL) AS TOTACCTBAL\ - \n Sort: substr(CUSTOMER.C_PHONE,Int32(1),Int32(2)) ASC NULLS LAST\ - \n Aggregate: groupBy=[[substr(CUSTOMER.C_PHONE,Int32(1),Int32(2))]], aggr=[[count(Int64(1)), sum(CUSTOMER.C_ACCTBAL)]]\ - \n Projection: substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)), CUSTOMER.C_ACCTBAL\ - \n Filter: (substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8(\"13\") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8(\"31\") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8(\"23\") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8(\"29\") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8(\"30\") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8(\"18\") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8(\"17\") AS Utf8)) AND CUSTOMER.C_ACCTBAL > () AND NOT EXISTS ()\ - \n Subquery:\ - \n Aggregate: groupBy=[[]], aggr=[[avg(CUSTOMER.C_ACCTBAL)]]\ - \n Projection: CUSTOMER.C_ACCTBAL\ - \n Filter: CUSTOMER.C_ACCTBAL > Decimal128(Some(0),3,2) AND (substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8(\"13\") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8(\"31\") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8(\"23\") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8(\"29\") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8(\"30\") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8(\"18\") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8(\"17\") AS Utf8))\ - \n TableScan: CUSTOMER\ - \n Subquery:\ - \n Filter: ORDERS.O_CUSTKEY = ORDERS.O_ORDERKEY\ - \n TableScan: ORDERS\ - \n TableScan: CUSTOMER" - ); + assert_snapshot!( + plan_str, + @r#" + Projection: substr(CUSTOMER.C_PHONE,Int32(1),Int32(2)) AS CNTRYCODE, count(Int64(1)) AS NUMCUST, sum(CUSTOMER.C_ACCTBAL) AS TOTACCTBAL + Sort: substr(CUSTOMER.C_PHONE,Int32(1),Int32(2)) ASC NULLS LAST + Aggregate: groupBy=[[substr(CUSTOMER.C_PHONE,Int32(1),Int32(2))]], aggr=[[count(Int64(1)), sum(CUSTOMER.C_ACCTBAL)]] + Projection: substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)), CUSTOMER.C_ACCTBAL + Filter: (substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8("13") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8("31") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8("23") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8("29") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8("30") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8("18") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8("17") AS Utf8)) AND CUSTOMER.C_ACCTBAL > () AND NOT EXISTS () + Subquery: + Aggregate: groupBy=[[]], aggr=[[avg(CUSTOMER.C_ACCTBAL)]] + Projection: CUSTOMER.C_ACCTBAL + Filter: CUSTOMER.C_ACCTBAL > Decimal128(Some(0),3,2) AND (substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8("13") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8("31") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8("23") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8("29") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8("30") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8("18") AS Utf8) OR substr(CUSTOMER.C_PHONE, Int32(1), Int32(2)) = CAST(Utf8("17") AS Utf8)) + TableScan: CUSTOMER + Subquery: + Filter: ORDERS.O_CUSTKEY = ORDERS.O_ORDERKEY + TableScan: ORDERS + TableScan: CUSTOMER + "# + ); Ok(()) } @@ -475,11 +509,13 @@ mod tests { let plan_str = test_plan_to_string("select_count_from_select_1.substrait.json").await?; - assert_eq!( - plan_str, - "Aggregate: groupBy=[[]], aggr=[[count(Int64(1)) AS count(*)]]\ - \n Values: (Int64(0))" - ); + assert_snapshot!( + plan_str, + @r#" + Aggregate: groupBy=[[]], aggr=[[count(Int64(1)) AS count(*)]] + Values: (Int64(0)) + "# + ); Ok(()) } @@ -487,12 +523,14 @@ mod tests { async fn test_select_window_count() -> Result<()> { let plan_str = test_plan_to_string("select_window_count.substrait.json").await?; - assert_eq!( - plan_str, - "Projection: count(Int64(1)) PARTITION BY [DATA.PART] ORDER BY [DATA.ORD ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING AS LEAD_EXPR\ - \n WindowAggr: windowExpr=[[count(Int64(1)) PARTITION BY [DATA.PART] ORDER BY [DATA.ORD ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING]]\ - \n TableScan: DATA" - ); + assert_snapshot!( + plan_str, + @r#" + Projection: count(Int64(1)) PARTITION BY [DATA.PART] ORDER BY [DATA.ORD ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING AS LEAD_EXPR + WindowAggr: windowExpr=[[count(Int64(1)) PARTITION BY [DATA.PART] ORDER BY [DATA.ORD ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING]] + TableScan: DATA + "# + ); Ok(()) } } From 32bbca633abeab2d67bd07ff8188d04603a9403f Mon Sep 17 00:00:00 2001 From: qstommyshu Date: Wed, 26 Mar 2025 20:25:10 -0400 Subject: [PATCH 05/17] migrate logical plans tests to use `insta` snapshots --- .../substrait/tests/cases/logical_plans.rs | 80 +++++++++++-------- 1 file changed, 48 insertions(+), 32 deletions(-) diff --git a/datafusion/substrait/tests/cases/logical_plans.rs b/datafusion/substrait/tests/cases/logical_plans.rs index 579e3535f16d..8589edec3d05 100644 --- a/datafusion/substrait/tests/cases/logical_plans.rs +++ b/datafusion/substrait/tests/cases/logical_plans.rs @@ -24,6 +24,7 @@ mod tests { use datafusion::dataframe::DataFrame; use datafusion::prelude::SessionContext; use datafusion_substrait::logical_plan::consumer::from_substrait_plan; + use insta::assert_snapshot; #[tokio::test] async fn scalar_function_compound_signature() -> Result<()> { @@ -40,11 +41,13 @@ mod tests { let ctx = add_plan_schemas_to_ctx(SessionContext::new(), &proto_plan)?; let plan = from_substrait_plan(&ctx.state(), &proto_plan).await?; - assert_eq!( - format!("{}", plan), - "Projection: NOT DATA.D AS EXPR$0\ - \n TableScan: DATA" - ); + assert_snapshot!( + format!("{}", plan), + @r#" + Projection: NOT DATA.D AS EXPR$0 + TableScan: DATA + "# + ); // Trigger execution to ensure plan validity DataFrame::new(ctx.state(), plan).show().await?; @@ -69,12 +72,14 @@ mod tests { let ctx = add_plan_schemas_to_ctx(SessionContext::new(), &proto_plan)?; let plan = from_substrait_plan(&ctx.state(), &proto_plan).await?; - assert_eq!( - format!("{}", plan), - "Projection: sum(DATA.D) PARTITION BY [DATA.PART] ORDER BY [DATA.ORD ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING AS LEAD_EXPR\ - \n WindowAggr: windowExpr=[[sum(DATA.D) PARTITION BY [DATA.PART] ORDER BY [DATA.ORD ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING]]\ - \n TableScan: DATA" - ); + assert_snapshot!( + format!("{}", plan), + @r#" + Projection: sum(DATA.D) PARTITION BY [DATA.PART] ORDER BY [DATA.ORD ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING AS LEAD_EXPR + WindowAggr: windowExpr=[[sum(DATA.D) PARTITION BY [DATA.PART] ORDER BY [DATA.ORD ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING]] + TableScan: DATA + "# + ); // Trigger execution to ensure plan validity DataFrame::new(ctx.state(), plan).show().await?; @@ -94,12 +99,14 @@ mod tests { let ctx = add_plan_schemas_to_ctx(SessionContext::new(), &proto_plan)?; let plan = from_substrait_plan(&ctx.state(), &proto_plan).await?; - assert_eq!( - format!("{}", plan), - "Projection: row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS EXPR$0, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW__temp__0 AS ALIASED\ - \n WindowAggr: windowExpr=[[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\ - \n TableScan: DATA" - ); + assert_snapshot!( + format!("{}", plan), + @r#" + Projection: row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS EXPR$0, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW__temp__0 AS ALIASED + WindowAggr: windowExpr=[[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] + TableScan: DATA + "# + ); // Trigger execution to ensure plan validity DataFrame::new(ctx.state(), plan).show().await?; @@ -121,13 +128,15 @@ mod tests { let ctx = add_plan_schemas_to_ctx(SessionContext::new(), &proto_plan)?; let plan = from_substrait_plan(&ctx.state(), &proto_plan).await?; - assert_eq!( - format!("{}", plan), - "Projection: row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS EXPR$0, row_number() PARTITION BY [DATA.A] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS EXPR$1\ - \n WindowAggr: windowExpr=[[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\ - \n WindowAggr: windowExpr=[[row_number() PARTITION BY [DATA.A] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\ - \n TableScan: DATA" - ); + assert_snapshot!( + format!("{}", plan), + @r#" + Projection: row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS EXPR$0, row_number() PARTITION BY [DATA.A] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS EXPR$1 + WindowAggr: windowExpr=[[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] + WindowAggr: windowExpr=[[row_number() PARTITION BY [DATA.A] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] + TableScan: DATA + "# + ); // Trigger execution to ensure plan validity DataFrame::new(ctx.state(), plan).show().await?; @@ -145,7 +154,12 @@ mod tests { let ctx = add_plan_schemas_to_ctx(SessionContext::new(), &proto_plan)?; let plan = from_substrait_plan(&ctx.state(), &proto_plan).await?; - assert_eq!(format!("{}", &plan), "Values: (List([1, 2]))"); + assert_snapshot!( + format!("{}", &plan), + @r#" + Values: (List([1, 2])) + "# + ); // Trigger execution to ensure plan validity DataFrame::new(ctx.state(), plan).show().await?; @@ -160,13 +174,15 @@ mod tests { let ctx = add_plan_schemas_to_ctx(SessionContext::new(), &proto_plan)?; let plan = from_substrait_plan(&ctx.state(), &proto_plan).await?; - assert_eq!( - format!("{}", plan), - "Projection: lower(sales.product) AS lower(product), sum(count(sales.product)) AS product_count\ - \n Aggregate: groupBy=[[sales.product]], aggr=[[sum(count(sales.product))]]\ - \n Aggregate: groupBy=[[sales.product]], aggr=[[count(sales.product)]]\ - \n TableScan: sales" - ); + assert_snapshot!( + format!("{}", plan), + @r#" + Projection: lower(sales.product) AS lower(product), sum(count(sales.product)) AS product_count + Aggregate: groupBy=[[sales.product]], aggr=[[sum(count(sales.product))]] + Aggregate: groupBy=[[sales.product]], aggr=[[count(sales.product)]] + TableScan: sales + "# + ); // Trigger execution to ensure plan validity DataFrame::new(ctx.state(), plan).show().await?; From 9ba73deac26adb8743d4e2ee9c72053029bc817b Mon Sep 17 00:00:00 2001 From: qstommyshu Date: Thu, 27 Mar 2025 09:33:04 -0400 Subject: [PATCH 06/17] migrate emit_kind_tests to use `insta` snapshots --- .../substrait/tests/cases/emit_kind_tests.rs | 74 ++++++++++++------- 1 file changed, 46 insertions(+), 28 deletions(-) diff --git a/datafusion/substrait/tests/cases/emit_kind_tests.rs b/datafusion/substrait/tests/cases/emit_kind_tests.rs index 08537d0d110f..9e64f27e870e 100644 --- a/datafusion/substrait/tests/cases/emit_kind_tests.rs +++ b/datafusion/substrait/tests/cases/emit_kind_tests.rs @@ -26,6 +26,7 @@ mod tests { use datafusion::prelude::{CsvReadOptions, SessionConfig, SessionContext}; use datafusion_substrait::logical_plan::consumer::from_substrait_plan; use datafusion_substrait::logical_plan::producer::to_substrait_plan; + use insta::assert_snapshot; #[tokio::test] async fn project_respects_direct_emit_kind() -> Result<()> { @@ -37,11 +38,13 @@ mod tests { let plan_str = format!("{}", plan); - assert_eq!( - plan_str, - "Projection: DATA.A AS a, DATA.B AS b, DATA.A + Int64(1) AS add1\ - \n TableScan: DATA" - ); + assert_snapshot!( + plan_str, + @r#" + Projection: DATA.A AS a, DATA.B AS b, DATA.A + Int64(1) AS add1 + TableScan: DATA + "# + ); Ok(()) } @@ -55,13 +58,15 @@ mod tests { let plan_str = format!("{}", plan); - assert_eq!( - plan_str, - // Note that duplicate references in the remap are aliased - "Projection: DATA.B, DATA.A AS A1, DATA.A AS DATA.A__temp__0 AS A2\ - \n Filter: DATA.B = Int64(2)\ - \n TableScan: DATA" - ); + assert_snapshot!( + plan_str, + // Note that duplicate references in the remap are aliased + @r#" + Projection: DATA.B, DATA.A AS A1, DATA.A AS DATA.A__temp__0 AS A2 + Filter: DATA.B = Int64(2) + TableScan: DATA + "# + ); Ok(()) } @@ -85,21 +90,24 @@ mod tests { .await?; let plan = df.into_unoptimized_plan(); - assert_eq!( + assert_snapshot!( format!("{}", plan), - "Projection: random() AS c1, data.a + Int64(1) AS c2\ - \n TableScan: data" - ); + @r#" + Projection: random() AS c1, data.a + Int64(1) AS c2 + TableScan: data + "# ); let proto = to_substrait_plan(&plan, &ctx.state())?; let plan2 = from_substrait_plan(&ctx.state(), &proto).await?; // note how the Projections are not flattened - assert_eq!( - format!("{}", plan2), - "Projection: random() AS c1, data.a + Int64(1) AS c2\ - \n Projection: data.a, data.b, data.c, data.d, data.e, data.f, random(), data.a + Int64(1)\ - \n TableScan: data" - ); + assert_snapshot!( + format!("{}", plan2), + @r#" + Projection: random() AS c1, data.a + Int64(1) AS c2 + Projection: data.a, data.b, data.c, data.d, data.e, data.f, random(), data.a + Int64(1) + TableScan: data + "# + ); Ok(()) } @@ -109,18 +117,28 @@ mod tests { let df = ctx.sql("SELECT a + 1, b + 2 FROM data").await?; let plan = df.into_unoptimized_plan(); - assert_eq!( - format!("{}", plan), - "Projection: data.a + Int64(1), data.b + Int64(2)\ - \n TableScan: data" - ); + assert_snapshot!( + format!("{}", plan), + @r#" + Projection: data.a + Int64(1), data.b + Int64(2) + TableScan: data + "# + ); let proto = to_substrait_plan(&plan, &ctx.state())?; let plan2 = from_substrait_plan(&ctx.state(), &proto).await?; let plan1str = format!("{plan}"); let plan2str = format!("{plan2}"); - assert_eq!(plan1str, plan2str); + println!("{}", plan1str); + println!("{}", plan2str); + assert_snapshot!( + plan1str, + @r#" + Projection: data.a + Int64(1), data.b + Int64(2) + TableScan: data + "# + ); Ok(()) } From dc0c76c868b693cd6d8fd225560eaff4f81e16cb Mon Sep 17 00:00:00 2001 From: qstommyshu Date: Thu, 27 Mar 2025 09:33:50 -0400 Subject: [PATCH 07/17] migrate function_test to use `insta` snapshots for assertions --- datafusion/substrait/tests/cases/function_test.rs | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/datafusion/substrait/tests/cases/function_test.rs b/datafusion/substrait/tests/cases/function_test.rs index 043808456176..7fc06ad22e08 100644 --- a/datafusion/substrait/tests/cases/function_test.rs +++ b/datafusion/substrait/tests/cases/function_test.rs @@ -24,6 +24,7 @@ mod tests { use datafusion::common::Result; use datafusion::prelude::SessionContext; use datafusion_substrait::logical_plan::consumer::from_substrait_plan; + use insta::assert_snapshot; #[tokio::test] async fn contains_function_test() -> Result<()> { @@ -33,12 +34,14 @@ mod tests { let plan_str = format!("{}", plan); - assert_eq!( - plan_str, - "Projection: nation.n_name\ - \n Filter: contains(nation.n_name, Utf8(\"IA\"))\ - \n TableScan: nation" - ); + assert_snapshot!( + plan_str, + @r#" + Projection: nation.n_name + Filter: contains(nation.n_name, Utf8("IA")) + TableScan: nation + "# + ); Ok(()) } } From 9deebb0229ab67b1c0169e3705aa0bbef26eda2b Mon Sep 17 00:00:00 2001 From: qstommyshu Date: Thu, 27 Mar 2025 09:41:05 -0400 Subject: [PATCH 08/17] migrate substrait_validations tests to use insta snapshots, missing `insta` mapping to `assert!` --- .../tests/cases/substrait_validations.rs | 37 +++++++++++-------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/datafusion/substrait/tests/cases/substrait_validations.rs b/datafusion/substrait/tests/cases/substrait_validations.rs index a7d4d4aa82fc..620e0d8c5b7c 100644 --- a/datafusion/substrait/tests/cases/substrait_validations.rs +++ b/datafusion/substrait/tests/cases/substrait_validations.rs @@ -26,6 +26,7 @@ mod tests { use datafusion::datasource::empty::EmptyTable; use datafusion::prelude::SessionContext; use datafusion_substrait::logical_plan::consumer::from_substrait_plan; + use insta::assert_snapshot; use std::collections::HashMap; use std::sync::Arc; @@ -66,11 +67,13 @@ mod tests { let ctx = generate_context_with_table("DATA", df_schema)?; let plan = from_substrait_plan(&ctx.state(), &proto_plan).await?; - assert_eq!( - format!("{}", plan), - "Projection: DATA.a, DATA.b\ - \n TableScan: DATA" - ); + assert_snapshot!( + format!("{}", plan), + @r#" + Projection: DATA.a, DATA.b + TableScan: DATA + "# + ); Ok(()) } @@ -87,11 +90,13 @@ mod tests { let ctx = generate_context_with_table("DATA", df_schema)?; let plan = from_substrait_plan(&ctx.state(), &proto_plan).await?; - assert_eq!( - format!("{}", plan), - "Projection: DATA.a, DATA.b\ - \n TableScan: DATA projection=[a, b]" - ); + assert_snapshot!( + format!("{}", plan), + @r#" + Projection: DATA.a, DATA.b + TableScan: DATA projection=[a, b] + "# + ); Ok(()) } @@ -110,11 +115,13 @@ mod tests { let ctx = generate_context_with_table("DATA", df_schema)?; let plan = from_substrait_plan(&ctx.state(), &proto_plan).await?; - assert_eq!( - format!("{}", plan), - "Projection: DATA.a, DATA.b\ - \n TableScan: DATA projection=[a, b]" - ); + assert_snapshot!( + format!("{}", plan), + @r#" + Projection: DATA.a, DATA.b + TableScan: DATA projection=[a, b] + "# + ); Ok(()) } From 18f8106be30336f2f60a2e42b6c4935e3d6cde67 Mon Sep 17 00:00:00 2001 From: qstommyshu Date: Thu, 27 Mar 2025 21:22:32 -0400 Subject: [PATCH 09/17] revert `handle_emit_as_project_without_volatile_exprs` back to `assert_eq!` and remove `format!` for `assert_snapshot!` --- .../substrait/tests/cases/emit_kind_tests.rs | 22 +++++-------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/datafusion/substrait/tests/cases/emit_kind_tests.rs b/datafusion/substrait/tests/cases/emit_kind_tests.rs index 9e64f27e870e..88db2bc34d7f 100644 --- a/datafusion/substrait/tests/cases/emit_kind_tests.rs +++ b/datafusion/substrait/tests/cases/emit_kind_tests.rs @@ -36,10 +36,8 @@ mod tests { let ctx = add_plan_schemas_to_ctx(SessionContext::new(), &proto_plan)?; let plan = from_substrait_plan(&ctx.state(), &proto_plan).await?; - let plan_str = format!("{}", plan); - assert_snapshot!( - plan_str, + plan, @r#" Projection: DATA.A AS a, DATA.B AS b, DATA.A + Int64(1) AS add1 TableScan: DATA @@ -56,10 +54,8 @@ mod tests { let ctx = add_plan_schemas_to_ctx(SessionContext::new(), &proto_plan)?; let plan = from_substrait_plan(&ctx.state(), &proto_plan).await?; - let plan_str = format!("{}", plan); - assert_snapshot!( - plan_str, + plan, // Note that duplicate references in the remap are aliased @r#" Projection: DATA.B, DATA.A AS A1, DATA.A AS DATA.A__temp__0 AS A2 @@ -91,7 +87,7 @@ mod tests { let plan = df.into_unoptimized_plan(); assert_snapshot!( - format!("{}", plan), + plan, @r#" Projection: random() AS c1, data.a + Int64(1) AS c2 TableScan: data @@ -101,7 +97,7 @@ mod tests { let plan2 = from_substrait_plan(&ctx.state(), &proto).await?; // note how the Projections are not flattened assert_snapshot!( - format!("{}", plan2), + plan2, @r#" Projection: random() AS c1, data.a + Int64(1) AS c2 Projection: data.a, data.b, data.c, data.d, data.e, data.f, random(), data.a + Int64(1) @@ -118,7 +114,7 @@ mod tests { let plan = df.into_unoptimized_plan(); assert_snapshot!( - format!("{}", plan), + plan, @r#" Projection: data.a + Int64(1), data.b + Int64(2) TableScan: data @@ -132,13 +128,7 @@ mod tests { let plan2str = format!("{plan2}"); println!("{}", plan1str); println!("{}", plan2str); - assert_snapshot!( - plan1str, - @r#" - Projection: data.a + Int64(1), data.b + Int64(2) - TableScan: data - "# - ); + assert_eq!(plan1str, plan2str); Ok(()) } From ab3fd5bf701a1053124ea14c6ec6795263c52f5d Mon Sep 17 00:00:00 2001 From: qstommyshu Date: Thu, 27 Mar 2025 21:25:14 -0400 Subject: [PATCH 10/17] migrate function and validation tests to use plan directly in assert_snapshot! --- datafusion/substrait/tests/cases/function_test.rs | 4 +--- datafusion/substrait/tests/cases/logical_plans.rs | 10 +++++----- .../substrait/tests/cases/substrait_validations.rs | 6 +++--- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/datafusion/substrait/tests/cases/function_test.rs b/datafusion/substrait/tests/cases/function_test.rs index 7fc06ad22e08..1816c64d3921 100644 --- a/datafusion/substrait/tests/cases/function_test.rs +++ b/datafusion/substrait/tests/cases/function_test.rs @@ -32,10 +32,8 @@ mod tests { let ctx = add_plan_schemas_to_ctx(SessionContext::new(), &proto_plan)?; let plan = from_substrait_plan(&ctx.state(), &proto_plan).await?; - let plan_str = format!("{}", plan); - assert_snapshot!( - plan_str, + plan, @r#" Projection: nation.n_name Filter: contains(nation.n_name, Utf8("IA")) diff --git a/datafusion/substrait/tests/cases/logical_plans.rs b/datafusion/substrait/tests/cases/logical_plans.rs index 8589edec3d05..fecae9ca6f71 100644 --- a/datafusion/substrait/tests/cases/logical_plans.rs +++ b/datafusion/substrait/tests/cases/logical_plans.rs @@ -42,7 +42,7 @@ mod tests { let plan = from_substrait_plan(&ctx.state(), &proto_plan).await?; assert_snapshot!( - format!("{}", plan), + plan, @r#" Projection: NOT DATA.D AS EXPR$0 TableScan: DATA @@ -73,7 +73,7 @@ mod tests { let plan = from_substrait_plan(&ctx.state(), &proto_plan).await?; assert_snapshot!( - format!("{}", plan), + plan, @r#" Projection: sum(DATA.D) PARTITION BY [DATA.PART] ORDER BY [DATA.ORD ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING AS LEAD_EXPR WindowAggr: windowExpr=[[sum(DATA.D) PARTITION BY [DATA.PART] ORDER BY [DATA.ORD ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING]] @@ -100,7 +100,7 @@ mod tests { let plan = from_substrait_plan(&ctx.state(), &proto_plan).await?; assert_snapshot!( - format!("{}", plan), + plan, @r#" Projection: row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS EXPR$0, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW__temp__0 AS ALIASED WindowAggr: windowExpr=[[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] @@ -129,7 +129,7 @@ mod tests { let plan = from_substrait_plan(&ctx.state(), &proto_plan).await?; assert_snapshot!( - format!("{}", plan), + plan, @r#" Projection: row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS EXPR$0, row_number() PARTITION BY [DATA.A] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS EXPR$1 WindowAggr: windowExpr=[[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] @@ -175,7 +175,7 @@ mod tests { let plan = from_substrait_plan(&ctx.state(), &proto_plan).await?; assert_snapshot!( - format!("{}", plan), + plan, @r#" Projection: lower(sales.product) AS lower(product), sum(count(sales.product)) AS product_count Aggregate: groupBy=[[sales.product]], aggr=[[sum(count(sales.product))]] diff --git a/datafusion/substrait/tests/cases/substrait_validations.rs b/datafusion/substrait/tests/cases/substrait_validations.rs index 620e0d8c5b7c..a31b3ca385e9 100644 --- a/datafusion/substrait/tests/cases/substrait_validations.rs +++ b/datafusion/substrait/tests/cases/substrait_validations.rs @@ -68,7 +68,7 @@ mod tests { let plan = from_substrait_plan(&ctx.state(), &proto_plan).await?; assert_snapshot!( - format!("{}", plan), + plan, @r#" Projection: DATA.a, DATA.b TableScan: DATA @@ -91,7 +91,7 @@ mod tests { let plan = from_substrait_plan(&ctx.state(), &proto_plan).await?; assert_snapshot!( - format!("{}", plan), + plan, @r#" Projection: DATA.a, DATA.b TableScan: DATA projection=[a, b] @@ -116,7 +116,7 @@ mod tests { let plan = from_substrait_plan(&ctx.state(), &proto_plan).await?; assert_snapshot!( - format!("{}", plan), + plan, @r#" Projection: DATA.a, DATA.b TableScan: DATA projection=[a, b] From e1fa3b86db96ecd67f71d8bbea9f3ad2639ddc60 Mon Sep 17 00:00:00 2001 From: qstommyshu Date: Thu, 27 Mar 2025 21:31:54 -0400 Subject: [PATCH 11/17] migrate serialize tests to use insta snapshots for assertions --- datafusion/substrait/tests/cases/serialize.rs | 29 ++++++++++++------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/datafusion/substrait/tests/cases/serialize.rs b/datafusion/substrait/tests/cases/serialize.rs index 02089b9fa92d..39c0622e3ba3 100644 --- a/datafusion/substrait/tests/cases/serialize.rs +++ b/datafusion/substrait/tests/cases/serialize.rs @@ -27,6 +27,7 @@ mod tests { use datafusion::error::Result; use datafusion::prelude::*; + use insta::assert_snapshot; use std::fs; use substrait::proto::plan_rel::RelType; use substrait::proto::rel_common::{Emit, EmitKind}; @@ -92,11 +93,14 @@ mod tests { let df = ctx.sql("SELECT b, a + a, a FROM data").await?; let datafusion_plan = df.into_optimized_plan()?; - assert_eq!( - format!("{}", datafusion_plan), - "Projection: data.b, data.a + data.a, data.a\ - \n TableScan: data projection=[a, b]", - ); + assert_snapshot!( + format!("{}", datafusion_plan), + @r#" +Projection: data.b, data.a + data.a, data.a + TableScan: data projection=[a, b] +"# + , + ); let plan = to_substrait_plan(&datafusion_plan, &ctx.state())? .as_ref() @@ -136,12 +140,15 @@ mod tests { .sql("SELECT b, RANK() OVER (PARTITION BY a), c FROM data;") .await?; let datafusion_plan = df.into_optimized_plan()?; - assert_eq!( - format!("{}", datafusion_plan), - "Projection: data.b, rank() PARTITION BY [data.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, data.c\ - \n WindowAggr: windowExpr=[[rank() PARTITION BY [data.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]\ - \n TableScan: data projection=[a, b, c]", - ); + assert_snapshot!( + datafusion_plan, + @r#" +Projection: data.b, rank() PARTITION BY [data.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, data.c + WindowAggr: windowExpr=[[rank() PARTITION BY [data.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]] + TableScan: data projection=[a, b, c] +"# + , + ); let plan = to_substrait_plan(&datafusion_plan, &ctx.state())? .as_ref() From 805e351e8242ee2b095751cd7d8ed327fd2d012c Mon Sep 17 00:00:00 2001 From: qstommyshu Date: Thu, 27 Mar 2025 21:40:58 -0400 Subject: [PATCH 12/17] migrate logical_plans test to use insta snapshots for assertions --- datafusion/substrait/tests/cases/logical_plans.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/substrait/tests/cases/logical_plans.rs b/datafusion/substrait/tests/cases/logical_plans.rs index fecae9ca6f71..4dd97193034b 100644 --- a/datafusion/substrait/tests/cases/logical_plans.rs +++ b/datafusion/substrait/tests/cases/logical_plans.rs @@ -155,7 +155,7 @@ mod tests { let plan = from_substrait_plan(&ctx.state(), &proto_plan).await?; assert_snapshot!( - format!("{}", &plan), + &plan, @r#" Values: (List([1, 2])) "# From 5ac2cdfafe07d19e848a998f8d6f701f82afc59f Mon Sep 17 00:00:00 2001 From: qstommyshu Date: Fri, 28 Mar 2025 10:56:18 -0400 Subject: [PATCH 13/17] WIP --- .../tests/cases/roundtrip_logical_plan.rs | 261 ++++++++++++------ 1 file changed, 183 insertions(+), 78 deletions(-) diff --git a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs index f989d05c80dd..3b4db9e3ec81 100644 --- a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs +++ b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs @@ -37,6 +37,7 @@ use datafusion::logical_expr::{ }; use datafusion::optimizer::simplify_expressions::expr_simplifier::THRESHOLD_INLINE_INLIST; use datafusion::prelude::*; +use insta::assert_snapshot; use std::hash::Hash; use std::sync::Arc; use substrait::proto::extensions::simple_extension_declaration::MappingType; @@ -188,13 +189,15 @@ async fn simple_select() -> Result<()> { #[tokio::test] async fn wildcard_select() -> Result<()> { - assert_expected_plan_unoptimized( - "SELECT * FROM data", - "Projection: data.a, data.b, data.c, data.d, data.e, data.f\ - \n TableScan: data", - true, - ) - .await + let plan = assert_and_generate_plan("SELECT * FROM data", true, false).await?; + + assert_snapshot!( + plan, + @r#" + Projection: data.a, data.b, data.c, data.d, data.e, data.f + TableScan: data + "#); + Ok(()) } #[tokio::test] @@ -299,24 +302,42 @@ async fn aggregate_grouping_sets() -> Result<()> { #[tokio::test] async fn aggregate_grouping_rollup() -> Result<()> { - assert_expected_plan( + let plan = assert_and_generate_plan( "SELECT a, c, e, avg(b) FROM data GROUP BY ROLLUP (a, c, e)", - "Projection: data.a, data.c, data.e, avg(data.b)\ - \n Aggregate: groupBy=[[GROUPING SETS ((data.a, data.c, data.e), (data.a, data.c), (data.a), ())]], aggr=[[avg(data.b)]]\ - \n TableScan: data projection=[a, b, c, e]", - true - ).await + true, + true, + ) + .await?; + + assert_snapshot!( + plan, + @r#" + Projection: data.a, data.c, data.e, avg(data.b) + Aggregate: groupBy=[[GROUPING SETS ((data.a, data.c, data.e), (data.a, data.c), (data.a), ())]], aggr=[[avg(data.b)]] + TableScan: data projection=[a, b, c, e] + "# + ); + Ok(()) } #[tokio::test] async fn multilayer_aggregate() -> Result<()> { - assert_expected_plan( + let plan = assert_and_generate_plan( "SELECT a, sum(partial_count_b) FROM (SELECT a, count(b) as partial_count_b FROM data GROUP BY a) GROUP BY a", - "Aggregate: groupBy=[[data.a]], aggr=[[sum(count(data.b)) AS sum(partial_count_b)]]\ - \n Aggregate: groupBy=[[data.a]], aggr=[[count(data.b)]]\ - \n TableScan: data projection=[a, b]", - true - ).await + true, + true, + ) + .await?; + + assert_snapshot!( + plan, + @r#" + Aggregate: groupBy=[[data.a]], aggr=[[sum(count(data.b)) AS sum(partial_count_b)]] + Aggregate: groupBy=[[data.a]], aggr=[[count(data.b)]] + TableScan: data projection=[a, b] + "# + ); + Ok(()) } #[tokio::test] @@ -454,13 +475,21 @@ async fn try_cast_decimal_to_string() -> Result<()> { #[tokio::test] async fn aggregate_case() -> Result<()> { - assert_expected_plan( + let plan = assert_and_generate_plan( "SELECT sum(CASE WHEN a > 0 THEN 1 ELSE NULL END) FROM data", - "Aggregate: groupBy=[[]], aggr=[[sum(CASE WHEN data.a > Int64(0) THEN Int64(1) ELSE Int64(NULL) END) AS sum(CASE WHEN data.a > Int64(0) THEN Int64(1) ELSE NULL END)]]\ - \n TableScan: data projection=[a]", - true + true, + true, ) - .await + .await?; + + assert_snapshot!( + plan, + @r#" + Aggregate: groupBy=[[]], aggr=[[sum(CASE WHEN data.a > Int64(0) THEN Int64(1) ELSE Int64(NULL) END) AS sum(CASE WHEN data.a > Int64(0) THEN Int64(1) ELSE NULL END)]] + TableScan: data projection=[a] + "# + ); + Ok(()) } #[tokio::test] @@ -494,17 +523,26 @@ async fn roundtrip_inlist_4() -> Result<()> { async fn roundtrip_inlist_5() -> Result<()> { // on roundtrip there is an additional projection during TableScan which includes all column of the table, // using assert_expected_plan here as a workaround - assert_expected_plan( + let plan = assert_and_generate_plan( "SELECT a, f FROM data WHERE (f IN ('a', 'b', 'c') OR a in (SELECT data2.a FROM data2 WHERE f IN ('b', 'c', 'd')))", + true, + true, + ) + .await?; - "Projection: data.a, data.f\ - \n Filter: data.f = Utf8(\"a\") OR data.f = Utf8(\"b\") OR data.f = Utf8(\"c\") OR data2.mark\ - \n LeftMark Join: data.a = data2.a\ - \n TableScan: data projection=[a, f]\ - \n Projection: data2.a\ - \n Filter: data2.f = Utf8(\"b\") OR data2.f = Utf8(\"c\") OR data2.f = Utf8(\"d\")\ - \n TableScan: data2 projection=[a, f], partial_filters=[data2.f = Utf8(\"b\") OR data2.f = Utf8(\"c\") OR data2.f = Utf8(\"d\")]", - true).await + assert_snapshot!( + plan, + @r#" + Projection: data.a, data.f + Filter: data.f = Utf8("a") OR data.f = Utf8("b") OR data.f = Utf8("c") OR data2.mark + LeftMark Join: data.a = data2.a + TableScan: data projection=[a, f] + Projection: data2.a + Filter: data2.f = Utf8("b") OR data2.f = Utf8("c") OR data2.f = Utf8("d") + TableScan: data2 projection=[a, f], partial_filters=[data2.f = Utf8("b") OR data2.f = Utf8("c") OR data2.f = Utf8("d")] + "# + ); + Ok(()) } #[tokio::test] @@ -535,27 +573,44 @@ async fn roundtrip_non_equi_join() -> Result<()> { #[tokio::test] async fn roundtrip_exists_filter() -> Result<()> { - assert_expected_plan( + let plan = assert_and_generate_plan( "SELECT b FROM data d1 WHERE EXISTS (SELECT * FROM data2 d2 WHERE d2.a = d1.a AND d2.e != d1.e)", - "Projection: data.b\ - \n LeftSemi Join: data.a = data2.a Filter: data2.e != CAST(data.e AS Int64)\ - \n TableScan: data projection=[a, b, e]\ - \n TableScan: data2 projection=[a, e]", - false // "d1" vs "data" field qualifier - ).await + false, + true, + ) + .await?; + + assert_snapshot!( + plan, + @r#" + Projection: data.b + LeftSemi Join: data.a = data2.a Filter: data2.e != CAST(data.e AS Int64) + TableScan: data projection=[a, b, e] + TableScan: data2 projection=[a, e] + "# + ); + Ok(()) } #[tokio::test] async fn inner_join() -> Result<()> { - assert_expected_plan( + let plan = assert_and_generate_plan( "SELECT data.a FROM data JOIN data2 ON data.a = data2.a", - "Projection: data.a\ - \n Inner Join: data.a = data2.a\ - \n TableScan: data projection=[a]\ - \n TableScan: data2 projection=[a]", + true, true, ) - .await + .await?; + + assert_snapshot!( + plan, + @r#" + Projection: data.a + Inner Join: data.a = data2.a + TableScan: data projection=[a] + TableScan: data2 projection=[a] + "# + ); + Ok(()) } #[tokio::test] @@ -592,17 +647,25 @@ async fn roundtrip_self_implicit_cross_join() -> Result<()> { #[tokio::test] async fn self_join_introduces_aliases() -> Result<()> { - assert_expected_plan( + let plan = assert_and_generate_plan( "SELECT d1.b, d2.c FROM data d1 JOIN data d2 ON d1.b = d2.b", - "Projection: left.b, right.c\ - \n Inner Join: left.b = right.b\ - \n SubqueryAlias: left\ - \n TableScan: data projection=[b]\ - \n SubqueryAlias: right\ - \n TableScan: data projection=[b, c]", false, + true, ) - .await + .await?; + + assert_snapshot!( + plan, + @r#" + Projection: left.b, right.c + Inner Join: left.b = right.b + SubqueryAlias: left + TableScan: data projection=[b] + SubqueryAlias: right + TableScan: data projection=[b, c] + "# + ); + Ok(()) } #[tokio::test] @@ -986,13 +1049,21 @@ async fn roundtrip_literal_list() -> Result<()> { #[tokio::test] async fn roundtrip_literal_struct() -> Result<()> { - assert_expected_plan( + let plan = assert_and_generate_plan( "SELECT STRUCT(1, true, CAST(NULL AS STRING)) FROM data", - "Projection: Struct({c0:1,c1:true,c2:}) AS struct(Int64(1),Boolean(true),NULL)\ - \n TableScan: data projection=[]", - false, // "Struct(..)" vs "struct(..)" + false, + true, ) - .await + .await?; + + assert_snapshot!( + plan, + @r#" + Projection: Struct({c0:1,c1:true,c2:}) AS struct(Int64(1),Boolean(true),NULL) + TableScan: data projection=[] + "# + ); + Ok(()) } #[tokio::test] @@ -1020,6 +1091,30 @@ async fn roundtrip_values() -> Result<()> { ), \ (Int64(NULL), Utf8(NULL), List(), LargeList(), Struct({c0:,int_field:,c2:}), List())", true).await + + // let plan = assert_and_generate_plan( + // "VALUES \ + // (\ + // 1, \ + // 'a', \ + // [[-213.1, NULL, 5.5, 2.0, 1.0], []], \ + // arrow_cast([1,2,3], 'LargeList(Int64)'), \ + // STRUCT(true, 1 AS int_field, CAST(NULL AS STRING)), \ + // [STRUCT(STRUCT('a' AS string_field) AS struct_field), STRUCT(STRUCT('b' AS string_field) AS struct_field)]\ + // ), \ + // (NULL, NULL, NULL, NULL, NULL, NULL)", + // true, + // true, + // ) + // .await?; + + // assert_snapshot!( + // plan, + // @r#" + // Values: (Int64(1), Utf8("a"), List([[-213.1, , 5.5, 2.0, 1.0], []]), LargeList([1, 2, 3]), Struct({c0:true,int_field:1,c2:}), List([{struct_field: {string_field: a}}, {struct_field: {string_field: b}}]) ), (Int64(NULL), Utf8(NULL), List(), LargeList(), Struct({c0:,int_field:,c2:}), List()) + // "# + // ); + // Ok(()) } #[tokio::test] @@ -1061,14 +1156,22 @@ async fn duplicate_column() -> Result<()> { // only. DataFusion however, is strict about not having duplicate column names appear in the plan. // This test confirms that we generate aliases for columns in the plan which would otherwise have // colliding names. - assert_expected_plan( + let plan = assert_and_generate_plan( "SELECT a + 1 as sum_a, a + 1 as sum_a_2 FROM data", - "Projection: data.a + Int64(1) AS sum_a, data.a + Int64(1) AS data.a + Int64(1)__temp__0 AS sum_a_2\ - \n Projection: data.a + Int64(1)\ - \n TableScan: data projection=[a]", + true, true, ) - .await + .await?; + + assert_snapshot!( + plan, + @r#" + Projection: data.a + Int64(1) AS sum_a, data.a + Int64(1) AS data.a + Int64(1)__temp__0 AS sum_a_2 + Projection: data.a + Int64(1) + TableScan: data projection=[a] + "# + ); + Ok(()) } /// Construct a plan that cast columns. Only those SQL types are supported for now. @@ -1374,30 +1477,32 @@ async fn assert_read_filter_count( Ok(()) } -async fn assert_expected_plan_unoptimized( +async fn assert_and_generate_plan( sql: &str, - expected_plan_str: &str, assert_schema: bool, -) -> Result<()> { + optimized: bool, +) -> Result { let ctx = create_context().await?; let df = ctx.sql(sql).await?; - let plan = df.into_unoptimized_plan(); - let proto = to_substrait_plan(&plan, &ctx.state())?; - let plan2 = from_substrait_plan(&ctx.state(), &proto).await?; - println!("{plan}"); - println!("{plan2}"); - - println!("{proto:?}"); + let plan = if optimized { + df.into_optimized_plan()? + } else { + df.into_unoptimized_plan() + }; + let proto = to_substrait_plan(&plan, &ctx.state())?; + let plan2 = if optimized { + let temp = from_substrait_plan(&ctx.state(), &proto).await?; + ctx.state().optimize(&temp)? + } else { + from_substrait_plan(&ctx.state(), &proto).await? + }; if assert_schema { assert_eq!(plan.schema(), plan2.schema()); } - let plan2str = format!("{plan2}"); - assert_eq!(expected_plan_str, &plan2str); - - Ok(()) + Ok(format!("{}", plan2)) } async fn assert_expected_plan( From 39ff6b4c1ffe0f806fcce914ea83658515cbeae3 Mon Sep 17 00:00:00 2001 From: qstommyshu Date: Fri, 28 Mar 2025 11:08:58 -0400 Subject: [PATCH 14/17] migrate `assert_expected_plan_substrait` --- .../tests/cases/roundtrip_logical_plan.rs | 55 ++++++++++--------- 1 file changed, 29 insertions(+), 26 deletions(-) diff --git a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs index 3b4db9e3ec81..c820fbe7c79b 100644 --- a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs +++ b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs @@ -810,12 +810,15 @@ async fn aggregate_wo_projection_consume() -> Result<()> { let proto_plan = read_json("tests/testdata/test_plans/aggregate_no_project.substrait.json"); - assert_expected_plan_substrait( - proto_plan, - "Aggregate: groupBy=[[data.a]], aggr=[[count(data.a) AS countA]]\ - \n TableScan: data projection=[a]", - ) - .await + let plan = generate_plan_from_substrait(proto_plan).await?; + assert_snapshot!( + plan, + @r#" + Aggregate: groupBy=[[data.a]], aggr=[[count(data.a) AS countA]] + TableScan: data projection=[a] + "# + ); + Ok(()) } #[tokio::test] @@ -823,12 +826,15 @@ async fn aggregate_wo_projection_group_expression_ref_consume() -> Result<()> { let proto_plan = read_json("tests/testdata/test_plans/aggregate_no_project_group_expression_ref.substrait.json"); - assert_expected_plan_substrait( - proto_plan, - "Aggregate: groupBy=[[data.a]], aggr=[[count(data.a) AS countA]]\ - \n TableScan: data projection=[a]", - ) - .await + let plan = generate_plan_from_substrait(proto_plan).await?; + assert_snapshot!( + plan, + @r#" + Aggregate: groupBy=[[data.a]], aggr=[[count(data.a) AS countA]] + TableScan: data projection=[a] + "# + ); + Ok(()) } #[tokio::test] @@ -836,12 +842,15 @@ async fn aggregate_wo_projection_sorted_consume() -> Result<()> { let proto_plan = read_json("tests/testdata/test_plans/aggregate_sorted_no_project.substrait.json"); - assert_expected_plan_substrait( - proto_plan, - "Aggregate: groupBy=[[data.a]], aggr=[[count(data.a) ORDER BY [data.a DESC NULLS FIRST] AS countA]]\ - \n TableScan: data projection=[a]", - ) - .await + let plan = generate_plan_from_substrait(proto_plan).await?; + assert_snapshot!( + plan, + @r#" + Aggregate: groupBy=[[data.a]], aggr=[[count(data.a) ORDER BY [data.a DESC NULLS FIRST] AS countA]] + TableScan: data projection=[a] + "# + ); + Ok(()) } #[tokio::test] @@ -1532,20 +1541,14 @@ async fn assert_expected_plan( Ok(()) } -async fn assert_expected_plan_substrait( - substrait_plan: Plan, - expected_plan_str: &str, -) -> Result<()> { +async fn generate_plan_from_substrait(substrait_plan: Plan) -> Result { let ctx = create_context().await?; let plan = from_substrait_plan(&ctx.state(), &substrait_plan).await?; let plan = ctx.state().optimize(&plan)?; - let planstr = format!("{plan}"); - assert_eq!(planstr, expected_plan_str); - - Ok(()) + Ok(plan) } async fn assert_substrait_sql(substrait_plan: Plan, sql: &str) -> Result<()> { From d372d124be9f31eef9fe3312839326b831ff2c09 Mon Sep 17 00:00:00 2001 From: qstommyshu Date: Fri, 28 Mar 2025 11:24:25 -0400 Subject: [PATCH 15/17] refactor tests to use assert_and_generate_plan and assert_snapshot! for improved clarity and consistency --- .../tests/cases/roundtrip_logical_plan.rs | 58 ++++++------------- 1 file changed, 18 insertions(+), 40 deletions(-) diff --git a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs index c820fbe7c79b..358195032724 100644 --- a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs +++ b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs @@ -192,11 +192,12 @@ async fn wildcard_select() -> Result<()> { let plan = assert_and_generate_plan("SELECT * FROM data", true, false).await?; assert_snapshot!( - plan, - @r#" + plan, + @r#" Projection: data.a, data.b, data.c, data.d, data.e, data.f TableScan: data - "#); + "# + ); Ok(()) } @@ -522,7 +523,7 @@ async fn roundtrip_inlist_4() -> Result<()> { #[tokio::test] async fn roundtrip_inlist_5() -> Result<()> { // on roundtrip there is an additional projection during TableScan which includes all column of the table, - // using assert_expected_plan here as a workaround + // using assert_and_generate_plan and assert_snapshot! here as a workaround let plan = assert_and_generate_plan( "SELECT a, f FROM data WHERE (f IN ('a', 'b', 'c') OR a in (SELECT data2.a FROM data2 WHERE f IN ('b', 'c', 'd')))", true, @@ -1078,7 +1079,7 @@ async fn roundtrip_literal_struct() -> Result<()> { #[tokio::test] async fn roundtrip_values() -> Result<()> { // TODO: would be nice to have a struct inside the LargeList, but arrow_cast doesn't support that currently - assert_expected_plan( + let plan = assert_and_generate_plan( "VALUES \ (\ 1, \ @@ -1089,41 +1090,18 @@ async fn roundtrip_values() -> Result<()> { [STRUCT(STRUCT('a' AS string_field) AS struct_field), STRUCT(STRUCT('b' AS string_field) AS struct_field)]\ ), \ (NULL, NULL, NULL, NULL, NULL, NULL)", - "Values: \ - (\ - Int64(1), \ - Utf8(\"a\"), \ - List([[-213.1, , 5.5, 2.0, 1.0], []]), \ - LargeList([1, 2, 3]), \ - Struct({c0:true,int_field:1,c2:}), \ - List([{struct_field: {string_field: a}}, {struct_field: {string_field: b}}])\ - ), \ - (Int64(NULL), Utf8(NULL), List(), LargeList(), Struct({c0:,int_field:,c2:}), List())", - true).await - - // let plan = assert_and_generate_plan( - // "VALUES \ - // (\ - // 1, \ - // 'a', \ - // [[-213.1, NULL, 5.5, 2.0, 1.0], []], \ - // arrow_cast([1,2,3], 'LargeList(Int64)'), \ - // STRUCT(true, 1 AS int_field, CAST(NULL AS STRING)), \ - // [STRUCT(STRUCT('a' AS string_field) AS struct_field), STRUCT(STRUCT('b' AS string_field) AS struct_field)]\ - // ), \ - // (NULL, NULL, NULL, NULL, NULL, NULL)", - // true, - // true, - // ) - // .await?; - - // assert_snapshot!( - // plan, - // @r#" - // Values: (Int64(1), Utf8("a"), List([[-213.1, , 5.5, 2.0, 1.0], []]), LargeList([1, 2, 3]), Struct({c0:true,int_field:1,c2:}), List([{struct_field: {string_field: a}}, {struct_field: {string_field: b}}]) ), (Int64(NULL), Utf8(NULL), List(), LargeList(), Struct({c0:,int_field:,c2:}), List()) - // "# - // ); - // Ok(()) + true, + true, + ) + .await?; + + assert_snapshot!( + plan, + @r#" + Values: (Int64(1), Utf8("a"), List([[-213.1, , 5.5, 2.0, 1.0], []]), LargeList([1, 2, 3]), Struct({c0:true,int_field:1,c2:}), List([{struct_field: {string_field: a}}, {struct_field: {string_field: b}}])), (Int64(NULL), Utf8(NULL), List(), LargeList(), Struct({c0:,int_field:,c2:}), List()) + "# + ); + Ok(()) } #[tokio::test] From 2294a3f3e6c67db1cbbc41dfb6d66c421eb32c9d Mon Sep 17 00:00:00 2001 From: qstommyshu Date: Fri, 28 Mar 2025 13:57:59 -0400 Subject: [PATCH 16/17] remove println! --- datafusion/substrait/tests/cases/emit_kind_tests.rs | 2 -- .../substrait/tests/cases/roundtrip_logical_plan.rs | 13 ------------- 2 files changed, 15 deletions(-) diff --git a/datafusion/substrait/tests/cases/emit_kind_tests.rs b/datafusion/substrait/tests/cases/emit_kind_tests.rs index 88db2bc34d7f..e48d6fc04693 100644 --- a/datafusion/substrait/tests/cases/emit_kind_tests.rs +++ b/datafusion/substrait/tests/cases/emit_kind_tests.rs @@ -126,8 +126,6 @@ mod tests { let plan1str = format!("{plan}"); let plan2str = format!("{plan2}"); - println!("{}", plan1str); - println!("{}", plan2str); assert_eq!(plan1str, plan2str); Ok(()) diff --git a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs index 358195032724..8ef272e92a77 100644 --- a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs +++ b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs @@ -1504,11 +1504,6 @@ async fn assert_expected_plan( let plan2 = from_substrait_plan(&ctx.state(), &proto).await?; let plan2 = ctx.state().optimize(&plan2)?; - println!("{plan}"); - println!("{plan2}"); - - println!("{proto:?}"); - if assert_schema { assert_eq!(plan.schema(), plan2.schema()); } @@ -1577,9 +1572,6 @@ async fn test_alias(sql_with_alias: &str, sql_no_alias: &str) -> Result<()> { let proto = to_substrait_plan(&df.into_optimized_plan()?, &ctx.state())?; let plan = from_substrait_plan(&ctx.state(), &proto).await?; - println!("{plan_with_alias}"); - println!("{plan}"); - let plan1str = format!("{plan_with_alias}"); let plan2str = format!("{plan}"); assert_eq!(plan1str, plan2str); @@ -1596,11 +1588,6 @@ async fn roundtrip_logical_plan_with_ctx( let plan2 = from_substrait_plan(&ctx.state(), &proto).await?; let plan2 = ctx.state().optimize(&plan2)?; - println!("{plan}"); - println!("{plan2}"); - - println!("{proto:?}"); - let plan1str = format!("{plan}"); let plan2str = format!("{plan2}"); assert_eq!(plan1str, plan2str); From 945405e53af3b1de58cbf958464628503a320697 Mon Sep 17 00:00:00 2001 From: qstommyshu Date: Sat, 29 Mar 2025 19:51:37 -0400 Subject: [PATCH 17/17] migrate tests to use generate_plan_from_sql for improved clarity --- .../tests/cases/roundtrip_logical_plan.rs | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs index 8ef272e92a77..36ee78fe5d9a 100644 --- a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs +++ b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs @@ -189,7 +189,7 @@ async fn simple_select() -> Result<()> { #[tokio::test] async fn wildcard_select() -> Result<()> { - let plan = assert_and_generate_plan("SELECT * FROM data", true, false).await?; + let plan = generate_plan_from_sql("SELECT * FROM data", true, false).await?; assert_snapshot!( plan, @@ -303,7 +303,7 @@ async fn aggregate_grouping_sets() -> Result<()> { #[tokio::test] async fn aggregate_grouping_rollup() -> Result<()> { - let plan = assert_and_generate_plan( + let plan = generate_plan_from_sql( "SELECT a, c, e, avg(b) FROM data GROUP BY ROLLUP (a, c, e)", true, true, @@ -323,7 +323,7 @@ async fn aggregate_grouping_rollup() -> Result<()> { #[tokio::test] async fn multilayer_aggregate() -> Result<()> { - let plan = assert_and_generate_plan( + let plan = generate_plan_from_sql( "SELECT a, sum(partial_count_b) FROM (SELECT a, count(b) as partial_count_b FROM data GROUP BY a) GROUP BY a", true, true, @@ -476,7 +476,7 @@ async fn try_cast_decimal_to_string() -> Result<()> { #[tokio::test] async fn aggregate_case() -> Result<()> { - let plan = assert_and_generate_plan( + let plan = generate_plan_from_sql( "SELECT sum(CASE WHEN a > 0 THEN 1 ELSE NULL END) FROM data", true, true, @@ -524,7 +524,7 @@ async fn roundtrip_inlist_4() -> Result<()> { async fn roundtrip_inlist_5() -> Result<()> { // on roundtrip there is an additional projection during TableScan which includes all column of the table, // using assert_and_generate_plan and assert_snapshot! here as a workaround - let plan = assert_and_generate_plan( + let plan = generate_plan_from_sql( "SELECT a, f FROM data WHERE (f IN ('a', 'b', 'c') OR a in (SELECT data2.a FROM data2 WHERE f IN ('b', 'c', 'd')))", true, true, @@ -574,7 +574,7 @@ async fn roundtrip_non_equi_join() -> Result<()> { #[tokio::test] async fn roundtrip_exists_filter() -> Result<()> { - let plan = assert_and_generate_plan( + let plan = generate_plan_from_sql( "SELECT b FROM data d1 WHERE EXISTS (SELECT * FROM data2 d2 WHERE d2.a = d1.a AND d2.e != d1.e)", false, true, @@ -595,7 +595,7 @@ async fn roundtrip_exists_filter() -> Result<()> { #[tokio::test] async fn inner_join() -> Result<()> { - let plan = assert_and_generate_plan( + let plan = generate_plan_from_sql( "SELECT data.a FROM data JOIN data2 ON data.a = data2.a", true, true, @@ -648,7 +648,7 @@ async fn roundtrip_self_implicit_cross_join() -> Result<()> { #[tokio::test] async fn self_join_introduces_aliases() -> Result<()> { - let plan = assert_and_generate_plan( + let plan = generate_plan_from_sql( "SELECT d1.b, d2.c FROM data d1 JOIN data d2 ON d1.b = d2.b", false, true, @@ -1059,7 +1059,7 @@ async fn roundtrip_literal_list() -> Result<()> { #[tokio::test] async fn roundtrip_literal_struct() -> Result<()> { - let plan = assert_and_generate_plan( + let plan = generate_plan_from_sql( "SELECT STRUCT(1, true, CAST(NULL AS STRING)) FROM data", false, true, @@ -1079,7 +1079,7 @@ async fn roundtrip_literal_struct() -> Result<()> { #[tokio::test] async fn roundtrip_values() -> Result<()> { // TODO: would be nice to have a struct inside the LargeList, but arrow_cast doesn't support that currently - let plan = assert_and_generate_plan( + let plan = generate_plan_from_sql( "VALUES \ (\ 1, \ @@ -1143,7 +1143,7 @@ async fn duplicate_column() -> Result<()> { // only. DataFusion however, is strict about not having duplicate column names appear in the plan. // This test confirms that we generate aliases for columns in the plan which would otherwise have // colliding names. - let plan = assert_and_generate_plan( + let plan = generate_plan_from_sql( "SELECT a + 1 as sum_a, a + 1 as sum_a_2 FROM data", true, true, @@ -1464,13 +1464,13 @@ async fn assert_read_filter_count( Ok(()) } -async fn assert_and_generate_plan( +async fn generate_plan_from_sql( sql: &str, assert_schema: bool, optimized: bool, -) -> Result { +) -> Result { let ctx = create_context().await?; - let df = ctx.sql(sql).await?; + let df: DataFrame = ctx.sql(sql).await?; let plan = if optimized { df.into_optimized_plan()? @@ -1489,7 +1489,7 @@ async fn assert_and_generate_plan( assert_eq!(plan.schema(), plan2.schema()); } - Ok(format!("{}", plan2)) + Ok(plan2) } async fn assert_expected_plan(