From 442c61fa6123b0fb1b8e770f14ef3dcf4e749c3c Mon Sep 17 00:00:00 2001 From: lifan-ake Date: Sun, 25 May 2025 11:06:09 +0800 Subject: [PATCH 1/8] migrate `logical_plan` tests to insta --- Cargo.lock | 1 + datafusion/expr/Cargo.toml | 1 + datafusion/expr/src/logical_plan/builder.rs | 244 +++++++++--------- datafusion/expr/src/logical_plan/display.rs | 8 +- datafusion/expr/src/logical_plan/plan.rs | 268 +++++++++----------- 5 files changed, 252 insertions(+), 270 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index dba06cd4b653..01afb6cb72fa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2214,6 +2214,7 @@ dependencies = [ "datafusion-physical-expr-common", "env_logger", "indexmap 2.9.0", + "insta", "paste", "recursive", "serde_json", diff --git a/datafusion/expr/Cargo.toml b/datafusion/expr/Cargo.toml index 37e1ed1936fb..d77c59ff64e1 100644 --- a/datafusion/expr/Cargo.toml +++ b/datafusion/expr/Cargo.toml @@ -58,3 +58,4 @@ sqlparser = { workspace = true } [dev-dependencies] ctor = { workspace = true } env_logger = { workspace = true } +insta = { workspace = true } diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs index 538cdbc646e2..54a5115fbe85 100644 --- a/datafusion/expr/src/logical_plan/builder.rs +++ b/datafusion/expr/src/logical_plan/builder.rs @@ -2259,7 +2259,8 @@ mod tests { use crate::{col, expr, expr_fn::exists, in_subquery, lit, scalar_subquery}; use crate::test::function_stub::sum; - use datafusion_common::{Constraint, RecursionUnnestOption, SchemaError}; + use datafusion_common::{Constraint, RecursionUnnestOption}; + use insta::assert_snapshot; #[test] fn plan_builder_simple() -> Result<()> { @@ -2269,11 +2270,11 @@ mod tests { .project(vec![col("id")])? .build()?; - let expected = "Projection: employee_csv.id\ - \n Filter: employee_csv.state = Utf8(\"CO\")\ - \n TableScan: employee_csv projection=[id, state]"; - - assert_eq!(expected, format!("{plan}")); + assert_snapshot!(format!("{plan}"), @r#" + Projection: employee_csv.id + Filter: employee_csv.state = Utf8("CO") + TableScan: employee_csv projection=[id, state] + "#); Ok(()) } @@ -2285,12 +2286,7 @@ mod tests { let plan = LogicalPlanBuilder::scan("employee_csv", table_source(&schema), projection) .unwrap(); - let expected = DFSchema::try_from_qualified_schema( - TableReference::bare("employee_csv"), - &schema, - ) - .unwrap(); - assert_eq!(&expected, plan.schema().as_ref()); + assert_snapshot!(plan.schema().as_ref(), @"fields:[employee_csv.id, employee_csv.first_name, employee_csv.last_name, employee_csv.state, employee_csv.salary], metadata:{}"); // Note scan of "EMPLOYEE_CSV" is treated as a SQL identifier // (and thus normalized to "employee"csv") as well @@ -2298,7 +2294,7 @@ mod tests { let plan = LogicalPlanBuilder::scan("EMPLOYEE_CSV", table_source(&schema), projection) .unwrap(); - assert_eq!(&expected, plan.schema().as_ref()); + assert_snapshot!(plan.schema().as_ref(), @"fields:[employee_csv.id, employee_csv.first_name, employee_csv.last_name, employee_csv.state, employee_csv.salary], metadata:{}"); } #[test] @@ -2307,9 +2303,9 @@ mod tests { let projection = None; let err = LogicalPlanBuilder::scan("", table_source(&schema), projection).unwrap_err(); - assert_eq!( + assert_snapshot!( err.strip_backtrace(), - "Error during planning: table_name cannot be empty" + @"Error during planning: table_name cannot be empty" ); } @@ -2323,10 +2319,10 @@ mod tests { ])? .build()?; - let expected = "Sort: employee_csv.state ASC NULLS FIRST, employee_csv.salary DESC NULLS LAST\ - \n TableScan: employee_csv projection=[state, salary]"; - - assert_eq!(expected, format!("{plan}")); + assert_snapshot!(format!("{plan}"), @r" + Sort: employee_csv.state ASC NULLS FIRST, employee_csv.salary DESC NULLS LAST + TableScan: employee_csv projection=[state, salary] + "); Ok(()) } @@ -2343,15 +2339,15 @@ mod tests { .union(plan.build()?)? .build()?; - let expected = "Union\ - \n Union\ - \n Union\ - \n TableScan: employee_csv projection=[state, salary]\ - \n TableScan: employee_csv projection=[state, salary]\ - \n TableScan: employee_csv projection=[state, salary]\ - \n TableScan: employee_csv projection=[state, salary]"; - - assert_eq!(expected, format!("{plan}")); + assert_snapshot!(format!("{plan}"), @r" + Union + Union + Union + TableScan: employee_csv projection=[state, salary] + TableScan: employee_csv projection=[state, salary] + TableScan: employee_csv projection=[state, salary] + TableScan: employee_csv projection=[state, salary] + "); Ok(()) } @@ -2368,19 +2364,18 @@ mod tests { .union_distinct(plan.build()?)? .build()?; - let expected = "\ - Distinct:\ - \n Union\ - \n Distinct:\ - \n Union\ - \n Distinct:\ - \n Union\ - \n TableScan: employee_csv projection=[state, salary]\ - \n TableScan: employee_csv projection=[state, salary]\ - \n TableScan: employee_csv projection=[state, salary]\ - \n TableScan: employee_csv projection=[state, salary]"; - - assert_eq!(expected, format!("{plan}")); + assert_snapshot!(format!("{plan}"), @r" + Distinct: + Union + Distinct: + Union + Distinct: + Union + TableScan: employee_csv projection=[state, salary] + TableScan: employee_csv projection=[state, salary] + TableScan: employee_csv projection=[state, salary] + TableScan: employee_csv projection=[state, salary] + "); Ok(()) } @@ -2394,13 +2389,12 @@ mod tests { .distinct()? .build()?; - let expected = "\ - Distinct:\ - \n Projection: employee_csv.id\ - \n Filter: employee_csv.state = Utf8(\"CO\")\ - \n TableScan: employee_csv projection=[id, state]"; - - assert_eq!(expected, format!("{plan}")); + assert_snapshot!(format!("{plan}"), @r#" + Distinct: + Projection: employee_csv.id + Filter: employee_csv.state = Utf8("CO") + TableScan: employee_csv projection=[id, state] + "#); Ok(()) } @@ -2420,14 +2414,15 @@ mod tests { .filter(exists(Arc::new(subquery)))? .build()?; - let expected = "Filter: EXISTS ()\ - \n Subquery:\ - \n Filter: foo.a = bar.a\ - \n Projection: foo.a\ - \n TableScan: foo\ - \n Projection: bar.a\ - \n TableScan: bar"; - assert_eq!(expected, format!("{outer_query}")); + assert_snapshot!(format!("{outer_query}"), @r" + Filter: EXISTS () + Subquery: + Filter: foo.a = bar.a + Projection: foo.a + TableScan: foo + Projection: bar.a + TableScan: bar + "); Ok(()) } @@ -2448,14 +2443,15 @@ mod tests { .filter(in_subquery(col("a"), Arc::new(subquery)))? .build()?; - let expected = "Filter: bar.a IN ()\ - \n Subquery:\ - \n Filter: foo.a = bar.a\ - \n Projection: foo.a\ - \n TableScan: foo\ - \n Projection: bar.a\ - \n TableScan: bar"; - assert_eq!(expected, format!("{outer_query}")); + assert_snapshot!(format!("{outer_query}"), @r" + Filter: bar.a IN () + Subquery: + Filter: foo.a = bar.a + Projection: foo.a + TableScan: foo + Projection: bar.a + TableScan: bar + "); Ok(()) } @@ -2475,13 +2471,14 @@ mod tests { .project(vec![scalar_subquery(Arc::new(subquery))])? .build()?; - let expected = "Projection: ()\ - \n Subquery:\ - \n Filter: foo.a = bar.a\ - \n Projection: foo.b\ - \n TableScan: foo\ - \n TableScan: bar"; - assert_eq!(expected, format!("{outer_query}")); + assert_snapshot!(format!("{outer_query}"), @r" + Projection: () + Subquery: + Filter: foo.a = bar.a + Projection: foo.b + TableScan: foo + TableScan: bar + "); Ok(()) } @@ -2498,19 +2495,8 @@ mod tests { .project(vec![col("id"), col("first_name").alias("id")]); match plan { - Err(DataFusionError::SchemaError( - SchemaError::AmbiguousReference { - field: - Column { - relation: Some(TableReference::Bare { table }), - name, - spans: _, - }, - }, - _, - )) => { - assert_eq!(*"employee_csv", *table); - assert_eq!("id", &name); + Err(DataFusionError::SchemaError(e, _)) => { + assert_snapshot!(e, @"Schema contains qualified field name employee_csv.id and unqualified field name id which would be ambiguous"); Ok(()) } _ => plan_err!("Plan should have returned an DataFusionError::SchemaError"), @@ -2575,13 +2561,11 @@ mod tests { let plan2 = table_scan(TableReference::none(), &employee_schema(), Some(vec![3, 4]))?; - let expected = "Error during planning: INTERSECT/EXCEPT query must have the same number of columns. \ - Left is 1 and right is 2."; let err_msg1 = LogicalPlanBuilder::intersect(plan1.build()?, plan2.build()?, true) .unwrap_err(); - assert_eq!(err_msg1.strip_backtrace(), expected); + assert_snapshot!(err_msg1.strip_backtrace(), @"Error during planning: INTERSECT/EXCEPT query must have the same number of columns. Left is 1 and right is 2."); Ok(()) } @@ -2592,19 +2576,23 @@ mod tests { let err = nested_table_scan("test_table")? .unnest_column("scalar") .unwrap_err(); - assert!(err - .to_string() - .starts_with("Internal error: trying to unnest on invalid data type UInt32")); + // assert!(err + // .to_string() + // .starts_with("Internal error: trying to unnest on invalid data type UInt32")); + assert_snapshot!(err.strip_backtrace(), @r" + Internal error: trying to unnest on invalid data type UInt32. + This was likely caused by a bug in DataFusion's code and we would welcome that you file an bug report in our issue tracker + "); // Unnesting the strings list. let plan = nested_table_scan("test_table")? .unnest_column("strings")? .build()?; - let expected = "\ - Unnest: lists[test_table.strings|depth=1] structs[]\ - \n TableScan: test_table"; - assert_eq!(expected, format!("{plan}")); + assert_snapshot!(format!("{plan}"), @r" + Unnest: lists[test_table.strings|depth=1] structs[] + TableScan: test_table + "); // Check unnested field is a scalar let field = plan.schema().field_with_name(None, "strings").unwrap(); @@ -2615,10 +2603,10 @@ mod tests { .unnest_column("struct_singular")? .build()?; - let expected = "\ - Unnest: lists[] structs[test_table.struct_singular]\ - \n TableScan: test_table"; - assert_eq!(expected, format!("{plan}")); + assert_snapshot!(format!("{plan}"), @r" + Unnest: lists[] structs[test_table.struct_singular] + TableScan: test_table + "); for field_name in &["a", "b"] { // Check unnested struct field is a scalar @@ -2636,12 +2624,12 @@ mod tests { .unnest_column("struct_singular")? .build()?; - let expected = "\ - Unnest: lists[] structs[test_table.struct_singular]\ - \n Unnest: lists[test_table.structs|depth=1] structs[]\ - \n Unnest: lists[test_table.strings|depth=1] structs[]\ - \n TableScan: test_table"; - assert_eq!(expected, format!("{plan}")); + assert_snapshot!(format!("{plan}"), @r" + Unnest: lists[] structs[test_table.struct_singular] + Unnest: lists[test_table.structs|depth=1] structs[] + Unnest: lists[test_table.strings|depth=1] structs[] + TableScan: test_table + "); // Check unnested struct list field should be a struct. let field = plan.schema().field_with_name(None, "structs").unwrap(); @@ -2657,10 +2645,10 @@ mod tests { .unnest_columns_with_options(cols, UnnestOptions::default())? .build()?; - let expected = "\ - Unnest: lists[test_table.strings|depth=1, test_table.structs|depth=1] structs[test_table.struct_singular]\ - \n TableScan: test_table"; - assert_eq!(expected, format!("{plan}")); + assert_snapshot!(format!("{plan}"), @r" + Unnest: lists[test_table.strings|depth=1, test_table.structs|depth=1] structs[test_table.struct_singular] + TableScan: test_table + "); // Unnesting missing column should fail. let plan = nested_table_scan("test_table")?.unnest_column("missing"); @@ -2684,10 +2672,10 @@ mod tests { )? .build()?; - let expected = "\ - Unnest: lists[test_table.stringss|depth=1, test_table.stringss|depth=2] structs[test_table.struct_singular]\ - \n TableScan: test_table"; - assert_eq!(expected, format!("{plan}")); + assert_snapshot!(format!("{plan}"), @r" + Unnest: lists[test_table.stringss|depth=1, test_table.stringss|depth=2] structs[test_table.struct_singular] + TableScan: test_table + "); // Check output columns has correct type let field = plan @@ -2759,10 +2747,24 @@ mod tests { let join = LogicalPlanBuilder::from(left).cross_join(right)?.build()?; - let _ = LogicalPlanBuilder::from(join.clone()) + let plan = LogicalPlanBuilder::from(join.clone()) .union(join)? .build()?; + assert_snapshot!(plan, @r" + Union + Cross Join: + SubqueryAlias: left + Values: (Int32(1)) + SubqueryAlias: right + Values: (Int32(1)) + Cross Join: + SubqueryAlias: left + Values: (Int32(1)) + SubqueryAlias: right + Values: (Int32(1)) + "); + Ok(()) } @@ -2822,10 +2824,10 @@ mod tests { .aggregate(vec![col("id")], vec![sum(col("salary"))])? .build()?; - let expected = - "Aggregate: groupBy=[[employee_csv.id]], aggr=[[sum(employee_csv.salary)]]\ - \n TableScan: employee_csv projection=[id, state, salary]"; - assert_eq!(expected, format!("{plan}")); + assert_snapshot!(format!("{plan}"), @r" + Aggregate: groupBy=[[employee_csv.id]], aggr=[[sum(employee_csv.salary)]] + TableScan: employee_csv projection=[id, state, salary] + "); Ok(()) } @@ -2844,10 +2846,10 @@ mod tests { .aggregate(vec![col("id")], vec![sum(col("salary"))])? .build()?; - let expected = - "Aggregate: groupBy=[[employee_csv.id, employee_csv.state, employee_csv.salary]], aggr=[[sum(employee_csv.salary)]]\ - \n TableScan: employee_csv projection=[id, state, salary]"; - assert_eq!(expected, format!("{plan}")); + assert_snapshot!(format!("{plan}"), @r" + Aggregate: groupBy=[[employee_csv.id, employee_csv.state, employee_csv.salary]], aggr=[[sum(employee_csv.salary)]] + TableScan: employee_csv projection=[id, state, salary] + "); Ok(()) } diff --git a/datafusion/expr/src/logical_plan/display.rs b/datafusion/expr/src/logical_plan/display.rs index 73c1a5304f82..a902c2d2a78d 100644 --- a/datafusion/expr/src/logical_plan/display.rs +++ b/datafusion/expr/src/logical_plan/display.rs @@ -722,13 +722,14 @@ impl<'n> TreeNodeVisitor<'n> for PgJsonVisitor<'_, '_> { #[cfg(test)] mod tests { use arrow::datatypes::{DataType, Field}; + use insta::assert_snapshot; use super::*; #[test] fn test_display_empty_schema() { let schema = Schema::empty(); - assert_eq!("[]", format!("{}", display_schema(&schema))); + assert_snapshot!(format!("{}", display_schema(&schema)), @"[]"); } #[test] @@ -738,9 +739,6 @@ mod tests { Field::new("first_name", DataType::Utf8, true), ]); - assert_eq!( - "[id:Int32, first_name:Utf8;N]", - format!("{}", display_schema(&schema)) - ); + assert_snapshot!(format!("{}", display_schema(&schema)), @"[id:Int32, first_name:Utf8;N]"); } } diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs index 7b47b59256d1..302834cd80b5 100644 --- a/datafusion/expr/src/logical_plan/plan.rs +++ b/datafusion/expr/src/logical_plan/plan.rs @@ -3997,6 +3997,7 @@ mod tests { TransformedResult, TreeNodeRewriter, TreeNodeVisitor, }; use datafusion_common::{not_impl_err, Constraint, ScalarValue}; + use insta::assert_snapshot; use crate::test::function_stub::count; @@ -4024,13 +4025,13 @@ mod tests { fn test_display_indent() -> Result<()> { let plan = display_plan()?; - let expected = "Projection: employee_csv.id\ - \n Filter: employee_csv.state IN ()\ - \n Subquery:\ - \n TableScan: employee_csv projection=[state]\ - \n TableScan: employee_csv projection=[id, state]"; - - assert_eq!(expected, format!("{}", plan.display_indent())); + assert_snapshot!(format!("{}", plan.display_indent()), @r" + Projection: employee_csv.id + Filter: employee_csv.state IN () + Subquery: + TableScan: employee_csv projection=[state] + TableScan: employee_csv projection=[id, state] + "); Ok(()) } @@ -4038,13 +4039,13 @@ mod tests { fn test_display_indent_schema() -> Result<()> { let plan = display_plan()?; - let expected = "Projection: employee_csv.id [id:Int32]\ - \n Filter: employee_csv.state IN () [id:Int32, state:Utf8]\ - \n Subquery: [state:Utf8]\ - \n TableScan: employee_csv projection=[state] [state:Utf8]\ - \n TableScan: employee_csv projection=[id, state] [id:Int32, state:Utf8]"; - - assert_eq!(expected, format!("{}", plan.display_indent_schema())); + assert_snapshot!(format!("{}", plan.display_indent_schema()), @r" + Projection: employee_csv.id [id:Int32] + Filter: employee_csv.state IN () [id:Int32, state:Utf8] + Subquery: [state:Utf8] + TableScan: employee_csv projection=[state] [state:Utf8] + TableScan: employee_csv projection=[id, state] [id:Int32, state:Utf8] + "); Ok(()) } @@ -4059,12 +4060,12 @@ mod tests { .project(vec![col("id"), exists(plan1).alias("exists")])? .build(); - let expected = "Projection: employee_csv.id, EXISTS () AS exists\ - \n Subquery:\ - \n TableScan: employee_csv projection=[state]\ - \n TableScan: employee_csv projection=[id, state]"; - - assert_eq!(expected, format!("{}", plan?.display_indent())); + assert_snapshot!(format!("{}", plan?.display_indent()), @r" + Projection: employee_csv.id, EXISTS () AS exists + Subquery: + TableScan: employee_csv projection=[state] + TableScan: employee_csv projection=[id, state] + "); Ok(()) } @@ -4072,46 +4073,44 @@ mod tests { fn test_display_graphviz() -> Result<()> { let plan = display_plan()?; - let expected_graphviz = r#" -// Begin DataFusion GraphViz Plan, -// display it online here: https://dreampuf.github.io/GraphvizOnline - -digraph { - subgraph cluster_1 - { - graph[label="LogicalPlan"] - 2[shape=box label="Projection: employee_csv.id"] - 3[shape=box label="Filter: employee_csv.state IN ()"] - 2 -> 3 [arrowhead=none, arrowtail=normal, dir=back] - 4[shape=box label="Subquery:"] - 3 -> 4 [arrowhead=none, arrowtail=normal, dir=back] - 5[shape=box label="TableScan: employee_csv projection=[state]"] - 4 -> 5 [arrowhead=none, arrowtail=normal, dir=back] - 6[shape=box label="TableScan: employee_csv projection=[id, state]"] - 3 -> 6 [arrowhead=none, arrowtail=normal, dir=back] - } - subgraph cluster_7 - { - graph[label="Detailed LogicalPlan"] - 8[shape=box label="Projection: employee_csv.id\nSchema: [id:Int32]"] - 9[shape=box label="Filter: employee_csv.state IN ()\nSchema: [id:Int32, state:Utf8]"] - 8 -> 9 [arrowhead=none, arrowtail=normal, dir=back] - 10[shape=box label="Subquery:\nSchema: [state:Utf8]"] - 9 -> 10 [arrowhead=none, arrowtail=normal, dir=back] - 11[shape=box label="TableScan: employee_csv projection=[state]\nSchema: [state:Utf8]"] - 10 -> 11 [arrowhead=none, arrowtail=normal, dir=back] - 12[shape=box label="TableScan: employee_csv projection=[id, state]\nSchema: [id:Int32, state:Utf8]"] - 9 -> 12 [arrowhead=none, arrowtail=normal, dir=back] - } -} -// End DataFusion GraphViz Plan -"#; - // just test for a few key lines in the output rather than the // whole thing to make test maintenance easier. let graphviz = format!("{}", plan.display_graphviz()); - assert_eq!(expected_graphviz, graphviz); + assert_snapshot!(graphviz, @r#" + // Begin DataFusion GraphViz Plan, + // display it online here: https://dreampuf.github.io/GraphvizOnline + + digraph { + subgraph cluster_1 + { + graph[label="LogicalPlan"] + 2[shape=box label="Projection: employee_csv.id"] + 3[shape=box label="Filter: employee_csv.state IN ()"] + 2 -> 3 [arrowhead=none, arrowtail=normal, dir=back] + 4[shape=box label="Subquery:"] + 3 -> 4 [arrowhead=none, arrowtail=normal, dir=back] + 5[shape=box label="TableScan: employee_csv projection=[state]"] + 4 -> 5 [arrowhead=none, arrowtail=normal, dir=back] + 6[shape=box label="TableScan: employee_csv projection=[id, state]"] + 3 -> 6 [arrowhead=none, arrowtail=normal, dir=back] + } + subgraph cluster_7 + { + graph[label="Detailed LogicalPlan"] + 8[shape=box label="Projection: employee_csv.id\nSchema: [id:Int32]"] + 9[shape=box label="Filter: employee_csv.state IN ()\nSchema: [id:Int32, state:Utf8]"] + 8 -> 9 [arrowhead=none, arrowtail=normal, dir=back] + 10[shape=box label="Subquery:\nSchema: [state:Utf8]"] + 9 -> 10 [arrowhead=none, arrowtail=normal, dir=back] + 11[shape=box label="TableScan: employee_csv projection=[state]\nSchema: [state:Utf8]"] + 10 -> 11 [arrowhead=none, arrowtail=normal, dir=back] + 12[shape=box label="TableScan: employee_csv projection=[id, state]\nSchema: [id:Int32, state:Utf8]"] + 9 -> 12 [arrowhead=none, arrowtail=normal, dir=back] + } + } + // End DataFusion GraphViz Plan + "#); Ok(()) } @@ -4119,60 +4118,60 @@ digraph { fn test_display_pg_json() -> Result<()> { let plan = display_plan()?; - let expected_pg_json = r#"[ - { - "Plan": { - "Expressions": [ - "employee_csv.id" - ], - "Node Type": "Projection", - "Output": [ - "id" - ], - "Plans": [ - { - "Condition": "employee_csv.state IN ()", - "Node Type": "Filter", - "Output": [ - "id", - "state" - ], - "Plans": [ - { - "Node Type": "Subquery", + let pg_json = format!("{}", plan.display_pg_json()); + + assert_snapshot!(pg_json, @r#" + [ + { + "Plan": { + "Expressions": [ + "employee_csv.id" + ], + "Node Type": "Projection", "Output": [ - "state" + "id" ], "Plans": [ { - "Node Type": "TableScan", + "Condition": "employee_csv.state IN ()", + "Node Type": "Filter", "Output": [ + "id", "state" ], - "Plans": [], - "Relation Name": "employee_csv" + "Plans": [ + { + "Node Type": "Subquery", + "Output": [ + "state" + ], + "Plans": [ + { + "Node Type": "TableScan", + "Output": [ + "state" + ], + "Plans": [], + "Relation Name": "employee_csv" + } + ] + }, + { + "Node Type": "TableScan", + "Output": [ + "id", + "state" + ], + "Plans": [], + "Relation Name": "employee_csv" + } + ] } ] - }, - { - "Node Type": "TableScan", - "Output": [ - "id", - "state" - ], - "Plans": [], - "Relation Name": "employee_csv" } - ] - } - ] - } - } -]"#; - - let pg_json = format!("{}", plan.display_pg_json()); - - assert_eq!(expected_pg_json, pg_json); + } + ] + "#); Ok(()) } @@ -4221,17 +4220,7 @@ digraph { let res = plan.visit_with_subqueries(&mut visitor); assert!(res.is_ok()); - assert_eq!( - visitor.strings, - vec![ - "pre_visit Projection", - "pre_visit Filter", - "pre_visit TableScan", - "post_visit TableScan", - "post_visit Filter", - "post_visit Projection", - ] - ); + assert_snapshot!(visitor.strings.join(", "), @"pre_visit Projection, pre_visit Filter, pre_visit TableScan, post_visit TableScan, post_visit Filter, post_visit Projection"); } #[derive(Debug, Default)] @@ -4297,9 +4286,9 @@ digraph { let res = plan.visit_with_subqueries(&mut visitor); assert!(res.is_ok()); - assert_eq!( - visitor.inner.strings, - vec!["pre_visit Projection", "pre_visit Filter"] + assert_snapshot!( + visitor.inner.strings.join(", "), + @"pre_visit Projection, pre_visit Filter" ); } @@ -4313,14 +4302,9 @@ digraph { let res = plan.visit_with_subqueries(&mut visitor); assert!(res.is_ok()); - assert_eq!( - visitor.inner.strings, - vec![ - "pre_visit Projection", - "pre_visit Filter", - "pre_visit TableScan", - "post_visit TableScan", - ] + assert_snapshot!( + visitor.inner.strings.join(", "), + @"pre_visit Projection, pre_visit Filter, pre_visit TableScan, post_visit TableScan" ); } @@ -4362,13 +4346,13 @@ digraph { }; let plan = test_plan(); let res = plan.visit_with_subqueries(&mut visitor).unwrap_err(); - assert_eq!( - "This feature is not implemented: Error in pre_visit", - res.strip_backtrace() + assert_snapshot!( + res.strip_backtrace(), + @"This feature is not implemented: Error in pre_visit" ); - assert_eq!( - visitor.inner.strings, - vec!["pre_visit Projection", "pre_visit Filter"] + assert_snapshot!( + visitor.inner.strings.join(", "), + @"pre_visit Projection, pre_visit Filter" ); } @@ -4380,18 +4364,13 @@ digraph { }; let plan = test_plan(); let res = plan.visit_with_subqueries(&mut visitor).unwrap_err(); - assert_eq!( - "This feature is not implemented: Error in post_visit", - res.strip_backtrace() + assert_snapshot!( + res.strip_backtrace(), + @"This feature is not implemented: Error in post_visit" ); - assert_eq!( - visitor.inner.strings, - vec![ - "pre_visit Projection", - "pre_visit Filter", - "pre_visit TableScan", - "post_visit TableScan", - ] + assert_snapshot!( + visitor.inner.strings.join(", "), + @"pre_visit Projection, pre_visit Filter, pre_visit TableScan, post_visit TableScan" ); } @@ -4406,7 +4385,7 @@ digraph { })), empty_schema, ); - assert_eq!(p.err().unwrap().strip_backtrace(), "Error during planning: Projection has mismatch between number of expressions (1) and number of fields in schema (0)"); + assert_snapshot!(p.err().unwrap().strip_backtrace(), @"Error during planning: Projection has mismatch between number of expressions (1) and number of fields in schema (0)"); Ok(()) } @@ -4593,11 +4572,12 @@ digraph { .data() .unwrap(); - let expected = "Explain\ - \n Filter: foo = Boolean(true)\ - \n TableScan: ?table?"; let actual = format!("{}", plan.display_indent()); - assert_eq!(expected.to_string(), actual) + assert_snapshot!(actual, @r" + Explain + Filter: foo = Boolean(true) + TableScan: ?table? + ") } #[test] From 54b4f847932dfaf8810655840f6ace9bf6c7e0cb Mon Sep 17 00:00:00 2001 From: lifan-ake Date: Sun, 25 May 2025 12:08:46 +0800 Subject: [PATCH 2/8] fix assert error --- datafusion/expr/src/logical_plan/builder.rs | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs index 54a5115fbe85..da9fbc2d018b 100644 --- a/datafusion/expr/src/logical_plan/builder.rs +++ b/datafusion/expr/src/logical_plan/builder.rs @@ -2576,13 +2576,15 @@ mod tests { let err = nested_table_scan("test_table")? .unnest_column("scalar") .unwrap_err(); - // assert!(err - // .to_string() - // .starts_with("Internal error: trying to unnest on invalid data type UInt32")); - assert_snapshot!(err.strip_backtrace(), @r" - Internal error: trying to unnest on invalid data type UInt32. - This was likely caused by a bug in DataFusion's code and we would welcome that you file an bug report in our issue tracker - "); + + let err_msg = err + .strip_backtrace() + .lines() + .filter(|line| !line.contains("This was likely caused by a bug")) + .collect::>() + .join("\n"); + + assert_snapshot!(err_msg, @"Internal error: trying to unnest on invalid data type UInt32."); // Unnesting the strings list. let plan = nested_table_scan("test_table")? From 5e15c5a5e4035de018ec6f3c6c2a5262994f99c4 Mon Sep 17 00:00:00 2001 From: lifan-ake Date: Mon, 26 May 2025 09:55:56 +0800 Subject: [PATCH 3/8] fix according to review --- datafusion/expr/src/logical_plan/builder.rs | 52 ++++++++------ datafusion/expr/src/logical_plan/plan.rs | 77 ++++++++++++++------- 2 files changed, 83 insertions(+), 46 deletions(-) diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs index da9fbc2d018b..350f7acfcfad 100644 --- a/datafusion/expr/src/logical_plan/builder.rs +++ b/datafusion/expr/src/logical_plan/builder.rs @@ -2259,7 +2259,7 @@ mod tests { use crate::{col, expr, expr_fn::exists, in_subquery, lit, scalar_subquery}; use crate::test::function_stub::sum; - use datafusion_common::{Constraint, RecursionUnnestOption}; + use datafusion_common::{Constraint, RecursionUnnestOption, SchemaError}; use insta::assert_snapshot; #[test] @@ -2270,7 +2270,7 @@ mod tests { .project(vec![col("id")])? .build()?; - assert_snapshot!(format!("{plan}"), @r#" + assert_snapshot!(plan, @r#" Projection: employee_csv.id Filter: employee_csv.state = Utf8("CO") TableScan: employee_csv projection=[id, state] @@ -2319,7 +2319,7 @@ mod tests { ])? .build()?; - assert_snapshot!(format!("{plan}"), @r" + assert_snapshot!(plan, @r" Sort: employee_csv.state ASC NULLS FIRST, employee_csv.salary DESC NULLS LAST TableScan: employee_csv projection=[state, salary] "); @@ -2339,7 +2339,7 @@ mod tests { .union(plan.build()?)? .build()?; - assert_snapshot!(format!("{plan}"), @r" + assert_snapshot!(plan, @r" Union Union Union @@ -2364,7 +2364,7 @@ mod tests { .union_distinct(plan.build()?)? .build()?; - assert_snapshot!(format!("{plan}"), @r" + assert_snapshot!(plan, @r" Distinct: Union Distinct: @@ -2389,7 +2389,7 @@ mod tests { .distinct()? .build()?; - assert_snapshot!(format!("{plan}"), @r#" + assert_snapshot!(plan, @r#" Distinct: Projection: employee_csv.id Filter: employee_csv.state = Utf8("CO") @@ -2495,8 +2495,19 @@ mod tests { .project(vec![col("id"), col("first_name").alias("id")]); match plan { - Err(DataFusionError::SchemaError(e, _)) => { - assert_snapshot!(e, @"Schema contains qualified field name employee_csv.id and unqualified field name id which would be ambiguous"); + Err(DataFusionError::SchemaError( + SchemaError::AmbiguousReference { + field: + Column { + relation: Some(TableReference::Bare { table }), + name, + spans: _, + }, + }, + _, + )) => { + assert_eq!(*"employee_csv", *table); + assert_eq!("id", &name); Ok(()) } _ => plan_err!("Plan should have returned an DataFusionError::SchemaError"), @@ -2577,21 +2588,18 @@ mod tests { .unnest_column("scalar") .unwrap_err(); - let err_msg = err - .strip_backtrace() - .lines() - .filter(|line| !line.contains("This was likely caused by a bug")) - .collect::>() - .join("\n"); + let DataFusionError::Internal(desc) = err else { + return plan_err!("Plan should have returned an DataFusionError::Internal"); + }; - assert_snapshot!(err_msg, @"Internal error: trying to unnest on invalid data type UInt32."); + assert_snapshot!(desc, @"trying to unnest on invalid data type UInt32"); // Unnesting the strings list. let plan = nested_table_scan("test_table")? .unnest_column("strings")? .build()?; - assert_snapshot!(format!("{plan}"), @r" + assert_snapshot!(plan, @r" Unnest: lists[test_table.strings|depth=1] structs[] TableScan: test_table "); @@ -2605,7 +2613,7 @@ mod tests { .unnest_column("struct_singular")? .build()?; - assert_snapshot!(format!("{plan}"), @r" + assert_snapshot!(plan, @r" Unnest: lists[] structs[test_table.struct_singular] TableScan: test_table "); @@ -2626,7 +2634,7 @@ mod tests { .unnest_column("struct_singular")? .build()?; - assert_snapshot!(format!("{plan}"), @r" + assert_snapshot!(plan, @r" Unnest: lists[] structs[test_table.struct_singular] Unnest: lists[test_table.structs|depth=1] structs[] Unnest: lists[test_table.strings|depth=1] structs[] @@ -2647,7 +2655,7 @@ mod tests { .unnest_columns_with_options(cols, UnnestOptions::default())? .build()?; - assert_snapshot!(format!("{plan}"), @r" + assert_snapshot!(plan, @r" Unnest: lists[test_table.strings|depth=1, test_table.structs|depth=1] structs[test_table.struct_singular] TableScan: test_table "); @@ -2674,7 +2682,7 @@ mod tests { )? .build()?; - assert_snapshot!(format!("{plan}"), @r" + assert_snapshot!(plan, @r" Unnest: lists[test_table.stringss|depth=1, test_table.stringss|depth=2] structs[test_table.struct_singular] TableScan: test_table "); @@ -2826,7 +2834,7 @@ mod tests { .aggregate(vec![col("id")], vec![sum(col("salary"))])? .build()?; - assert_snapshot!(format!("{plan}"), @r" + assert_snapshot!(plan, @r" Aggregate: groupBy=[[employee_csv.id]], aggr=[[sum(employee_csv.salary)]] TableScan: employee_csv projection=[id, state, salary] "); @@ -2848,7 +2856,7 @@ mod tests { .aggregate(vec![col("id")], vec![sum(col("salary"))])? .build()?; - assert_snapshot!(format!("{plan}"), @r" + assert_snapshot!(plan, @r" Aggregate: groupBy=[[employee_csv.id, employee_csv.state, employee_csv.salary]], aggr=[[sum(employee_csv.salary)]] TableScan: employee_csv projection=[id, state, salary] "); diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs index 302834cd80b5..6ad62c5e4ce8 100644 --- a/datafusion/expr/src/logical_plan/plan.rs +++ b/datafusion/expr/src/logical_plan/plan.rs @@ -3997,7 +3997,7 @@ mod tests { TransformedResult, TreeNodeRewriter, TreeNodeVisitor, }; use datafusion_common::{not_impl_err, Constraint, ScalarValue}; - use insta::assert_snapshot; + use insta::{assert_snapshot, assert_debug_snapshot}; use crate::test::function_stub::count; @@ -4025,7 +4025,7 @@ mod tests { fn test_display_indent() -> Result<()> { let plan = display_plan()?; - assert_snapshot!(format!("{}", plan.display_indent()), @r" + assert_snapshot!(plan.display_indent(), @r" Projection: employee_csv.id Filter: employee_csv.state IN () Subquery: @@ -4039,7 +4039,7 @@ mod tests { fn test_display_indent_schema() -> Result<()> { let plan = display_plan()?; - assert_snapshot!(format!("{}", plan.display_indent_schema()), @r" + assert_snapshot!(plan.display_indent_schema(), @r" Projection: employee_csv.id [id:Int32] Filter: employee_csv.state IN () [id:Int32, state:Utf8] Subquery: [state:Utf8] @@ -4060,7 +4060,7 @@ mod tests { .project(vec![col("id"), exists(plan1).alias("exists")])? .build(); - assert_snapshot!(format!("{}", plan?.display_indent()), @r" + assert_snapshot!(plan?.display_indent(), @r" Projection: employee_csv.id, EXISTS () AS exists Subquery: TableScan: employee_csv projection=[state] @@ -4075,9 +4075,7 @@ mod tests { // just test for a few key lines in the output rather than the // whole thing to make test maintenance easier. - let graphviz = format!("{}", plan.display_graphviz()); - - assert_snapshot!(graphviz, @r#" + assert_snapshot!(plan.display_graphviz(), @r#" // Begin DataFusion GraphViz Plan, // display it online here: https://dreampuf.github.io/GraphvizOnline @@ -4118,9 +4116,7 @@ mod tests { fn test_display_pg_json() -> Result<()> { let plan = display_plan()?; - let pg_json = format!("{}", plan.display_pg_json()); - - assert_snapshot!(pg_json, @r#" + assert_snapshot!(plan.display_pg_json(), @r#" [ { "Plan": { @@ -4220,7 +4216,16 @@ mod tests { let res = plan.visit_with_subqueries(&mut visitor); assert!(res.is_ok()); - assert_snapshot!(visitor.strings.join(", "), @"pre_visit Projection, pre_visit Filter, pre_visit TableScan, post_visit TableScan, post_visit Filter, post_visit Projection"); + assert_debug_snapshot!(visitor.strings, @r#" + [ + "pre_visit Projection", + "pre_visit Filter", + "pre_visit TableScan", + "post_visit TableScan", + "post_visit Filter", + "post_visit Projection", + ] + "#); } #[derive(Debug, Default)] @@ -4286,9 +4291,14 @@ mod tests { let res = plan.visit_with_subqueries(&mut visitor); assert!(res.is_ok()); - assert_snapshot!( - visitor.inner.strings.join(", "), - @"pre_visit Projection, pre_visit Filter" + assert_debug_snapshot!( + visitor.inner.strings, + @r#" + [ + "pre_visit Projection", + "pre_visit Filter", + ] + "# ); } @@ -4302,9 +4312,16 @@ mod tests { let res = plan.visit_with_subqueries(&mut visitor); assert!(res.is_ok()); - assert_snapshot!( - visitor.inner.strings.join(", "), - @"pre_visit Projection, pre_visit Filter, pre_visit TableScan, post_visit TableScan" + assert_debug_snapshot!( + visitor.inner.strings, + @r#" + [ + "pre_visit Projection", + "pre_visit Filter", + "pre_visit TableScan", + "post_visit TableScan", + ] + "# ); } @@ -4350,9 +4367,14 @@ mod tests { res.strip_backtrace(), @"This feature is not implemented: Error in pre_visit" ); - assert_snapshot!( - visitor.inner.strings.join(", "), - @"pre_visit Projection, pre_visit Filter" + assert_debug_snapshot!( + visitor.inner.strings, + @r#" + [ + "pre_visit Projection", + "pre_visit Filter", + ] + "# ); } @@ -4368,9 +4390,16 @@ mod tests { res.strip_backtrace(), @"This feature is not implemented: Error in post_visit" ); - assert_snapshot!( - visitor.inner.strings.join(", "), - @"pre_visit Projection, pre_visit Filter, pre_visit TableScan, post_visit TableScan" + assert_debug_snapshot!( + visitor.inner.strings, + @r#" + [ + "pre_visit Projection", + "pre_visit Filter", + "pre_visit TableScan", + "post_visit TableScan", + ] + "# ); } @@ -4385,7 +4414,7 @@ mod tests { })), empty_schema, ); - assert_snapshot!(p.err().unwrap().strip_backtrace(), @"Error during planning: Projection has mismatch between number of expressions (1) and number of fields in schema (0)"); + assert_snapshot!(p.unwrap_err().strip_backtrace(), @"Error during planning: Projection has mismatch between number of expressions (1) and number of fields in schema (0)"); Ok(()) } From 2eb4c05e0ab10ba938f09dfe76ea01ce6df23d19 Mon Sep 17 00:00:00 2001 From: lifan-ake Date: Mon, 26 May 2025 10:52:38 +0800 Subject: [PATCH 4/8] strip backtrace from internal error --- datafusion/expr/src/logical_plan/builder.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs index 350f7acfcfad..206bc06c8174 100644 --- a/datafusion/expr/src/logical_plan/builder.rs +++ b/datafusion/expr/src/logical_plan/builder.rs @@ -2592,6 +2592,12 @@ mod tests { return plan_err!("Plan should have returned an DataFusionError::Internal"); }; + let desc = desc.split(DataFusionError::BACK_TRACE_SEP) + .collect::>() + .first() + .unwrap_or(&"") + .to_string(); + assert_snapshot!(desc, @"trying to unnest on invalid data type UInt32"); // Unnesting the strings list. From fae9d632b5e96c863d19dfe743ea9c7d5c9607dc Mon Sep 17 00:00:00 2001 From: lifan-ake Date: Mon, 26 May 2025 10:58:31 +0800 Subject: [PATCH 5/8] format --- datafusion/expr/src/logical_plan/builder.rs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs index 206bc06c8174..9674f7516263 100644 --- a/datafusion/expr/src/logical_plan/builder.rs +++ b/datafusion/expr/src/logical_plan/builder.rs @@ -2592,11 +2592,12 @@ mod tests { return plan_err!("Plan should have returned an DataFusionError::Internal"); }; - let desc = desc.split(DataFusionError::BACK_TRACE_SEP) - .collect::>() - .first() - .unwrap_or(&"") - .to_string(); + let desc = desc + .split(DataFusionError::BACK_TRACE_SEP) + .collect::>() + .first() + .unwrap_or(&"") + .to_string(); assert_snapshot!(desc, @"trying to unnest on invalid data type UInt32"); From bfe5884695bb2b1a0c48c53b618ce6164c9592ee Mon Sep 17 00:00:00 2001 From: lifan-ake Date: Mon, 26 May 2025 11:01:26 +0800 Subject: [PATCH 6/8] format --- datafusion/expr/src/logical_plan/plan.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs index 6ad62c5e4ce8..2fe1c0d7398f 100644 --- a/datafusion/expr/src/logical_plan/plan.rs +++ b/datafusion/expr/src/logical_plan/plan.rs @@ -3997,7 +3997,7 @@ mod tests { TransformedResult, TreeNodeRewriter, TreeNodeVisitor, }; use datafusion_common::{not_impl_err, Constraint, ScalarValue}; - use insta::{assert_snapshot, assert_debug_snapshot}; + use insta::{assert_debug_snapshot, assert_snapshot}; use crate::test::function_stub::count; From 4e7994ca58a45bc07723eddad058bee679aa2964 Mon Sep 17 00:00:00 2001 From: lifan-ake Date: Mon, 26 May 2025 19:23:11 +0800 Subject: [PATCH 7/8] fix `format("outer_query")` --- datafusion/expr/src/logical_plan/builder.rs | 22 +++++++-------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs index 9674f7516263..49e4e839c1dc 100644 --- a/datafusion/expr/src/logical_plan/builder.rs +++ b/datafusion/expr/src/logical_plan/builder.rs @@ -2414,7 +2414,7 @@ mod tests { .filter(exists(Arc::new(subquery)))? .build()?; - assert_snapshot!(format!("{outer_query}"), @r" + assert_snapshot!(outer_query, @r" Filter: EXISTS () Subquery: Filter: foo.a = bar.a @@ -2443,7 +2443,7 @@ mod tests { .filter(in_subquery(col("a"), Arc::new(subquery)))? .build()?; - assert_snapshot!(format!("{outer_query}"), @r" + assert_snapshot!(outer_query, @r" Filter: bar.a IN () Subquery: Filter: foo.a = bar.a @@ -2471,7 +2471,7 @@ mod tests { .project(vec![scalar_subquery(Arc::new(subquery))])? .build()?; - assert_snapshot!(format!("{outer_query}"), @r" + assert_snapshot!(outer_query, @r" Projection: () Subquery: Filter: foo.a = bar.a @@ -2588,18 +2588,10 @@ mod tests { .unnest_column("scalar") .unwrap_err(); - let DataFusionError::Internal(desc) = err else { - return plan_err!("Plan should have returned an DataFusionError::Internal"); - }; - - let desc = desc - .split(DataFusionError::BACK_TRACE_SEP) - .collect::>() - .first() - .unwrap_or(&"") - .to_string(); - - assert_snapshot!(desc, @"trying to unnest on invalid data type UInt32"); + assert_snapshot!(err.strip_backtrace(), @r" + Internal error: trying to unnest on invalid data type UInt32. + This was likely caused by a bug in DataFusion's code and we would welcome that you file an bug report in our issue tracker + "); // Unnesting the strings list. let plan = nested_table_scan("test_table")? From c178a5a5752dd911132726cb03a6686c1ac38fef Mon Sep 17 00:00:00 2001 From: lifan-ake Date: Mon, 26 May 2025 19:42:27 +0800 Subject: [PATCH 8/8] fix `Internal` error --- datafusion/expr/src/logical_plan/builder.rs | 16 ++++++++++++---- datafusion/expr/src/logical_plan/display.rs | 4 ++-- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs index 49e4e839c1dc..fabdfecef104 100644 --- a/datafusion/expr/src/logical_plan/builder.rs +++ b/datafusion/expr/src/logical_plan/builder.rs @@ -2588,10 +2588,18 @@ mod tests { .unnest_column("scalar") .unwrap_err(); - assert_snapshot!(err.strip_backtrace(), @r" - Internal error: trying to unnest on invalid data type UInt32. - This was likely caused by a bug in DataFusion's code and we would welcome that you file an bug report in our issue tracker - "); + let DataFusionError::Internal(desc) = err else { + return plan_err!("Plan should have returned an DataFusionError::Internal"); + }; + + let desc = desc + .split(DataFusionError::BACK_TRACE_SEP) + .collect::>() + .first() + .unwrap_or(&"") + .to_string(); + + assert_snapshot!(desc, @"trying to unnest on invalid data type UInt32"); // Unnesting the strings list. let plan = nested_table_scan("test_table")? diff --git a/datafusion/expr/src/logical_plan/display.rs b/datafusion/expr/src/logical_plan/display.rs index a902c2d2a78d..f1e455f46db3 100644 --- a/datafusion/expr/src/logical_plan/display.rs +++ b/datafusion/expr/src/logical_plan/display.rs @@ -729,7 +729,7 @@ mod tests { #[test] fn test_display_empty_schema() { let schema = Schema::empty(); - assert_snapshot!(format!("{}", display_schema(&schema)), @"[]"); + assert_snapshot!(display_schema(&schema), @"[]"); } #[test] @@ -739,6 +739,6 @@ mod tests { Field::new("first_name", DataType::Utf8, true), ]); - assert_snapshot!(format!("{}", display_schema(&schema)), @"[id:Int32, first_name:Utf8;N]"); + assert_snapshot!(display_schema(&schema), @"[id:Int32, first_name:Utf8;N]"); } }