@@ -61,9 +61,7 @@ use arrow::array::{builder::StringBuilder, RecordBatch};
6161use arrow:: compute:: SortOptions ;
6262use arrow:: datatypes:: { Schema , SchemaRef } ;
6363use datafusion_common:: display:: ToStringifiedPlan ;
64- use datafusion_common:: tree_node:: {
65- Transformed , TransformedResult , TreeNode , TreeNodeRecursion , TreeNodeVisitor ,
66- } ;
64+ use datafusion_common:: tree_node:: { TreeNode , TreeNodeRecursion , TreeNodeVisitor } ;
6765use datafusion_common:: {
6866 exec_err, internal_datafusion_err, internal_err, not_impl_err, plan_err, DFSchema ,
6967 ScalarValue ,
@@ -83,7 +81,7 @@ use datafusion_expr::{
8381 WindowFrameBound , WriteOp ,
8482} ;
8583use datafusion_physical_expr:: aggregate:: { AggregateExprBuilder , AggregateFunctionExpr } ;
86- use datafusion_physical_expr:: expressions:: { Column , Literal } ;
84+ use datafusion_physical_expr:: expressions:: Literal ;
8785use datafusion_physical_expr:: {
8886 create_physical_sort_exprs, LexOrdering , PhysicalSortExpr ,
8987} ;
@@ -2177,11 +2175,7 @@ impl DefaultPhysicalPlanner {
21772175 let physical_expr =
21782176 self . create_physical_expr ( e, input_logical_schema, session_state) ;
21792177
2180- // Check for possible column name mismatches
2181- let final_physical_expr =
2182- maybe_fix_physical_column_name ( physical_expr, & input_physical_schema) ;
2183-
2184- tuple_err ( ( final_physical_expr, physical_name) )
2178+ tuple_err ( ( physical_expr, physical_name) )
21852179 } )
21862180 . collect :: < Result < Vec < _ > > > ( ) ?;
21872181
@@ -2287,47 +2281,6 @@ fn tuple_err<T, R>(value: (Result<T>, Result<R>)) -> Result<(T, R)> {
22872281 }
22882282}
22892283
2290- // Handle the case where the name of a physical column expression does not match the corresponding physical input fields names.
2291- // Physical column names are derived from the physical schema, whereas physical column expressions are derived from the logical column names.
2292- //
2293- // This is a special case that applies only to column expressions. Logical plans may slightly modify column names by appending a suffix (e.g., using ':'),
2294- // to avoid duplicates—since DFSchemas do not allow duplicate names. For example: `count(Int64(1)):1`.
2295- fn maybe_fix_physical_column_name (
2296- expr : Result < Arc < dyn PhysicalExpr > > ,
2297- input_physical_schema : & SchemaRef ,
2298- ) -> Result < Arc < dyn PhysicalExpr > > {
2299- let Ok ( expr) = expr else { return expr } ;
2300- expr. transform_down ( |node| {
2301- if let Some ( column) = node. as_any ( ) . downcast_ref :: < Column > ( ) {
2302- let idx = column. index ( ) ;
2303- let physical_field = input_physical_schema. field ( idx) ;
2304- let expr_col_name = column. name ( ) ;
2305- let physical_name = physical_field. name ( ) ;
2306-
2307- if expr_col_name != physical_name {
2308- // handle edge cases where the physical_name contains ':'.
2309- let colon_count = physical_name. matches ( ':' ) . count ( ) ;
2310- let mut splits = expr_col_name. match_indices ( ':' ) ;
2311- let split_pos = splits. nth ( colon_count) ;
2312-
2313- if let Some ( ( i, _) ) = split_pos {
2314- let base_name = & expr_col_name[ ..i] ;
2315- if base_name == physical_name {
2316- let updated_column = Column :: new ( physical_name, idx) ;
2317- return Ok ( Transformed :: yes ( Arc :: new ( updated_column) ) ) ;
2318- }
2319- }
2320- }
2321-
2322- // If names already match or fix is not possible, just leave it as it is
2323- Ok ( Transformed :: no ( node) )
2324- } else {
2325- Ok ( Transformed :: no ( node) )
2326- }
2327- } )
2328- . data ( )
2329- }
2330-
23312284struct OptimizationInvariantChecker < ' a > {
23322285 rule : & ' a Arc < dyn PhysicalOptimizerRule + Send + Sync > ,
23332286}
@@ -2431,12 +2384,10 @@ mod tests {
24312384 } ;
24322385 use datafusion_execution:: runtime_env:: RuntimeEnv ;
24332386 use datafusion_execution:: TaskContext ;
2434- use datafusion_expr:: {
2435- col, lit, LogicalPlanBuilder , Operator , UserDefinedLogicalNodeCore ,
2436- } ;
2387+ use datafusion_expr:: builder:: subquery_alias;
2388+ use datafusion_expr:: { col, lit, LogicalPlanBuilder , UserDefinedLogicalNodeCore } ;
24372389 use datafusion_functions_aggregate:: count:: count_all;
24382390 use datafusion_functions_aggregate:: expr_fn:: sum;
2439- use datafusion_physical_expr:: expressions:: { BinaryExpr , IsNotNullExpr } ;
24402391 use datafusion_physical_expr:: EquivalenceProperties ;
24412392 use datafusion_physical_plan:: execution_plan:: { Boundedness , EmissionType } ;
24422393
@@ -2997,71 +2948,6 @@ mod tests {
29972948 }
29982949 }
29992950
3000- #[ tokio:: test]
3001- async fn test_maybe_fix_colon_in_physical_name ( ) {
3002- // The physical schema has a field name with a colon
3003- let schema = Schema :: new ( vec ! [ Field :: new( "metric:avg" , DataType :: Int32 , false ) ] ) ;
3004- let schema_ref: SchemaRef = Arc :: new ( schema) ;
3005-
3006- // What might happen after deduplication
3007- let logical_col_name = "metric:avg:1" ;
3008- let expr_with_suffix =
3009- Arc :: new ( Column :: new ( logical_col_name, 0 ) ) as Arc < dyn PhysicalExpr > ;
3010- let expr_result = Ok ( expr_with_suffix) ;
3011-
3012- // Call function under test
3013- let fixed_expr =
3014- maybe_fix_physical_column_name ( expr_result, & schema_ref) . unwrap ( ) ;
3015-
3016- // Downcast back to Column so we can check the name
3017- let col = fixed_expr
3018- . as_any ( )
3019- . downcast_ref :: < Column > ( )
3020- . expect ( "Column" ) ;
3021-
3022- assert_eq ! ( col. name( ) , "metric:avg" ) ;
3023- }
3024-
3025- #[ tokio:: test]
3026- async fn test_maybe_fix_nested_column_name_with_colon ( ) {
3027- let schema = Schema :: new ( vec ! [ Field :: new( "column" , DataType :: Int32 , false ) ] ) ;
3028- let schema_ref: SchemaRef = Arc :: new ( schema) ;
3029-
3030- // Construct the nested expr
3031- let col_expr = Arc :: new ( Column :: new ( "column:1" , 0 ) ) as Arc < dyn PhysicalExpr > ;
3032- let is_not_null_expr = Arc :: new ( IsNotNullExpr :: new ( col_expr. clone ( ) ) ) ;
3033-
3034- // Create a binary expression and put the column inside
3035- let binary_expr = Arc :: new ( BinaryExpr :: new (
3036- is_not_null_expr. clone ( ) ,
3037- Operator :: Or ,
3038- is_not_null_expr. clone ( ) ,
3039- ) ) as Arc < dyn PhysicalExpr > ;
3040-
3041- let fixed_expr =
3042- maybe_fix_physical_column_name ( Ok ( binary_expr) , & schema_ref) . unwrap ( ) ;
3043-
3044- let bin = fixed_expr
3045- . as_any ( )
3046- . downcast_ref :: < BinaryExpr > ( )
3047- . expect ( "Expected BinaryExpr" ) ;
3048-
3049- // Check that both sides where renamed
3050- for expr in & [ bin. left ( ) , bin. right ( ) ] {
3051- let is_not_null = expr
3052- . as_any ( )
3053- . downcast_ref :: < IsNotNullExpr > ( )
3054- . expect ( "Expected IsNotNull" ) ;
3055-
3056- let col = is_not_null
3057- . arg ( )
3058- . as_any ( )
3059- . downcast_ref :: < Column > ( )
3060- . expect ( "Expected Column" ) ;
3061-
3062- assert_eq ! ( col. name( ) , "column" ) ;
3063- }
3064- }
30652951 struct ErrorExtensionPlanner { }
30662952
30672953 #[ async_trait]
@@ -3558,4 +3444,41 @@ digraph {
35583444
35593445 Ok ( ( ) )
35603446 }
3447+
3448+ #[ tokio:: test]
3449+ async fn subquery_alias_confusing_the_optimizer ( ) -> Result < ( ) > {
3450+ let state = make_session_state ( ) ;
3451+
3452+ let schema = Schema :: new ( vec ! [ Field :: new( "a" , DataType :: Int32 , false ) ] ) ;
3453+ let schema = Arc :: new ( schema) ;
3454+
3455+ let table = MemTable :: try_new ( schema. clone ( ) , vec ! [ vec![ ] ] ) ?;
3456+ let table = Arc :: new ( table) ;
3457+
3458+ let source = DefaultTableSource :: new ( table) ;
3459+ let source = Arc :: new ( source) ;
3460+
3461+ let left = LogicalPlanBuilder :: scan ( "left" , source. clone ( ) , None ) ?;
3462+ let right = LogicalPlanBuilder :: scan ( "right" , source, None ) ?. build ( ) ?;
3463+
3464+ let join_keys = (
3465+ vec ! [ datafusion_common:: Column :: new( Some ( "left" ) , "a" ) ] ,
3466+ vec ! [ datafusion_common:: Column :: new( Some ( "right" ) , "a" ) ] ,
3467+ ) ;
3468+
3469+ let join = left. join ( right, JoinType :: Full , join_keys, None ) ?. build ( ) ?;
3470+
3471+ let alias = subquery_alias ( join, "alias" ) ?;
3472+
3473+ let logical_plan = LogicalPlanBuilder :: new ( alias)
3474+ . aggregate ( vec ! [ col( "a:1" ) ] , Vec :: < Expr > :: new ( ) ) ?
3475+ . build ( ) ?;
3476+
3477+ let optimized_logical_plan = state. optimize ( & logical_plan) ?;
3478+
3479+ let planner = DefaultPhysicalPlanner :: default ( ) ;
3480+ let physical_plan = planner. create_physical_plan ( & logical_plan, & state) . await ?;
3481+
3482+ Ok ( ( ) )
3483+ }
35613484}
0 commit comments