@@ -23,11 +23,14 @@ use arrow::datatypes::{DataType, Field};
2323use arrow:: util:: pretty:: { pretty_format_batches, pretty_format_columns} ;
2424use datafusion:: prelude:: * ;
2525use datafusion_common:: { DFSchema , ScalarValue } ;
26+ use datafusion_expr:: execution_props:: ExecutionProps ;
27+ use datafusion_expr:: simplify:: SimplifyContext ;
2628use datafusion_expr:: ExprFunctionExt ;
2729use datafusion_functions:: core:: expr_ext:: FieldAccessor ;
2830use datafusion_functions_aggregate:: first_last:: first_value_udaf;
2931use datafusion_functions_aggregate:: sum:: sum_udaf;
3032use datafusion_functions_nested:: expr_ext:: { IndexAccessor , SliceAccessor } ;
33+ use datafusion_optimizer:: simplify_expressions:: ExprSimplifier ;
3134use sqlparser:: ast:: NullTreatment ;
3235/// Tests of using and evaluating `Expr`s outside the context of a LogicalPlan
3336use std:: sync:: { Arc , LazyLock } ;
@@ -304,6 +307,37 @@ async fn test_aggregate_ext_null_treatment() {
304307 . await ;
305308}
306309
310+ #[ tokio:: test]
311+ async fn test_create_physical_expr ( ) {
312+ // create_physical_expr does not simplify the expression
313+ // 1 + 1
314+ create_expr_test ( lit ( 1i32 ) + lit ( 2i32 ) , "1 + 2" ) ;
315+ // However, you can run the simplifier before creating the physical
316+ // expression. This mimics what delta.rs and other non-sql libraries do to
317+ // create predicates
318+ //
319+ // 1 + 1
320+ create_simplified_expr_test ( lit ( 1i32 ) + lit ( 2i32 ) , "3" ) ;
321+ }
322+
323+ #[ tokio:: test]
324+ async fn test_create_physical_expr_coercion ( ) {
325+ // create_physical_expr does apply type coercion and unwrapping in cast
326+ //
327+ // expect the cast on the literals
328+ // compare string function to int `id = 1`
329+ create_expr_test ( col ( "id" ) . eq ( lit ( 1i32 ) ) , "id@0 = CAST(1 AS Utf8)" ) ;
330+ create_expr_test ( lit ( 1i32 ) . eq ( col ( "id" ) ) , "CAST(1 AS Utf8) = id@0" ) ;
331+ // compare int col to string literal `i = '202410'`
332+ // Note this casts the column (not the field)
333+ create_expr_test ( col ( "i" ) . eq ( lit ( "202410" ) ) , "CAST(i@1 AS Utf8) = 202410" ) ;
334+ create_expr_test ( lit ( "202410" ) . eq ( col ( "i" ) ) , "202410 = CAST(i@1 AS Utf8)" ) ;
335+ // however, when simplified the casts on i should removed
336+ // https://github.com/apache/datafusion/issues/14944
337+ create_simplified_expr_test ( col ( "i" ) . eq ( lit ( "202410" ) ) , "CAST(i@1 AS Utf8) = 202410" ) ;
338+ create_simplified_expr_test ( lit ( "202410" ) . eq ( col ( "i" ) ) , "CAST(i@1 AS Utf8) = 202410" ) ;
339+ }
340+
307341/// Evaluates the specified expr as an aggregate and compares the result to the
308342/// expected result.
309343async fn evaluate_agg_test ( expr : Expr , expected_lines : Vec < & str > ) {
@@ -350,6 +384,38 @@ fn evaluate_expr_test(expr: Expr, expected_lines: Vec<&str>) {
350384 ) ;
351385}
352386
387+ /// Creates the physical expression from Expr and compares the Debug expression
388+ /// to the expected result.
389+ fn create_expr_test ( expr : Expr , expected_expr : & str ) {
390+ let batch = & TEST_BATCH ;
391+ let df_schema = DFSchema :: try_from ( batch. schema ( ) ) . unwrap ( ) ;
392+ let physical_expr = SessionContext :: new ( )
393+ . create_physical_expr ( expr, & df_schema)
394+ . unwrap ( ) ;
395+
396+ assert_eq ! ( physical_expr. to_string( ) , expected_expr) ;
397+ }
398+
399+ /// Creates the physical expression from Expr and runs the expr simplifier
400+ fn create_simplified_expr_test ( expr : Expr , expected_expr : & str ) {
401+ let batch = & TEST_BATCH ;
402+ let df_schema = DFSchema :: try_from ( batch. schema ( ) ) . unwrap ( ) ;
403+
404+ // Simplify the expression first
405+ let props = ExecutionProps :: new ( ) ;
406+ let simplify_context =
407+ SimplifyContext :: new ( & props) . with_schema ( df_schema. clone ( ) . into ( ) ) ;
408+ let simplifier = ExprSimplifier :: new ( simplify_context) . with_max_cycles ( 10 ) ;
409+ let simplified = simplifier. simplify ( expr) . unwrap ( ) ;
410+ create_expr_test ( simplified, expected_expr) ;
411+ }
412+
413+ /// Returns a Batch with 3 rows and 4 columns:
414+ ///
415+ /// id: Utf8
416+ /// i: Int64
417+ /// props: Struct
418+ /// list: List<String>
353419static TEST_BATCH : LazyLock < RecordBatch > = LazyLock :: new ( || {
354420 let string_array: ArrayRef = Arc :: new ( StringArray :: from ( vec ! [ "1" , "2" , "3" ] ) ) ;
355421 let int_array: ArrayRef =
0 commit comments