diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index 85e9d9b6a0ed..56fe95fffd15 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -571,7 +571,18 @@ impl TreeNodeRewriter for ConstEvaluator<'_> { ConstSimplifyResult::NotSimplified(s, m) => { Ok(Transformed::no(Expr::Literal(s, m))) } - ConstSimplifyResult::SimplifyRuntimeError(_, expr) => { + ConstSimplifyResult::SimplifyRuntimeError(err, expr) => { + // For CAST expressions with literal inputs, propagate the error at plan time rather than deferring to execution time. + // This provides clearer error messages and fails fast. + if let Expr::Cast(Cast { ref expr, .. }) + | Expr::TryCast(TryCast { ref expr, .. }) = expr + { + if matches!(expr.as_ref(), Expr::Literal(_, _)) { + return Err(err); + } + } + // For other expressions (like CASE, COALESCE), preserve the original + // to allow short-circuit evaluation at execution time Ok(Transformed::yes(expr)) } }, @@ -4968,6 +4979,56 @@ mod tests { ); } + #[test] + fn simplify_cast_literal() { + // Test that CAST(literal) expressions are evaluated at plan time + + // CAST(123 AS Int64) should become 123i64 + let expr = Expr::Cast(Cast::new(Box::new(lit(123i32)), DataType::Int64)); + let expected = lit(123i64); + assert_eq!(simplify(expr), expected); + + // CAST(1761630189642 AS Timestamp(Nanosecond, Some("+00:00"))) + // Integer to timestamp cast + let expr = Expr::Cast(Cast::new( + Box::new(lit(1761630189642i64)), + DataType::Timestamp( + arrow::datatypes::TimeUnit::Nanosecond, + Some("+00:00".into()), + ), + )); + // Should evaluate to a timestamp literal + let result = simplify(expr); + match result { + Expr::Literal(ScalarValue::TimestampNanosecond(Some(val), tz), _) => { + assert_eq!(val, 1761630189642i64); + assert_eq!(tz.as_deref(), Some("+00:00")); + } + other => panic!("Expected TimestampNanosecond literal, got: {other:?}"), + } + + // Test CAST of invalid string to timestamp - should return an error at plan time + // This represents the case from the issue: CAST(Utf8("1761630189642") AS Timestamp) + // "1761630189642" is NOT a valid timestamp string format + let expr = Expr::Cast(Cast::new( + Box::new(lit("1761630189642")), + DataType::Timestamp( + arrow::datatypes::TimeUnit::Nanosecond, + Some("+00:00".into()), + ), + )); + + // The simplification should now fail with an error at plan time + let schema = test_schema(); + let props = ExecutionProps::new(); + let simplifier = + ExprSimplifier::new(SimplifyContext::new(&props).with_schema(schema)); + let result = simplifier.simplify(expr); + assert!(result.is_err(), "Expected error for invalid cast"); + let err_msg = result.unwrap_err().to_string(); + assert_contains!(err_msg, "Error parsing timestamp"); + } + fn if_not_null(expr: Expr, then: bool) -> Expr { Expr::Case(Case { expr: Some(expr.is_not_null().into()), diff --git a/datafusion/sqllogictest/test_files/arrow_typeof.slt b/datafusion/sqllogictest/test_files/arrow_typeof.slt index ac32ef821bc4..cbb20acb2d91 100644 --- a/datafusion/sqllogictest/test_files/arrow_typeof.slt +++ b/datafusion/sqllogictest/test_files/arrow_typeof.slt @@ -316,7 +316,7 @@ select arrow_cast(interval '30 minutes', 'Duration(Second)'); ---- 0 days 0 hours 30 mins 0 secs -query error DataFusion error: This feature is not implemented: Unsupported CAST from Utf8 to Duration\(s\) +query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*This feature is not implemented: Unsupported CAST from Utf8 to Duration\(s\) select arrow_cast('30 minutes', 'Duration(Second)'); @@ -337,7 +337,7 @@ select arrow_cast(timestamp '2000-01-01T00:00:00Z', 'Timestamp(Nanosecond, Some( ---- 2000-01-01T00:00:00+08:00 -statement error DataFusion error: Arrow error: Parser error: Invalid timezone "\+25:00": failed to parse timezone +statement error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Parser error: Invalid timezone "\+25:00": failed to parse timezone select arrow_cast(timestamp '2000-01-01T00:00:00', 'Timestamp(Nanosecond, Some( "+25:00" ))'); @@ -406,7 +406,7 @@ select arrow_cast([1], 'FixedSizeList(1, Int64)'); ---- [1] -query error DataFusion error: Arrow error: Cast error: Cannot cast to FixedSizeList\(4\): value at index 0 has length 3 +query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast to FixedSizeList\(4\): value at index 0 has length 3 select arrow_cast(make_array(1, 2, 3), 'FixedSizeList(4, Int64)'); query ? diff --git a/datafusion/sqllogictest/test_files/cte.slt b/datafusion/sqllogictest/test_files/cte.slt index a581bcb539a9..e7ca7a5ae1d8 100644 --- a/datafusion/sqllogictest/test_files/cte.slt +++ b/datafusion/sqllogictest/test_files/cte.slt @@ -764,7 +764,7 @@ WITH RECURSIVE my_cte AS ( # Test issue: https://github.com/apache/datafusion/issues/9794 # Non-recursive term and recursive term have different types, and cannot be casted -query error DataFusion error: Arrow error: Cast error: Cannot cast string 'abc' to value of Int64 type +query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string 'abc' to value of Int64 type WITH RECURSIVE my_cte AS ( SELECT 1 AS a UNION ALL diff --git a/datafusion/sqllogictest/test_files/errors.slt b/datafusion/sqllogictest/test_files/errors.slt index 3e60423df8a0..41f747df5baa 100644 --- a/datafusion/sqllogictest/test_files/errors.slt +++ b/datafusion/sqllogictest/test_files/errors.slt @@ -145,7 +145,7 @@ SELECT LIMIT 5; -query error DataFusion error: Arrow error: Cast error: Cannot cast string 'foo' to value of Int64 type +query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string 'foo' to value of Int64 type create table foo as values (1), ('foo'); query error DataFusion error: Error during planning: Substring without for/from is not valid diff --git a/datafusion/sqllogictest/test_files/map.slt b/datafusion/sqllogictest/test_files/map.slt index 949edb8376d1..a3234b4e7ee5 100644 --- a/datafusion/sqllogictest/test_files/map.slt +++ b/datafusion/sqllogictest/test_files/map.slt @@ -155,7 +155,7 @@ SELECT MAKE_MAP('POST', 41, 'HEAD', 53, 'PATCH', 30); ---- {POST: 41, HEAD: 53, PATCH: 30} -query error DataFusion error: Arrow error: Cast error: Cannot cast string 'ab' to value of Int64 type +query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string 'ab' to value of Int64 type SELECT MAKE_MAP('POST', 41, 'HEAD', 'ab', 'PATCH', 30); # Map keys can not be NULL @@ -523,7 +523,7 @@ SELECT MAP { 'a': 1, 'b': 3 }; ---- {a: 1, b: 3} -query error DataFusion error: Arrow error: Cast error: Cannot cast string 'a' to value of Int64 type +query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string 'a' to value of Int64 type SELECT MAP { 'a': 1, 2: 3 }; # accessing map with non-string key @@ -670,7 +670,7 @@ SELECT map_entries(MAP { 'a': 1, 'b': 3 }); ---- [{key: a, value: 1}, {key: b, value: 3}] -query error DataFusion error: Arrow error: Cast error: Cannot cast string 'a' to value of Int64 type +query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string 'a' to value of Int64 type SELECT map_entries(MAP { 'a': 1, 2: 3 }); query ? @@ -721,7 +721,7 @@ SELECT map_keys(MAP { 'a': 1, 'b': 3 }); ---- [a, b] -query error DataFusion error: Arrow error: Cast error: Cannot cast string 'a' to value of Int64 type +query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string 'a' to value of Int64 type SELECT map_keys(MAP { 'a': 1, 2: 3 }); query ? @@ -768,7 +768,7 @@ NULL # Tests for map_values -query error DataFusion error: Arrow error: Cast error: Cannot cast string 'a' to value of Int64 type +query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string 'a' to value of Int64 type SELECT map_values(MAP { 'a': 1, 2: 3 }); query ? diff --git a/datafusion/sqllogictest/test_files/nullif.slt b/datafusion/sqllogictest/test_files/nullif.slt index 6acb9aea26d5..7b4c59b26394 100644 --- a/datafusion/sqllogictest/test_files/nullif.slt +++ b/datafusion/sqllogictest/test_files/nullif.slt @@ -112,7 +112,7 @@ select nullif(1.0, 2); ---- 1 -query error DataFusion error: Arrow error: Cast error: Cannot cast string 'a' to value of Int64 type +query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string 'a' to value of Int64 type select nullif(2, 'a'); query T diff --git a/datafusion/sqllogictest/test_files/select.slt b/datafusion/sqllogictest/test_files/select.slt index cd1f90c42efd..5b2587bdc330 100644 --- a/datafusion/sqllogictest/test_files/select.slt +++ b/datafusion/sqllogictest/test_files/select.slt @@ -1775,7 +1775,7 @@ DROP TABLE test; query error DataFusion error: Arrow error: Parser error: Error parsing timestamp from 'I AM NOT A TIMESTAMP': error parsing date SELECT to_timestamp('I AM NOT A TIMESTAMP'); -query error DataFusion error: Arrow error: Cast error: Cannot cast string '' to value of Int32 type +query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string '' to value of Int32 type SELECT CAST('' AS int); # See issue: https://github.com/apache/datafusion/issues/8978 diff --git a/datafusion/sqllogictest/test_files/struct.slt b/datafusion/sqllogictest/test_files/struct.slt index 0e3c5145d156..dce5fe036b4e 100644 --- a/datafusion/sqllogictest/test_files/struct.slt +++ b/datafusion/sqllogictest/test_files/struct.slt @@ -492,7 +492,7 @@ Struct("r": nullable Utf8, "c": nullable Float64) statement ok drop table t; -query error DataFusion error: Arrow error: Cast error: Cannot cast string 'a' to value of Float64 type +query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string 'a' to value of Float64 type create table t as values({r: 'a', c: 1}), ({c: 2.3, r: 'b'}); ################################## @@ -554,14 +554,14 @@ statement ok drop table t; # row() with incorrect order -statement error DataFusion error: Arrow error: Cast error: Cannot cast string 'blue' to value of Float32 type -create table t(a struct(r varchar, c int), b struct(r varchar, c float)) as values +statement error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string 'blue' to value of Float32 type +create table t(a struct(r varchar, c int), b struct(r varchar, c float)) as values (row('red', 1), row(2.3, 'blue')), (row('purple', 1), row('green', 2.3)); # out of order struct literal # TODO: This query should not fail -statement error DataFusion error: Arrow error: Cast error: Cannot cast string 'b' to value of Int32 type +statement error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string 'b' to value of Int32 type create table t(a struct(r varchar, c int)) as values ({r: 'a', c: 1}), ({c: 2, r: 'b'}); ################################## diff --git a/datafusion/sqllogictest/test_files/timestamps.slt b/datafusion/sqllogictest/test_files/timestamps.slt index 250d4e9830e5..cdacad0fda0d 100644 --- a/datafusion/sqllogictest/test_files/timestamps.slt +++ b/datafusion/sqllogictest/test_files/timestamps.slt @@ -691,11 +691,11 @@ select ---- 08:09:10.123456789 13:14:15.123456 13:14:15.123 13:14:15 -query error DataFusion error: Arrow error: Cast error: Cannot cast string 'not a time' to value of Time64\(ns\) type +query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string 'not a time' to value of Time64\(ns\) type SELECT TIME 'not a time' as time; # invalid time -query error DataFusion error: Arrow error: Cast error: Cannot cast string '24:01:02' to value of Time64\(ns\) type +query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string '24:01:02' to value of Time64\(ns\) type SELECT TIME '24:01:02' as time; # invalid timezone @@ -3271,7 +3271,7 @@ statement error The to_local_time function can only accept Timestamp as the arg select to_local_time('2024-04-01T00:00:20Z'); # invalid timezone -statement error DataFusion error: Arrow error: Parser error: Invalid timezone "Europe/timezone": failed to parse timezone +statement error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Parser error: Invalid timezone "Europe/timezone": failed to parse timezone select to_local_time('2024-04-01T00:00:20Z'::timestamp AT TIME ZONE 'Europe/timezone'); # valid query