From 624657750aed1d0a0221ae271b990a2d8a38a4c0 Mon Sep 17 00:00:00 2001 From: Weijun Huang Date: Fri, 10 Mar 2023 10:59:06 +0100 Subject: [PATCH 1/3] refactor: timestamp overflow check --- arrow-cast/src/cast.rs | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs index ae901665473d..de90ed8d570a 100644 --- a/arrow-cast/src/cast.rs +++ b/arrow-cast/src/cast.rs @@ -1682,7 +1682,11 @@ pub fn cast_with_options( Ordering::Equal => time_array.clone(), Ordering::Less => { let mul = to_size / from_size; - time_array.unary::<_, Int64Type>(|o| o * mul) + time_array.try_unary::<_, Int64Type, _>(|o| { + o.checked_mul(mul).ok_or(ArrowError::ComputeError( + "overflow in cast from timestamp to timestamp".to_string(), + )) + })? } }; Ok(make_timestamp_array( @@ -1709,8 +1713,13 @@ pub fn cast_with_options( Ok(Arc::new(b.finish()) as ArrayRef) } (Timestamp(TimeUnit::Second, _), Date64) => Ok(Arc::new( - as_primitive_array::(array) - .unary::<_, Date64Type>(|x| x * MILLISECONDS), + as_primitive_array::(array).try_unary::<_, Date64Type, _>( + |x| { + x.checked_mul(MILLISECONDS).ok_or(ArrowError::ComputeError( + "overflow in cast from timestamp to date64".to_string(), + )) + }, + )?, )), (Timestamp(TimeUnit::Millisecond, _), Date64) => { cast_reinterpret_arrays::(array) From 1f06ea918e512ae4f58ddcaf9a09f97cd704ac42 Mon Sep 17 00:00:00 2001 From: Weijun Huang Date: Fri, 10 Mar 2023 11:14:38 +0100 Subject: [PATCH 2/3] update tests --- arrow-cast/src/cast.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs index de90ed8d570a..2b7a8ea31ea9 100644 --- a/arrow-cast/src/cast.rs +++ b/arrow-cast/src/cast.rs @@ -5339,6 +5339,18 @@ mod tests { assert_eq!(864000000005, c.value(0)); assert_eq!(1545696000001, c.value(1)); assert!(c.is_null(2)); + + let array = + TimestampSecondArray::from(vec![Some(864000000005), Some(1545696000001)]); + let b = cast(&array, &DataType::Date64).unwrap(); + let c = b.as_any().downcast_ref::().unwrap(); + assert_eq!(864000000005000, c.value(0)); + assert_eq!(1545696000001000, c.value(1)); + + // test overflow + let array = TimestampSecondArray::from(vec![Some(i64::MAX)]); + let b = cast(&array, &DataType::Date64); + assert!(b.is_err()); } #[test] @@ -5415,6 +5427,8 @@ mod tests { assert!(b.is_err()); let b = cast(&array, &DataType::Time64(TimeUnit::Nanosecond)); assert!(b.is_err()); + let b = cast(&array, &DataType::Time64(TimeUnit::Millisecond)); + assert!(b.is_err()); } #[test] From fe95531345be1ff21397b7ba71c20e99750cf911 Mon Sep 17 00:00:00 2001 From: Weijun Huang Date: Fri, 10 Mar 2023 13:25:22 +0100 Subject: [PATCH 3/3] refactor: cast based on cast_option --- arrow-cast/src/cast.rs | 42 ++++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs index 2b7a8ea31ea9..0e3d6f49471c 100644 --- a/arrow-cast/src/cast.rs +++ b/arrow-cast/src/cast.rs @@ -1682,11 +1682,11 @@ pub fn cast_with_options( Ordering::Equal => time_array.clone(), Ordering::Less => { let mul = to_size / from_size; - time_array.try_unary::<_, Int64Type, _>(|o| { - o.checked_mul(mul).ok_or(ArrowError::ComputeError( - "overflow in cast from timestamp to timestamp".to_string(), - )) - })? + if cast_options.safe { + time_array.unary_opt::<_, Int64Type>(|o| o.checked_mul(mul)) + } else { + time_array.try_unary::<_, Int64Type, _>(|o| o.mul_checked(mul))? + } } }; Ok(make_timestamp_array( @@ -1713,13 +1713,22 @@ pub fn cast_with_options( Ok(Arc::new(b.finish()) as ArrayRef) } (Timestamp(TimeUnit::Second, _), Date64) => Ok(Arc::new( - as_primitive_array::(array).try_unary::<_, Date64Type, _>( - |x| { - x.checked_mul(MILLISECONDS).ok_or(ArrowError::ComputeError( - "overflow in cast from timestamp to date64".to_string(), - )) - }, - )?, + match cast_options.safe { + true => { + // change error to None + as_primitive_array::(array) + .unary_opt::<_, Date64Type>(|x| { + x.checked_mul(MILLISECONDS) + }) + } + false => { + as_primitive_array::(array).try_unary::<_, Date64Type, _>( + |x| { + x.mul_checked(MILLISECONDS) + }, + )? + } + }, )), (Timestamp(TimeUnit::Millisecond, _), Date64) => { cast_reinterpret_arrays::(array) @@ -5347,9 +5356,14 @@ mod tests { assert_eq!(864000000005000, c.value(0)); assert_eq!(1545696000001000, c.value(1)); - // test overflow + // test overflow, safe cast + let array = TimestampSecondArray::from(vec![Some(i64::MAX)]); + let b = cast(&array, &DataType::Date64).unwrap(); + assert!(b.is_null(0)); + // test overflow, unsafe cast let array = TimestampSecondArray::from(vec![Some(i64::MAX)]); - let b = cast(&array, &DataType::Date64); + let options = CastOptions { safe: false }; + let b = cast_with_options(&array, &DataType::Date64, &options); assert!(b.is_err()); }