Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion arrow-array/src/arithmetic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ native_type_op!(u8);
native_type_op!(u16);
native_type_op!(u32);
native_type_op!(u64);
native_type_op!(i256, i256::ZERO, i256::ONE, i256::MIN, i256::MAX);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

opportunistic cleanup

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I verified that this form of the macro simply re-calls the same macro with the same arguments 👍

native_type_op!(i256, i256::ZERO, i256::ONE);

native_type_op!(IntervalDayTime, IntervalDayTime::ZERO, IntervalDayTime::ONE);
native_type_op!(
Expand Down
6 changes: 6 additions & 0 deletions arrow-array/src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1323,6 +1323,8 @@ pub trait DecimalType:
const MAX_PRECISION: u8;
/// Maximum no of digits after the decimal point (note the scale can be negative)
const MAX_SCALE: i8;
/// The maximum value for each precision in `0..=MAX_PRECISION`: [0, 9, 99, ...]
const MAX_FOR_EACH_PRECISION: &[Self::Native];
/// fn to create its [`DataType`]
const TYPE_CONSTRUCTOR: fn(u8, i8) -> DataType;
/// Default values for [`DataType`]
Expand Down Expand Up @@ -1393,6 +1395,7 @@ impl DecimalType for Decimal32Type {
const BYTE_LENGTH: usize = 4;
const MAX_PRECISION: u8 = DECIMAL32_MAX_PRECISION;
const MAX_SCALE: i8 = DECIMAL32_MAX_SCALE;
const MAX_FOR_EACH_PRECISION: &[i32] = &arrow_data::decimal::MAX_DECIMAL32_FOR_EACH_PRECISION;
const TYPE_CONSTRUCTOR: fn(u8, i8) -> DataType = DataType::Decimal32;
const DEFAULT_TYPE: DataType =
DataType::Decimal32(DECIMAL32_MAX_PRECISION, DECIMAL32_DEFAULT_SCALE);
Expand Down Expand Up @@ -1427,6 +1430,7 @@ impl DecimalType for Decimal64Type {
const BYTE_LENGTH: usize = 8;
const MAX_PRECISION: u8 = DECIMAL64_MAX_PRECISION;
const MAX_SCALE: i8 = DECIMAL64_MAX_SCALE;
const MAX_FOR_EACH_PRECISION: &[i64] = &arrow_data::decimal::MAX_DECIMAL64_FOR_EACH_PRECISION;
const TYPE_CONSTRUCTOR: fn(u8, i8) -> DataType = DataType::Decimal64;
const DEFAULT_TYPE: DataType =
DataType::Decimal64(DECIMAL64_MAX_PRECISION, DECIMAL64_DEFAULT_SCALE);
Expand Down Expand Up @@ -1461,6 +1465,7 @@ impl DecimalType for Decimal128Type {
const BYTE_LENGTH: usize = 16;
const MAX_PRECISION: u8 = DECIMAL128_MAX_PRECISION;
const MAX_SCALE: i8 = DECIMAL128_MAX_SCALE;
const MAX_FOR_EACH_PRECISION: &[i128] = &arrow_data::decimal::MAX_DECIMAL128_FOR_EACH_PRECISION;
const TYPE_CONSTRUCTOR: fn(u8, i8) -> DataType = DataType::Decimal128;
const DEFAULT_TYPE: DataType =
DataType::Decimal128(DECIMAL128_MAX_PRECISION, DECIMAL_DEFAULT_SCALE);
Expand Down Expand Up @@ -1495,6 +1500,7 @@ impl DecimalType for Decimal256Type {
const BYTE_LENGTH: usize = 32;
const MAX_PRECISION: u8 = DECIMAL256_MAX_PRECISION;
const MAX_SCALE: i8 = DECIMAL256_MAX_SCALE;
const MAX_FOR_EACH_PRECISION: &[i256] = &arrow_data::decimal::MAX_DECIMAL256_FOR_EACH_PRECISION;
const TYPE_CONSTRUCTOR: fn(u8, i8) -> DataType = DataType::Decimal256;
const DEFAULT_TYPE: DataType =
DataType::Decimal256(DECIMAL256_MAX_PRECISION, DECIMAL_DEFAULT_SCALE);
Expand Down
16 changes: 12 additions & 4 deletions arrow-cast/src/cast/decimal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -188,11 +188,19 @@ where
// [99999] -> [99] + 1 = [100], a cast to Decimal(2, 0) would not be possible
let is_infallible_cast = (input_precision as i8) - delta_scale < (output_precision as i8);

let div = I::Native::from_decimal(10_i128)
.unwrap()
.pow_checked(delta_scale as u32)?;
// delta_scale is guaranteed to be > 0, but may also be larger than I::MAX_PRECISION. If so, the
// scale change divides out more digits than the input has precision and the result of the cast
// is always zero. For example, if we try to apply delta_scale=10 a decimal32 value, the largest
// possible result is 999999999/10000000000 = 0.0999999999, which rounds to zero. Smaller values
// (e.g. 1/10000000000) or larger delta_scale (e.g. 999999999/10000000000000) produce even
// smaller results, which also round to zero. In that case, just return an array of zeros.
let Some(max) = I::MAX_FOR_EACH_PRECISION.get(delta_scale as usize) else {
let zeros = vec![O::Native::ZERO; array.len()];
return Ok(PrimitiveArray::new(zeros.into(), array.nulls().cloned()));
};

let half = div.div_wrapping(I::Native::from_usize(2).unwrap());
let div = max.add_wrapping(I::Native::ONE);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rather than paying an exponentiation, just look up the max value for that precision and add one.

let half = div.div_wrapping(I::Native::ONE.add_wrapping(I::Native::ONE));
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Opportunistic cleanup: compute 2 as 1+1 (infallible) instead of converting from 2_usize (needs unwrap). It's fairly likely that the compiler emits the same code either way, tho, thanks to aggressive inlining.

let half_neg = half.neg_wrapping();

let f = |x: I::Native| {
Expand Down
57 changes: 57 additions & 0 deletions arrow-cast/src/cast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3084,6 +3084,32 @@ mod tests {
);
}

#[test]
fn test_cast_decimal32_to_decimal32_large_scale_reduction() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I verified this test fails without the code in this PR:

called `Result::unwrap()` on an `Err` value: ArithmeticOverflow("Overflow happened on: 10 ^ 10")
thread 'cast::tests::test_cast_decimal32_to_decimal32_large_scale_reduction' panicked at arrow-cast/src/cast/mod.rs:3105:9:
called `Result::unwrap()` on an `Err` value: ArithmeticOverflow("Overflow happened on: 10 ^ 10")
stack backtrace:

let array = vec![Some(-999999999), Some(0), Some(999999999), None];
let array = create_decimal32_array(array, 9, 3).unwrap();

// Divide out all digits of precision -- rounding could still produce +/- 1
let output_type = DataType::Decimal32(9, -6);
assert!(can_cast_types(array.data_type(), &output_type));
generate_cast_test_case!(
&array,
Decimal32Array,
&output_type,
vec![Some(-1), Some(0), Some(1), None]
);

// Divide out more digits than we have precision -- all-zero result
let output_type = DataType::Decimal32(9, -7);
assert!(can_cast_types(array.data_type(), &output_type));
generate_cast_test_case!(
&array,
Decimal32Array,
&output_type,
vec![Some(0), Some(0), Some(0), None]
);
}

#[test]
fn test_cast_decimal64_to_decimal64_overflow() {
let input_type = DataType::Decimal64(18, 3);
Expand All @@ -3106,6 +3132,37 @@ mod tests {
);
}

#[test]
fn test_cast_decimal64_to_decimal64_large_scale_reduction() {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not obvious to me that we need this second version of the test, given that it's all generic code anyway.

I intentionally avoided adding cases for 128- and 256-bit decimals because IMO they add no value -- any problems in the constants should be caught by other tests, and two data points should suffice to confirm that the new code doesn't hide any size-specific assumptions.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i agree this seems adequate

let array = vec![
Some(-999999999999999999),
Some(0),
Some(999999999999999999),
None,
];
let array = create_decimal64_array(array, 18, 3).unwrap();

// Divide out all digits of precision -- rounding could still produce +/- 1
let output_type = DataType::Decimal64(18, -15);
assert!(can_cast_types(array.data_type(), &output_type));
generate_cast_test_case!(
&array,
Decimal64Array,
&output_type,
vec![Some(-1), Some(0), Some(1), None]
);

// Divide out more digits than we have precision -- all-zero result
let output_type = DataType::Decimal64(18, -16);
assert!(can_cast_types(array.data_type(), &output_type));
generate_cast_test_case!(
&array,
Decimal64Array,
&output_type,
vec![Some(0), Some(0), Some(0), None]
);
}

#[test]
fn test_cast_floating_to_decimals() {
for output_type in [
Expand Down
Loading