Skip to content

Commit d90faef

Browse files
authored
Fix broken decimal->decimal casting with large scale reduction (#8580)
# Which issue does this PR close? - Closes #8579 # Rationale for this change Bug fix # What changes are included in this PR? Detect and directly handle large scale reductions, instead of failing on accident because the computed divisor overflows. Also, replace the `pow_checked` call with a lookup into the (already existing) `MAX_DECIMALXX_FOR_EACH_PRECISION` array. This requires adding a new `MAX_FOR_EACH_PRECISION` constant to the `DecimalType` trait, but the corresponding arrays were already public so this seems ok? # Are these changes tested? New unit tests exercise the scenario (and its boundary case). The tests fail without this fix. # Are there any user-facing changes? New constant on the public `DecimalType` trait. A class of decimal conversions that used to fail will now (correctly) produce zeros instead.
1 parent 39cda62 commit d90faef

File tree

4 files changed

+76
-5
lines changed

4 files changed

+76
-5
lines changed

arrow-array/src/arithmetic.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -288,7 +288,7 @@ native_type_op!(u8);
288288
native_type_op!(u16);
289289
native_type_op!(u32);
290290
native_type_op!(u64);
291-
native_type_op!(i256, i256::ZERO, i256::ONE, i256::MIN, i256::MAX);
291+
native_type_op!(i256, i256::ZERO, i256::ONE);
292292

293293
native_type_op!(IntervalDayTime, IntervalDayTime::ZERO, IntervalDayTime::ONE);
294294
native_type_op!(

arrow-array/src/types.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1323,6 +1323,8 @@ pub trait DecimalType:
13231323
const MAX_PRECISION: u8;
13241324
/// Maximum no of digits after the decimal point (note the scale can be negative)
13251325
const MAX_SCALE: i8;
1326+
/// The maximum value for each precision in `0..=MAX_PRECISION`: [0, 9, 99, ...]
1327+
const MAX_FOR_EACH_PRECISION: &[Self::Native];
13261328
/// fn to create its [`DataType`]
13271329
const TYPE_CONSTRUCTOR: fn(u8, i8) -> DataType;
13281330
/// Default values for [`DataType`]
@@ -1393,6 +1395,7 @@ impl DecimalType for Decimal32Type {
13931395
const BYTE_LENGTH: usize = 4;
13941396
const MAX_PRECISION: u8 = DECIMAL32_MAX_PRECISION;
13951397
const MAX_SCALE: i8 = DECIMAL32_MAX_SCALE;
1398+
const MAX_FOR_EACH_PRECISION: &[i32] = &arrow_data::decimal::MAX_DECIMAL32_FOR_EACH_PRECISION;
13961399
const TYPE_CONSTRUCTOR: fn(u8, i8) -> DataType = DataType::Decimal32;
13971400
const DEFAULT_TYPE: DataType =
13981401
DataType::Decimal32(DECIMAL32_MAX_PRECISION, DECIMAL32_DEFAULT_SCALE);
@@ -1427,6 +1430,7 @@ impl DecimalType for Decimal64Type {
14271430
const BYTE_LENGTH: usize = 8;
14281431
const MAX_PRECISION: u8 = DECIMAL64_MAX_PRECISION;
14291432
const MAX_SCALE: i8 = DECIMAL64_MAX_SCALE;
1433+
const MAX_FOR_EACH_PRECISION: &[i64] = &arrow_data::decimal::MAX_DECIMAL64_FOR_EACH_PRECISION;
14301434
const TYPE_CONSTRUCTOR: fn(u8, i8) -> DataType = DataType::Decimal64;
14311435
const DEFAULT_TYPE: DataType =
14321436
DataType::Decimal64(DECIMAL64_MAX_PRECISION, DECIMAL64_DEFAULT_SCALE);
@@ -1461,6 +1465,7 @@ impl DecimalType for Decimal128Type {
14611465
const BYTE_LENGTH: usize = 16;
14621466
const MAX_PRECISION: u8 = DECIMAL128_MAX_PRECISION;
14631467
const MAX_SCALE: i8 = DECIMAL128_MAX_SCALE;
1468+
const MAX_FOR_EACH_PRECISION: &[i128] = &arrow_data::decimal::MAX_DECIMAL128_FOR_EACH_PRECISION;
14641469
const TYPE_CONSTRUCTOR: fn(u8, i8) -> DataType = DataType::Decimal128;
14651470
const DEFAULT_TYPE: DataType =
14661471
DataType::Decimal128(DECIMAL128_MAX_PRECISION, DECIMAL_DEFAULT_SCALE);
@@ -1495,6 +1500,7 @@ impl DecimalType for Decimal256Type {
14951500
const BYTE_LENGTH: usize = 32;
14961501
const MAX_PRECISION: u8 = DECIMAL256_MAX_PRECISION;
14971502
const MAX_SCALE: i8 = DECIMAL256_MAX_SCALE;
1503+
const MAX_FOR_EACH_PRECISION: &[i256] = &arrow_data::decimal::MAX_DECIMAL256_FOR_EACH_PRECISION;
14981504
const TYPE_CONSTRUCTOR: fn(u8, i8) -> DataType = DataType::Decimal256;
14991505
const DEFAULT_TYPE: DataType =
15001506
DataType::Decimal256(DECIMAL256_MAX_PRECISION, DECIMAL_DEFAULT_SCALE);

arrow-cast/src/cast/decimal.rs

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -188,11 +188,19 @@ where
188188
// [99999] -> [99] + 1 = [100], a cast to Decimal(2, 0) would not be possible
189189
let is_infallible_cast = (input_precision as i8) - delta_scale < (output_precision as i8);
190190

191-
let div = I::Native::from_decimal(10_i128)
192-
.unwrap()
193-
.pow_checked(delta_scale as u32)?;
191+
// delta_scale is guaranteed to be > 0, but may also be larger than I::MAX_PRECISION. If so, the
192+
// scale change divides out more digits than the input has precision and the result of the cast
193+
// is always zero. For example, if we try to apply delta_scale=10 a decimal32 value, the largest
194+
// possible result is 999999999/10000000000 = 0.0999999999, which rounds to zero. Smaller values
195+
// (e.g. 1/10000000000) or larger delta_scale (e.g. 999999999/10000000000000) produce even
196+
// smaller results, which also round to zero. In that case, just return an array of zeros.
197+
let Some(max) = I::MAX_FOR_EACH_PRECISION.get(delta_scale as usize) else {
198+
let zeros = vec![O::Native::ZERO; array.len()];
199+
return Ok(PrimitiveArray::new(zeros.into(), array.nulls().cloned()));
200+
};
194201

195-
let half = div.div_wrapping(I::Native::from_usize(2).unwrap());
202+
let div = max.add_wrapping(I::Native::ONE);
203+
let half = div.div_wrapping(I::Native::ONE.add_wrapping(I::Native::ONE));
196204
let half_neg = half.neg_wrapping();
197205

198206
let f = |x: I::Native| {

arrow-cast/src/cast/mod.rs

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3084,6 +3084,32 @@ mod tests {
30843084
);
30853085
}
30863086

3087+
#[test]
3088+
fn test_cast_decimal32_to_decimal32_large_scale_reduction() {
3089+
let array = vec![Some(-999999999), Some(0), Some(999999999), None];
3090+
let array = create_decimal32_array(array, 9, 3).unwrap();
3091+
3092+
// Divide out all digits of precision -- rounding could still produce +/- 1
3093+
let output_type = DataType::Decimal32(9, -6);
3094+
assert!(can_cast_types(array.data_type(), &output_type));
3095+
generate_cast_test_case!(
3096+
&array,
3097+
Decimal32Array,
3098+
&output_type,
3099+
vec![Some(-1), Some(0), Some(1), None]
3100+
);
3101+
3102+
// Divide out more digits than we have precision -- all-zero result
3103+
let output_type = DataType::Decimal32(9, -7);
3104+
assert!(can_cast_types(array.data_type(), &output_type));
3105+
generate_cast_test_case!(
3106+
&array,
3107+
Decimal32Array,
3108+
&output_type,
3109+
vec![Some(0), Some(0), Some(0), None]
3110+
);
3111+
}
3112+
30873113
#[test]
30883114
fn test_cast_decimal64_to_decimal64_overflow() {
30893115
let input_type = DataType::Decimal64(18, 3);
@@ -3106,6 +3132,37 @@ mod tests {
31063132
);
31073133
}
31083134

3135+
#[test]
3136+
fn test_cast_decimal64_to_decimal64_large_scale_reduction() {
3137+
let array = vec![
3138+
Some(-999999999999999999),
3139+
Some(0),
3140+
Some(999999999999999999),
3141+
None,
3142+
];
3143+
let array = create_decimal64_array(array, 18, 3).unwrap();
3144+
3145+
// Divide out all digits of precision -- rounding could still produce +/- 1
3146+
let output_type = DataType::Decimal64(18, -15);
3147+
assert!(can_cast_types(array.data_type(), &output_type));
3148+
generate_cast_test_case!(
3149+
&array,
3150+
Decimal64Array,
3151+
&output_type,
3152+
vec![Some(-1), Some(0), Some(1), None]
3153+
);
3154+
3155+
// Divide out more digits than we have precision -- all-zero result
3156+
let output_type = DataType::Decimal64(18, -16);
3157+
assert!(can_cast_types(array.data_type(), &output_type));
3158+
generate_cast_test_case!(
3159+
&array,
3160+
Decimal64Array,
3161+
&output_type,
3162+
vec![Some(0), Some(0), Some(0), None]
3163+
);
3164+
}
3165+
31093166
#[test]
31103167
fn test_cast_floating_to_decimals() {
31113168
for output_type in [

0 commit comments

Comments
 (0)