Skip to content

Commit 0603e21

Browse files
committed
[Variant] rescale_decimal followup
1 parent 0c33129 commit 0603e21

File tree

2 files changed

+39
-53
lines changed

2 files changed

+39
-53
lines changed

arrow-array/src/types.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1324,7 +1324,7 @@ pub trait DecimalType:
13241324
/// Maximum no of digits after the decimal point (note the scale can be negative)
13251325
const MAX_SCALE: i8;
13261326
/// The maximum value for each precision in `0..=MAX_PRECISION`: [0, 9, 99, ...]
1327-
const MAX_FOR_EACH_PRECISION: &[Self::Native];
1327+
const MAX_FOR_EACH_PRECISION: &'static[Self::Native];
13281328
/// fn to create its [`DataType`]
13291329
const TYPE_CONSTRUCTOR: fn(u8, i8) -> DataType;
13301330
/// Default values for [`DataType`]
@@ -1395,7 +1395,7 @@ impl DecimalType for Decimal32Type {
13951395
const BYTE_LENGTH: usize = 4;
13961396
const MAX_PRECISION: u8 = DECIMAL32_MAX_PRECISION;
13971397
const MAX_SCALE: i8 = DECIMAL32_MAX_SCALE;
1398-
const MAX_FOR_EACH_PRECISION: &[i32] = &arrow_data::decimal::MAX_DECIMAL32_FOR_EACH_PRECISION;
1398+
const MAX_FOR_EACH_PRECISION: &'static[i32] = &arrow_data::decimal::MAX_DECIMAL32_FOR_EACH_PRECISION;
13991399
const TYPE_CONSTRUCTOR: fn(u8, i8) -> DataType = DataType::Decimal32;
14001400
const DEFAULT_TYPE: DataType =
14011401
DataType::Decimal32(DECIMAL32_MAX_PRECISION, DECIMAL32_DEFAULT_SCALE);
@@ -1430,7 +1430,7 @@ impl DecimalType for Decimal64Type {
14301430
const BYTE_LENGTH: usize = 8;
14311431
const MAX_PRECISION: u8 = DECIMAL64_MAX_PRECISION;
14321432
const MAX_SCALE: i8 = DECIMAL64_MAX_SCALE;
1433-
const MAX_FOR_EACH_PRECISION: &[i64] = &arrow_data::decimal::MAX_DECIMAL64_FOR_EACH_PRECISION;
1433+
const MAX_FOR_EACH_PRECISION: &'static[i64] = &arrow_data::decimal::MAX_DECIMAL64_FOR_EACH_PRECISION;
14341434
const TYPE_CONSTRUCTOR: fn(u8, i8) -> DataType = DataType::Decimal64;
14351435
const DEFAULT_TYPE: DataType =
14361436
DataType::Decimal64(DECIMAL64_MAX_PRECISION, DECIMAL64_DEFAULT_SCALE);
@@ -1465,7 +1465,7 @@ impl DecimalType for Decimal128Type {
14651465
const BYTE_LENGTH: usize = 16;
14661466
const MAX_PRECISION: u8 = DECIMAL128_MAX_PRECISION;
14671467
const MAX_SCALE: i8 = DECIMAL128_MAX_SCALE;
1468-
const MAX_FOR_EACH_PRECISION: &[i128] = &arrow_data::decimal::MAX_DECIMAL128_FOR_EACH_PRECISION;
1468+
const MAX_FOR_EACH_PRECISION: &'static[i128] = &arrow_data::decimal::MAX_DECIMAL128_FOR_EACH_PRECISION;
14691469
const TYPE_CONSTRUCTOR: fn(u8, i8) -> DataType = DataType::Decimal128;
14701470
const DEFAULT_TYPE: DataType =
14711471
DataType::Decimal128(DECIMAL128_MAX_PRECISION, DECIMAL_DEFAULT_SCALE);
@@ -1500,7 +1500,7 @@ impl DecimalType for Decimal256Type {
15001500
const BYTE_LENGTH: usize = 32;
15011501
const MAX_PRECISION: u8 = DECIMAL256_MAX_PRECISION;
15021502
const MAX_SCALE: i8 = DECIMAL256_MAX_SCALE;
1503-
const MAX_FOR_EACH_PRECISION: &[i256] = &arrow_data::decimal::MAX_DECIMAL256_FOR_EACH_PRECISION;
1503+
const MAX_FOR_EACH_PRECISION: &'static[i256] = &arrow_data::decimal::MAX_DECIMAL256_FOR_EACH_PRECISION;
15041504
const TYPE_CONSTRUCTOR: fn(u8, i8) -> DataType = DataType::Decimal256;
15051505
const DEFAULT_TYPE: DataType =
15061506
DataType::Decimal256(DECIMAL256_MAX_PRECISION, DECIMAL_DEFAULT_SCALE);

parquet-variant-compute/src/type_conversion.rs

Lines changed: 34 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -189,40 +189,48 @@ where
189189
/// Rescale a decimal from (input_precision, input_scale) to (output_precision, output_scale)
190190
/// and return the scaled value if it fits the output precision. Similar to the implementation in
191191
/// decimal.rs in arrow-cast.
192-
pub(crate) fn rescale_decimal<I, O>(
192+
pub(crate) fn rescale_decimal<I: DecimalType, O: DecimalType>(
193193
value: I::Native,
194194
input_precision: u8,
195195
input_scale: i8,
196196
output_precision: u8,
197197
output_scale: i8,
198198
) -> Option<O::Native>
199199
where
200-
I: DecimalType,
201-
O: DecimalType,
202200
I::Native: DecimalCast,
203201
O::Native: DecimalCast,
204202
{
205203
let delta_scale = output_scale - input_scale;
206204

207-
// Determine if the cast is infallible based on precision/scale math
208-
let is_infallible_cast =
209-
is_infallible_decimal_cast(input_precision, input_scale, output_precision, output_scale);
205+
let (scaled, is_infallible_cast) = if delta_scale >= 0 {
206+
// O::MAX_FOR_EACH_PRECISION[k] stores 10^k - 1 (e.g., 9, 99, 999, ...).
207+
// Adding 1 yields exactly 10^k without computing a power at runtime.
208+
// Using the precomputed table avoids pow(10, k) and its checked/overflow
209+
// handling, which is faster and simpler for scaling by 10^delta_scale.
210+
let max = O::MAX_FOR_EACH_PRECISION.get(delta_scale as usize)?;
211+
let mul = max.add_wrapping(O::Native::ONE);
210212

211-
let scaled = if delta_scale == 0 {
212-
O::Native::from_decimal(value)
213-
} else if delta_scale > 0 {
214-
let mul = O::Native::from_decimal(10_i128)
215-
.and_then(|t| t.pow_checked(delta_scale as u32).ok())?;
216-
O::Native::from_decimal(value).and_then(|x| x.mul_checked(mul).ok())
213+
// if the gain in precision (digits) is greater than the multiplication due to scaling
214+
// every number will fit into the output type
215+
// Example: If we are starting with any number of precision 5 [xxxxx],
216+
// then an increase of scale by 3 will have the following effect on the representation:
217+
// [xxxxx] -> [xxxxx000], so for the cast to be infallible, the output type
218+
// needs to provide at least 8 digits precision
219+
let is_infallible_cast = input_precision as i8 + delta_scale <= output_precision as i8;
220+
let scaled = if is_infallible_cast {
221+
Some(O::Native::from_decimal(value).unwrap().mul_wrapping(mul))
222+
} else {
223+
O::Native::from_decimal(value).and_then(|x| x.mul_checked(mul).ok())
224+
};
225+
(scaled, is_infallible_cast)
217226
} else {
218-
// delta_scale is guaranteed to be > 0, but may also be larger than I::MAX_PRECISION. If so, the
219-
// scale change divides out more digits than the input has precision and the result of the cast
220-
// is always zero. For example, if we try to apply delta_scale=10 a decimal32 value, the largest
221-
// possible result is 999999999/10000000000 = 0.0999999999, which rounds to zero. Smaller values
222-
// (e.g. 1/10000000000) or larger delta_scale (e.g. 999999999/10000000000000) produce even
223-
// smaller results, which also round to zero. In that case, just return an array of zeros.
224-
let delta_scale = delta_scale.unsigned_abs() as usize;
225-
let Some(max) = I::MAX_FOR_EACH_PRECISION.get(delta_scale) else {
227+
// the abs of delta_scale is guaranteed to be > 0, but may also be larger than I::MAX_PRECISION.
228+
// If so, the scale change divides out more digits than the input has precision and the result
229+
// of the cast is always zero. For example, if we try to apply delta_scale=10 a decimal32 value,
230+
// the largest possible result is 999999999/10000000000 = 0.0999999999, which rounds to zero.
231+
// Smaller values (e.g. 1/10000000000) or larger delta_scale (e.g. 999999999/10000000000000)
232+
// produce even smaller results, which also round to zero. In that case, just return zero.
233+
let Some(max) = I::MAX_FOR_EACH_PRECISION.get(delta_scale.unsigned_abs() as usize) else {
226234
return Some(O::Native::ZERO);
227235
};
228236
let div = max.add_wrapping(I::Native::ONE);
@@ -239,44 +247,22 @@ where
239247
false if r <= half_neg => d.sub_wrapping(I::Native::ONE),
240248
_ => d,
241249
};
242-
O::Native::from_decimal(adjusted)
243-
};
244250

245-
scaled.filter(|v| is_infallible_cast || O::is_valid_decimal_precision(*v, output_precision))
246-
}
247-
248-
/// Returns true if casting from (input_precision, input_scale) to
249-
/// (output_precision, output_scale) is infallible based on precision/scale math.
250-
fn is_infallible_decimal_cast(
251-
input_precision: u8,
252-
input_scale: i8,
253-
output_precision: u8,
254-
output_scale: i8,
255-
) -> bool {
256-
let delta_scale = output_scale - input_scale;
257-
let input_precision = input_precision as i8;
258-
let output_precision = output_precision as i8;
259-
if delta_scale >= 0 {
260-
// if the gain in precision (digits) is greater than the multiplication due to scaling
261-
// every number will fit into the output type
262-
// Example: If we are starting with any number of precision 5 [xxxxx],
263-
// then an increase of scale by 3 will have the following effect on the representation:
264-
// [xxxxx] -> [xxxxx000], so for the cast to be infallible, the output type
265-
// needs to provide at least 8 digits precision
266-
input_precision + delta_scale <= output_precision
267-
} else {
268251
// if the reduction of the input number through scaling (dividing) is greater
269252
// than a possible precision loss (plus potential increase via rounding)
270253
// every input number will fit into the output type
271254
// Example: If we are starting with any number of precision 5 [xxxxx],
272255
// then and decrease the scale by 3 will have the following effect on the representation:
273256
// [xxxxx] -> [xx] (+ 1 possibly, due to rounding).
274-
// The rounding may add an additional digit, so for the cast to be infallible,
257+
// The rounding may add a digit, so for the cast to be infallible,
275258
// the output type needs to have at least 3 digits of precision.
276259
// e.g. Decimal(5, 3) 99.999 to Decimal(3, 0) will result in 100:
277260
// [99999] -> [99] + 1 = [100], a cast to Decimal(2, 0) would not be possible
278-
input_precision + delta_scale < output_precision
279-
}
261+
let is_infallible_cast = input_precision as i8 + delta_scale < output_precision as i8;
262+
(O::Native::from_decimal(adjusted), is_infallible_cast)
263+
};
264+
265+
scaled.filter(|v| is_infallible_cast || O::is_valid_decimal_precision(*v, output_precision))
280266
}
281267

282268
/// Convert the value at a specific index in the given array into a `Variant`.

0 commit comments

Comments
 (0)