Skip to content

Commit 751b082

Browse files
authored
[Variant] Support variant to Decimal32/64/128/256 (#8552)
# Which issue does this PR close? We generally require a GitHub issue to be filed for all bug fixes and enhancements and this helps us generate change logs for our releases. You can link an issue to this PR using the GitHub syntax. - Part of #8477. # Rationale for this change # What changes are included in this PR? - Support casting `Variant` → `Decimal32/64/128/256` - Handle scaling and precision adjustments, downscaling may lose fractional precision # Are these changes tested? Yes # Are there any user-facing changes? New cast types supported
1 parent 52f7bf1 commit 751b082

File tree

5 files changed

+878
-84
lines changed

5 files changed

+878
-84
lines changed

arrow-cast/src/cast/decimal.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,17 +19,23 @@ use crate::cast::*;
1919

2020
/// A utility trait that provides checked conversions between
2121
/// decimal types inspired by [`NumCast`]
22-
pub(crate) trait DecimalCast: Sized {
22+
pub trait DecimalCast: Sized {
23+
/// Convert the decimal to an i32
2324
fn to_i32(self) -> Option<i32>;
2425

26+
/// Convert the decimal to an i64
2527
fn to_i64(self) -> Option<i64>;
2628

29+
/// Convert the decimal to an i128
2730
fn to_i128(self) -> Option<i128>;
2831

32+
/// Convert the decimal to an i256
2933
fn to_i256(self) -> Option<i256>;
3034

35+
/// Convert a decimal from a decimal
3136
fn from_decimal<T: DecimalCast>(n: T) -> Option<Self>;
3237

38+
/// Convert a decimal from a f64
3339
fn from_f64(n: f64) -> Option<Self>;
3440
}
3541

arrow-cast/src/cast/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,8 @@ use arrow_schema::*;
6767
use arrow_select::take::take;
6868
use num_traits::{NumCast, ToPrimitive, cast::AsPrimitive};
6969

70+
pub use decimal::DecimalCast;
71+
7072
/// CastOptions provides a way to override the default cast behaviors
7173
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
7274
pub struct CastOptions<'a> {

parquet-variant-compute/src/type_conversion.rs

Lines changed: 173 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,13 @@
1717

1818
//! Module for transforming a typed arrow `Array` to `VariantArray`.
1919
20-
use arrow::datatypes::{self, ArrowPrimitiveType, ArrowTimestampType, Date32Type};
21-
use parquet_variant::Variant;
20+
use arrow::array::ArrowNativeTypeOp;
21+
use arrow::compute::DecimalCast;
22+
use arrow::datatypes::{
23+
self, ArrowPrimitiveType, ArrowTimestampType, Decimal32Type, Decimal64Type, Decimal128Type,
24+
DecimalType,
25+
};
26+
use parquet_variant::{Variant, VariantDecimal4, VariantDecimal8, VariantDecimal16};
2227

2328
/// Options for controlling the behavior of `cast_to_variant_with_options`.
2429
#[derive(Debug, Clone, PartialEq, Eq)]
@@ -82,7 +87,7 @@ impl_primitive_from_variant!(datatypes::Float64Type, as_f64);
8287
impl_primitive_from_variant!(
8388
datatypes::Date32Type,
8489
as_naive_date,
85-
Date32Type::from_naive_date
90+
datatypes::Date32Type::from_naive_date
8691
);
8792
impl_timestamp_from_variant!(
8893
datatypes::TimestampMicrosecondType,
@@ -109,6 +114,171 @@ impl_timestamp_from_variant!(
109114
|timestamp| Self::make_value(timestamp.naive_utc())
110115
);
111116

117+
/// Returns the unscaled integer representation for Arrow decimal type `O`
118+
/// from a `Variant`.
119+
///
120+
/// - `precision` and `scale` specify the target Arrow decimal parameters
121+
/// - Integer variants (`Int8/16/32/64`) are treated as decimals with scale 0
122+
/// - Decimal variants (`Decimal4/8/16`) use their embedded precision and scale
123+
///
124+
/// The value is rescaled to (`precision`, `scale`) using `rescale_decimal` and
125+
/// returns `None` if it cannot fit the requested precision.
126+
pub(crate) fn variant_to_unscaled_decimal<O>(
127+
variant: &Variant<'_, '_>,
128+
precision: u8,
129+
scale: i8,
130+
) -> Option<O::Native>
131+
where
132+
O: DecimalType,
133+
O::Native: DecimalCast,
134+
{
135+
match variant {
136+
Variant::Int8(i) => rescale_decimal::<Decimal32Type, O>(
137+
*i as i32,
138+
VariantDecimal4::MAX_PRECISION,
139+
0,
140+
precision,
141+
scale,
142+
),
143+
Variant::Int16(i) => rescale_decimal::<Decimal32Type, O>(
144+
*i as i32,
145+
VariantDecimal4::MAX_PRECISION,
146+
0,
147+
precision,
148+
scale,
149+
),
150+
Variant::Int32(i) => rescale_decimal::<Decimal32Type, O>(
151+
*i,
152+
VariantDecimal4::MAX_PRECISION,
153+
0,
154+
precision,
155+
scale,
156+
),
157+
Variant::Int64(i) => rescale_decimal::<Decimal64Type, O>(
158+
*i,
159+
VariantDecimal8::MAX_PRECISION,
160+
0,
161+
precision,
162+
scale,
163+
),
164+
Variant::Decimal4(d) => rescale_decimal::<Decimal32Type, O>(
165+
d.integer(),
166+
VariantDecimal4::MAX_PRECISION,
167+
d.scale() as i8,
168+
precision,
169+
scale,
170+
),
171+
Variant::Decimal8(d) => rescale_decimal::<Decimal64Type, O>(
172+
d.integer(),
173+
VariantDecimal8::MAX_PRECISION,
174+
d.scale() as i8,
175+
precision,
176+
scale,
177+
),
178+
Variant::Decimal16(d) => rescale_decimal::<Decimal128Type, O>(
179+
d.integer(),
180+
VariantDecimal16::MAX_PRECISION,
181+
d.scale() as i8,
182+
precision,
183+
scale,
184+
),
185+
_ => None,
186+
}
187+
}
188+
189+
/// Rescale a decimal from (input_precision, input_scale) to (output_precision, output_scale)
190+
/// and return the scaled value if it fits the output precision. Similar to the implementation in
191+
/// decimal.rs in arrow-cast.
192+
pub(crate) fn rescale_decimal<I, O>(
193+
value: I::Native,
194+
input_precision: u8,
195+
input_scale: i8,
196+
output_precision: u8,
197+
output_scale: i8,
198+
) -> Option<O::Native>
199+
where
200+
I: DecimalType,
201+
O: DecimalType,
202+
I::Native: DecimalCast,
203+
O::Native: DecimalCast,
204+
{
205+
let delta_scale = output_scale - input_scale;
206+
207+
// Determine if the cast is infallible based on precision/scale math
208+
let is_infallible_cast =
209+
is_infallible_decimal_cast(input_precision, input_scale, output_precision, output_scale);
210+
211+
let scaled = if delta_scale == 0 {
212+
O::Native::from_decimal(value)
213+
} else if delta_scale > 0 {
214+
let mul = O::Native::from_decimal(10_i128)
215+
.and_then(|t| t.pow_checked(delta_scale as u32).ok())?;
216+
O::Native::from_decimal(value).and_then(|x| x.mul_checked(mul).ok())
217+
} else {
218+
// delta_scale is guaranteed to be > 0, but may also be larger than I::MAX_PRECISION. If so, the
219+
// scale change divides out more digits than the input has precision and the result of the cast
220+
// is always zero. For example, if we try to apply delta_scale=10 a decimal32 value, the largest
221+
// possible result is 999999999/10000000000 = 0.0999999999, which rounds to zero. Smaller values
222+
// (e.g. 1/10000000000) or larger delta_scale (e.g. 999999999/10000000000000) produce even
223+
// smaller results, which also round to zero. In that case, just return an array of zeros.
224+
let delta_scale = delta_scale.unsigned_abs() as usize;
225+
let Some(max) = I::MAX_FOR_EACH_PRECISION.get(delta_scale) else {
226+
return Some(O::Native::ZERO);
227+
};
228+
let div = max.add_wrapping(I::Native::ONE);
229+
let half = div.div_wrapping(I::Native::ONE.add_wrapping(I::Native::ONE));
230+
let half_neg = half.neg_wrapping();
231+
232+
// div is >= 10 and so this cannot overflow
233+
let d = value.div_wrapping(div);
234+
let r = value.mod_wrapping(div);
235+
236+
// Round result
237+
let adjusted = match value >= I::Native::ZERO {
238+
true if r >= half => d.add_wrapping(I::Native::ONE),
239+
false if r <= half_neg => d.sub_wrapping(I::Native::ONE),
240+
_ => d,
241+
};
242+
O::Native::from_decimal(adjusted)
243+
};
244+
245+
scaled.filter(|v| is_infallible_cast || O::is_valid_decimal_precision(*v, output_precision))
246+
}
247+
248+
/// Returns true if casting from (input_precision, input_scale) to
249+
/// (output_precision, output_scale) is infallible based on precision/scale math.
250+
fn is_infallible_decimal_cast(
251+
input_precision: u8,
252+
input_scale: i8,
253+
output_precision: u8,
254+
output_scale: i8,
255+
) -> bool {
256+
let delta_scale = output_scale - input_scale;
257+
let input_precision = input_precision as i8;
258+
let output_precision = output_precision as i8;
259+
if delta_scale >= 0 {
260+
// if the gain in precision (digits) is greater than the multiplication due to scaling
261+
// every number will fit into the output type
262+
// Example: If we are starting with any number of precision 5 [xxxxx],
263+
// then an increase of scale by 3 will have the following effect on the representation:
264+
// [xxxxx] -> [xxxxx000], so for the cast to be infallible, the output type
265+
// needs to provide at least 8 digits precision
266+
input_precision + delta_scale <= output_precision
267+
} else {
268+
// if the reduction of the input number through scaling (dividing) is greater
269+
// than a possible precision loss (plus potential increase via rounding)
270+
// every input number will fit into the output type
271+
// Example: If we are starting with any number of precision 5 [xxxxx],
272+
// then and decrease the scale by 3 will have the following effect on the representation:
273+
// [xxxxx] -> [xx] (+ 1 possibly, due to rounding).
274+
// The rounding may add an additional digit, so for the cast to be infallible,
275+
// the output type needs to have at least 3 digits of precision.
276+
// e.g. Decimal(5, 3) 99.999 to Decimal(3, 0) will result in 100:
277+
// [99999] -> [99] + 1 = [100], a cast to Decimal(2, 0) would not be possible
278+
input_precision + delta_scale < output_precision
279+
}
280+
}
281+
112282
/// Convert the value at a specific index in the given array into a `Variant`.
113283
macro_rules! non_generic_conversion_single_value {
114284
($array:expr, $cast_fn:expr, $index:expr) => {{

0 commit comments

Comments
 (0)