Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
f25b499
[Variant] Support variant to `Decimal32/64/128/256`
liamzwbao Oct 3, 2025
7a32191
Simplify logic
liamzwbao Oct 4, 2025
02d29de
Using macro to generalize
liamzwbao Oct 4, 2025
f498db5
Support i256 and Decimal256
liamzwbao Oct 4, 2025
964e45a
Simplify decimal builders
liamzwbao Oct 4, 2025
43d579d
Merge branch 'main' into issue-8477-variant-to-arrow-decimal
liamzwbao Oct 4, 2025
6f39a2a
fmt
liamzwbao Oct 4, 2025
8f0f53c
Add comment
liamzwbao Oct 4, 2025
d88fd7f
assert precision and scale in tests
liamzwbao Oct 6, 2025
522b26a
Merge branch 'main' into issue-8477-variant-to-arrow-decimal
liamzwbao Oct 6, 2025
9b6d0e1
address comments
liamzwbao Oct 6, 2025
54237fe
add more overflow cases and valid cases that will overflow in current…
liamzwbao Oct 8, 2025
e0b18da
WIP
liamzwbao Oct 8, 2025
1f19580
Refactor common logic
liamzwbao Oct 8, 2025
c163a91
Refactor common logic
liamzwbao Oct 8, 2025
274a028
Refactor common logic
liamzwbao Oct 8, 2025
a7cdd33
Use rescale_decimal for variant decimal scaling
liamzwbao Oct 8, 2025
94d60c0
Fix clippy
liamzwbao Oct 8, 2025
338defe
Merge branch 'main' into issue-8477-variant-to-arrow-decimal
liamzwbao Oct 9, 2025
e1febf6
Address comments
liamzwbao Oct 9, 2025
51648fd
Move rescale_decimal into variant-compute
liamzwbao Oct 10, 2025
a48bbf4
Revert changes in arrow-cast
liamzwbao Oct 10, 2025
cb2576c
Fix doc
liamzwbao Oct 10, 2025
5ffab93
Merge branch 'main' into issue-8477-variant-to-arrow-decimal
liamzwbao Oct 10, 2025
ef62474
Return value instead of fn
liamzwbao Oct 10, 2025
21a83ed
Merge branch 'main' into issue-8477-variant-to-arrow-decimal
liamzwbao Oct 10, 2025
25e4aa9
Fix large scale reduction case
liamzwbao Oct 10, 2025
539d73f
Reuse DecimalCast
liamzwbao Oct 10, 2025
dfe9960
Merge branch 'main' into issue-8477-variant-to-arrow-decimal
liamzwbao Oct 14, 2025
cfc8580
Use trait VariantDecimalType
liamzwbao Oct 15, 2025
9ed0d7a
Add doc
liamzwbao Oct 15, 2025
0567cb6
Refactor tests to use `into`
liamzwbao Oct 15, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion arrow-cast/src/cast/decimal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,23 @@ use crate::cast::*;

/// A utility trait that provides checked conversions between
/// decimal types inspired by [`NumCast`]
pub(crate) trait DecimalCast: Sized {
pub trait DecimalCast: Sized {
/// Convert the decimal to an i32
fn to_i32(self) -> Option<i32>;

/// Convert the decimal to an i64
fn to_i64(self) -> Option<i64>;

/// Convert the decimal to an i128
fn to_i128(self) -> Option<i128>;

/// Convert the decimal to an i256
fn to_i256(self) -> Option<i256>;

/// Convert a decimal from a decimal
fn from_decimal<T: DecimalCast>(n: T) -> Option<Self>;

/// Convert a decimal from a f64
fn from_f64(n: f64) -> Option<Self>;
}

Expand Down
2 changes: 2 additions & 0 deletions arrow-cast/src/cast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ use arrow_schema::*;
use arrow_select::take::take;
use num_traits::{NumCast, ToPrimitive, cast::AsPrimitive};

pub use decimal::DecimalCast;

/// CastOptions provides a way to override the default cast behaviors
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct CastOptions<'a> {
Expand Down
176 changes: 173 additions & 3 deletions parquet-variant-compute/src/type_conversion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,13 @@

//! Module for transforming a typed arrow `Array` to `VariantArray`.

use arrow::datatypes::{self, ArrowPrimitiveType, ArrowTimestampType, Date32Type};
use parquet_variant::Variant;
use arrow::array::ArrowNativeTypeOp;
use arrow::compute::DecimalCast;
use arrow::datatypes::{
self, ArrowPrimitiveType, ArrowTimestampType, Decimal32Type, Decimal64Type, Decimal128Type,
DecimalType,
};
use parquet_variant::{Variant, VariantDecimal4, VariantDecimal8, VariantDecimal16};

/// Options for controlling the behavior of `cast_to_variant_with_options`.
#[derive(Debug, Clone, PartialEq, Eq)]
Expand Down Expand Up @@ -82,7 +87,7 @@ impl_primitive_from_variant!(datatypes::Float64Type, as_f64);
impl_primitive_from_variant!(
datatypes::Date32Type,
as_naive_date,
Date32Type::from_naive_date
datatypes::Date32Type::from_naive_date
);
impl_timestamp_from_variant!(
datatypes::TimestampMicrosecondType,
Expand All @@ -109,6 +114,171 @@ impl_timestamp_from_variant!(
|timestamp| Self::make_value(timestamp.naive_utc())
);

/// Returns the unscaled integer representation for Arrow decimal type `O`
/// from a `Variant`.
///
/// - `precision` and `scale` specify the target Arrow decimal parameters
/// - Integer variants (`Int8/16/32/64`) are treated as decimals with scale 0
/// - Decimal variants (`Decimal4/8/16`) use their embedded precision and scale
///
/// The value is rescaled to (`precision`, `scale`) using `rescale_decimal` and
/// returns `None` if it cannot fit the requested precision.
pub(crate) fn variant_to_unscaled_decimal<O>(
variant: &Variant<'_, '_>,
precision: u8,
scale: i8,
) -> Option<O::Native>
where
O: DecimalType,
O::Native: DecimalCast,
{
match variant {
Variant::Int8(i) => rescale_decimal::<Decimal32Type, O>(
*i as i32,
VariantDecimal4::MAX_PRECISION,
0,
precision,
scale,
),
Variant::Int16(i) => rescale_decimal::<Decimal32Type, O>(
*i as i32,
VariantDecimal4::MAX_PRECISION,
0,
precision,
scale,
),
Variant::Int32(i) => rescale_decimal::<Decimal32Type, O>(
*i,
VariantDecimal4::MAX_PRECISION,
0,
precision,
scale,
),
Variant::Int64(i) => rescale_decimal::<Decimal64Type, O>(
*i,
VariantDecimal8::MAX_PRECISION,
0,
precision,
scale,
),
Variant::Decimal4(d) => rescale_decimal::<Decimal32Type, O>(
d.integer(),
VariantDecimal4::MAX_PRECISION,
d.scale() as i8,
precision,
scale,
),
Variant::Decimal8(d) => rescale_decimal::<Decimal64Type, O>(
d.integer(),
VariantDecimal8::MAX_PRECISION,
d.scale() as i8,
precision,
scale,
),
Variant::Decimal16(d) => rescale_decimal::<Decimal128Type, O>(
d.integer(),
VariantDecimal16::MAX_PRECISION,
d.scale() as i8,
precision,
scale,
),
_ => None,
}
}

/// Rescale a decimal from (input_precision, input_scale) to (output_precision, output_scale)
/// and return the scaled value if it fits the output precision. Similar to the implementation in
/// decimal.rs in arrow-cast.
pub(crate) fn rescale_decimal<I, O>(
value: I::Native,
input_precision: u8,
input_scale: i8,
output_precision: u8,
output_scale: i8,
) -> Option<O::Native>
where
I: DecimalType,
O: DecimalType,
Comment on lines +192 to +201
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tiny nit to consider (saves space)

Suggested change
pub(crate) fn rescale_decimal<I, O>(
value: I::Native,
input_precision: u8,
input_scale: i8,
output_precision: u8,
output_scale: i8,
) -> Option<O::Native>
where
I: DecimalType,
O: DecimalType,
pub(crate) fn rescale_decimal<I: DecimalType, O: DecimalType>(
value: I::Native,
input_precision: u8,
input_scale: i8,
output_precision: u8,
output_scale: i8,
) -> Option<O::Native>
where

I::Native: DecimalCast,
O::Native: DecimalCast,
{
let delta_scale = output_scale - input_scale;

// Determine if the cast is infallible based on precision/scale math
let is_infallible_cast =
is_infallible_decimal_cast(input_precision, input_scale, output_precision, output_scale);

Comment on lines +206 to +210
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: move this whole block down to where we actually use it -- declare near first (only) use

let scaled = if delta_scale == 0 {
O::Native::from_decimal(value)
} else if delta_scale > 0 {
let mul = O::Native::from_decimal(10_i128)
.and_then(|t| t.pow_checked(delta_scale as u32).ok())?;
Comment on lines +214 to +215
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could use the same performance optimization as the negative scale case below:

Suggested change
let mul = O::Native::from_decimal(10_i128)
.and_then(|t| t.pow_checked(delta_scale as u32).ok())?;
let max = O::MAX_FOR_EACH_PRECISION.get(delta_scale)?;
let mul = max.add_wrapping(O::Native::ONE);

(it didn't matter much in the columnar decimal cast code, but it probably does matter in row-wise variant cast code)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also -- we should benchmark, but it might be faster to multiply by one than to execute the branch that distinguishes between zero and positive delta scale. If so, we would want code like this:

let (scaled, is_infallible_cast) = if delta_scale < 0 {
    // ... big comment about why ...
    let is_infallible = input_precision + delta_scale < output_precision;

    // ... comment about dividing out too many digits ...
    let delta_scale = delta_scale.unsigned_abs() as usize;
    let Some(max) = ... else { ... return zero ... };
      ...
    (O::Native::from_decimal(adjusted)?, is_infallible_cast)
} else {
    // ... big comment explaining why ...
    let is_infallible_cast = input_precision + delta_scale <= output_precision;

    let max = O::MAX_FOR_EACH_PRECISION.get(delta_scale)?;
    let mul = max.add_wrapping(O::Native::ONE);
    let x = O::Native::from_decimal(value)?;
    (x.mul_checked(mul).ok()?, is_infallible_cast)
}

(is_infallible_cast || O::is_valid_decimal_precision(scaled, output_precision)).then(scaled)

O::Native::from_decimal(value).and_then(|x| x.mul_checked(mul).ok())
} else {
// delta_scale is guaranteed to be > 0, but may also be larger than I::MAX_PRECISION. If so, the
// scale change divides out more digits than the input has precision and the result of the cast
// is always zero. For example, if we try to apply delta_scale=10 a decimal32 value, the largest
// possible result is 999999999/10000000000 = 0.0999999999, which rounds to zero. Smaller values
// (e.g. 1/10000000000) or larger delta_scale (e.g. 999999999/10000000000000) produce even
// smaller results, which also round to zero. In that case, just return an array of zeros.
let delta_scale = delta_scale.unsigned_abs() as usize;
let Some(max) = I::MAX_FOR_EACH_PRECISION.get(delta_scale) else {
return Some(O::Native::ZERO);
};
let div = max.add_wrapping(I::Native::ONE);
let half = div.div_wrapping(I::Native::ONE.add_wrapping(I::Native::ONE));
let half_neg = half.neg_wrapping();

// div is >= 10 and so this cannot overflow
let d = value.div_wrapping(div);
let r = value.mod_wrapping(div);

// Round result
let adjusted = match value >= I::Native::ZERO {
true if r >= half => d.add_wrapping(I::Native::ONE),
false if r <= half_neg => d.sub_wrapping(I::Native::ONE),
_ => d,
};
O::Native::from_decimal(adjusted)
};

scaled.filter(|v| is_infallible_cast || O::is_valid_decimal_precision(*v, output_precision))
}

/// Returns true if casting from (input_precision, input_scale) to
/// (output_precision, output_scale) is infallible based on precision/scale math.
fn is_infallible_decimal_cast(
input_precision: u8,
input_scale: i8,
output_precision: u8,
output_scale: i8,
) -> bool {
let delta_scale = output_scale - input_scale;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: we could have passed this in, our caller already computed it. but I guess this is more regular?

let input_precision = input_precision as i8;
let output_precision = output_precision as i8;
if delta_scale >= 0 {
// if the gain in precision (digits) is greater than the multiplication due to scaling
// every number will fit into the output type
// Example: If we are starting with any number of precision 5 [xxxxx],
// then an increase of scale by 3 will have the following effect on the representation:
// [xxxxx] -> [xxxxx000], so for the cast to be infallible, the output type
// needs to provide at least 8 digits precision
input_precision + delta_scale <= output_precision
} else {
// if the reduction of the input number through scaling (dividing) is greater
// than a possible precision loss (plus potential increase via rounding)
// every input number will fit into the output type
// Example: If we are starting with any number of precision 5 [xxxxx],
// then and decrease the scale by 3 will have the following effect on the representation:
// [xxxxx] -> [xx] (+ 1 possibly, due to rounding).
// The rounding may add an additional digit, so for the cast to be infallible,
// the output type needs to have at least 3 digits of precision.
// e.g. Decimal(5, 3) 99.999 to Decimal(3, 0) will result in 100:
// [99999] -> [99] + 1 = [100], a cast to Decimal(2, 0) would not be possible
input_precision + delta_scale < output_precision
}
}

/// Convert the value at a specific index in the given array into a `Variant`.
macro_rules! non_generic_conversion_single_value {
($array:expr, $cast_fn:expr, $index:expr) => {{
Expand Down
Loading
Loading