Skip to content

Commit 9b6d0e1

Browse files
committed
address comments
1 parent 522b26a commit 9b6d0e1

File tree

4 files changed

+70
-98
lines changed

4 files changed

+70
-98
lines changed

parquet-variant-compute/src/type_conversion.rs

Lines changed: 33 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -63,42 +63,39 @@ impl_primitive_from_variant!(datatypes::Float64Type, as_f64);
6363

6464
macro_rules! scale_variant_decimal {
6565
($variant:expr, $variant_method:ident, $to_int_ty:expr, $output_scale:expr, $precision:expr, $validate:path) => {{
66-
(|| -> Option<_> {
67-
let variant = $variant.$variant_method()?;
68-
let input_scale = variant.scale() as i8;
69-
let variant = $to_int_ty(variant.integer());
70-
let ten = $to_int_ty(10);
71-
72-
let scaled = if input_scale == $output_scale {
73-
Some(variant)
74-
} else if input_scale < $output_scale {
75-
// scale_up means output has more fractional digits than input
76-
// multiply integer by 10^(output_scale - input_scale)
77-
let delta = ($output_scale - input_scale) as u32;
78-
let mul = ten.checked_pow(delta)?;
79-
variant.checked_mul(mul)
80-
} else {
81-
// scale_down means output has fewer fractional digits than input
82-
// divide by 10^(input_scale - output_scale) with rounding
83-
let delta = (input_scale - $output_scale) as u32;
84-
let div = ten.checked_pow(delta)?;
85-
let v = variant;
86-
let d = v.checked_div(div)?;
87-
let r = v % div;
88-
89-
// rounding in the same way as convert_to_smaller_scale_decimal in arrow-cast
90-
let half = div.checked_div($to_int_ty(2))?;
91-
let half_neg = half.checked_neg()?;
92-
let adjusted = match v >= $to_int_ty(0) {
93-
true if r >= half => d.checked_add($to_int_ty(1))?,
94-
false if r <= half_neg => d.checked_sub($to_int_ty(1))?,
95-
_ => d,
96-
};
97-
Some(adjusted)
66+
let variant = $variant.$variant_method()?;
67+
let input_scale = variant.scale() as i8;
68+
let variant = $to_int_ty(variant.integer());
69+
let ten = $to_int_ty(10);
70+
71+
let scaled = if input_scale == $output_scale {
72+
Some(variant)
73+
} else if input_scale < $output_scale {
74+
// scale_up means output has more fractional digits than input
75+
// multiply integer by 10^(output_scale - input_scale)
76+
let delta = ($output_scale - input_scale) as u32;
77+
let mul = ten.checked_pow(delta)?;
78+
variant.checked_mul(mul)
79+
} else {
80+
// scale_down means output has fewer fractional digits than input
81+
// divide by 10^(input_scale - output_scale) with rounding
82+
let delta = (input_scale - $output_scale) as u32;
83+
let div = ten.checked_pow(delta)?;
84+
let d = variant.checked_div(div)?;
85+
let r = variant % div;
86+
87+
// rounding in the same way as convert_to_smaller_scale_decimal in arrow-cast
88+
let half = div.checked_div($to_int_ty(2))?;
89+
let half_neg = half.checked_neg()?;
90+
let adjusted = match variant >= $to_int_ty(0) {
91+
true if r >= half => d.checked_add($to_int_ty(1))?,
92+
false if r <= half_neg => d.checked_sub($to_int_ty(1))?,
93+
_ => d,
9894
};
95+
Some(adjusted)
96+
};
9997

100-
scaled.filter(|v| $validate(*v, $precision))
101-
})()
98+
scaled.filter(|v| $validate(*v, $precision))
10299
}};
103100
}
104101
pub(crate) use scale_variant_decimal;
@@ -151,9 +148,8 @@ macro_rules! decimal_to_variant_decimal {
151148
let (v, scale) = if *$scale < 0 {
152149
// For negative scale, we need to multiply the value by 10^|scale|
153150
// For example: 123 with scale -2 becomes 12300 with scale 0
154-
let v = (10 as $value_type)
155-
.checked_pow((-*$scale) as u32)
156-
.and_then(|m| m.checked_mul($v));
151+
let v =
152+
<$value_type>::checked_pow(10, (-*$scale) as u32).and_then(|m| m.checked_mul($v));
157153
(v, 0u8)
158154
} else {
159155
(Some($v), *$scale as u8)

parquet-variant-compute/src/variant_get.rs

Lines changed: 12 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -308,10 +308,7 @@ mod test {
308308
use arrow::compute::CastOptions;
309309
use arrow::datatypes::DataType::{Int16, Int32, Int64};
310310
use arrow::datatypes::i256;
311-
use arrow_schema::{
312-
DECIMAL32_MAX_PRECISION, DECIMAL64_MAX_PRECISION, DECIMAL128_MAX_PRECISION, DataType,
313-
Field, FieldRef, Fields,
314-
};
311+
use arrow_schema::{DataType, Field, FieldRef, Fields};
315312
use chrono::DateTime;
316313
use parquet_variant::{
317314
EMPTY_VARIANT_METADATA_BYTES, Variant, VariantDecimal4, VariantDecimal8, VariantDecimal16,
@@ -2688,18 +2685,6 @@ mod test {
26882685
Arc::new(struct_array)
26892686
}
26902687

2691-
macro_rules! max_unscaled_value {
2692-
(32, $precision:expr) => {
2693-
(u32::pow(10, $precision as u32) - 1) as i32
2694-
};
2695-
(64, $precision:expr) => {
2696-
(u64::pow(10, $precision as u32) - 1) as i64
2697-
};
2698-
(128, $precision:expr) => {
2699-
(u128::pow(10, $precision as u32) - 1) as i128
2700-
};
2701-
}
2702-
27032688
#[test]
27042689
fn get_decimal32_unshredded_var_scales_to_scale2() {
27052690
// Build unshredded variant values with different scales
@@ -2750,7 +2735,7 @@ mod test {
27502735
// Exceed Decimal32 max precision (9) after scaling
27512736
let mut builder = crate::VariantArrayBuilder::new(1);
27522737
builder.append_variant(Variant::from(
2753-
VariantDecimal4::try_new(max_unscaled_value!(32, DECIMAL32_MAX_PRECISION), 0).unwrap(),
2738+
VariantDecimal4::try_new(VariantDecimal4::MAX_UNSCALED_VALUE as i32, 0).unwrap(),
27542739
));
27552740
let variant_array: ArrayRef = ArrayRef::from(builder.build());
27562741

@@ -2766,7 +2751,7 @@ mod test {
27662751
fn get_decimal32_precision_overflow_unsafe_errors() {
27672752
let mut builder = crate::VariantArrayBuilder::new(1);
27682753
builder.append_variant(Variant::from(
2769-
VariantDecimal4::try_new(max_unscaled_value!(32, DECIMAL32_MAX_PRECISION), 0).unwrap(),
2754+
VariantDecimal4::try_new(VariantDecimal4::MAX_UNSCALED_VALUE as i32, 0).unwrap(),
27702755
));
27712756
let variant_array: ArrayRef = ArrayRef::from(builder.build());
27722757

@@ -2836,7 +2821,7 @@ mod test {
28362821
// Exceed Decimal64 max precision (18) after scaling
28372822
let mut builder = crate::VariantArrayBuilder::new(1);
28382823
builder.append_variant(Variant::from(
2839-
VariantDecimal8::try_new(max_unscaled_value!(64, DECIMAL64_MAX_PRECISION), 0).unwrap(),
2824+
VariantDecimal8::try_new(VariantDecimal8::MAX_UNSCALED_VALUE as i64, 0).unwrap(),
28402825
));
28412826
let variant_array: ArrayRef = ArrayRef::from(builder.build());
28422827

@@ -2852,7 +2837,7 @@ mod test {
28522837
fn get_decimal64_precision_overflow_unsafe_errors() {
28532838
let mut builder = crate::VariantArrayBuilder::new(1);
28542839
builder.append_variant(Variant::from(
2855-
VariantDecimal8::try_new(max_unscaled_value!(64, DECIMAL64_MAX_PRECISION), 0).unwrap(),
2840+
VariantDecimal8::try_new(VariantDecimal8::MAX_UNSCALED_VALUE as i64, 0).unwrap(),
28562841
));
28572842
let variant_array: ArrayRef = ArrayRef::from(builder.build());
28582843

@@ -2922,8 +2907,7 @@ mod test {
29222907
// Exceed Decimal128 max precision (38) after scaling
29232908
let mut builder = crate::VariantArrayBuilder::new(1);
29242909
builder.append_variant(Variant::from(
2925-
VariantDecimal16::try_new(max_unscaled_value!(128, DECIMAL128_MAX_PRECISION), 0)
2926-
.unwrap(),
2910+
VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE as i128, 0).unwrap(),
29272911
));
29282912
let variant_array: ArrayRef = ArrayRef::from(builder.build());
29292913

@@ -2939,8 +2923,7 @@ mod test {
29392923
fn get_decimal128_precision_overflow_unsafe_errors() {
29402924
let mut builder = crate::VariantArrayBuilder::new(1);
29412925
builder.append_variant(Variant::from(
2942-
VariantDecimal16::try_new(max_unscaled_value!(128, DECIMAL128_MAX_PRECISION), 0)
2943-
.unwrap(),
2926+
VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE as i128, 0).unwrap(),
29442927
));
29452928
let variant_array: ArrayRef = ArrayRef::from(builder.build());
29462929

@@ -3009,12 +2992,10 @@ mod test {
30092992
// Exceed Decimal128 max precision (38) after scaling
30102993
let mut builder = crate::VariantArrayBuilder::new(2);
30112994
builder.append_variant(Variant::from(
3012-
VariantDecimal16::try_new(max_unscaled_value!(128, DECIMAL128_MAX_PRECISION), 1)
3013-
.unwrap(),
2995+
VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE as i128, 1).unwrap(),
30142996
));
30152997
builder.append_variant(Variant::from(
3016-
VariantDecimal16::try_new(max_unscaled_value!(128, DECIMAL128_MAX_PRECISION), 0)
3017-
.unwrap(),
2998+
VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE as i128, 0).unwrap(),
30182999
));
30193000
let variant_array: ArrayRef = ArrayRef::from(builder.build());
30203001

@@ -3027,7 +3008,7 @@ mod test {
30273008
// So expected integer is (10^38-1) * 10^(39-1) = (10^38-1) * 10^38
30283009
let base = i256::from_i128(10);
30293010
let factor = base.checked_pow(38).unwrap();
3030-
let expected = i256::from_i128(max_unscaled_value!(128, DECIMAL128_MAX_PRECISION))
3011+
let expected = i256::from_i128(VariantDecimal16::MAX_UNSCALED_VALUE as i128)
30313012
.checked_mul(factor)
30323013
.unwrap();
30333014
assert_eq!(result.value(0), expected);
@@ -3039,12 +3020,10 @@ mod test {
30393020
// Exceed Decimal128 max precision (38) after scaling
30403021
let mut builder = crate::VariantArrayBuilder::new(2);
30413022
builder.append_variant(Variant::from(
3042-
VariantDecimal16::try_new(max_unscaled_value!(128, DECIMAL128_MAX_PRECISION), 1)
3043-
.unwrap(),
3023+
VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE as i128, 1).unwrap(),
30443024
));
30453025
builder.append_variant(Variant::from(
3046-
VariantDecimal16::try_new(max_unscaled_value!(128, DECIMAL128_MAX_PRECISION), 0)
3047-
.unwrap(),
3026+
VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE as i128, 0).unwrap(),
30483027
));
30493028
let variant_array: ArrayRef = ArrayRef::from(builder.build());
30503029

parquet-variant-compute/src/variant_to_arrow.rs

Lines changed: 22 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,9 @@
1818
use arrow::array::{ArrayRef, BinaryViewArray, NullBufferBuilder, PrimitiveBuilder};
1919
use arrow::compute::CastOptions;
2020
use arrow::datatypes::{
21-
self, ArrowPrimitiveType, DataType, Decimal32Type, Decimal64Type, Decimal128Type,
22-
Decimal256Type, i256, is_validate_decimal_precision, is_validate_decimal32_precision,
23-
is_validate_decimal64_precision, is_validate_decimal256_precision,
21+
self, ArrowPrimitiveType, DataType, i256, is_validate_decimal_precision,
22+
is_validate_decimal32_precision, is_validate_decimal64_precision,
23+
is_validate_decimal256_precision,
2424
};
2525
use arrow::error::{ArrowError, Result};
2626
use parquet_variant::{Variant, VariantPath};
@@ -376,18 +376,18 @@ impl<'a, T: PrimitiveFromVariant> VariantToPrimitiveArrowRowBuilder<'a, T> {
376376
}
377377

378378
// Minimal per-decimal hook: just wraps scale_variant_decimal! with correct parameters
379-
pub(crate) trait VariantDecimalScaler: datatypes::DecimalType {
380-
fn scale_from_variant(
379+
pub(crate) trait RescaleVariantDecimal: datatypes::DecimalType {
380+
fn rescale_variant_decimal(
381381
value: &Variant<'_, '_>,
382382
scale: i8,
383383
precision: u8,
384384
) -> Option<<Self as ArrowPrimitiveType>::Native>;
385385
}
386386

387-
macro_rules! impl_variant_decimal_scaler {
387+
macro_rules! impl_rescale_variant_decimal {
388388
($t:ty, $variant_method:ident, $to_native:expr, $validate:path) => {
389-
impl VariantDecimalScaler for $t {
390-
fn scale_from_variant(
389+
impl RescaleVariantDecimal for $t {
390+
fn rescale_variant_decimal(
391391
value: &Variant<'_, '_>,
392392
scale: i8,
393393
precision: u8,
@@ -405,43 +405,40 @@ macro_rules! impl_variant_decimal_scaler {
405405
};
406406
}
407407

408-
impl_variant_decimal_scaler!(
409-
Decimal32Type,
408+
impl_rescale_variant_decimal!(
409+
datatypes::Decimal32Type,
410410
as_decimal4,
411-
|x: i32| x,
411+
i32::from,
412412
is_validate_decimal32_precision
413413
);
414-
impl_variant_decimal_scaler!(
415-
Decimal64Type,
414+
impl_rescale_variant_decimal!(
415+
datatypes::Decimal64Type,
416416
as_decimal8,
417-
|x: i64| x,
417+
i64::from,
418418
is_validate_decimal64_precision
419419
);
420-
impl_variant_decimal_scaler!(
421-
Decimal128Type,
420+
impl_rescale_variant_decimal!(
421+
datatypes::Decimal128Type,
422422
as_decimal16,
423-
|x: i128| x,
423+
i128::from,
424424
is_validate_decimal_precision
425425
);
426-
impl_variant_decimal_scaler!(
427-
Decimal256Type,
426+
impl_rescale_variant_decimal!(
427+
datatypes::Decimal256Type,
428428
as_decimal16,
429429
i256::from_i128,
430430
is_validate_decimal256_precision
431431
);
432432

433433
/// Builder for converting variant values to arrow Decimal values
434-
pub(crate) struct VariantToDecimalArrowRowBuilder<
435-
'a,
436-
T: datatypes::DecimalType + VariantDecimalScaler,
437-
> {
434+
pub(crate) struct VariantToDecimalArrowRowBuilder<'a, T: RescaleVariantDecimal> {
438435
builder: PrimitiveBuilder<T>,
439436
cast_options: &'a CastOptions<'a>,
440437
precision: u8,
441438
scale: i8,
442439
}
443440

444-
impl<'a, T: datatypes::DecimalType + VariantDecimalScaler> VariantToDecimalArrowRowBuilder<'a, T> {
441+
impl<'a, T: RescaleVariantDecimal> VariantToDecimalArrowRowBuilder<'a, T> {
445442
fn new(
446443
cast_options: &'a CastOptions<'a>,
447444
capacity: usize,
@@ -464,7 +461,7 @@ impl<'a, T: datatypes::DecimalType + VariantDecimalScaler> VariantToDecimalArrow
464461
}
465462

466463
fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
467-
if let Some(scaled) = T::scale_from_variant(value, self.scale, self.precision) {
464+
if let Some(scaled) = T::rescale_variant_decimal(value, self.scale, self.precision) {
468465
self.builder.append_value(scaled);
469466
Ok(true)
470467
} else if self.cast_options.safe {

parquet-variant/src/variant/decimal.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ pub struct VariantDecimal4 {
8787

8888
impl VariantDecimal4 {
8989
pub(crate) const MAX_PRECISION: u8 = 9;
90-
pub(crate) const MAX_UNSCALED_VALUE: u32 = u32::pow(10, Self::MAX_PRECISION as u32) - 1;
90+
pub const MAX_UNSCALED_VALUE: u32 = u32::pow(10, Self::MAX_PRECISION as u32) - 1;
9191

9292
pub fn try_new(integer: i32, scale: u8) -> Result<Self, ArrowError> {
9393
decimal_try_new!(integer, scale)
@@ -137,7 +137,7 @@ pub struct VariantDecimal8 {
137137

138138
impl VariantDecimal8 {
139139
pub(crate) const MAX_PRECISION: u8 = 18;
140-
pub(crate) const MAX_UNSCALED_VALUE: u64 = u64::pow(10, Self::MAX_PRECISION as u32) - 1;
140+
pub const MAX_UNSCALED_VALUE: u64 = u64::pow(10, Self::MAX_PRECISION as u32) - 1;
141141

142142
pub fn try_new(integer: i64, scale: u8) -> Result<Self, ArrowError> {
143143
decimal_try_new!(integer, scale)
@@ -187,7 +187,7 @@ pub struct VariantDecimal16 {
187187

188188
impl VariantDecimal16 {
189189
const MAX_PRECISION: u8 = 38;
190-
const MAX_UNSCALED_VALUE: u128 = u128::pow(10, Self::MAX_PRECISION as u32) - 1;
190+
pub const MAX_UNSCALED_VALUE: u128 = u128::pow(10, Self::MAX_PRECISION as u32) - 1;
191191

192192
pub fn try_new(integer: i128, scale: u8) -> Result<Self, ArrowError> {
193193
decimal_try_new!(integer, scale)

0 commit comments

Comments
 (0)