1717
1818//! Module for transforming a typed arrow `Array` to `VariantArray`.
1919
20- use arrow:: datatypes:: { self , ArrowPrimitiveType , ArrowTimestampType , Date32Type } ;
21- use parquet_variant:: Variant ;
20+ use arrow:: array:: ArrowNativeTypeOp ;
21+ use arrow:: compute:: DecimalCast ;
22+ use arrow:: datatypes:: {
23+ self , ArrowPrimitiveType , ArrowTimestampType , Decimal32Type , Decimal64Type , Decimal128Type ,
24+ DecimalType ,
25+ } ;
26+ use parquet_variant:: { Variant , VariantDecimal4 , VariantDecimal8 , VariantDecimal16 } ;
2227
2328/// Options for controlling the behavior of `cast_to_variant_with_options`.
2429#[ derive( Debug , Clone , PartialEq , Eq ) ]
@@ -82,7 +87,7 @@ impl_primitive_from_variant!(datatypes::Float64Type, as_f64);
8287impl_primitive_from_variant ! (
8388 datatypes:: Date32Type ,
8489 as_naive_date,
85- Date32Type :: from_naive_date
90+ datatypes :: Date32Type :: from_naive_date
8691) ;
8792impl_timestamp_from_variant ! (
8893 datatypes:: TimestampMicrosecondType ,
@@ -109,6 +114,171 @@ impl_timestamp_from_variant!(
109114 |timestamp| Self :: make_value( timestamp. naive_utc( ) )
110115) ;
111116
117+ /// Returns the unscaled integer representation for Arrow decimal type `O`
118+ /// from a `Variant`.
119+ ///
120+ /// - `precision` and `scale` specify the target Arrow decimal parameters
121+ /// - Integer variants (`Int8/16/32/64`) are treated as decimals with scale 0
122+ /// - Decimal variants (`Decimal4/8/16`) use their embedded precision and scale
123+ ///
124+ /// The value is rescaled to (`precision`, `scale`) using `rescale_decimal` and
125+ /// returns `None` if it cannot fit the requested precision.
126+ pub ( crate ) fn variant_to_unscaled_decimal < O > (
127+ variant : & Variant < ' _ , ' _ > ,
128+ precision : u8 ,
129+ scale : i8 ,
130+ ) -> Option < O :: Native >
131+ where
132+ O : DecimalType ,
133+ O :: Native : DecimalCast ,
134+ {
135+ match variant {
136+ Variant :: Int8 ( i) => rescale_decimal :: < Decimal32Type , O > (
137+ * i as i32 ,
138+ VariantDecimal4 :: MAX_PRECISION ,
139+ 0 ,
140+ precision,
141+ scale,
142+ ) ,
143+ Variant :: Int16 ( i) => rescale_decimal :: < Decimal32Type , O > (
144+ * i as i32 ,
145+ VariantDecimal4 :: MAX_PRECISION ,
146+ 0 ,
147+ precision,
148+ scale,
149+ ) ,
150+ Variant :: Int32 ( i) => rescale_decimal :: < Decimal32Type , O > (
151+ * i,
152+ VariantDecimal4 :: MAX_PRECISION ,
153+ 0 ,
154+ precision,
155+ scale,
156+ ) ,
157+ Variant :: Int64 ( i) => rescale_decimal :: < Decimal64Type , O > (
158+ * i,
159+ VariantDecimal8 :: MAX_PRECISION ,
160+ 0 ,
161+ precision,
162+ scale,
163+ ) ,
164+ Variant :: Decimal4 ( d) => rescale_decimal :: < Decimal32Type , O > (
165+ d. integer ( ) ,
166+ VariantDecimal4 :: MAX_PRECISION ,
167+ d. scale ( ) as i8 ,
168+ precision,
169+ scale,
170+ ) ,
171+ Variant :: Decimal8 ( d) => rescale_decimal :: < Decimal64Type , O > (
172+ d. integer ( ) ,
173+ VariantDecimal8 :: MAX_PRECISION ,
174+ d. scale ( ) as i8 ,
175+ precision,
176+ scale,
177+ ) ,
178+ Variant :: Decimal16 ( d) => rescale_decimal :: < Decimal128Type , O > (
179+ d. integer ( ) ,
180+ VariantDecimal16 :: MAX_PRECISION ,
181+ d. scale ( ) as i8 ,
182+ precision,
183+ scale,
184+ ) ,
185+ _ => None ,
186+ }
187+ }
188+
189+ /// Rescale a decimal from (input_precision, input_scale) to (output_precision, output_scale)
190+ /// and return the scaled value if it fits the output precision. Similar to the implementation in
191+ /// decimal.rs in arrow-cast.
192+ pub ( crate ) fn rescale_decimal < I , O > (
193+ value : I :: Native ,
194+ input_precision : u8 ,
195+ input_scale : i8 ,
196+ output_precision : u8 ,
197+ output_scale : i8 ,
198+ ) -> Option < O :: Native >
199+ where
200+ I : DecimalType ,
201+ O : DecimalType ,
202+ I :: Native : DecimalCast ,
203+ O :: Native : DecimalCast ,
204+ {
205+ let delta_scale = output_scale - input_scale;
206+
207+ // Determine if the cast is infallible based on precision/scale math
208+ let is_infallible_cast =
209+ is_infallible_decimal_cast ( input_precision, input_scale, output_precision, output_scale) ;
210+
211+ let scaled = if delta_scale == 0 {
212+ O :: Native :: from_decimal ( value)
213+ } else if delta_scale > 0 {
214+ let mul = O :: Native :: from_decimal ( 10_i128 )
215+ . and_then ( |t| t. pow_checked ( delta_scale as u32 ) . ok ( ) ) ?;
216+ O :: Native :: from_decimal ( value) . and_then ( |x| x. mul_checked ( mul) . ok ( ) )
217+ } else {
218+ // delta_scale is guaranteed to be > 0, but may also be larger than I::MAX_PRECISION. If so, the
219+ // scale change divides out more digits than the input has precision and the result of the cast
220+ // is always zero. For example, if we try to apply delta_scale=10 a decimal32 value, the largest
221+ // possible result is 999999999/10000000000 = 0.0999999999, which rounds to zero. Smaller values
222+ // (e.g. 1/10000000000) or larger delta_scale (e.g. 999999999/10000000000000) produce even
223+ // smaller results, which also round to zero. In that case, just return an array of zeros.
224+ let delta_scale = delta_scale. unsigned_abs ( ) as usize ;
225+ let Some ( max) = I :: MAX_FOR_EACH_PRECISION . get ( delta_scale) else {
226+ return Some ( O :: Native :: ZERO ) ;
227+ } ;
228+ let div = max. add_wrapping ( I :: Native :: ONE ) ;
229+ let half = div. div_wrapping ( I :: Native :: ONE . add_wrapping ( I :: Native :: ONE ) ) ;
230+ let half_neg = half. neg_wrapping ( ) ;
231+
232+ // div is >= 10 and so this cannot overflow
233+ let d = value. div_wrapping ( div) ;
234+ let r = value. mod_wrapping ( div) ;
235+
236+ // Round result
237+ let adjusted = match value >= I :: Native :: ZERO {
238+ true if r >= half => d. add_wrapping ( I :: Native :: ONE ) ,
239+ false if r <= half_neg => d. sub_wrapping ( I :: Native :: ONE ) ,
240+ _ => d,
241+ } ;
242+ O :: Native :: from_decimal ( adjusted)
243+ } ;
244+
245+ scaled. filter ( |v| is_infallible_cast || O :: is_valid_decimal_precision ( * v, output_precision) )
246+ }
247+
248+ /// Returns true if casting from (input_precision, input_scale) to
249+ /// (output_precision, output_scale) is infallible based on precision/scale math.
250+ fn is_infallible_decimal_cast (
251+ input_precision : u8 ,
252+ input_scale : i8 ,
253+ output_precision : u8 ,
254+ output_scale : i8 ,
255+ ) -> bool {
256+ let delta_scale = output_scale - input_scale;
257+ let input_precision = input_precision as i8 ;
258+ let output_precision = output_precision as i8 ;
259+ if delta_scale >= 0 {
260+ // if the gain in precision (digits) is greater than the multiplication due to scaling
261+ // every number will fit into the output type
262+ // Example: If we are starting with any number of precision 5 [xxxxx],
263+ // then an increase of scale by 3 will have the following effect on the representation:
264+ // [xxxxx] -> [xxxxx000], so for the cast to be infallible, the output type
265+ // needs to provide at least 8 digits precision
266+ input_precision + delta_scale <= output_precision
267+ } else {
268+ // if the reduction of the input number through scaling (dividing) is greater
269+ // than a possible precision loss (plus potential increase via rounding)
270+ // every input number will fit into the output type
271+ // Example: If we are starting with any number of precision 5 [xxxxx],
272+ // then and decrease the scale by 3 will have the following effect on the representation:
273+ // [xxxxx] -> [xx] (+ 1 possibly, due to rounding).
274+ // The rounding may add an additional digit, so for the cast to be infallible,
275+ // the output type needs to have at least 3 digits of precision.
276+ // e.g. Decimal(5, 3) 99.999 to Decimal(3, 0) will result in 100:
277+ // [99999] -> [99] + 1 = [100], a cast to Decimal(2, 0) would not be possible
278+ input_precision + delta_scale < output_precision
279+ }
280+ }
281+
112282/// Convert the value at a specific index in the given array into a `Variant`.
113283macro_rules! non_generic_conversion_single_value {
114284 ( $array: expr, $cast_fn: expr, $index: expr) => { {
0 commit comments