Fill out more parts in expr,common and expr-common

AdamGS · AdamGS · commit 4242ed004daa · 2025-09-16T17:13:19.000+01:00
diff --git a/datafusion/common/src/types/native.rs b/datafusion/common/src/types/native.rs
@@ -23,6 +23,7 @@ use crate::error::{Result, _internal_err};
 use arrow::compute::can_cast_types;
 use arrow::datatypes::{
     DataType, Field, FieldRef, Fields, IntervalUnit, TimeUnit, UnionFields,
+    DECIMAL128_MAX_PRECISION, DECIMAL32_MAX_PRECISION, DECIMAL64_MAX_PRECISION,
 };
 use std::{fmt::Display, sync::Arc};
 
@@ -228,7 +229,15 @@ impl LogicalType for NativeType {
             (Self::Float16, _) => Float16,
             (Self::Float32, _) => Float32,
             (Self::Float64, _) => Float64,
-            (Self::Decimal(p, s), _) if p <= &38 => Decimal128(*p, *s),
+            (Self::Decimal(p, s), _) if *p <= DECIMAL32_MAX_PRECISION => {
+                Decimal32(*p, *s)
+            }
+            (Self::Decimal(p, s), _) if *p <= DECIMAL64_MAX_PRECISION => {
+                Decimal64(*p, *s)
+            }
+            (Self::Decimal(p, s), _) if *p <= DECIMAL128_MAX_PRECISION => {
+                Decimal128(*p, *s)
+            }
             (Self::Decimal(p, s), _) => Decimal256(*p, *s),
             (Self::Timestamp(tu, tz), _) => Timestamp(*tu, tz.clone()),
             // If given type is Date, return the same type
diff --git a/datafusion/expr-common/src/casts.rs b/datafusion/expr-common/src/casts.rs
@@ -25,7 +25,9 @@ use std::cmp::Ordering;
 
 use arrow::datatypes::{
     DataType, TimeUnit, MAX_DECIMAL128_FOR_EACH_PRECISION,
-    MIN_DECIMAL128_FOR_EACH_PRECISION,
+    MAX_DECIMAL32_FOR_EACH_PRECISION, MAX_DECIMAL64_FOR_EACH_PRECISION,
+    MIN_DECIMAL128_FOR_EACH_PRECISION, MIN_DECIMAL32_FOR_EACH_PRECISION,
+    MIN_DECIMAL64_FOR_EACH_PRECISION,
 };
 use arrow::temporal_conversions::{MICROSECONDS, MILLISECONDS, NANOSECONDS};
 use datafusion_common::ScalarValue;
@@ -69,6 +71,8 @@ fn is_supported_numeric_type(data_type: &DataType) -> bool {
             | DataType::Int16
             | DataType::Int32
             | DataType::Int64
+            | DataType::Decimal32(_, _)
+            | DataType::Decimal64(_, _)
             | DataType::Decimal128(_, _)
             | DataType::Timestamp(_, _)
     )
@@ -114,6 +118,8 @@ fn try_cast_numeric_literal(
         | DataType::Int32
         | DataType::Int64 => 1_i128,
         DataType::Timestamp(_, _) => 1_i128,
+        DataType::Decimal32(_, scale) => 10_i128.pow(*scale as u32),
+        DataType::Decimal64(_, scale) => 10_i128.pow(*scale as u32),
         DataType::Decimal128(_, scale) => 10_i128.pow(*scale as u32),
         _ => return None,
     };
@@ -127,6 +133,20 @@ fn try_cast_numeric_literal(
         DataType::Int32 => (i32::MIN as i128, i32::MAX as i128),
         DataType::Int64 => (i64::MIN as i128, i64::MAX as i128),
         DataType::Timestamp(_, _) => (i64::MIN as i128, i64::MAX as i128),
+        DataType::Decimal32(precision, _) => (
+            // Different precision for decimal32 can store different range of value.
+            // For example, the precision is 3, the max of value is `999` and the min
+            // value is `-999`
+            MIN_DECIMAL32_FOR_EACH_PRECISION[*precision as usize] as i128,
+            MAX_DECIMAL32_FOR_EACH_PRECISION[*precision as usize] as i128,
+        ),
+        DataType::Decimal64(precision, _) => (
+            // Different precision for decimal64 can store different range of value.
+            // For example, the precision is 3, the max of value is `999` and the min
+            // value is `-999`
+            MIN_DECIMAL64_FOR_EACH_PRECISION[*precision as usize] as i128,
+            MAX_DECIMAL64_FOR_EACH_PRECISION[*precision as usize] as i128,
+        ),
         DataType::Decimal128(precision, _) => (
             // Different precision for decimal128 can store different range of value.
             // For example, the precision is 3, the max of value is `999` and the min
@@ -149,6 +169,46 @@ fn try_cast_numeric_literal(
         ScalarValue::TimestampMillisecond(Some(v), _) => (*v as i128).checked_mul(mul),
         ScalarValue::TimestampMicrosecond(Some(v), _) => (*v as i128).checked_mul(mul),
         ScalarValue::TimestampNanosecond(Some(v), _) => (*v as i128).checked_mul(mul),
+        ScalarValue::Decimal32(Some(v), _, scale) => {
+            let v = *v as i128;
+            let lit_scale_mul = 10_i128.pow(*scale as u32);
+            if mul >= lit_scale_mul {
+                // Example:
+                // lit is decimal(123,3,2)
+                // target type is decimal(5,3)
+                // the lit can be converted to the decimal(1230,5,3)
+                v.checked_mul(mul / lit_scale_mul)
+            } else if v % (lit_scale_mul / mul) == 0 {
+                // Example:
+                // lit is decimal(123000,10,3)
+                // target type is int32: the lit can be converted to INT32(123)
+                // target type is decimal(10,2): the lit can be converted to decimal(12300,10,2)
+                Some(v / (lit_scale_mul / mul))
+            } else {
+                // can't convert the lit decimal to the target data type
+                None
+            }
+        }
+        ScalarValue::Decimal64(Some(v), _, scale) => {
+            let v = *v as i128;
+            let lit_scale_mul = 10_i128.pow(*scale as u32);
+            if mul >= lit_scale_mul {
+                // Example:
+                // lit is decimal(123,3,2)
+                // target type is decimal(5,3)
+                // the lit can be converted to the decimal(1230,5,3)
+                v.checked_mul(mul / lit_scale_mul)
+            } else if v % (lit_scale_mul / mul) == 0 {
+                // Example:
+                // lit is decimal(123000,10,3)
+                // target type is int32: the lit can be converted to INT32(123)
+                // target type is decimal(10,2): the lit can be converted to decimal(12300,10,2)
+                Some(v / (lit_scale_mul / mul))
+            } else {
+                // can't convert the lit decimal to the target data type
+                None
+            }
+        }
         ScalarValue::Decimal128(Some(v), _, scale) => {
             let lit_scale_mul = 10_i128.pow(*scale as u32);
             if mul >= lit_scale_mul {
@@ -218,6 +278,12 @@ fn try_cast_numeric_literal(
                         );
                         ScalarValue::TimestampNanosecond(value, tz.clone())
                     }
+                    DataType::Decimal32(p, s) => {
+                        ScalarValue::Decimal32(Some(value as i32), *p, *s)
+                    }
+                    DataType::Decimal64(p, s) => {
+                        ScalarValue::Decimal64(Some(value as i64), *p, *s)
+                    }
                     DataType::Decimal128(p, s) => {
                         ScalarValue::Decimal128(Some(value), *p, *s)
                     }
diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs
@@ -281,15 +281,14 @@ impl LogicalPlanBuilder {
                 let value = &row[j];
                 let data_type = value.get_type(schema)?;
 
-                if !data_type.equals_datatype(field_type) {
-                    if can_cast_types(&data_type, field_type) {
-                    } else {
-                        return exec_err!(
-                            "type mismatch and can't cast to got {} and {}",
-                            data_type,
-                            field_type
-                        );
-                    }
+                if !data_type.equals_datatype(field_type)
+                    && !can_cast_types(&data_type, field_type)
+                {
+                    return exec_err!(
+                        "type mismatch and can't cast to got {} and {}",
+                        data_type,
+                        field_type
+                    );
                 }
             }
             fields.push(field_type.to_owned(), field_nullable);
diff --git a/datafusion/expr/src/test/function_stub.rs b/datafusion/expr/src/test/function_stub.rs
@@ -23,6 +23,7 @@ use std::any::Any;
 
 use arrow::datatypes::{
     DataType, FieldRef, DECIMAL128_MAX_PRECISION, DECIMAL256_MAX_PRECISION,
+    DECIMAL32_MAX_PRECISION, DECIMAL64_MAX_PRECISION,
 };
 
 use datafusion_common::{exec_err, not_impl_err, utils::take_function_args, Result};
@@ -135,9 +136,10 @@ impl AggregateUDFImpl for Sum {
                 DataType::Dictionary(_, v) => coerced_type(v),
                 // in the spark, the result type is DECIMAL(min(38,precision+10), s)
                 // ref: https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala#L66
-                DataType::Decimal128(_, _) | DataType::Decimal256(_, _) => {
-                    Ok(data_type.clone())
-                }
+                DataType::Decimal32(_, _)
+                | DataType::Decimal64(_, _)
+                | DataType::Decimal128(_, _)
+                | DataType::Decimal256(_, _) => Ok(data_type.clone()),
                 dt if dt.is_signed_integer() => Ok(DataType::Int64),
                 dt if dt.is_unsigned_integer() => Ok(DataType::UInt64),
                 dt if dt.is_floating() => Ok(DataType::Float64),
@@ -153,6 +155,18 @@ impl AggregateUDFImpl for Sum {
             DataType::Int64 => Ok(DataType::Int64),
             DataType::UInt64 => Ok(DataType::UInt64),
             DataType::Float64 => Ok(DataType::Float64),
+            DataType::Decimal32(precision, scale) => {
+                // in the spark, the result type is DECIMAL(min(38,precision+10), s)
+                // ref: https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala#L66
+                let new_precision = DECIMAL32_MAX_PRECISION.min(*precision + 10);
+                Ok(DataType::Decimal32(new_precision, *scale))
+            }
+            DataType::Decimal64(precision, scale) => {
+                // in the spark, the result type is DECIMAL(min(38,precision+10), s)
+                // ref: https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala#L66
+                let new_precision = DECIMAL64_MAX_PRECISION.min(*precision + 10);
+                Ok(DataType::Decimal64(new_precision, *scale))
+            }
             DataType::Decimal128(precision, scale) => {
                 // in the spark, the result type is DECIMAL(min(38,precision+10), s)
                 // ref: https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala#L66
diff --git a/datafusion/expr/src/type_coercion/functions.rs b/datafusion/expr/src/type_coercion/functions.rs
@@ -877,6 +877,8 @@ fn coerced_from<'a>(
             | UInt64
             | Float32
             | Float64
+            | Decimal32(_, _)
+            | Decimal64(_, _)
             | Decimal128(_, _),
         ) => Some(type_into.clone()),
         (

Original file line number	Diff line number	Diff line change
`@@ -877,6 +877,8 @@ fn coerced_from<'a>(`
`877`	`877`	`\| UInt64`
`878`	`878`	`\| Float32`
`879`	`879`	`\| Float64`
	`880`	`+ \| Decimal32(_, _)`
	`881`	`+ \| Decimal64(_, _)`
`880`	`882`	`\| Decimal128(_, _),`
`881`	`883`	`) => Some(type_into.clone()),`
`882`	`884`	`(`