apache · alamb · Jul 30, 2025 · Jul 26, 2025 · Jul 29, 2025 · Jul 29, 2025
diff --git a/datafusion/spark/src/function/math/mod.rs b/datafusion/spark/src/function/math/mod.rs
@@ -18,6 +18,7 @@
 pub mod expm1;
 pub mod factorial;
 pub mod hex;
+pub mod rint;
 
 use datafusion_expr::ScalarUDF;
 use datafusion_functions::make_udf_function;
@@ -26,6 +27,7 @@ use std::sync::Arc;
 make_udf_function!(expm1::SparkExpm1, expm1);
 make_udf_function!(factorial::SparkFactorial, factorial);
 make_udf_function!(hex::SparkHex, hex);
+make_udf_function!(rint::SparkRint, rint);
 
 pub mod expr_fn {
     use datafusion_functions::export_functions;
@@ -37,8 +39,9 @@ pub mod expr_fn {
         arg1
     ));
     export_functions!((hex, "Computes hex value of the given column.", arg1));
+    export_functions!((rint, "Returns the double value that is closest in value to the argument and is equal to a mathematical integer.", arg1));
 }
 
 pub fn functions() -> Vec<Arc<ScalarUDF>> {
-    vec![expm1(), factorial(), hex()]
+    vec![expm1(), factorial(), hex(), rint()]
 }
diff --git a/datafusion/spark/src/function/math/rint.rs b/datafusion/spark/src/function/math/rint.rs
@@ -0,0 +1,165 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::any::Any;
+use std::sync::Arc;
+
+use arrow::array::{Array, ArrayRef, AsArray};
+use arrow::compute::cast;
+use arrow::datatypes::DataType::{
+    Float32, Float64, Int16, Int32, Int64, Int8, UInt16, UInt32, UInt64, UInt8,
+};
+use arrow::datatypes::{DataType, Float32Type, Float64Type};
+use datafusion_common::{exec_err, Result};
+use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
+use datafusion_expr::{
+    ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
+};
+use datafusion_functions::utils::make_scalar_function;
+
+#[derive(Debug)]
+pub struct SparkRint {
+    signature: Signature,
+}
+
+impl Default for SparkRint {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl SparkRint {
+    pub fn new() -> Self {
+        Self {
+            signature: Signature::numeric(1, Volatility::Immutable),
+        }
+    }
+}
+
+impl ScalarUDFImpl for SparkRint {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "rint"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+        Ok(Float64)
+    }
+
+    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
+        make_scalar_function(spark_rint, vec![])(&args.args)
+    }
+
+    fn output_ordering(&self, input: &[ExprProperties]) -> Result<SortProperties> {
+        // round preserves the order of the first argument
+        if input.len() == 1 {
+            let value = &input[0];
+            Ok(value.sort_properties)
+        } else {
+            Ok(SortProperties::default())
+        }
+    }
+}
+
+pub fn spark_rint(args: &[ArrayRef]) -> Result<ArrayRef> {
+    if args.len() != 1 {
+        return exec_err!("rint expects exactly 1 argument, got {}", args.len());
+    }
+
+    let array: &dyn Array = args[0].as_ref();
+    match args[0].data_type() {
+        Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32 | UInt64 => {
+            Ok(cast(array, &Float64)?)
+        }
+        Float64 => {
+            let array = array
+                .as_primitive::<Float64Type>()
+                .unary::<_, Float64Type>(|value: f64| value.round_ties_even());
+            Ok(Arc::new(array))
+        }
+        Float32 => {
+            let array = array
+                .as_primitive::<Float32Type>()
+                .unary::<_, Float64Type>(|value: f32| value.round_ties_even() as f64);
+            Ok(Arc::new(array))
+        }
+        _ => {
+            exec_err!(
+                "rint expects a numeric argument, got {}",
+                args[0].data_type()
+            )
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use arrow::array::Float64Array;
+
+    #[test]
+    fn test_rint_positive_decimals() {
+        // Test positive decimal rounding
+        let result = spark_rint(&[Arc::new(Float64Array::from(vec![12.3456]))]).unwrap();
+        assert_eq!(result.as_ref(), &Float64Array::from(vec![12.0]));
+
+        // Test rounding to nearest even (banker's rounding)
+        let result = spark_rint(&[Arc::new(Float64Array::from(vec![2.5]))]).unwrap();
+        assert_eq!(result.as_ref(), &Float64Array::from(vec![2.0]));
+
+        let result = spark_rint(&[Arc::new(Float64Array::from(vec![3.5]))]).unwrap();
+        assert_eq!(result.as_ref(), &Float64Array::from(vec![4.0]));
+    }
+
+    #[test]
+    fn test_rint_negative_decimals() {
+        // Test negative decimal rounding
+        let result = spark_rint(&[Arc::new(Float64Array::from(vec![-12.3456]))]).unwrap();
+        assert_eq!(result.as_ref(), &Float64Array::from(vec![-12.0]));
+
+        // Test negative rounding to nearest even
+        let result = spark_rint(&[Arc::new(Float64Array::from(vec![-2.5]))]).unwrap();
+        assert_eq!(result.as_ref(), &Float64Array::from(vec![-2.0]));
+    }
+
+    #[test]
+    fn test_rint_integers() {
+        // Test integer input (should return as float64)
+        let result = spark_rint(&[Arc::new(Float64Array::from(vec![42.0]))]).unwrap();
+        assert_eq!(result.as_ref(), &Float64Array::from(vec![42.0]));
+    }
+
+    #[test]
+    fn test_rint_null() {
+        let result = spark_rint(&[Arc::new(Float64Array::from(vec![None]))]).unwrap();
+        assert_eq!(result.as_ref(), &Float64Array::from(vec![None]));
+    }
+
+    #[test]
+    fn test_rint_zero() {
+        // Test zero
+        let result = spark_rint(&[Arc::new(Float64Array::from(vec![0.0]))]).unwrap();
+        assert_eq!(result.as_ref(), &Float64Array::from(vec![0.0]));
+    }
+}
diff --git a/datafusion/sqllogictest/test_files/spark/math/rint.slt b/datafusion/sqllogictest/test_files/spark/math/rint.slt
@@ -23,5 +23,95 @@
 
 ## Original Query: SELECT rint(12.3456);
 ## PySpark 3.5.5 Result: {'rint(12.3456)': 12.0, 'typeof(rint(12.3456))': 'double', 'typeof(12.3456)': 'decimal(6,4)'}
-#query
-#SELECT rint(12.3456::decimal(6,4));
+query R
+SELECT rint(12.3456);
+----
+12
+
+## Test additional cases
+query R
+SELECT rint(-12.3456);
+----
+-12
+
+query R
+SELECT rint(arrow_cast(-12.3456, 'Float32'));
+----
+-12
+
+## Test int
+query R
+SELECT rint(arrow_cast(12, 'UInt8'));
+----
+12
+
+query R
+SELECT rint(arrow_cast(-12, 'Int8'));
+----
+-12
+
+query R
+SELECT rint(arrow_cast(12, 'UInt16'));
+----
+12
+
+query R
+SELECT rint(arrow_cast(-12, 'Int16'));
+----
+-12
+
+query R
+SELECT rint(arrow_cast(12, 'UInt32'));
+----
+12
+
+query R
+SELECT rint(arrow_cast(-12, 'Int32'));
+----
+-12
+
+query R
+SELECT rint(arrow_cast(12, 'UInt64'));
+----
+12
+
+query R
+SELECT rint(arrow_cast(-12, 'Int64'));
+----
+-12
+
+query R
+SELECT rint(2.5);
+----
+2
+
+query R
+SELECT rint(3.5);
+----
+4
+
+query R
+SELECT rint(-2.5);
+----
+-2
+
+query R
+SELECT rint(-3.5);
+----
+-4
+
+query R
+SELECT rint(0.0);
+----
+0
+
+query R
+SELECT rint(42);
+----
+42
+
+## Test with null
+query R
+SELECT rint(NULL);
+----
+NULL