From a77bec217e0ce494183fc6f5b4dee0be1dc90e02 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 13 May 2025 09:37:06 -0400 Subject: [PATCH 1/4] Docs: Add example of creating a field in `return_field_from_args` --- datafusion/expr/src/udf.rs | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs index c1b74fedcc32..8855e01ada95 100644 --- a/datafusion/expr/src/udf.rs +++ b/datafusion/expr/src/udf.rs @@ -317,9 +317,9 @@ pub struct ScalarFunctionArgs<'a, 'b> { pub struct ReturnFieldArgs<'a> { /// The data types of the arguments to the function pub arg_fields: &'a [Field], - /// Is argument `i` to the function a scalar (constant) + /// Is argument `i` to the function a scalar (constant)? /// - /// If argument `i` is not a scalar, it will be None + /// If the argument `i` is not a scalar, it will be None /// /// For example, if a function is called like `my_function(column_a, 5)` /// this field will be `[None, Some(ScalarValue::Int32(Some(5)))]` @@ -448,10 +448,10 @@ pub trait ScalarUDFImpl: Debug + Send + Sync { /// /// By default, this function calls [`Self::return_type`] with the /// types of each argument. - /// + /// /// # Notes /// - /// Most UDFs should implement [`Self::return_type`] and not this + /// Most UDFs can implement [`Self::return_type`] and not this /// function as the output type for most functions only depends on the types /// of their inputs (e.g. `sqrt(f32)` is always `f32`). /// @@ -461,6 +461,26 @@ pub trait ScalarUDFImpl: Debug + Send + Sync { /// 2. return types based on the **values** of the arguments (rather than /// their **types**. /// + /// # Example creating `Field` + /// + /// Note the [`Field`] is ignored, except for structured types such as + /// `DataType::Struct`. + /// + /// ```rust + /// # use arrow::datatypes::{DataType, Field}; + /// # use datafusion_common::Result; + /// # use datafusion_expr::ReturnFieldArgs; + /// # struct Example{}; + /// # impl Example { + /// fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result { + /// // report output is only nullable if any one of the arguments are nullable + /// let nullable = args.arg_fields.iter().any(|f| f.is_nullable()); + /// let field = Field::new("ignored_name", DataType::Int32, true); + /// Ok(field) + /// } + /// # } + /// ``` + /// /// # Output Type based on Values /// /// For example, the following two function calls get the same argument From 02d6ea78b16eabeb243fa193df34901615dcf464 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 13 May 2025 09:38:39 -0400 Subject: [PATCH 2/4] fmt --- datafusion/expr/src/udf.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs index 8855e01ada95..11c2d2d2e3ba 100644 --- a/datafusion/expr/src/udf.rs +++ b/datafusion/expr/src/udf.rs @@ -448,7 +448,7 @@ pub trait ScalarUDFImpl: Debug + Send + Sync { /// /// By default, this function calls [`Self::return_type`] with the /// types of each argument. - /// + /// /// # Notes /// /// Most UDFs can implement [`Self::return_type`] and not this @@ -465,10 +465,10 @@ pub trait ScalarUDFImpl: Debug + Send + Sync { /// /// Note the [`Field`] is ignored, except for structured types such as /// `DataType::Struct`. - /// + /// /// ```rust - /// # use arrow::datatypes::{DataType, Field}; - /// # use datafusion_common::Result; + /// # use arrow::datatypes::{DataType, Field}; + /// # use datafusion_common::Result; /// # use datafusion_expr::ReturnFieldArgs; /// # struct Example{}; /// # impl Example { From 44de835d8bec6c6a2344c48b29e0956cf226ed08 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 13 May 2025 15:11:32 -0400 Subject: [PATCH 3/4] Update datafusion/expr/src/udf.rs Co-authored-by: Oleks V --- datafusion/expr/src/udf.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs index 11c2d2d2e3ba..d2e4a0a22b12 100644 --- a/datafusion/expr/src/udf.rs +++ b/datafusion/expr/src/udf.rs @@ -451,9 +451,10 @@ pub trait ScalarUDFImpl: Debug + Send + Sync { /// /// # Notes /// - /// Most UDFs can implement [`Self::return_type`] and not this - /// function as the output type for most functions only depends on the types - /// of their inputs (e.g. `sqrt(f32)` is always `f32`). +/// For the majority of UDFs, implementing [`Self::return_type`] is sufficient, +/// as the result type is typically a deterministic function of the input types +/// (e.g., `sqrt(f32)` consistently yields `f32`). Implementing this method directly +/// is generally unnecessary unless the return type depends on runtime values. /// /// This function can be used for more advanced cases such as: /// From 2f194284dd040926eb7c8b0ddb1b9d518b8d0f4b Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 13 May 2025 15:18:44 -0400 Subject: [PATCH 4/4] fmt --- datafusion/expr/src/udf.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs index d2e4a0a22b12..bf8339d88a55 100644 --- a/datafusion/expr/src/udf.rs +++ b/datafusion/expr/src/udf.rs @@ -451,10 +451,10 @@ pub trait ScalarUDFImpl: Debug + Send + Sync { /// /// # Notes /// -/// For the majority of UDFs, implementing [`Self::return_type`] is sufficient, -/// as the result type is typically a deterministic function of the input types -/// (e.g., `sqrt(f32)` consistently yields `f32`). Implementing this method directly -/// is generally unnecessary unless the return type depends on runtime values. + /// For the majority of UDFs, implementing [`Self::return_type`] is sufficient, + /// as the result type is typically a deterministic function of the input types + /// (e.g., `sqrt(f32)` consistently yields `f32`). Implementing this method directly + /// is generally unnecessary unless the return type depends on runtime values. /// /// This function can be used for more advanced cases such as: ///