From 4b53e1df92dee981ba777640c92930c1c3c7a73f Mon Sep 17 00:00:00 2001 From: Raz Luvaton <16746759+rluvaton@users.noreply.github.com> Date: Sun, 23 Feb 2025 18:46:28 +0200 Subject: [PATCH 1/3] fix: use `return_type_from_args` and mark nullable if any of the input is nullable --- datafusion/functions/src/unicode/strpos.rs | 39 ++++++++++++++++++++-- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/datafusion/functions/src/unicode/strpos.rs b/datafusion/functions/src/unicode/strpos.rs index 19b82ccc23c2..e01aa7de83ce 100644 --- a/datafusion/functions/src/unicode/strpos.rs +++ b/datafusion/functions/src/unicode/strpos.rs @@ -23,7 +23,7 @@ use arrow::array::{ ArrayRef, ArrowPrimitiveType, AsArray, PrimitiveArray, StringArrayType, }; use arrow::datatypes::{ArrowNativeType, DataType, Int32Type, Int64Type}; -use datafusion_common::{exec_err, Result}; +use datafusion_common::{exec_err, internal_err, Result}; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; @@ -79,8 +79,17 @@ impl ScalarUDFImpl for StrposFunc { &self.signature } - fn return_type(&self, arg_types: &[DataType]) -> Result { - utf8_to_int_type(&arg_types[0], "strpos/instr/position") + fn return_type(&self, _arg_types: &[DataType]) -> Result { + internal_err!("return_type_from_args should be used instead") + } + + fn return_type_from_args( + &self, + args: datafusion_expr::ReturnTypeArgs, + ) -> Result { + utf8_to_int_type(&args.arg_types[0], "strpos/instr/position").map(|data_type| { + datafusion_expr::ReturnInfo::new(data_type, args.nullables.iter().any(|x| *x)) + }) } fn invoke_batch( @@ -202,6 +211,7 @@ mod tests { use arrow::array::{Array, Int32Array, Int64Array}; use arrow::datatypes::DataType::{Int32, Int64}; + use arrow::datatypes::DataType; use datafusion_common::{Result, ScalarValue}; use datafusion_expr::{ColumnarValue, ScalarUDFImpl}; @@ -289,4 +299,27 @@ mod tests { test_strpos!("", "" -> 1; Utf8View LargeUtf8 i32 Int32 Int32Array); test_strpos!("ДатаФусион数据融合📊🔥", "📊" -> 15; Utf8View LargeUtf8 i32 Int32 Int32Array); } + + #[test] + fn nullable_return_type() { + fn get_nullable(string_array_nullable: bool, substring_nullable: bool) -> bool { + let strpos = StrposFunc::new(); + let args = datafusion_expr::ReturnTypeArgs { + arg_types: &[DataType::Utf8, DataType::Utf8], + nullables: &[string_array_nullable, substring_nullable], + scalar_arguments: &[None::<&ScalarValue>, None::<&ScalarValue>], + }; + + let (_, nullable) = strpos.return_type_from_args(args).unwrap().into_parts(); + + nullable + } + + assert!(!get_nullable(false, false)); + + // If any of the arguments is nullable, the result is nullable + assert!(get_nullable(true, false)); + assert!(get_nullable(false, true)); + assert!(get_nullable(true, true)); + } } From b1b7aac15577a10f2dfd6c07f5e1da46e74d8e86 Mon Sep 17 00:00:00 2001 From: Raz Luvaton <16746759+rluvaton@users.noreply.github.com> Date: Sun, 23 Feb 2025 19:25:51 +0200 Subject: [PATCH 2/3] keep `return_type` function --- datafusion/functions/src/unicode/strpos.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/datafusion/functions/src/unicode/strpos.rs b/datafusion/functions/src/unicode/strpos.rs index e01aa7de83ce..f29cbaede209 100644 --- a/datafusion/functions/src/unicode/strpos.rs +++ b/datafusion/functions/src/unicode/strpos.rs @@ -23,7 +23,7 @@ use arrow::array::{ ArrayRef, ArrowPrimitiveType, AsArray, PrimitiveArray, StringArrayType, }; use arrow::datatypes::{ArrowNativeType, DataType, Int32Type, Int64Type}; -use datafusion_common::{exec_err, internal_err, Result}; +use datafusion_common::{exec_err, Result}; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; @@ -79,15 +79,15 @@ impl ScalarUDFImpl for StrposFunc { &self.signature } - fn return_type(&self, _arg_types: &[DataType]) -> Result { - internal_err!("return_type_from_args should be used instead") + fn return_type(&self, arg_types: &[DataType]) -> Result { + utf8_to_int_type(&arg_types[0], "strpos/instr/position") } fn return_type_from_args( &self, args: datafusion_expr::ReturnTypeArgs, ) -> Result { - utf8_to_int_type(&args.arg_types[0], "strpos/instr/position").map(|data_type| { + self.return_type(args.arg_types).map(|data_type| { datafusion_expr::ReturnInfo::new(data_type, args.nullables.iter().any(|x| *x)) }) } From 07b7946d3c089423508e839ca845c6684425bd15 Mon Sep 17 00:00:00 2001 From: Raz Luvaton <16746759+rluvaton@users.noreply.github.com> Date: Tue, 25 Feb 2025 20:43:08 +0200 Subject: [PATCH 3/3] Revert "keep `return_type` function" This reverts commit b1b7aac15577a10f2dfd6c07f5e1da46e74d8e86. --- datafusion/functions/src/unicode/strpos.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/datafusion/functions/src/unicode/strpos.rs b/datafusion/functions/src/unicode/strpos.rs index f29cbaede209..e01aa7de83ce 100644 --- a/datafusion/functions/src/unicode/strpos.rs +++ b/datafusion/functions/src/unicode/strpos.rs @@ -23,7 +23,7 @@ use arrow::array::{ ArrayRef, ArrowPrimitiveType, AsArray, PrimitiveArray, StringArrayType, }; use arrow::datatypes::{ArrowNativeType, DataType, Int32Type, Int64Type}; -use datafusion_common::{exec_err, Result}; +use datafusion_common::{exec_err, internal_err, Result}; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; @@ -79,15 +79,15 @@ impl ScalarUDFImpl for StrposFunc { &self.signature } - fn return_type(&self, arg_types: &[DataType]) -> Result { - utf8_to_int_type(&arg_types[0], "strpos/instr/position") + fn return_type(&self, _arg_types: &[DataType]) -> Result { + internal_err!("return_type_from_args should be used instead") } fn return_type_from_args( &self, args: datafusion_expr::ReturnTypeArgs, ) -> Result { - self.return_type(args.arg_types).map(|data_type| { + utf8_to_int_type(&args.arg_types[0], "strpos/instr/position").map(|data_type| { datafusion_expr::ReturnInfo::new(data_type, args.nullables.iter().any(|x| *x)) }) }