From 9290e3b8577ecb0df1433e791ff8ba9b7e330456 Mon Sep 17 00:00:00 2001 From: feniljain Date: Fri, 19 Dec 2025 23:19:41 +0530 Subject: [PATCH 1/4] fix(arrow_intersect, arrow_union): row converter panic --- datafusion/functions-nested/src/set_ops.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/functions-nested/src/set_ops.rs b/datafusion/functions-nested/src/set_ops.rs index a8dd857947b12..4e453e1adabd1 100644 --- a/datafusion/functions-nested/src/set_ops.rs +++ b/datafusion/functions-nested/src/set_ops.rs @@ -368,13 +368,13 @@ fn generic_set_lists( let l_values = if let Some(first_arr) = first_arr { converter.convert_columns(&[first_arr])? } else { - converter.convert_columns(&[])? + converter.empty_rows(0, 0) }; let r_values = if let Some(second_arr) = second_arr { converter.convert_columns(&[second_arr])? } else { - converter.convert_columns(&[])? + converter.empty_rows(0, 0) }; let l_iter = l_values.iter().sorted().dedup(); From 773c175a7d81fc7098f4f4703863298f2d653b2e Mon Sep 17 00:00:00 2001 From: feniljain Date: Fri, 19 Dec 2025 23:55:30 +0530 Subject: [PATCH 2/4] test: handling NULLs in array_intersect, arrow_union --- datafusion/sqllogictest/test_files/array.slt | 22 ++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index 35e25a15bebba..b15bfc5a44383 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -503,6 +503,17 @@ AS FROM array_intersect_table_1D_UTF8 ; +statement ok +CREATE TABLE array_intersect_table_1D_NULL +AS VALUES + ([1, 2, 2, 3], [2, 3, 4]), + ([2, 3, 3], [3]), + ([3], [3, 3, 4]), + (null, [3, 4]), + ([1, 2], null), + (null, null) +; + statement ok CREATE TABLE array_intersect_table_2D AS VALUES @@ -6765,6 +6776,17 @@ from large_array_intersect_table_1D_UTF8; [bc] [arrow, rust] [] [] [arrow, datafusion, rust] [arrow, rust] +query ? +select array_intersect(column1, column2) +from array_intersect_table_1D_NULL; +---- +[2, 3] +[3] +[3] +[] +[] +[] + query ?? select array_intersect(column1, column2), array_intersect(column3, column4) From 031affc1685592585539449f07ddaa21704a36d0 Mon Sep 17 00:00:00 2001 From: feniljain Date: Sat, 20 Dec 2025 22:45:29 +0530 Subject: [PATCH 3/4] test: handling NULLs in array_union --- datafusion/sqllogictest/test_files/array.slt | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index b15bfc5a44383..b3ce1ae0428d4 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -4827,6 +4827,16 @@ select array_union(arrow_cast(['hello'], 'LargeList(Utf8)'), arrow_cast(['hello' ---- [hello, datafusion] +query ? +select array_union(column1, column2) +from array_intersect_table_1D_NULL; +---- +[1, 2, 3, 4] +[2, 3] +[3, 4] +[3, 4] +[1, 2] +[] # list_to_string scalar function #4 (function alias `array_to_string`) query TTT From a0ffb4b9f80a769350282ca93a6060b90a2d411d Mon Sep 17 00:00:00 2001 From: feniljain Date: Sat, 3 Jan 2026 23:14:48 +0530 Subject: [PATCH 4/4] feat: preserve nullability info for array_union and array_intersect --- datafusion/functions-nested/src/set_ops.rs | 15 +++++++++++++-- datafusion/sqllogictest/test_files/array.slt | 12 ++++++------ 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/datafusion/functions-nested/src/set_ops.rs b/datafusion/functions-nested/src/set_ops.rs index 4e453e1adabd1..69a220e125c04 100644 --- a/datafusion/functions-nested/src/set_ops.rs +++ b/datafusion/functions-nested/src/set_ops.rs @@ -22,7 +22,7 @@ use arrow::array::{ Array, ArrayRef, GenericListArray, LargeListArray, ListArray, OffsetSizeTrait, new_null_array, }; -use arrow::buffer::OffsetBuffer; +use arrow::buffer::{NullBuffer, OffsetBuffer}; use arrow::compute; use arrow::datatypes::DataType::{LargeList, List, Null}; use arrow::datatypes::{DataType, Field, FieldRef}; @@ -363,17 +363,22 @@ fn generic_set_lists( let mut offsets = vec![OffsetSize::usize_as(0)]; let mut new_arrays = vec![]; + let mut new_null_buf = vec![]; let converter = RowConverter::new(vec![SortField::new(l.value_type())])?; for (first_arr, second_arr) in l.iter().zip(r.iter()) { + let mut ele_should_be_null = false; + let l_values = if let Some(first_arr) = first_arr { converter.convert_columns(&[first_arr])? } else { + ele_should_be_null = true; converter.empty_rows(0, 0) }; let r_values = if let Some(second_arr) = second_arr { converter.convert_columns(&[second_arr])? } else { + ele_should_be_null = true; converter.empty_rows(0, 0) }; @@ -414,13 +419,19 @@ fn generic_set_lists( } }; + new_null_buf.push(!ele_should_be_null); new_arrays.push(array); } let offsets = OffsetBuffer::new(offsets.into()); let new_arrays_ref: Vec<_> = new_arrays.iter().map(|v| v.as_ref()).collect(); let values = compute::concat(&new_arrays_ref)?; - let arr = GenericListArray::::try_new(field, offsets, values, None)?; + let arr = GenericListArray::::try_new( + field, + offsets, + values, + Some(NullBuffer::new(new_null_buf.into())), + )?; Ok(Arc::new(arr)) } diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index b3ce1ae0428d4..c31f3d0702358 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -4834,9 +4834,9 @@ from array_intersect_table_1D_NULL; [1, 2, 3, 4] [2, 3] [3, 4] -[3, 4] -[1, 2] -[] +NULL +NULL +NULL # list_to_string scalar function #4 (function alias `array_to_string`) query TTT @@ -6793,9 +6793,9 @@ from array_intersect_table_1D_NULL; [2, 3] [3] [3] -[] -[] -[] +NULL +NULL +NULL query ?? select array_intersect(column1, column2),