From 28e3b99414ea7ddc126b41b6cf1fc7dd2b61e23d Mon Sep 17 00:00:00 2001 From: Piotr Findeisen Date: Fri, 22 Nov 2024 22:36:47 +0100 Subject: [PATCH 1/2] Fix panic when hashing empty FixedSizeList Array Previously it would panic due to division by zero. --- datafusion/common/src/hash_utils.rs | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/datafusion/common/src/hash_utils.rs b/datafusion/common/src/hash_utils.rs index e18d70844d32..f4cb497c5c27 100644 --- a/datafusion/common/src/hash_utils.rs +++ b/datafusion/common/src/hash_utils.rs @@ -322,7 +322,11 @@ fn hash_fixed_list_array( ) -> Result<()> { let values = Arc::clone(array.values()); let value_len = array.value_length(); - let offset_size = value_len as usize / array.len(); + let offset_size = if array.len() == 0 { + 0 + } else { + value_len as usize / array.len() + }; let nulls = array.nulls(); let mut values_hashes = vec![0u64; values.len()]; create_hashes(&[values], random_state, &mut values_hashes)?; @@ -454,6 +458,16 @@ mod tests { Ok(()) } + #[test] + fn create_hashes_for_empty_fixed_size_lit() -> Result<()> { + let empty_array = FixedSizeListBuilder::new(StringBuilder::new(), 1).finish(); + let random_state = RandomState::with_seeds(0, 0, 0, 0); + let hashes_buff = &mut vec![0; 0]; + let hashes = create_hashes(&[Arc::new(empty_array)], &random_state, hashes_buff)?; + assert_eq!(hashes, &Vec::::new()); + Ok(()) + } + #[test] fn create_hashes_for_float_arrays() -> Result<()> { let f32_arr = Arc::new(Float32Array::from(vec![0.12, 0.5, 1f32, 444.7])); From 4b46b8ac3ee5b87b5f5e603ea042b4c308003a96 Mon Sep 17 00:00:00 2001 From: Piotr Findeisen Date: Sat, 23 Nov 2024 19:00:08 +0100 Subject: [PATCH 2/2] simplify code --- datafusion/common/src/hash_utils.rs | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/datafusion/common/src/hash_utils.rs b/datafusion/common/src/hash_utils.rs index f4cb497c5c27..f1b3947f9976 100644 --- a/datafusion/common/src/hash_utils.rs +++ b/datafusion/common/src/hash_utils.rs @@ -321,12 +321,7 @@ fn hash_fixed_list_array( hashes_buffer: &mut [u64], ) -> Result<()> { let values = Arc::clone(array.values()); - let value_len = array.value_length(); - let offset_size = if array.len() == 0 { - 0 - } else { - value_len as usize / array.len() - }; + let value_length = array.value_length() as usize; let nulls = array.nulls(); let mut values_hashes = vec![0u64; values.len()]; create_hashes(&[values], random_state, &mut values_hashes)?; @@ -334,7 +329,8 @@ fn hash_fixed_list_array( for i in 0..array.len() { if nulls.is_valid(i) { let hash = &mut hashes_buffer[i]; - for values_hash in &values_hashes[i * offset_size..(i + 1) * offset_size] + for values_hash in + &values_hashes[i * value_length..(i + 1) * value_length] { *hash = combine_hashes(*hash, *values_hash); } @@ -343,7 +339,7 @@ fn hash_fixed_list_array( } else { for i in 0..array.len() { let hash = &mut hashes_buffer[i]; - for values_hash in &values_hashes[i * offset_size..(i + 1) * offset_size] { + for values_hash in &values_hashes[i * value_length..(i + 1) * value_length] { *hash = combine_hashes(*hash, *values_hash); } }