diff --git a/arrow/benches/partition_kernels.rs b/arrow/benches/partition_kernels.rs index ae55fbdad22c..cba5e5132f4c 100644 --- a/arrow/benches/partition_kernels.rs +++ b/arrow/benches/partition_kernels.rs @@ -22,12 +22,15 @@ use std::sync::Arc; extern crate arrow; use arrow::compute::kernels::partition::lexicographical_partition_ranges; use arrow::compute::kernels::sort::{lexsort, SortColumn}; +use arrow::datatypes::{ArrowDictionaryKeyType, ArrowNativeType, DataType, Int32Type}; use arrow::util::bench_util::*; +use arrow::util::test_util::seedable_rng; use arrow::{ array::*, datatypes::{ArrowPrimitiveType, Float64Type, UInt8Type}, }; use rand::distributions::{Distribution, Standard}; +use rand::Rng; use std::iter; fn create_array(size: usize, with_nulls: bool) -> ArrayRef @@ -39,6 +42,47 @@ where Arc::new(array) } +fn create_sorted_dictionary_data( + size: usize, + num_distinct_keys: usize, + mark_as_sorted: bool, +) -> Vec +where + Standard: Distribution, + T::Native: Ord, +{ + let mut rng = seedable_rng(); + + let mut keys = (0..size) + .map(|_| T::Native::from_usize(rng.gen_range(0..num_distinct_keys)).unwrap()) + .collect::>(); + keys.sort(); + let keys = PrimitiveArray::::from_iter_values(keys); + let mut values = (0..num_distinct_keys) + .map(|_| format!("{}", rng.gen_range(10_000_usize..1_000_000_000_000_usize))) + .collect::>(); + values.sort(); + let values = StringArray::from_iter_values(values); + + let data = ArrayData::try_new( + DataType::Dictionary(Box::new(T::DATA_TYPE), Box::new(DataType::Utf8)), + size, + keys.data().null_buffer().cloned(), + 0, + keys.data().buffers().to_vec(), + vec![values.data().clone()], + ) + .unwrap(); + + let mut dictionary_array = DictionaryArray::::from(data); + + if mark_as_sorted { + dictionary_array = dictionary_array.as_ordered(); + } + + vec![Arc::new(dictionary_array)] +} + fn bench_partition(sorted_columns: &[ArrayRef]) { let columns = sorted_columns .iter() @@ -140,6 +184,33 @@ fn add_benchmark(c: &mut Criterion) { "lexicographical_partition_ranges(low cardinality) 1024", |b| b.iter(|| bench_partition(&sorted_columns)), ); + + let sorted_columns = + create_sorted_dictionary_data::(16 * 1024, 100, false); + c.bench_function( + "lexicographical_partition_ranges(dictionary_values_low_cardinality)", + |b| b.iter(|| bench_partition(&sorted_columns)), + ); + + let sorted_columns = + create_sorted_dictionary_data::(16 * 1024, 1000, false); + c.bench_function( + "lexicographical_partition_ranges(dictionary_values_high_cardinality)", + |b| b.iter(|| bench_partition(&sorted_columns)), + ); + + let sorted_columns = create_sorted_dictionary_data::(16 * 1024, 100, true); + c.bench_function( + "lexicographical_partition_ranges(dictionary_keys_low_cardinality)", + |b| b.iter(|| bench_partition(&sorted_columns)), + ); + + let sorted_columns = + create_sorted_dictionary_data::(16 * 1024, 1000, true); + c.bench_function( + "lexicographical_partition_ranges(dictionary_keys_high_cardinality)", + |b| b.iter(|| bench_partition(&sorted_columns)), + ); } criterion_group!(benches, add_benchmark); diff --git a/arrow/benches/sort_kernel.rs b/arrow/benches/sort_kernel.rs index f9f5f24c15a6..55608d9c212b 100644 --- a/arrow/benches/sort_kernel.rs +++ b/arrow/benches/sort_kernel.rs @@ -19,12 +19,14 @@ extern crate criterion; use criterion::Criterion; +use rand::Rng; use std::sync::Arc; extern crate arrow; use arrow::compute::kernels::sort::{lexsort, SortColumn}; use arrow::util::bench_util::*; +use arrow::util::test_util::seedable_rng; use arrow::{array::*, datatypes::Float32Type}; fn create_f32_array(size: usize, with_nulls: bool) -> ArrayRef { @@ -40,6 +42,36 @@ fn create_bool_array(size: usize, with_nulls: bool) -> ArrayRef { Arc::new(array) } +fn create_string_array( + size: usize, + max_len: usize, + cardinality: usize, + with_nulls: bool, +) -> ArrayRef { + let null_density = if with_nulls { 0.5 } else { 0.0 }; + + let strings = create_string_array_with_len::(cardinality, 0.0, max_len); + let rng = &mut seedable_rng(); + + let values = (0..size) + .map(|_| { + if rng.gen_bool(null_density) { + None + } else { + let idx = rng.gen_range(0..strings.len()); + Some(strings.value(idx)) + } + }) + .collect::(); + + Arc::new(values) +} + +fn create_string_dict_array(string_array: &ArrayRef) -> ArrayRef { + let strings = string_array.as_any().downcast_ref::().unwrap(); + Arc::new(Int32DictionaryArray::from_iter(strings.into_iter())) +} + fn bench_sort(array_a: &ArrayRef, array_b: &ArrayRef, limit: Option) { let columns = vec![ SortColumn { @@ -92,6 +124,60 @@ fn add_benchmark(c: &mut Criterion) { b.iter(|| bench_sort(&arr_a, &arr_b, None)) }); + let arr_a: ArrayRef = create_string_array(2_usize.pow(12), 32, 32, true); + let arr_b: ArrayRef = create_string_array(2_usize.pow(12), 16, 64, true); + c.bench_function("string sort nulls 2^12", |b| { + b.iter(|| bench_sort(&arr_a, &arr_b, None)) + }); + + let arr_a = create_string_dict_array(&arr_a); + let arr_b = create_string_dict_array(&arr_b); + c.bench_function("dict string sort nulls 2^12", |b| { + b.iter(|| bench_sort(&arr_a, &arr_b, None)) + }); + + c.bench_function("make_ordered dict string sort nulls 2^12", |b| { + b.iter(|| { + let arr_a: ArrayRef = Arc::new( + arr_a + .as_any() + .downcast_ref::() + .unwrap() + .make_ordered() + .unwrap(), + ); + let arr_b: ArrayRef = Arc::new( + arr_b + .as_any() + .downcast_ref::() + .unwrap() + .make_ordered() + .unwrap(), + ); + bench_sort(&arr_a, &arr_b, None); + }); + }); + + let arr_a: ArrayRef = Arc::new( + arr_a + .as_any() + .downcast_ref::() + .unwrap() + .make_ordered() + .unwrap(), + ); + let arr_b: ArrayRef = Arc::new( + arr_b + .as_any() + .downcast_ref::() + .unwrap() + .make_ordered() + .unwrap(), + ); + c.bench_function("presorted dict string sort nulls 2^12", |b| { + b.iter(|| bench_sort(&arr_a, &arr_b, None)) + }); + // with limit { let arr_a = create_f32_array(2u64.pow(12) as usize, false); diff --git a/arrow/src/array/array_dictionary.rs b/arrow/src/array/array_dictionary.rs index b967b3abb49f..839141777c60 100644 --- a/arrow/src/array/array_dictionary.rs +++ b/arrow/src/array/array_dictionary.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +use crate::buffer::Buffer; +use crate::compute::{sort_to_indices, take, TakeOptions}; use std::any::Any; use std::fmt; use std::iter::IntoIterator; @@ -27,7 +29,7 @@ use super::{ use crate::datatypes::{ ArrowDictionaryKeyType, ArrowNativeType, ArrowPrimitiveType, DataType, }; -use crate::error::Result; +use crate::error::{ArrowError, Result}; /// A dictionary array where each element is a single value indexed by an integer key. /// This is mostly used to represent strings or a limited set of primitive types as integers, @@ -163,6 +165,92 @@ impl<'a, K: ArrowPrimitiveType> DictionaryArray { self.is_ordered } + /// Returns a DictionaryArray referencing the same data + /// with the [DictionaryArray::is_ordered] flag set to `true`. + /// Note that this does not actually reorder the values in the dictionary. + pub fn as_ordered(&self) -> Self { + Self { + data: self.data.clone(), + values: self.values.clone(), + keys: PrimitiveArray::::from(self.keys.data().clone()), + is_ordered: true, + } + } + + pub fn make_ordered(&self) -> Result { + let values = self.values(); + if self.is_ordered || values.is_empty() { + Ok(self.as_ordered()) + } else { + // validate up front that we can do conversions from/to usize for the whole range of keys + // this allows using faster unchecked conversions below + K::Native::from_usize(values.len()) + .ok_or(ArrowError::DictionaryKeyOverflowError)?; + // sort indices are u32 so we cannot sort larger dictionaries + u32::try_from(values.len()) + .map_err(|_| ArrowError::DictionaryKeyOverflowError)?; + + // sort the dictionary values + let sort_indices = sort_to_indices(values, None, None)?; + let sorted_dictionary = take( + values.as_ref(), + &sort_indices, + Some(TakeOptions { + check_bounds: false, + }), + )?; + + // build a lookup table from old to new key + let mut lookup = vec![0; sort_indices.len()]; + sort_indices + .values() + .iter() + .enumerate() + .for_each(|(i, idx)| { + lookup[*idx as usize] = i; + }); + + let mapped_keys_iter = self.keys_iter().map(|opt_key| { + if let Some(key) = opt_key { + // Safety: + // lookup has the same length as the dictionary values + // so if the keys were valid for values they will be valid indices into lookup + unsafe { + debug_assert!(key < lookup.len()); + let new_key = *lookup.get_unchecked(key); + debug_assert!(new_key < values.len()); + K::Native::from_usize(new_key).unwrap_unchecked() + } + } else { + K::default_value() + } + }); + + // Safety: + // PrimitiveIter has a trusted len + let new_key_buffer = + unsafe { Buffer::from_trusted_len_iter(mapped_keys_iter) }; + + // Safety: + // after remapping the keys will be in the same range as before + let new_data = unsafe { + ArrayData::new_unchecked( + self.data_type().clone(), + self.len(), + Some(self.data.null_count()), + self.data + .null_buffer() + .map(|b| b.bit_slice(self.data.offset(), self.len())), + 0, + vec![new_key_buffer], + vec![sorted_dictionary.data().clone()], + ) + }; + + Ok(DictionaryArray::from(new_data).as_ordered()) + } + } + /// Return an iterator over the keys (indexes into the dictionary) pub fn keys_iter(&self) -> impl Iterator> + '_ { self.keys @@ -485,6 +573,36 @@ mod tests { .expect("All null array has valid array data"); } + #[test] + fn test_dictionary_make_ordered() { + let test = vec![ + Some("b"), + Some("b"), + None, + Some("d"), + Some("d"), + Some("c"), + Some("a"), + ]; + let array: DictionaryArray = test.into_iter().collect(); + + let ordered = array.make_ordered().unwrap(); + let actual_keys = ordered.keys.iter().collect::>(); + + let expected_keys = + vec![Some(1), Some(1), None, Some(3), Some(3), Some(2), Some(0)]; + assert_eq!(&expected_keys, &actual_keys); + + let expected_values = StringArray::from(vec!["a", "b", "c", "d"]); + let actual_values = ordered + .values + .as_any() + .downcast_ref::() + .unwrap(); + + assert_eq!(&expected_values, actual_values); + } + #[test] fn test_dictionary_iter() { // Construct a value array diff --git a/arrow/src/array/ord.rs b/arrow/src/array/ord.rs index be910f96bd54..7302e403e57e 100644 --- a/arrow/src/array/ord.rs +++ b/arrow/src/array/ord.rs @@ -83,6 +83,7 @@ where fn compare_dict_string(left: &dyn Array, right: &dyn Array) -> DynComparator where T: ArrowDictionaryKeyType, + T::Native: Ord, { let left = left.as_any().downcast_ref::>().unwrap(); let right = right.as_any().downcast_ref::>().unwrap(); @@ -92,13 +93,22 @@ where let left_values = StringArray::from(left.values().data().clone()); let right_values = StringArray::from(right.values().data().clone()); - Box::new(move |i: usize, j: usize| { - let key_left = left_keys.value(i).to_usize().unwrap(); - let key_right = right_keys.value(j).to_usize().unwrap(); - let left = left_values.value(key_left); - let right = right_values.value(key_right); - left.cmp(right) - }) + // only compare by keys if both arrays actually point to the same value buffers + if left.is_ordered() && ArrayData::ptr_eq(left_values.data(), right_values.data()) { + Box::new(move |i: usize, j: usize| { + let key_left = left_keys.value(i); + let key_right = right_keys.value(j); + ::cmp(&key_left, &key_right) + }) + } else { + Box::new(move |i: usize, j: usize| { + let key_left = left_keys.value(i).to_usize().unwrap(); + let key_right = right_keys.value(j).to_usize().unwrap(); + let left = left_values.value(key_left); + let right = right_values.value(key_right); + left.cmp(right) + }) + } } /// returns a comparison function that compares two values at two different positions @@ -119,8 +129,9 @@ where /// # Ok(()) /// # } /// ``` +/// The returned comparator should only be called for non-null elements as the result is undefined otherwise. +/// The caller should check the validity of both indices and then decide whether NULLs should come first or last. // This is a factory of comparisons. -// The lifetime 'a enforces that we cannot use the closure beyond any of the array's lifetime. pub fn build_compare(left: &dyn Array, right: &dyn Array) -> Result { use DataType::*; use IntervalUnit::*; @@ -325,6 +336,22 @@ pub mod tests { Ok(()) } + #[test] + fn test_dict_keys() -> Result<()> { + let data = vec!["a", "b", "c", "a", "a", "c", "c"]; + let array = data + .into_iter() + .collect::>() + .as_ordered(); + + let cmp = build_compare(&array, &array)?; + + assert_eq!(Ordering::Less, (cmp)(0, 1)); + assert_eq!(Ordering::Equal, (cmp)(3, 4)); + assert_eq!(Ordering::Greater, (cmp)(2, 3)); + Ok(()) + } + #[test] fn test_multiple_dict() -> Result<()> { let d1 = vec!["a", "b", "c", "d"]; diff --git a/arrow/src/compute/kernels/sort.rs b/arrow/src/compute/kernels/sort.rs index 140a57f33ed5..fb5f9be7a949 100644 --- a/arrow/src/compute/kernels/sort.rs +++ b/arrow/src/compute/kernels/sort.rs @@ -22,6 +22,7 @@ use crate::buffer::MutableBuffer; use crate::compute::take; use crate::datatypes::*; use crate::error::{ArrowError, Result}; +use crate::util::bit_util::get_bit_raw; use std::cmp::Ordering; use TimeUnit::*; @@ -151,8 +152,21 @@ fn partition_validity(array: &ArrayRef) -> (Vec, Vec) { // faster path 0 => ((0..(array.len() as u32)).collect(), vec![]), _ => { - let indices = 0..(array.len() as u32); - indices.partition(|index| array.is_valid(*index as usize)) + let validity = array.data().null_buffer().unwrap(); + let offset = array.data().offset(); + let mut vecs = [ + Vec::with_capacity(array.null_count()), + Vec::with_capacity(array.len() - array.null_count()), + ]; + for i in 0..array.len() { + // Safety: + // Index is in bounds for this array and raw access to the validity needs to take offset into account + let bit = unsafe { get_bit_raw(validity.as_ptr(), offset + i) }; + vecs[bit as usize].push(i as u32); + } + let nulls = std::mem::take(&mut vecs[0]); + let valids = std::mem::take(&mut vecs[1]); + (valids, nulls) } } } @@ -327,42 +341,35 @@ pub fn sort_to_indices( ))); } }, - DataType::Dictionary(key_type, value_type) - if *value_type.as_ref() == DataType::Utf8 => - { - match key_type.as_ref() { - DataType::Int8 => { - sort_string_dictionary::(values, v, n, &options, limit) - } - DataType::Int16 => { - sort_string_dictionary::(values, v, n, &options, limit) - } - DataType::Int32 => { - sort_string_dictionary::(values, v, n, &options, limit) - } - DataType::Int64 => { - sort_string_dictionary::(values, v, n, &options, limit) - } - DataType::UInt8 => { - sort_string_dictionary::(values, v, n, &options, limit) - } - DataType::UInt16 => { - sort_string_dictionary::(values, v, n, &options, limit) - } - DataType::UInt32 => { - sort_string_dictionary::(values, v, n, &options, limit) - } - DataType::UInt64 => { - sort_string_dictionary::(values, v, n, &options, limit) - } - t => { - return Err(ArrowError::ComputeError(format!( - "Sort not supported for dictionary key type {:?}", - t - ))); - } + + DataType::Dictionary(key_type, _value_type) => match key_type.as_ref() { + DataType::Int8 => sort_dictionary::(values, v, n, &options, limit), + DataType::Int16 => { + sort_dictionary::(values, v, n, &options, limit) } - } + DataType::Int32 => { + sort_dictionary::(values, v, n, &options, limit) + } + DataType::Int64 => { + sort_dictionary::(values, v, n, &options, limit) + } + DataType::UInt8 => { + sort_dictionary::(values, v, n, &options, limit) + } + DataType::UInt16 => { + sort_dictionary::(values, v, n, &options, limit) + } + DataType::UInt32 => { + sort_dictionary::(values, v, n, &options, limit) + } + DataType::UInt64 => { + sort_dictionary::(values, v, n, &options, limit) + } + t => Err(ArrowError::ComputeError(format!( + "Sort not supported for dictionary key type {:?}", + t + ))), + }?, DataType::Binary | DataType::FixedSizeBinary(_) => { sort_binary::(values, v, n, &options, limit) } @@ -506,7 +513,7 @@ where .into_iter() .map(|index| (index, decimal_array.value(index as usize))) .collect::>(); - sort_primitive_inner(decimal_values, null_indices, cmp, options, limit, valids) + sort_primitive_inner(null_indices, cmp, options, limit, valids) } /// Sort primitive values @@ -531,12 +538,11 @@ where .map(|index| (index, values.value(index as usize))) .collect::>() }; - sort_primitive_inner(values, null_indices, cmp, options, limit, valids) + sort_primitive_inner(null_indices, cmp, options, limit, valids) } // sort is instantiated a lot so we only compile this inner version for each native type fn sort_primitive_inner( - values: &ArrayRef, null_indices: Vec, cmp: F, options: &SortOptions, @@ -552,7 +558,7 @@ where let valids_len = valids.len(); let nulls_len = nulls.len(); - let mut len = values.len(); + let mut len = valids_len + nulls_len; if let Some(limit) = limit { len = limit.min(len); @@ -637,31 +643,54 @@ fn sort_string( } /// Sort dictionary encoded strings -fn sort_string_dictionary( +fn sort_dictionary( values: &ArrayRef, value_indices: Vec, null_indices: Vec, options: &SortOptions, limit: Option, -) -> UInt32Array { - let values: &DictionaryArray = as_dictionary_array::(values); - - let keys: &PrimitiveArray = values.keys(); - - let dict = values.values(); - let dict: &StringArray = as_string_array(dict); +) -> Result +where + T::Native: Ord, +{ + let dictionary_array: &DictionaryArray = as_dictionary_array::(values); + let keys: &PrimitiveArray = dictionary_array.keys(); - sort_string_helper( - keys, - value_indices, - null_indices, - options, - limit, - |array: &PrimitiveArray, idx| -> &str { - let key: T::Native = array.value(idx as usize); - dict.value(key.to_usize().unwrap()) - }, - ) + if dictionary_array.is_ordered() { + // create tuples that are used for sorting + let valids = value_indices + .into_iter() + .map(|index| (index, keys.value(index as usize))) + .collect::>(); + + Ok(sort_primitive_inner( + null_indices, + cmp, + options, + limit, + valids, + )) + } else if dictionary_array.value_type() == DataType::Utf8 { + let dict = dictionary_array.values(); + let dict: &StringArray = as_string_array(dict); + + Ok(sort_string_helper( + keys, + value_indices, + null_indices, + options, + limit, + |array: &PrimitiveArray, idx| -> &str { + let key: T::Native = array.value(idx as usize); + dict.value(key.to_usize().unwrap()) + }, + )) + } else { + Err(ArrowError::ComputeError(format!( + "Sort not supported for dictionary values of data type {:?}", + dictionary_array.data_type() + ))) + } } /// shared implementation between dictionary encoded and plain string arrays @@ -1019,11 +1048,8 @@ impl LexicographicalComparator<'_> { // use ArrayData for is_valid checks later to avoid dynamic call let values = column.values.as_ref(); let data = values.data_ref(); - Ok(( - data, - build_compare(values, values)?, - column.options.unwrap_or_default(), - )) + let options = column.options.unwrap_or_default(); + Ok((data, build_compare(values, values)?, options)) }) .collect::>>()?; Ok(LexicographicalComparator { compare_items }) @@ -1210,10 +1236,16 @@ mod tests { fn test_sort_string_dict_arrays( data: Vec>, options: Option, + ordered: bool, limit: Option, expected_data: Vec>, ) { - let array = data.into_iter().collect::>(); + let mut array = data.into_iter().collect::>(); + + if ordered { + array = array.as_ordered(); + } + let array_values = array.values().clone(); let dict = array_values .as_any() @@ -2418,6 +2450,7 @@ mod tests { Some("-ad"), ], None, + false, None, vec![ None, @@ -2442,6 +2475,7 @@ mod tests { descending: true, nulls_first: false, }), + false, None, vec![ Some("sad"), @@ -2466,6 +2500,7 @@ mod tests { descending: false, nulls_first: true, }), + false, None, vec![ None, @@ -2490,6 +2525,7 @@ mod tests { descending: true, nulls_first: true, }), + false, None, vec![ None, @@ -2514,6 +2550,7 @@ mod tests { descending: true, nulls_first: true, }), + false, Some(3), vec![None, None, Some("sad")], ); @@ -2525,6 +2562,7 @@ mod tests { descending: false, nulls_first: false, }), + false, Some(3), vec![Some("abc"), Some("def"), None], ); @@ -2535,6 +2573,7 @@ mod tests { descending: false, nulls_first: true, }), + false, Some(3), vec![None, None, Some("abc")], ); @@ -2546,6 +2585,7 @@ mod tests { descending: false, nulls_first: true, }), + false, Some(2), vec![None, None], ); @@ -2556,11 +2596,120 @@ mod tests { descending: false, nulls_first: false, }), + false, Some(2), vec![Some("def"), None], ); } + #[test] + fn test_sort_dicts_by_key() { + // this test sorts the dictionary by its keys instead of values + // since we do not specify the keys here directly, + // they get assigned in the order of the values in the vector + // For example values of ["B", "A", "A"] result in the keys [0, 1, 1] + + test_sort_string_dict_arrays::( + vec![None, Some("B"), Some("A"), None, Some("C"), Some("A")], + Some(SortOptions { + ..Default::default() + }), + true, + None, + vec![None, None, Some("B"), Some("A"), Some("A"), Some("C")], + ); + + test_sort_string_dict_arrays::( + vec![None, Some("B"), Some("A"), None, Some("C"), Some("A")], + Some(SortOptions { + descending: true, + nulls_first: false, + }), + true, + None, + vec![Some("C"), Some("A"), Some("A"), Some("B"), None, None], + ); + + test_sort_string_dict_arrays::( + vec![None, Some("B"), Some("A"), None, Some("C"), Some("A")], + Some(SortOptions { + descending: false, + nulls_first: true, + }), + true, + None, + vec![None, None, Some("B"), Some("A"), Some("A"), Some("C")], + ); + + test_sort_string_dict_arrays::( + vec![None, Some("B"), Some("A"), None, Some("C"), Some("A")], + Some(SortOptions { + descending: true, + nulls_first: true, + }), + true, + None, + vec![None, None, Some("C"), Some("A"), Some("A"), Some("B")], + ); + + test_sort_string_dict_arrays::( + vec![None, Some("B"), Some("A"), None, Some("C"), Some("A")], + Some(SortOptions { + descending: true, + nulls_first: true, + }), + true, + Some(3), + vec![None, None, Some("C")], + ); + + // valid values less than limit with extra nulls + test_sort_string_dict_arrays::( + vec![Some("B"), None, None, Some("A")], + Some(SortOptions { + descending: false, + nulls_first: false, + }), + true, + Some(3), + vec![Some("B"), Some("A"), None], + ); + + test_sort_string_dict_arrays::( + vec![Some("B"), None, None, Some("A")], + Some(SortOptions { + descending: false, + nulls_first: true, + }), + true, + Some(3), + vec![None, None, Some("B")], + ); + + // more nulls than limit + test_sort_string_dict_arrays::( + vec![Some("A"), None, None, None], + Some(SortOptions { + descending: false, + nulls_first: true, + }), + true, + Some(2), + vec![None, None], + ); + + test_sort_string_dict_arrays::( + vec![Some("A"), None, None, None], + Some(SortOptions { + descending: false, + nulls_first: false, + }), + true, + Some(2), + vec![Some("A"), None], + ); + } + #[test] fn test_sort_list() { test_sort_list_arrays::(