diff --git a/.gitignore b/.gitignore index 5b3bf6c4a66e..e8d9955b6488 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,6 @@ target rusty-tags.vi .history .flatbuffers/ - +.idea/ .vscode venv/* diff --git a/arrow/src/compute/kernels/sort.rs b/arrow/src/compute/kernels/sort.rs index bf8eda353e6c..30341b6f63a6 100644 --- a/arrow/src/compute/kernels/sort.rs +++ b/arrow/src/compute/kernels/sort.rs @@ -257,7 +257,8 @@ pub fn sort_to_indices( values, v, n, cmp, &options, limit, ) } - DataType::Utf8 => sort_string(values, v, n, &options, limit), + DataType::Utf8 => sort_string::(values, v, n, &options, limit), + DataType::LargeUtf8 => sort_string::(values, v, n, &options, limit), DataType::List(field) => match field.data_type() { DataType::Int8 => sort_list::(values, v, n, &options, limit), DataType::Int16 => sort_list::(values, v, n, &options, limit), @@ -545,14 +546,17 @@ fn insert_valid_values(result_slice: &mut [u32], offset: usize, valids: &[(u3 } /// Sort strings -fn sort_string( +fn sort_string( values: &ArrayRef, value_indices: Vec, null_indices: Vec, options: &SortOptions, limit: Option, ) -> Result { - let values = as_string_array(values); + let values = values + .as_any() + .downcast_ref::>() + .unwrap(); sort_string_helper( values, @@ -958,14 +962,25 @@ mod tests { assert_eq!(output, expected) } + /// Tests both Utf8 and LargeUtf8 fn test_sort_string_arrays( data: Vec>, options: Option, limit: Option, expected_data: Vec>, ) { - let output = StringArray::from(data); - let expected = Arc::new(StringArray::from(expected_data)) as ArrayRef; + let output = StringArray::from(data.clone()); + let expected = Arc::new(StringArray::from(expected_data.clone())) as ArrayRef; + let output = match limit { + Some(_) => { + sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap() + } + _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(), + }; + assert_eq!(&output, &expected); + + let output = LargeStringArray::from(data); + let expected = Arc::new(LargeStringArray::from(expected_data)) as ArrayRef; let output = match limit { Some(_) => { sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap()