From cece9ebbcd95cdb8de3b290368c4392e9b1cb3af Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Sat, 27 Jan 2024 23:27:47 +0800 Subject: [PATCH 01/15] support array_reverse --- datafusion/expr/src/built_in_function.rs | 6 ++ datafusion/expr/src/expr_fn.rs | 6 ++ .../physical-expr/src/array_expressions.rs | 59 +++++++++++++++++++ datafusion/physical-expr/src/functions.rs | 3 + datafusion/proto/proto/datafusion.proto | 1 + datafusion/proto/src/generated/pbjson.rs | 3 + datafusion/proto/src/generated/prost.rs | 3 + .../proto/src/logical_plan/from_proto.rs | 2 + datafusion/proto/src/logical_plan/to_proto.rs | 1 + .../source/user-guide/sql/scalar_functions.md | 29 +++++++++ 10 files changed, 113 insertions(+) diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs index f2eb82ebf9bd..b7bb17c86be7 100644 --- a/datafusion/expr/src/built_in_function.rs +++ b/datafusion/expr/src/built_in_function.rs @@ -171,6 +171,8 @@ pub enum BuiltinScalarFunction { ArrayReplaceN, /// array_replace_all ArrayReplaceAll, + /// array_reverse + ArrayReverse, /// array_slice ArraySlice, /// array_to_string @@ -427,6 +429,7 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::ArrayReplace => Volatility::Immutable, BuiltinScalarFunction::ArrayReplaceN => Volatility::Immutable, BuiltinScalarFunction::ArrayReplaceAll => Volatility::Immutable, + BuiltinScalarFunction::ArrayReverse => Volatility::Immutable, BuiltinScalarFunction::Flatten => Volatility::Immutable, BuiltinScalarFunction::ArraySlice => Volatility::Immutable, BuiltinScalarFunction::ArrayToString => Volatility::Immutable, @@ -622,6 +625,7 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::ArrayReplace => Ok(input_expr_types[0].clone()), BuiltinScalarFunction::ArrayReplaceN => Ok(input_expr_types[0].clone()), BuiltinScalarFunction::ArrayReplaceAll => Ok(input_expr_types[0].clone()), + BuiltinScalarFunction::ArrayReverse => Ok(input_expr_types[0].clone()), BuiltinScalarFunction::ArraySlice => Ok(input_expr_types[0].clone()), BuiltinScalarFunction::ArrayResize => Ok(input_expr_types[0].clone()), BuiltinScalarFunction::ArrayToString => Ok(Utf8), @@ -961,6 +965,7 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::ArrayReplaceAll => { Signature::any(3, self.volatility()) } + BuiltinScalarFunction::ArrayReverse => Signature::any(1, self.volatility()), BuiltinScalarFunction::ArraySlice => { Signature::variadic_any(self.volatility()) } @@ -1567,6 +1572,7 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::ArrayReplaceAll => { &["array_replace_all", "list_replace_all"] } + BuiltinScalarFunction::ArrayReverse => &["array_reverse", "list_reverse"], BuiltinScalarFunction::ArraySlice => &["array_slice", "list_slice"], BuiltinScalarFunction::ArrayToString => &[ "array_to_string", diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index 4608badde231..877066aabfed 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -728,6 +728,12 @@ scalar_expr!( array from to, "replaces all occurrences of the specified element with another specified element." ); +scalar_expr!( + ArrayReverse, + array_reverse, + array, + "reverses the order of elements in the array." +); scalar_expr!( ArraySlice, array_slice, diff --git a/datafusion/physical-expr/src/array_expressions.rs b/datafusion/physical-expr/src/array_expressions.rs index a3dec2762c10..f7e2678fab08 100644 --- a/datafusion/physical-expr/src/array_expressions.rs +++ b/datafusion/physical-expr/src/array_expressions.rs @@ -2766,6 +2766,65 @@ where )?)) } +/// array_reverse SQL function +pub fn array_reverse(arg: &[ArrayRef]) -> Result { + if arg.len() != 1 { + return exec_err!("array_reverse needs one argument"); + } + + match &arg[0].data_type() { + DataType::List(field) => { + let array = as_list_array(&arg[0])?; + general_array_reverse::(array, &field) + } + DataType::LargeList(field) => { + let array = as_large_list_array(&arg[0])?; + general_array_reverse::(array, &field) + } + array_type => exec_err!("array_reverse does not support type '{array_type:?}'."), + } +} + +fn general_array_reverse( + array: &GenericListArray, + field: &FieldRef, +) -> Result +where + O: TryFrom, +{ + let values = array.values(); + let original_data = values.to_data(); + let capacity = Capacities::Array(original_data.len()); + let mut offsets = vec![O::usize_as(0)]; + let mut mutable = + MutableArrayData::with_capacities(vec![&original_data], false, capacity); + + for (row_index, offset_window) in array.offsets().windows(2).enumerate() { + let start = offset_window[0]; + let end = offset_window[1]; + + let mut index = end - O::one(); + let mut cnt = 0; + let stride: O = (-1 as i64).try_into().map_err(|_| { + internal_datafusion_err!("array_reverse: failed to convert size to i64") + })?; + while index >= start { + mutable.extend(0, index.to_usize().unwrap(), index.to_usize().unwrap() + 1); + index += stride; + cnt += 1; + } + offsets.push(offsets[row_index] + O::usize_as(cnt)); + } + + let data = mutable.freeze(); + Ok(Arc::new(GenericListArray::::try_new( + field.clone(), + OffsetBuffer::::new(offsets.into()), + arrow_array::make_array(data), + None, + )?)) +} + #[cfg(test)] mod tests { use super::*; diff --git a/datafusion/physical-expr/src/functions.rs b/datafusion/physical-expr/src/functions.rs index cd4e6f96f0fe..21eaeab7213a 100644 --- a/datafusion/physical-expr/src/functions.rs +++ b/datafusion/physical-expr/src/functions.rs @@ -445,6 +445,9 @@ pub fn create_physical_fun( BuiltinScalarFunction::ArrayReplaceAll => Arc::new(|args| { make_scalar_function_inner(array_expressions::array_replace_all)(args) }), + BuiltinScalarFunction::ArrayReverse => Arc::new(|args| { + make_scalar_function_inner(array_expressions::array_reverse)(args) + }), BuiltinScalarFunction::ArraySlice => Arc::new(|args| { make_scalar_function_inner(array_expressions::array_slice)(args) }), diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto index 0b93820db841..f2b5c5dd4239 100644 --- a/datafusion/proto/proto/datafusion.proto +++ b/datafusion/proto/proto/datafusion.proto @@ -670,6 +670,7 @@ enum ScalarFunction { EndsWith = 131; InStr = 132; MakeDate = 133; + ArrayReverse = 134; } message ScalarFunctionNode { diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs index 55e83a885382..b9a8c5fc0782 100644 --- a/datafusion/proto/src/generated/pbjson.rs +++ b/datafusion/proto/src/generated/pbjson.rs @@ -22422,6 +22422,7 @@ impl serde::Serialize for ScalarFunction { Self::EndsWith => "EndsWith", Self::InStr => "InStr", Self::MakeDate => "MakeDate", + Self::ArrayReverse => "ArrayReverse", }; serializer.serialize_str(variant) } @@ -22565,6 +22566,7 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction { "EndsWith", "InStr", "MakeDate", + "ArrayReverse", ]; struct GeneratedVisitor; @@ -22737,6 +22739,7 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction { "EndsWith" => Ok(ScalarFunction::EndsWith), "InStr" => Ok(ScalarFunction::InStr), "MakeDate" => Ok(ScalarFunction::MakeDate), + "ArrayReverse" => Ok(ScalarFunction::ArrayReverse), _ => Err(serde::de::Error::unknown_variant(value, FIELDS)), } } diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs index b17bcd3a49d7..758ef2dcb5f3 100644 --- a/datafusion/proto/src/generated/prost.rs +++ b/datafusion/proto/src/generated/prost.rs @@ -2765,6 +2765,7 @@ pub enum ScalarFunction { EndsWith = 131, InStr = 132, MakeDate = 133, + ArrayReverse = 134, } impl ScalarFunction { /// String value of the enum field names used in the ProtoBuf definition. @@ -2905,6 +2906,7 @@ impl ScalarFunction { ScalarFunction::EndsWith => "EndsWith", ScalarFunction::InStr => "InStr", ScalarFunction::MakeDate => "MakeDate", + ScalarFunction::ArrayReverse => "ArrayReverse", } } /// Creates an enum from field names used in the ProtoBuf definition. @@ -3042,6 +3044,7 @@ impl ScalarFunction { "EndsWith" => Some(Self::EndsWith), "InStr" => Some(Self::InStr), "MakeDate" => Some(Self::MakeDate), + "ArrayReverse" => Some(Self::ArrayReverse), _ => None, } } diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs index b025f79bd1d0..cabc68825fc0 100644 --- a/datafusion/proto/src/logical_plan/from_proto.rs +++ b/datafusion/proto/src/logical_plan/from_proto.rs @@ -501,6 +501,7 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction { ScalarFunction::ArrayReplace => Self::ArrayReplace, ScalarFunction::ArrayReplaceN => Self::ArrayReplaceN, ScalarFunction::ArrayReplaceAll => Self::ArrayReplaceAll, + ScalarFunction::ArrayReverse => Self::ArrayReverse, ScalarFunction::ArraySlice => Self::ArraySlice, ScalarFunction::ArrayToString => Self::ArrayToString, ScalarFunction::ArrayIntersect => Self::ArrayIntersect, @@ -1458,6 +1459,7 @@ pub fn parse_expr( parse_expr(&args[1], registry)?, parse_expr(&args[2], registry)?, )), + ScalarFunction::ArrayReverse => parse_expr(&args[0], registry)?, ScalarFunction::ArraySlice => Ok(array_slice( parse_expr(&args[0], registry)?, parse_expr(&args[1], registry)?, diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs index f7be15136bbb..b9a61445c88e 100644 --- a/datafusion/proto/src/logical_plan/to_proto.rs +++ b/datafusion/proto/src/logical_plan/to_proto.rs @@ -1500,6 +1500,7 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction { BuiltinScalarFunction::ArrayReplace => Self::ArrayReplace, BuiltinScalarFunction::ArrayReplaceN => Self::ArrayReplaceN, BuiltinScalarFunction::ArrayReplaceAll => Self::ArrayReplaceAll, + BuiltInWindowFunction::ArrayReverse => Self::ArrayReverse, BuiltinScalarFunction::ArraySlice => Self::ArraySlice, BuiltinScalarFunction::ArrayToString => Self::ArrayToString, BuiltinScalarFunction::ArrayIntersect => Self::ArrayIntersect, diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index 7bec80b55e26..732eb3e4b5ef 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -1793,6 +1793,7 @@ from_unixtime(expression) - [array_replace](#array_replace) - [array_replace_n](#array_replace_n) - [array_replace_all](#array_replace_all) +- [array_reverse](#array_reverse) - [array_slice](#array_slice) - [array_to_string](#array_to_string) - [cardinality](#cardinality) @@ -2523,6 +2524,34 @@ array_replace_all(array, from, to) - list_replace_all +### `array_reverse` + +Returns the array with the order of the elements reversed. + +``` +array_reverse(array) +``` + +#### Arguments + +- **array**: Array expression. + Can be a constant, column, or function, and any combination of array operators. + +#### Example + +``` +❯ select array_reverse([1, 2, 3, 4]); ++------------------------------------------------------------+ +| array_reverse(List([1,2,2,3,2,1,4])) | ++------------------------------------------------------------+ +| [4, 3, 2, 1] | ++------------------------------------------------------------+ +``` + +#### Aliases + +- list_reverse + ### `array_slice` Returns a slice of the array based on 1-indexed start and end positions. From 2ca5a74a15151dd262becfa908418578e55bc9ab Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Sat, 27 Jan 2024 23:32:35 +0800 Subject: [PATCH 02/15] fix typo --- docs/source/user-guide/sql/scalar_functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index 732eb3e4b5ef..fee50eb2a9b8 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -2542,7 +2542,7 @@ array_reverse(array) ``` ❯ select array_reverse([1, 2, 3, 4]); +------------------------------------------------------------+ -| array_reverse(List([1,2,2,3,2,1,4])) | +| array_reverse(List([1, 2, 3, 4])) | +------------------------------------------------------------+ | [4, 3, 2, 1] | +------------------------------------------------------------+ From 705f07a50568a6c65cd44abccce8edabc06c94b2 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Sat, 27 Jan 2024 23:38:29 +0800 Subject: [PATCH 03/15] add test --- datafusion/sqllogictest/test_files/array.slt | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index e072e4146f13..802b600a66e3 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -5054,6 +5054,22 @@ select array_resize(arrow_cast([[1], [2], [3]], 'LargeList(List(Int64))'), 10, [ ---- [[1], [2], [3], [5], [5], [5], [5], [5], [5], [5]] +## array_reverse +query ?? +select array_reverse(make_array(1, 2, 3)), array_reverse(make_array(1)); +---- +[3, 2, 1] [1] + +query ?? +select array_reverse(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')), array_reverse(arrow_cast(make_array(1), 'LargeList(Int64)')); +---- +[3, 2, 1] [1] + +query ? +select array_reverse(NULL); +---- +NULL + ### Delete tables statement ok From 319c402856ee0c551ae38c97051b4a9414140bfb Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Sat, 27 Jan 2024 23:40:48 +0800 Subject: [PATCH 04/15] fix NULL --- datafusion/physical-expr/src/array_expressions.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/datafusion/physical-expr/src/array_expressions.rs b/datafusion/physical-expr/src/array_expressions.rs index f7e2678fab08..bad3ba4be1f8 100644 --- a/datafusion/physical-expr/src/array_expressions.rs +++ b/datafusion/physical-expr/src/array_expressions.rs @@ -2781,6 +2781,7 @@ pub fn array_reverse(arg: &[ArrayRef]) -> Result { let array = as_large_list_array(&arg[0])?; general_array_reverse::(array, &field) } + DataType::Null => Ok(arg[0].clone()), array_type => exec_err!("array_reverse does not support type '{array_type:?}'."), } } From bebe35f37919151d93144da2fd2a68e35db17df8 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Sat, 27 Jan 2024 23:41:43 +0800 Subject: [PATCH 05/15] fix parse_expr --- datafusion/proto/src/logical_plan/from_proto.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs index cabc68825fc0..e27d68350b41 100644 --- a/datafusion/proto/src/logical_plan/from_proto.rs +++ b/datafusion/proto/src/logical_plan/from_proto.rs @@ -1459,7 +1459,7 @@ pub fn parse_expr( parse_expr(&args[1], registry)?, parse_expr(&args[2], registry)?, )), - ScalarFunction::ArrayReverse => parse_expr(&args[0], registry)?, + ScalarFunction::ArrayReverse => Ok(parse_expr(&args[0], registry)?), ScalarFunction::ArraySlice => Ok(array_slice( parse_expr(&args[0], registry)?, parse_expr(&args[1], registry)?, From 1918812862e744f7b283c0176fda78b7c57336cf Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Sat, 27 Jan 2024 23:45:43 +0800 Subject: [PATCH 06/15] fix typo --- datafusion/proto/src/logical_plan/to_proto.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs index b9a61445c88e..e094994840b2 100644 --- a/datafusion/proto/src/logical_plan/to_proto.rs +++ b/datafusion/proto/src/logical_plan/to_proto.rs @@ -1500,7 +1500,7 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction { BuiltinScalarFunction::ArrayReplace => Self::ArrayReplace, BuiltinScalarFunction::ArrayReplaceN => Self::ArrayReplaceN, BuiltinScalarFunction::ArrayReplaceAll => Self::ArrayReplaceAll, - BuiltInWindowFunction::ArrayReverse => Self::ArrayReverse, + BuiltinScalarFunction::ArrayReverse => Self::ArrayReverse, BuiltinScalarFunction::ArraySlice => Self::ArraySlice, BuiltinScalarFunction::ArrayToString => Self::ArrayToString, BuiltinScalarFunction::ArrayIntersect => Self::ArrayIntersect, From 001b4360a41feda035a97d82415f5ce9e3719421 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Sat, 27 Jan 2024 23:54:46 +0800 Subject: [PATCH 07/15] fix null in column --- datafusion/physical-expr/src/array_expressions.rs | 8 +++++++- datafusion/sqllogictest/test_files/array.slt | 12 ++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/datafusion/physical-expr/src/array_expressions.rs b/datafusion/physical-expr/src/array_expressions.rs index bad3ba4be1f8..21f66764a786 100644 --- a/datafusion/physical-expr/src/array_expressions.rs +++ b/datafusion/physical-expr/src/array_expressions.rs @@ -2797,6 +2797,7 @@ where let original_data = values.to_data(); let capacity = Capacities::Array(original_data.len()); let mut offsets = vec![O::usize_as(0)]; + let mut nulls = vec![]; let mut mutable = MutableArrayData::with_capacities(vec![&original_data], false, capacity); @@ -2815,6 +2816,11 @@ where cnt += 1; } offsets.push(offsets[row_index] + O::usize_as(cnt)); + if cnt == 0 { + nulls.push(false); + } else { + nulls.push(true); + } } let data = mutable.freeze(); @@ -2822,7 +2828,7 @@ where field.clone(), OffsetBuffer::::new(offsets.into()), arrow_array::make_array(data), - None, + Some(nulls.into()), )?)) } diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index 802b600a66e3..8aed72309c60 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -5070,6 +5070,18 @@ select array_reverse(NULL); ---- NULL +query ?? +select array_reverse(column1), column1 from arrays_values; +---- +[10, 9, 8, 7, 6, 5, 4, 3, 2, ] [, 2, 3, 4, 5, 6, 7, 8, 9, 10] +[20, , 18, 17, 16, 15, 14, 13, 12, 11] [11, 12, 13, 14, 15, 16, 17, 18, , 20] +[30, 29, 28, 27, 26, 25, , 23, 22, 21] [21, 22, 23, , 25, 26, 27, 28, 29, 30] +[40, 39, 38, 37, , 35, 34, 33, 32, 31] [31, 32, 33, 34, 35, , 37, 38, 39, 40] +NULL NULL +[50, 49, 48, 47, 46, 45, 44, 43, 42, 41] [41, 42, 43, 44, 45, 46, 47, 48, 49, 50] +[60, 59, 58, 57, 56, 55, 54, , 52, 51] [51, 52, , 54, 55, 56, 57, 58, 59, 60] +[70, 69, 68, 67, 66, 65, 64, 63, 62, 61] [61, 62, 63, 64, 65, 66, 67, 68, 69, 70] + ### Delete tables statement ok From b9d10129098df014380ca6474acff9737a9b1d5e Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Sun, 28 Jan 2024 10:19:47 +0800 Subject: [PATCH 08/15] fix null --- datafusion/physical-expr/src/array_expressions.rs | 3 ++- datafusion/sqllogictest/test_files/array.slt | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/datafusion/physical-expr/src/array_expressions.rs b/datafusion/physical-expr/src/array_expressions.rs index 21f66764a786..6f4d26d384ba 100644 --- a/datafusion/physical-expr/src/array_expressions.rs +++ b/datafusion/physical-expr/src/array_expressions.rs @@ -2816,7 +2816,8 @@ where cnt += 1; } offsets.push(offsets[row_index] + O::usize_as(cnt)); - if cnt == 0 { + + if array.is_null(row_index) { nulls.push(false); } else { nulls.push(true); diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index 8aed72309c60..eff22f94e576 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -5065,10 +5065,10 @@ select array_reverse(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')), array ---- [3, 2, 1] [1] -query ? -select array_reverse(NULL); +query ?? +select array_reverse(NULL), array_reverse([]); ---- -NULL +NULL [] query ?? select array_reverse(column1), column1 from arrays_values; From f4621627c1c76d369a03a167948f895401a4861c Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Sun, 28 Jan 2024 10:19:54 +0800 Subject: [PATCH 09/15] add md --- docs/source/user-guide/sql/scalar_functions.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index fee50eb2a9b8..ba69b53e69af 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -2852,6 +2852,10 @@ _Alias of [array_replace_n](#array_replace_n)._ _Alias of [array_replace_all](#array_replace_all)._ +### `list_reverse` + +_Alias of [array_reverse](#array_reverse)._ + ### `list_slice` _Alias of [array_slice](#array_slice)._ From e5834a3005d170862b4ece55d71ed4ea38a67738 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Sun, 28 Jan 2024 11:02:23 +0800 Subject: [PATCH 10/15] fix ci --- datafusion/proto/src/logical_plan/from_proto.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs index e27d68350b41..bdbc2ce7e497 100644 --- a/datafusion/proto/src/logical_plan/from_proto.rs +++ b/datafusion/proto/src/logical_plan/from_proto.rs @@ -44,7 +44,7 @@ use datafusion_common::{ Constraints, DFField, DFSchema, DFSchemaRef, DataFusionError, OwnedTableReference, Result, ScalarValue, }; -use datafusion_expr::expr::{Alias, Placeholder}; +use datafusion_expr::{array_reverse, expr::{Alias, Placeholder}}; use datafusion_expr::window_frame::{check_window_frame, regularize_window_order_by}; use datafusion_expr::{ abs, acos, acosh, array, array_append, array_concat, array_dims, array_distinct, @@ -1459,7 +1459,9 @@ pub fn parse_expr( parse_expr(&args[1], registry)?, parse_expr(&args[2], registry)?, )), - ScalarFunction::ArrayReverse => Ok(parse_expr(&args[0], registry)?), + ScalarFunction::ArrayReverse => { + Ok(array_reverse(parse_expr(&args[0], registry)?)) + } ScalarFunction::ArraySlice => Ok(array_slice( parse_expr(&args[0], registry)?, parse_expr(&args[1], registry)?, From a0ff22d0de24e8ad6ef7b78e6fa30529899dc0fb Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Sun, 28 Jan 2024 11:04:49 +0800 Subject: [PATCH 11/15] add test for fixedsizelist --- datafusion/sqllogictest/test_files/array.slt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index eff22f94e576..e6a8181be1ac 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -5065,6 +5065,12 @@ select array_reverse(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')), array ---- [3, 2, 1] [1] +#TODO: support after FixedSizeList type coercion +#query ?? +#select array_reverse(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)')), array_reverse(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)')); +#---- +#[3, 2, 1] [1] + query ?? select array_reverse(NULL), array_reverse([]); ---- From 7903931858b5da14bfdb301a8360fa65f6f95ed5 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Sun, 28 Jan 2024 11:11:41 +0800 Subject: [PATCH 12/15] skip null and speed up --- .../physical-expr/src/array_expressions.rs | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/datafusion/physical-expr/src/array_expressions.rs b/datafusion/physical-expr/src/array_expressions.rs index 6f4d26d384ba..504d2b24e149 100644 --- a/datafusion/physical-expr/src/array_expressions.rs +++ b/datafusion/physical-expr/src/array_expressions.rs @@ -2802,6 +2802,16 @@ where MutableArrayData::with_capacities(vec![&original_data], false, capacity); for (row_index, offset_window) in array.offsets().windows(2).enumerate() { + // skip the null value + if array.is_null(row_index) { + nulls.push(false); + offsets.push(offsets[row_index] + O::one()); + mutable.extend(0, 0, 1); + continue; + } else { + nulls.push(true); + } + let start = offset_window[0]; let end = offset_window[1]; @@ -2816,12 +2826,6 @@ where cnt += 1; } offsets.push(offsets[row_index] + O::usize_as(cnt)); - - if array.is_null(row_index) { - nulls.push(false); - } else { - nulls.push(true); - } } let data = mutable.freeze(); From ce05a4fd9ba57d0cbfe9645c541362cc73427af7 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Sun, 28 Jan 2024 11:14:50 +0800 Subject: [PATCH 13/15] fix fmt --- datafusion/proto/src/logical_plan/from_proto.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs index bdbc2ce7e497..decf3b18745f 100644 --- a/datafusion/proto/src/logical_plan/from_proto.rs +++ b/datafusion/proto/src/logical_plan/from_proto.rs @@ -44,7 +44,6 @@ use datafusion_common::{ Constraints, DFField, DFSchema, DFSchemaRef, DataFusionError, OwnedTableReference, Result, ScalarValue, }; -use datafusion_expr::{array_reverse, expr::{Alias, Placeholder}}; use datafusion_expr::window_frame::{check_window_frame, regularize_window_order_by}; use datafusion_expr::{ abs, acos, acosh, array, array_append, array_concat, array_dims, array_distinct, @@ -72,6 +71,10 @@ use datafusion_expr::{ JoinConstraint, JoinType, Like, Operator, TryCast, WindowFrame, WindowFrameBound, WindowFrameUnits, }; +use datafusion_expr::{ + array_reverse, + expr::{Alias, Placeholder}, +}; #[derive(Debug)] pub enum Error { From 45330a265b3a243ba27a03465e46d1d4d61b4306 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Sun, 28 Jan 2024 11:28:02 +0800 Subject: [PATCH 14/15] fix clippy --- datafusion/physical-expr/src/array_expressions.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/datafusion/physical-expr/src/array_expressions.rs b/datafusion/physical-expr/src/array_expressions.rs index 504d2b24e149..d304c80021c0 100644 --- a/datafusion/physical-expr/src/array_expressions.rs +++ b/datafusion/physical-expr/src/array_expressions.rs @@ -2775,11 +2775,11 @@ pub fn array_reverse(arg: &[ArrayRef]) -> Result { match &arg[0].data_type() { DataType::List(field) => { let array = as_list_array(&arg[0])?; - general_array_reverse::(array, &field) + general_array_reverse::(array, field) } DataType::LargeList(field) => { let array = as_large_list_array(&arg[0])?; - general_array_reverse::(array, &field) + general_array_reverse::(array, field) } DataType::Null => Ok(arg[0].clone()), array_type => exec_err!("array_reverse does not support type '{array_type:?}'."), @@ -2817,7 +2817,7 @@ where let mut index = end - O::one(); let mut cnt = 0; - let stride: O = (-1 as i64).try_into().map_err(|_| { + let stride: O = (-1_i64).try_into().map_err(|_| { internal_datafusion_err!("array_reverse: failed to convert size to i64") })?; while index >= start { From f69c29e4677a0f2b078387d38d1ea0695910f512 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Sun, 28 Jan 2024 17:26:26 +0800 Subject: [PATCH 15/15] reduce code complex --- datafusion/physical-expr/src/array_expressions.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/datafusion/physical-expr/src/array_expressions.rs b/datafusion/physical-expr/src/array_expressions.rs index d304c80021c0..844dae0917c7 100644 --- a/datafusion/physical-expr/src/array_expressions.rs +++ b/datafusion/physical-expr/src/array_expressions.rs @@ -2817,12 +2817,10 @@ where let mut index = end - O::one(); let mut cnt = 0; - let stride: O = (-1_i64).try_into().map_err(|_| { - internal_datafusion_err!("array_reverse: failed to convert size to i64") - })?; + while index >= start { mutable.extend(0, index.to_usize().unwrap(), index.to_usize().unwrap() + 1); - index += stride; + index = index - O::one(); cnt += 1; } offsets.push(offsets[row_index] + O::usize_as(cnt));