Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: support array_reverse #9023

Merged
merged 15 commits into from
Jan 31, 2024
6 changes: 6 additions & 0 deletions datafusion/expr/src/built_in_function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,8 @@ pub enum BuiltinScalarFunction {
ArrayReplaceN,
/// array_replace_all
ArrayReplaceAll,
/// array_reverse
ArrayReverse,
/// array_slice
ArraySlice,
/// array_to_string
Expand Down Expand Up @@ -427,6 +429,7 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::ArrayReplace => Volatility::Immutable,
BuiltinScalarFunction::ArrayReplaceN => Volatility::Immutable,
BuiltinScalarFunction::ArrayReplaceAll => Volatility::Immutable,
BuiltinScalarFunction::ArrayReverse => Volatility::Immutable,
BuiltinScalarFunction::Flatten => Volatility::Immutable,
BuiltinScalarFunction::ArraySlice => Volatility::Immutable,
BuiltinScalarFunction::ArrayToString => Volatility::Immutable,
Expand Down Expand Up @@ -622,6 +625,7 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::ArrayReplace => Ok(input_expr_types[0].clone()),
BuiltinScalarFunction::ArrayReplaceN => Ok(input_expr_types[0].clone()),
BuiltinScalarFunction::ArrayReplaceAll => Ok(input_expr_types[0].clone()),
BuiltinScalarFunction::ArrayReverse => Ok(input_expr_types[0].clone()),
BuiltinScalarFunction::ArraySlice => Ok(input_expr_types[0].clone()),
BuiltinScalarFunction::ArrayResize => Ok(input_expr_types[0].clone()),
BuiltinScalarFunction::ArrayToString => Ok(Utf8),
Expand Down Expand Up @@ -961,6 +965,7 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::ArrayReplaceAll => {
Signature::any(3, self.volatility())
}
BuiltinScalarFunction::ArrayReverse => Signature::any(1, self.volatility()),
BuiltinScalarFunction::ArraySlice => {
Signature::variadic_any(self.volatility())
}
Expand Down Expand Up @@ -1567,6 +1572,7 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::ArrayReplaceAll => {
&["array_replace_all", "list_replace_all"]
}
BuiltinScalarFunction::ArrayReverse => &["array_reverse", "list_reverse"],
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

clickhouse support reverse(arr), I think we can have it too.

Copy link
Member Author

@Weijun-H Weijun-H Jan 29, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But reverse exists for Reverses the order of the characters in the string.
https://arrow.apache.org/datafusion/user-guide/sql/scalar_functions.html#reverse

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, I didn't notice that.

BuiltinScalarFunction::ArraySlice => &["array_slice", "list_slice"],
BuiltinScalarFunction::ArrayToString => &[
"array_to_string",
Expand Down
6 changes: 6 additions & 0 deletions datafusion/expr/src/expr_fn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -728,6 +728,12 @@ scalar_expr!(
array from to,
"replaces all occurrences of the specified element with another specified element."
);
scalar_expr!(
ArrayReverse,
array_reverse,
array,
"reverses the order of elements in the array."
);
scalar_expr!(
ArraySlice,
array_slice,
Expand Down
69 changes: 69 additions & 0 deletions datafusion/physical-expr/src/array_expressions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2766,6 +2766,75 @@ where
)?))
}

/// array_reverse SQL function
pub fn array_reverse(arg: &[ArrayRef]) -> Result<ArrayRef> {
if arg.len() != 1 {
return exec_err!("array_reverse needs one argument");
}

match &arg[0].data_type() {
DataType::List(field) => {
let array = as_list_array(&arg[0])?;
general_array_reverse::<i32>(array, field)
}
DataType::LargeList(field) => {
let array = as_large_list_array(&arg[0])?;
general_array_reverse::<i64>(array, field)
}
DataType::Null => Ok(arg[0].clone()),
array_type => exec_err!("array_reverse does not support type '{array_type:?}'."),
}
}

fn general_array_reverse<O: OffsetSizeTrait>(
array: &GenericListArray<O>,
field: &FieldRef,
) -> Result<ArrayRef>
where
O: TryFrom<i64>,
{
let values = array.values();
let original_data = values.to_data();
let capacity = Capacities::Array(original_data.len());
let mut offsets = vec![O::usize_as(0)];
let mut nulls = vec![];
let mut mutable =
MutableArrayData::with_capacities(vec![&original_data], false, capacity);

for (row_index, offset_window) in array.offsets().windows(2).enumerate() {
// skip the null value
if array.is_null(row_index) {
nulls.push(false);
offsets.push(offsets[row_index] + O::one());
mutable.extend(0, 0, 1);
continue;
} else {
nulls.push(true);
}

let start = offset_window[0];
let end = offset_window[1];

let mut index = end - O::one();
let mut cnt = 0;

while index >= start {
mutable.extend(0, index.to_usize().unwrap(), index.to_usize().unwrap() + 1);
index = index - O::one();
cnt += 1;
}
offsets.push(offsets[row_index] + O::usize_as(cnt));
}

let data = mutable.freeze();
Ok(Arc::new(GenericListArray::<O>::try_new(
field.clone(),
OffsetBuffer::<O>::new(offsets.into()),
arrow_array::make_array(data),
Some(nulls.into()),
)?))
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
3 changes: 3 additions & 0 deletions datafusion/physical-expr/src/functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -445,6 +445,9 @@ pub fn create_physical_fun(
BuiltinScalarFunction::ArrayReplaceAll => Arc::new(|args| {
make_scalar_function_inner(array_expressions::array_replace_all)(args)
}),
BuiltinScalarFunction::ArrayReverse => Arc::new(|args| {
make_scalar_function_inner(array_expressions::array_reverse)(args)
}),
BuiltinScalarFunction::ArraySlice => Arc::new(|args| {
make_scalar_function_inner(array_expressions::array_slice)(args)
}),
Expand Down
1 change: 1 addition & 0 deletions datafusion/proto/proto/datafusion.proto
Original file line number Diff line number Diff line change
Expand Up @@ -670,6 +670,7 @@ enum ScalarFunction {
EndsWith = 131;
InStr = 132;
MakeDate = 133;
ArrayReverse = 134;
}

message ScalarFunctionNode {
Expand Down
3 changes: 3 additions & 0 deletions datafusion/proto/src/generated/pbjson.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions datafusion/proto/src/generated/prost.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 8 additions & 1 deletion datafusion/proto/src/logical_plan/from_proto.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ use datafusion_common::{
Constraints, DFField, DFSchema, DFSchemaRef, DataFusionError, OwnedTableReference,
Result, ScalarValue,
};
use datafusion_expr::expr::{Alias, Placeholder};
use datafusion_expr::window_frame::{check_window_frame, regularize_window_order_by};
use datafusion_expr::{
abs, acos, acosh, array, array_append, array_concat, array_dims, array_distinct,
Expand Down Expand Up @@ -72,6 +71,10 @@ use datafusion_expr::{
JoinConstraint, JoinType, Like, Operator, TryCast, WindowFrame, WindowFrameBound,
WindowFrameUnits,
};
use datafusion_expr::{
array_reverse,
expr::{Alias, Placeholder},
};

#[derive(Debug)]
pub enum Error {
Expand Down Expand Up @@ -501,6 +504,7 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction {
ScalarFunction::ArrayReplace => Self::ArrayReplace,
ScalarFunction::ArrayReplaceN => Self::ArrayReplaceN,
ScalarFunction::ArrayReplaceAll => Self::ArrayReplaceAll,
ScalarFunction::ArrayReverse => Self::ArrayReverse,
ScalarFunction::ArraySlice => Self::ArraySlice,
ScalarFunction::ArrayToString => Self::ArrayToString,
ScalarFunction::ArrayIntersect => Self::ArrayIntersect,
Expand Down Expand Up @@ -1458,6 +1462,9 @@ pub fn parse_expr(
parse_expr(&args[1], registry)?,
parse_expr(&args[2], registry)?,
)),
ScalarFunction::ArrayReverse => {
Ok(array_reverse(parse_expr(&args[0], registry)?))
}
ScalarFunction::ArraySlice => Ok(array_slice(
parse_expr(&args[0], registry)?,
parse_expr(&args[1], registry)?,
Expand Down
1 change: 1 addition & 0 deletions datafusion/proto/src/logical_plan/to_proto.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1500,6 +1500,7 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction {
BuiltinScalarFunction::ArrayReplace => Self::ArrayReplace,
BuiltinScalarFunction::ArrayReplaceN => Self::ArrayReplaceN,
BuiltinScalarFunction::ArrayReplaceAll => Self::ArrayReplaceAll,
BuiltinScalarFunction::ArrayReverse => Self::ArrayReverse,
BuiltinScalarFunction::ArraySlice => Self::ArraySlice,
BuiltinScalarFunction::ArrayToString => Self::ArrayToString,
BuiltinScalarFunction::ArrayIntersect => Self::ArrayIntersect,
Expand Down
34 changes: 34 additions & 0 deletions datafusion/sqllogictest/test_files/array.slt
Original file line number Diff line number Diff line change
Expand Up @@ -5054,6 +5054,40 @@ select array_resize(arrow_cast([[1], [2], [3]], 'LargeList(List(Int64))'), 10, [
----
[[1], [2], [3], [5], [5], [5], [5], [5], [5], [5]]

## array_reverse
query ??
select array_reverse(make_array(1, 2, 3)), array_reverse(make_array(1));
----
[3, 2, 1] [1]

query ??
select array_reverse(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')), array_reverse(arrow_cast(make_array(1), 'LargeList(Int64)'));
----
[3, 2, 1] [1]

#TODO: support after FixedSizeList type coercion
#query ??
#select array_reverse(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)')), array_reverse(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)'));
#----
#[3, 2, 1] [1]

query ??
select array_reverse(NULL), array_reverse([]);
----
NULL []

query ??
select array_reverse(column1), column1 from arrays_values;
----
[10, 9, 8, 7, 6, 5, 4, 3, 2, ] [, 2, 3, 4, 5, 6, 7, 8, 9, 10]
[20, , 18, 17, 16, 15, 14, 13, 12, 11] [11, 12, 13, 14, 15, 16, 17, 18, , 20]
[30, 29, 28, 27, 26, 25, , 23, 22, 21] [21, 22, 23, , 25, 26, 27, 28, 29, 30]
[40, 39, 38, 37, , 35, 34, 33, 32, 31] [31, 32, 33, 34, 35, , 37, 38, 39, 40]
NULL NULL
[50, 49, 48, 47, 46, 45, 44, 43, 42, 41] [41, 42, 43, 44, 45, 46, 47, 48, 49, 50]
[60, 59, 58, 57, 56, 55, 54, , 52, 51] [51, 52, , 54, 55, 56, 57, 58, 59, 60]
[70, 69, 68, 67, 66, 65, 64, 63, 62, 61] [61, 62, 63, 64, 65, 66, 67, 68, 69, 70]

### Delete tables

statement ok
Expand Down
33 changes: 33 additions & 0 deletions docs/source/user-guide/sql/scalar_functions.md
Original file line number Diff line number Diff line change
Expand Up @@ -1793,6 +1793,7 @@ from_unixtime(expression)
- [array_replace](#array_replace)
- [array_replace_n](#array_replace_n)
- [array_replace_all](#array_replace_all)
- [array_reverse](#array_reverse)
- [array_slice](#array_slice)
- [array_to_string](#array_to_string)
- [cardinality](#cardinality)
Expand Down Expand Up @@ -2523,6 +2524,34 @@ array_replace_all(array, from, to)

- list_replace_all

### `array_reverse`

Returns the array with the order of the elements reversed.

```
array_reverse(array)
```

#### Arguments

- **array**: Array expression.
Can be a constant, column, or function, and any combination of array operators.

#### Example

```
❯ select array_reverse([1, 2, 3, 4]);
+------------------------------------------------------------+
| array_reverse(List([1, 2, 3, 4])) |
+------------------------------------------------------------+
| [4, 3, 2, 1] |
+------------------------------------------------------------+
```

#### Aliases

- list_reverse

### `array_slice`

Returns a slice of the array based on 1-indexed start and end positions.
Expand Down Expand Up @@ -2823,6 +2852,10 @@ _Alias of [array_replace_n](#array_replace_n)._

_Alias of [array_replace_all](#array_replace_all)._

### `list_reverse`

_Alias of [array_reverse](#array_reverse)._

### `list_slice`

_Alias of [array_slice](#array_slice)._
Expand Down
Loading