Skip to content

Commit 28c0673

Browse files
LiaCastanedanuno-fariafindepialamb
authored
[branch-49] fix: string_agg not respecting ORDER BY (apache#17058) (#41)
* fix: string_agg not respecting ORDER BY * Fix equality of parametrizable ArrayAgg function (apache#17065) The `ArrayAgg` struct is stateful, therefore it must implement `AggregateUDFImpl::equals` and `hash_value` functions. * Implement AggregateUDFImpl::equals and AggregateUDFImpl::hash_value for ArrayAgg * Implement alternative fix * Remove 'use std::any::Any' * Add sqllogictest for string_agg plan * Revert as_any to their original implementations --------- (cherry picked from commit f05b128) Co-authored-by: Nuno Faria <nunofpfaria@gmail.com> Co-authored-by: Piotr Findeisen <piotr.findeisen@gmail.com> Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
1 parent 025ddde commit 28c0673

File tree

2 files changed

+104
-0
lines changed

2 files changed

+104
-0
lines changed

datafusion/functions-aggregate/src/string_agg.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,10 @@ impl AggregateUDFImpl for StringAgg {
178178
)))
179179
}
180180

181+
fn reverse_expr(&self) -> datafusion_expr::ReversedUDAF {
182+
datafusion_expr::ReversedUDAF::Reversed(string_agg_udaf())
183+
}
184+
181185
fn documentation(&self) -> Option<&Documentation> {
182186
self.doc()
183187
}

datafusion/sqllogictest/test_files/aggregate.slt

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6028,6 +6028,106 @@ GROUP BY dummy
60286028
----
60296029
text1
60306030

6031+
6032+
# Test string_agg with ORDER BY clasuses (issue #17011)
6033+
statement ok
6034+
create table t (k varchar, v int);
6035+
6036+
statement ok
6037+
insert into t values ('a', 2), ('b', 3), ('c', 1), ('d', null);
6038+
6039+
query T
6040+
select string_agg(k, ',' order by k) from t;
6041+
----
6042+
a,b,c,d
6043+
6044+
query T
6045+
select string_agg(k, ',' order by k desc) from t;
6046+
----
6047+
d,c,b,a
6048+
6049+
query T
6050+
select string_agg(k, ',' order by v) from t;
6051+
----
6052+
c,a,b,d
6053+
6054+
query T
6055+
select string_agg(k, ',' order by v nulls first) from t;
6056+
----
6057+
d,c,a,b
6058+
6059+
query T
6060+
select string_agg(k, ',' order by v desc) from t;
6061+
----
6062+
d,b,a,c
6063+
6064+
query T
6065+
select string_agg(k, ',' order by v desc nulls last) from t;
6066+
----
6067+
b,a,c,d
6068+
6069+
query T
6070+
-- odd indexes should appear first, ties solved by v
6071+
select string_agg(k, ',' order by v % 2 == 0, v) from t;
6072+
----
6073+
c,b,a,d
6074+
6075+
query T
6076+
-- odd indexes should appear first, ties solved by v desc
6077+
select string_agg(k, ',' order by v % 2 == 0, v desc) from t;
6078+
----
6079+
b,c,a,d
6080+
6081+
query T
6082+
select string_agg(k, ',' order by
6083+
case
6084+
when k = 'a' then 3
6085+
when k = 'b' then 0
6086+
when k = 'c' then 2
6087+
when k = 'd' then 1
6088+
end)
6089+
from t;
6090+
----
6091+
b,d,c,a
6092+
6093+
query T
6094+
select string_agg(k, ',' order by
6095+
case
6096+
when k = 'a' then 3
6097+
when k = 'b' then 0
6098+
when k = 'c' then 2
6099+
when k = 'd' then 1
6100+
end desc)
6101+
from t;
6102+
----
6103+
a,c,d,b
6104+
6105+
query TT
6106+
explain select string_agg(k, ',' order by v) from t;
6107+
----
6108+
logical_plan
6109+
01)Aggregate: groupBy=[[]], aggr=[[string_agg(t.k, Utf8(",")) ORDER BY [t.v ASC NULLS LAST]]]
6110+
02)--TableScan: t projection=[k, v]
6111+
physical_plan
6112+
01)AggregateExec: mode=Single, gby=[], aggr=[string_agg(t.k,Utf8(",")) ORDER BY [t.v ASC NULLS LAST]]
6113+
02)--SortExec: expr=[v@1 ASC NULLS LAST], preserve_partitioning=[false]
6114+
03)----DataSourceExec: partitions=1, partition_sizes=[1]
6115+
6116+
query TT
6117+
explain select string_agg(k, ',' order by v desc) from t;
6118+
----
6119+
logical_plan
6120+
01)Aggregate: groupBy=[[]], aggr=[[string_agg(t.k, Utf8(",")) ORDER BY [t.v DESC NULLS FIRST]]]
6121+
02)--TableScan: t projection=[k, v]
6122+
physical_plan
6123+
01)AggregateExec: mode=Single, gby=[], aggr=[string_agg(t.k,Utf8(",")) ORDER BY [t.v DESC NULLS FIRST]]
6124+
02)--SortExec: expr=[v@1 DESC], preserve_partitioning=[false]
6125+
03)----DataSourceExec: partitions=1, partition_sizes=[1]
6126+
6127+
statement ok
6128+
drop table t;
6129+
6130+
60316131
# Tests for aggregating with NaN values
60326132
statement ok
60336133
CREATE TABLE float_table (

0 commit comments

Comments
 (0)