Skip to content

Commit 2a6665e

Browse files
committed
Improve performance of last_value by implementing special GroupsAccumulator
1 parent d056fb5 commit 2a6665e

File tree

4 files changed

+336
-42
lines changed

4 files changed

+336
-42
lines changed

datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,32 @@ async fn test_first_val() {
113113
.await;
114114
}
115115

116+
#[tokio::test(flavor = "multi_thread")]
117+
async fn test_last_val() {
118+
let mut data_gen_config = baseline_config();
119+
120+
for i in 0..data_gen_config.columns.len() {
121+
if data_gen_config.columns[i].get_max_num_distinct().is_none() {
122+
data_gen_config.columns[i] = data_gen_config.columns[i]
123+
.clone()
124+
// Minimize the chance of identical values in the order by columns to make the test more stable
125+
.with_max_num_distinct(usize::MAX);
126+
}
127+
}
128+
129+
let query_builder = QueryBuilder::new()
130+
.with_table_name("fuzz_table")
131+
.with_aggregate_function("last_value")
132+
.with_aggregate_arguments(data_gen_config.all_columns())
133+
.set_group_by_columns(data_gen_config.all_columns());
134+
135+
AggregationFuzzerBuilder::from(data_gen_config)
136+
.add_query_builder(query_builder)
137+
.build()
138+
.run()
139+
.await;
140+
}
141+
116142
#[tokio::test(flavor = "multi_thread")]
117143
async fn test_max() {
118144
let data_gen_config = baseline_config();

datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -503,7 +503,9 @@ impl QueryBuilder {
503503
let distinct = if *is_distinct { "DISTINCT " } else { "" };
504504
alias_gen += 1;
505505

506-
let (order_by, null_opt) = if function_name.eq("first_value") {
506+
let (order_by, null_opt) = if function_name.eq("first_value")
507+
|| function_name.eq("last_value")
508+
{
507509
(
508510
self.order_by(&order_by_black_list), /* Among the order by columns, at most one group by column can be included to avoid all order by column values being identical */
509511
self.null_opt(),

0 commit comments

Comments
 (0)