Skip to content

Commit cd593b2

Browse files
committed
revert: Support WITHIN GROUP syntax to standardize certain existing aggregate functions (apache#13511)
This reverts commit e41c02c.
1 parent 3670b67 commit cd593b2

File tree

12 files changed

+138
-357
lines changed

12 files changed

+138
-357
lines changed

datafusion/core/benches/aggregate_query_sql.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ fn criterion_benchmark(c: &mut Criterion) {
158158
query(
159159
ctx.clone(),
160160
&rt,
161-
"SELECT utf8, approx_percentile_cont(0.5, 2500) WITHIN GROUP (ORDER BY u64_wide) \
161+
"SELECT utf8, approx_percentile_cont(u64_wide, 0.5, 2500) \
162162
FROM t GROUP BY utf8",
163163
)
164164
})
@@ -169,7 +169,7 @@ fn criterion_benchmark(c: &mut Criterion) {
169169
query(
170170
ctx.clone(),
171171
&rt,
172-
"SELECT utf8, approx_percentile_cont(0.5, 2500) WITHIN GROUP (ORDER BY f32) \
172+
"SELECT utf8, approx_percentile_cont(f32, 0.5, 2500) \
173173
FROM t GROUP BY utf8",
174174
)
175175
})

datafusion/core/tests/dataframe/dataframe_functions.rs

Lines changed: 18 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -384,34 +384,19 @@ async fn test_fn_approx_median() -> Result<()> {
384384

385385
#[tokio::test]
386386
async fn test_fn_approx_percentile_cont() -> Result<()> {
387-
let expr = approx_percentile_cont(col("b").sort(true, false), lit(0.5), None);
387+
let expr = approx_percentile_cont(col("b"), lit(0.5), None);
388388

389389
let df = create_test_table().await?;
390390
let batches = df.aggregate(vec![], vec![expr]).unwrap().collect().await?;
391391

392392
assert_snapshot!(
393393
batches_to_string(&batches),
394394
@r"
395-
+---------------------------------------------------------------------------+
396-
| approx_percentile_cont(Float64(0.5)) WITHIN GROUP [test.b ASC NULLS LAST] |
397-
+---------------------------------------------------------------------------+
398-
| 10 |
399-
+---------------------------------------------------------------------------+
400-
");
401-
402-
let expr = approx_percentile_cont(col("b").sort(false, false), lit(0.1), None);
403-
404-
let df = create_test_table().await?;
405-
let batches = df.aggregate(vec![], vec![expr]).unwrap().collect().await?;
406-
407-
assert_snapshot!(
408-
batches_to_string(&batches),
409-
@r"
410-
+----------------------------------------------------------------------------+
411-
| approx_percentile_cont(Float64(0.1)) WITHIN GROUP [test.b DESC NULLS LAST] |
412-
+----------------------------------------------------------------------------+
413-
| 100 |
414-
+----------------------------------------------------------------------------+
395+
+---------------------------------------------+
396+
| approx_percentile_cont(test.b,Float64(0.5)) |
397+
+---------------------------------------------+
398+
| 10 |
399+
+---------------------------------------------+
415400
");
416401

417402
// the arg2 parameter is a complex expr, but it can be evaluated to the literal value
@@ -420,71 +405,35 @@ async fn test_fn_approx_percentile_cont() -> Result<()> {
420405
None::<&str>,
421406
"arg_2".to_string(),
422407
));
423-
let expr = approx_percentile_cont(col("b").sort(true, false), alias_expr, None);
408+
let expr = approx_percentile_cont(col("b"), alias_expr, None);
424409
let df = create_test_table().await?;
425410
let batches = df.aggregate(vec![], vec![expr]).unwrap().collect().await?;
426411

427412
assert_snapshot!(
428413
batches_to_string(&batches),
429414
@r"
430-
+--------------------------------------------------------------------+
431-
| approx_percentile_cont(arg_2) WITHIN GROUP [test.b ASC NULLS LAST] |
432-
+--------------------------------------------------------------------+
433-
| 10 |
434-
+--------------------------------------------------------------------+
435-
"
436-
);
437-
438-
let alias_expr = Expr::Alias(Alias::new(
439-
cast(lit(0.1), DataType::Float32),
440-
None::<&str>,
441-
"arg_2".to_string(),
442-
));
443-
let expr = approx_percentile_cont(col("b").sort(false, false), alias_expr, None);
444-
let df = create_test_table().await?;
445-
let batches = df.aggregate(vec![], vec![expr]).unwrap().collect().await?;
446-
447-
assert_snapshot!(
448-
batches_to_string(&batches),
449-
@r"
450-
+---------------------------------------------------------------------+
451-
| approx_percentile_cont(arg_2) WITHIN GROUP [test.b DESC NULLS LAST] |
452-
+---------------------------------------------------------------------+
453-
| 100 |
454-
+---------------------------------------------------------------------+
415+
+--------------------------------------+
416+
| approx_percentile_cont(test.b,arg_2) |
417+
+--------------------------------------+
418+
| 10 |
419+
+--------------------------------------+
455420
"
456421
);
457422

458423
// with number of centroids set
459-
let expr = approx_percentile_cont(col("b").sort(true, false), lit(0.5), Some(lit(2)));
460-
461-
let df = create_test_table().await?;
462-
let batches = df.aggregate(vec![], vec![expr]).unwrap().collect().await?;
463-
464-
assert_snapshot!(
465-
batches_to_string(&batches),
466-
@r"
467-
+------------------------------------------------------------------------------------+
468-
| approx_percentile_cont(Float64(0.5),Int32(2)) WITHIN GROUP [test.b ASC NULLS LAST] |
469-
+------------------------------------------------------------------------------------+
470-
| 30 |
471-
+------------------------------------------------------------------------------------+
472-
");
473-
474-
let expr =
475-
approx_percentile_cont(col("b").sort(false, false), lit(0.1), Some(lit(2)));
424+
let expr = approx_percentile_cont(col("b"), lit(0.5), Some(lit(2)));
476425

477426
let df = create_test_table().await?;
478427
let batches = df.aggregate(vec![], vec![expr]).unwrap().collect().await?;
479428

480429
assert_snapshot!(
481430
batches_to_string(&batches),
482431
@r"
483-
+-------------------------------------------------------------------------------------+
484-
| approx_percentile_cont(Float64(0.1),Int32(2)) WITHIN GROUP [test.b DESC NULLS LAST] |
485-
+-------------------------------------------------------------------------------------+
486-
| 69 |
487-
+-------------------------------------------------------------------------------------+
432+
+------------------------------------------------------+
433+
| approx_percentile_cont(test.b,Float64(0.5),Int32(2)) |
434+
+------------------------------------------------------+
435+
| 30 |
436+
+------------------------------------------------------+
488437
");
489438

490439
Ok(())

datafusion/expr/src/udaf.rs

Lines changed: 1 addition & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -322,16 +322,6 @@ impl AggregateUDF {
322322
self.inner.default_value(data_type)
323323
}
324324

325-
/// See [`AggregateUDFImpl::supports_null_handling_clause`] for more details.
326-
pub fn supports_null_handling_clause(&self) -> bool {
327-
self.inner.supports_null_handling_clause()
328-
}
329-
330-
/// See [`AggregateUDFImpl::is_ordered_set_aggregate`] for more details.
331-
pub fn is_ordered_set_aggregate(&self) -> bool {
332-
self.inner.is_ordered_set_aggregate()
333-
}
334-
335325
/// Returns the documentation for this Aggregate UDF.
336326
///
337327
/// Documentation can be accessed programmatically as well as
@@ -449,14 +439,6 @@ pub trait AggregateUDFImpl: Debug + Send + Sync {
449439
null_treatment,
450440
} = params;
451441

452-
// exclude the first function argument(= column) in ordered set aggregate function,
453-
// because it is duplicated with the WITHIN GROUP clause in schema name.
454-
let args = if self.is_ordered_set_aggregate() {
455-
&args[1..]
456-
} else {
457-
&args[..]
458-
};
459-
460442
let mut schema_name = String::new();
461443

462444
schema_name.write_fmt(format_args!(
@@ -475,14 +457,8 @@ pub trait AggregateUDFImpl: Debug + Send + Sync {
475457
};
476458

477459
if let Some(order_by) = order_by {
478-
let clause = match self.is_ordered_set_aggregate() {
479-
true => "WITHIN GROUP",
480-
false => "ORDER BY",
481-
};
482-
483460
schema_name.write_fmt(format_args!(
484-
" {} [{}]",
485-
clause,
461+
" ORDER BY [{}]",
486462
schema_name_from_sorts(order_by)?
487463
))?;
488464
};
@@ -952,18 +928,6 @@ pub trait AggregateUDFImpl: Debug + Send + Sync {
952928
ScalarValue::try_from(data_type)
953929
}
954930

955-
/// If this function supports `[IGNORE NULLS | RESPECT NULLS]` clause, return true
956-
/// If the function does not, return false
957-
fn supports_null_handling_clause(&self) -> bool {
958-
true
959-
}
960-
961-
/// If this function is ordered-set aggregate function, return true
962-
/// If the function is not, return false
963-
fn is_ordered_set_aggregate(&self) -> bool {
964-
false
965-
}
966-
967931
/// Returns the documentation for this Aggregate UDF.
968932
///
969933
/// Documentation can be accessed programmatically as well as

datafusion/functions-aggregate/src/approx_median.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ make_udaf_expr_and_func!(
4545
/// APPROX_MEDIAN aggregate expression
4646
#[user_doc(
4747
doc_section(label = "Approximate Functions"),
48-
description = "Returns the approximate median (50th percentile) of input values. It is an alias of `approx_percentile_cont(0.5) WITHIN GROUP (ORDER BY x)`.",
48+
description = "Returns the approximate median (50th percentile) of input values. It is an alias of `approx_percentile_cont(x, 0.5)`.",
4949
syntax_example = "approx_median(expression)",
5050
sql_example = r#"```sql
5151
> SELECT approx_median(column_name) FROM table_name;

datafusion/functions-aggregate/src/approx_percentile_cont.rs

Lines changed: 11 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@ use datafusion_common::{
3434
downcast_value, internal_err, not_impl_datafusion_err, not_impl_err, plan_err,
3535
Result, ScalarValue,
3636
};
37-
use datafusion_expr::expr::{AggregateFunction, Sort};
3837
use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
3938
use datafusion_expr::type_coercion::aggregates::{INTEGERS, NUMERICS};
4039
use datafusion_expr::utils::format_state_name;
@@ -52,39 +51,29 @@ create_func!(ApproxPercentileCont, approx_percentile_cont_udaf);
5251

5352
/// Computes the approximate percentile continuous of a set of numbers
5453
pub fn approx_percentile_cont(
55-
order_by: Sort,
54+
expression: Expr,
5655
percentile: Expr,
5756
centroids: Option<Expr>,
5857
) -> Expr {
59-
let expr = order_by.expr.clone();
60-
6158
let args = if let Some(centroids) = centroids {
62-
vec![expr, percentile, centroids]
59+
vec![expression, percentile, centroids]
6360
} else {
64-
vec![expr, percentile]
61+
vec![expression, percentile]
6562
};
66-
67-
Expr::AggregateFunction(AggregateFunction::new_udf(
68-
approx_percentile_cont_udaf(),
69-
args,
70-
false,
71-
None,
72-
Some(vec![order_by]),
73-
None,
74-
))
63+
approx_percentile_cont_udaf().call(args)
7564
}
7665

7766
#[user_doc(
7867
doc_section(label = "Approximate Functions"),
7968
description = "Returns the approximate percentile of input values using the t-digest algorithm.",
80-
syntax_example = "approx_percentile_cont(percentile, centroids) WITHIN GROUP (ORDER BY expression)",
69+
syntax_example = "approx_percentile_cont(expression, percentile, centroids)",
8170
sql_example = r#"```sql
82-
> SELECT approx_percentile_cont(0.75, 100) WITHIN GROUP (ORDER BY column_name) FROM table_name;
83-
+-----------------------------------------------------------------------+
84-
| approx_percentile_cont(0.75, 100) WITHIN GROUP (ORDER BY column_name) |
85-
+-----------------------------------------------------------------------+
86-
| 65.0 |
87-
+-----------------------------------------------------------------------+
71+
> SELECT approx_percentile_cont(column_name, 0.75, 100) FROM table_name;
72+
+-------------------------------------------------+
73+
| approx_percentile_cont(column_name, 0.75, 100) |
74+
+-------------------------------------------------+
75+
| 65.0 |
76+
+-------------------------------------------------+
8877
```"#,
8978
standard_argument(name = "expression",),
9079
argument(
@@ -141,19 +130,6 @@ impl ApproxPercentileCont {
141130
args: AccumulatorArgs,
142131
) -> Result<ApproxPercentileAccumulator> {
143132
let percentile = validate_input_percentile_expr(&args.exprs[1])?;
144-
145-
let is_descending = args
146-
.ordering_req
147-
.first()
148-
.map(|sort_expr| sort_expr.options.descending)
149-
.unwrap_or(false);
150-
151-
let percentile = if is_descending {
152-
1.0 - percentile
153-
} else {
154-
percentile
155-
};
156-
157133
let tdigest_max_size = if args.exprs.len() == 3 {
158134
Some(validate_input_max_size_expr(&args.exprs[2])?)
159135
} else {
@@ -319,14 +295,6 @@ impl AggregateUDFImpl for ApproxPercentileCont {
319295
Ok(arg_types[0].clone())
320296
}
321297

322-
fn supports_null_handling_clause(&self) -> bool {
323-
false
324-
}
325-
326-
fn is_ordered_set_aggregate(&self) -> bool {
327-
true
328-
}
329-
330298
fn documentation(&self) -> Option<&Documentation> {
331299
self.doc()
332300
}

datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs

Lines changed: 7 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -49,14 +49,14 @@ make_udaf_expr_and_func!(
4949
#[user_doc(
5050
doc_section(label = "Approximate Functions"),
5151
description = "Returns the weighted approximate percentile of input values using the t-digest algorithm.",
52-
syntax_example = "approx_percentile_cont_with_weight(weight, percentile) WITHIN GROUP (ORDER BY expression)",
52+
syntax_example = "approx_percentile_cont_with_weight(expression, weight, percentile)",
5353
sql_example = r#"```sql
54-
> SELECT approx_percentile_cont_with_weight(weight_column, 0.90) WITHIN GROUP (ORDER BY column_name) FROM table_name;
55-
+---------------------------------------------------------------------------------------------+
56-
| approx_percentile_cont_with_weight(weight_column, 0.90) WITHIN GROUP (ORDER BY column_name) |
57-
+---------------------------------------------------------------------------------------------+
58-
| 78.5 |
59-
+---------------------------------------------------------------------------------------------+
54+
> SELECT approx_percentile_cont_with_weight(column_name, weight_column, 0.90) FROM table_name;
55+
+----------------------------------------------------------------------+
56+
| approx_percentile_cont_with_weight(column_name, weight_column, 0.90) |
57+
+----------------------------------------------------------------------+
58+
| 78.5 |
59+
+----------------------------------------------------------------------+
6060
```"#,
6161
standard_argument(name = "expression", prefix = "The"),
6262
argument(
@@ -175,14 +175,6 @@ impl AggregateUDFImpl for ApproxPercentileContWithWeight {
175175
self.approx_percentile_cont.state_fields(args)
176176
}
177177

178-
fn supports_null_handling_clause(&self) -> bool {
179-
false
180-
}
181-
182-
fn is_ordered_set_aggregate(&self) -> bool {
183-
true
184-
}
185-
186178
fn documentation(&self) -> Option<&Documentation> {
187179
self.doc()
188180
}

datafusion/proto/tests/cases/roundtrip_logical_plan.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -979,8 +979,8 @@ async fn roundtrip_expr_api() -> Result<()> {
979979
stddev_pop(lit(2.2)),
980980
approx_distinct(lit(2)),
981981
approx_median(lit(2)),
982-
approx_percentile_cont(lit(2).sort(true, false), lit(0.5), None),
983-
approx_percentile_cont(lit(2).sort(true, false), lit(0.5), Some(lit(50))),
982+
approx_percentile_cont(lit(2), lit(0.5), None),
983+
approx_percentile_cont(lit(2), lit(0.5), Some(lit(50))),
984984
approx_percentile_cont_with_weight(lit(2), lit(1), lit(0.5)),
985985
grouping(lit(1)),
986986
bit_and(lit(2)),

datafusion/proto/tests/cases/roundtrip_physical_plan.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -505,7 +505,7 @@ fn rountrip_aggregate_with_approx_pencentile_cont() -> Result<()> {
505505
vec![col("b", &schema)?, lit(0.5)],
506506
)
507507
.schema(Arc::clone(&schema))
508-
.alias("APPROX_PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY b)")
508+
.alias("APPROX_PERCENTILE_CONT(b, 0.5)")
509509
.build()
510510
.map(Arc::new)?];
511511

0 commit comments

Comments
 (0)