Skip to content

Commit e242eb6

Browse files
author
Andras Palinkas
authored
SQL: Add method args to PERCENTILE/PERCENTILE_RANK (#65026)
* Adds the capability to have functions with two optional arguments * Adds two new optional arguments to `PERCENTILE()` and `PERCENTILE_RANK()` functions, namely the method and method_parameter which can be: 1) `tdigest` and a double `compression` parameter or 2) `hdr` and an integer representing the `number_of_digits` parameter. * Integration tests * Documentation updates Closes #63567
1 parent 350c2b9 commit e242eb6

File tree

17 files changed

+632
-172
lines changed

17 files changed

+632
-172
lines changed

docs/reference/sql/functions/aggs.asciidoc

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -477,14 +477,18 @@ include-tagged::{sql-specs}/docs/docs.csv-spec[aggMadScalars]
477477
[source, sql]
478478
--------------------------------------------------
479479
PERCENTILE(
480-
field_name, <1>
481-
numeric_exp) <2>
480+
field_name, <1>
481+
percentile[, <2>
482+
method[, <3>
483+
method_parameter]]) <4>
482484
--------------------------------------------------
483485

484486
*Input*:
485487

486488
<1> a numeric field
487489
<2> a numeric expression (must be a constant and not based on a field)
490+
<3> optional string literal for the <<search-aggregations-metrics-percentile-aggregation-approximation,percentile algorithm>>. Possible values: `tdigest` or `hdr`. Defaults to `tdigest`.
491+
<4> optional numeric literal that configures the <<search-aggregations-metrics-percentile-aggregation-approximation,percentile algorithm>>. Configures `compression` for `tdigest` or `number_of_significant_value_digits` for `hdr`. The default is the same as that of the backing algorithm.
488492

489493
*Output*: `double` numeric value
490494

@@ -503,21 +507,31 @@ include-tagged::{sql-specs}/docs/docs.csv-spec[aggPercentile]
503507
include-tagged::{sql-specs}/docs/docs.csv-spec[aggPercentileScalars]
504508
--------------------------------------------------
505509

510+
["source","sql",subs="attributes,macros"]
511+
--------------------------------------------------
512+
include-tagged::{sql-specs}/docs/docs.csv-spec[aggPercentileWithPercentileConfig]
513+
--------------------------------------------------
514+
506515
[[sql-functions-aggs-percentile-rank]]
507516
==== `PERCENTILE_RANK`
508517

509518
.Synopsis:
510519
[source, sql]
511520
--------------------------------------------------
512521
PERCENTILE_RANK(
513-
field_name, <1>
514-
numeric_exp) <2>
522+
field_name, <1>
523+
value[, <2>
524+
method[, <3>
525+
method_parameter]]) <4>
515526
--------------------------------------------------
516527

517528
*Input*:
518529

519530
<1> a numeric field
520531
<2> a numeric expression (must be a constant and not based on a field)
532+
<3> optional string literal for the <<search-aggregations-metrics-percentile-aggregation-approximation,percentile algorithm>>. Possible values: `tdigest` or `hdr`. Defaults to `tdigest`.
533+
<4> optional numeric literal that configures the <<search-aggregations-metrics-percentile-aggregation-approximation,percentile algorithm>>. Configures `compression` for `tdigest` or `number_of_significant_value_digits` for `hdr`. The default is the same as that of the backing algorithm.
534+
521535

522536
*Output*: `double` numeric value
523537

@@ -536,6 +550,11 @@ include-tagged::{sql-specs}/docs/docs.csv-spec[aggPercentileRank]
536550
include-tagged::{sql-specs}/docs/docs.csv-spec[aggPercentileRankScalars]
537551
--------------------------------------------------
538552

553+
["source","sql",subs="attributes,macros"]
554+
--------------------------------------------------
555+
include-tagged::{sql-specs}/docs/docs.csv-spec[aggPercentileRankWithPercentileConfig]
556+
--------------------------------------------------
557+
539558
[[sql-functions-aggs-skewness]]
540559
==== `SKEWNESS`
541560

x-pack/plugin/ql/src/main/java/org/elasticsearch/xpack/ql/expression/function/FunctionRegistry.java

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -420,16 +420,23 @@ protected interface ScalarTriFunctionConfigurationAwareBuilder<T> {
420420
public static <T extends Function> FunctionDefinition def(Class<T> function,
421421
FourParametersFunctionBuilder<T> ctorRef, String... names) {
422422
FunctionBuilder builder = (source, children, distinct, cfg) -> {
423-
boolean hasMinimumThree = OptionalArgument.class.isAssignableFrom(function);
424-
if (hasMinimumThree && (children.size() > 4 || children.size() < 3)) {
425-
throw new QlIllegalArgumentException("expects three or four arguments");
426-
} else if (!hasMinimumThree && children.size() != 4) {
423+
if (OptionalArgument.class.isAssignableFrom(function)) {
424+
if (children.size() > 4 || children.size() < 3) {
425+
throw new QlIllegalArgumentException("expects three or four arguments");
426+
}
427+
} else if (TwoOptionalArguments.class.isAssignableFrom(function)) {
428+
if (children.size() > 4 || children.size() < 2) {
429+
throw new QlIllegalArgumentException("expects minimum two, maximum four arguments");
430+
}
431+
} else if (children.size() != 4) {
427432
throw new QlIllegalArgumentException("expects exactly four arguments");
428433
}
429434
if (distinct) {
430435
throw new QlIllegalArgumentException("does not support DISTINCT yet it was specified");
431436
}
432-
return ctorRef.build(source, children.get(0), children.get(1), children.get(2), children.size() == 4 ? children.get(3) : null);
437+
return ctorRef.build(source, children.get(0), children.get(1),
438+
children.size() > 2 ? children.get(2) : null,
439+
children.size() > 3 ? children.get(3) : null);
433440
};
434441
return def(function, builder, false, names);
435442
}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License;
4+
* you may not use this file except in compliance with the Elastic License.
5+
*/
6+
7+
package org.elasticsearch.xpack.ql.expression.function;
8+
9+
/**
10+
* Marker interface indicating that a function accepts two optional arguments (the last two).
11+
* This is used by the {@link FunctionRegistry} to perform validation of function declaration.
12+
*/
13+
public interface TwoOptionalArguments {
14+
15+
}

x-pack/plugin/sql/qa/server/src/main/resources/agg.csv-spec

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,42 @@ F |10099.7608
2222
M |10096.2232
2323
;
2424

25+
singlePercentileWithCommaTDigestSpecified
26+
SELECT gender, PERCENTILE(emp_no, 97.76, 'tdigest') p1 FROM test_emp GROUP BY gender;
27+
28+
gender:s | p1:d
29+
null |10019.0
30+
F |10099.7608
31+
M |10096.2232
32+
;
33+
34+
singlePercentileWithCommaTDigestWithCompressionSpecified
35+
SELECT gender, PERCENTILE(emp_no, 97.76, 'tdigest', 50 + 0.2) p1 FROM test_emp GROUP BY gender;
36+
37+
gender:s | p1:d
38+
null |10019.0
39+
F |10099.7608
40+
M |10096.2232
41+
;
42+
43+
singlePercentileWithCommaHDRSpecified
44+
SELECT gender, PERCENTILE(emp_no, 97.76, 'hdr') p1 FROM test_emp GROUP BY gender;
45+
46+
gender:s | p1:d
47+
null |10016.0
48+
F |10096.0
49+
M |10096.0
50+
;
51+
52+
singlePercentileWithCommaHDRWithDigitsSpecified
53+
SELECT gender, PERCENTILE(emp_no, 97.76, 'hdr', 1+1) p1 FROM test_emp GROUP BY gender;
54+
55+
gender:s | p1:d
56+
null |9984.0
57+
F |10048.0
58+
M |10048.0
59+
;
60+
2561
multiplePercentilesOneWithCommaOneWithout
2662
SELECT gender, PERCENTILE(emp_no, 92.45) p1, PERCENTILE(emp_no, 91) p2 FROM test_emp GROUP BY gender;
2763

@@ -58,6 +94,24 @@ F |17.424242424242426
5894
M |15.350877192982457
5995
;
6096

97+
singlePercentileRankWithHDRSpecified
98+
SELECT gender, PERCENTILE_RANK(emp_no, 10000 + 25, 'hdr') p1 FROM test_emp GROUP BY gender;
99+
100+
gender:s | p1:d
101+
null |100.0
102+
F |21.21212121212121
103+
M |24.56140350877193
104+
;
105+
106+
singlePercentileRankHDRWithDigitsSpecified
107+
SELECT gender, PERCENTILE_RANK(emp_no, 10000 + 25, 'hdr', 4+1) p1 FROM test_emp GROUP BY gender;
108+
109+
gender:s | p1:d
110+
null |100.0
111+
F |18.181818181818183
112+
M |15.789473684210526
113+
;
114+
61115
multiplePercentileRanks
62116
SELECT gender, PERCENTILE_RANK(emp_no, 10030.0) rank1, PERCENTILE_RANK(emp_no, 10025) rank2 FROM test_emp GROUP BY gender;
63117

x-pack/plugin/sql/qa/server/src/main/resources/docs/docs.csv-spec

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1511,6 +1511,26 @@ null |6249.916666666667
15111511
// end::aggPercentileScalars
15121512
;
15131513

1514+
aggPercentileWithPercentileConfig
1515+
// tag::aggPercentileWithPercentileConfig
1516+
SELECT
1517+
languages,
1518+
PERCENTILE(salary, 97.3, 'tdigest', 100.0) AS "97.3_TDigest",
1519+
PERCENTILE(salary, 97.3, 'hdr', 3) AS "97.3_HDR"
1520+
FROM emp
1521+
GROUP BY languages;
1522+
1523+
languages | 97.3_TDigest | 97.3_HDR
1524+
---------------+---------------+---------------
1525+
null |74999.0 |74992.0
1526+
1 |73717.0 |73712.0
1527+
2 |73530.238 |69936.0
1528+
3 |74970.0 |74992.0
1529+
4 |74572.0 |74608.0
1530+
5 |66117.118 |56368.0
1531+
// end::aggPercentileWithPercentileConfig
1532+
;
1533+
15141534
aggPercentileRank
15151535
// tag::aggPercentileRank
15161536
SELECT languages, PERCENTILE_RANK(salary, 65000) AS rank FROM emp GROUP BY languages;
@@ -1542,6 +1562,26 @@ null |66.91240875912409
15421562
// end::aggPercentileRankScalars
15431563
;
15441564

1565+
aggPercentileRankWithPercentileConfig
1566+
// tag::aggPercentileRankWithPercentileConfig
1567+
SELECT
1568+
languages,
1569+
ROUND(PERCENTILE_RANK(salary, 65000, 'tdigest', 100.0), 2) AS "rank_TDigest",
1570+
ROUND(PERCENTILE_RANK(salary, 65000, 'hdr', 3), 2) AS "rank_HDR"
1571+
FROM emp
1572+
GROUP BY languages;
1573+
1574+
languages | rank_TDigest | rank_HDR
1575+
---------------+---------------+---------------
1576+
null |73.66 |80.0
1577+
1 |73.73 |73.33
1578+
2 |88.88 |89.47
1579+
3 |79.44 |76.47
1580+
4 |85.7 |83.33
1581+
5 |100.0 |95.24
1582+
// end::aggPercentileRankWithPercentileConfig
1583+
;
1584+
15451585
aggSkewness
15461586
// tag::aggSkewness
15471587
SELECT MIN(salary) AS min, MAX(salary) AS max, SKEWNESS(salary) AS s FROM emp;

x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/aggregate/Percentile.java

Lines changed: 6 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -6,70 +6,31 @@
66
package org.elasticsearch.xpack.sql.expression.function.aggregate;
77

88
import org.elasticsearch.xpack.ql.expression.Expression;
9-
import org.elasticsearch.xpack.ql.expression.Expressions.ParamOrdinal;
10-
import org.elasticsearch.xpack.ql.expression.Foldables;
11-
import org.elasticsearch.xpack.ql.expression.function.aggregate.EnclosedAgg;
129
import org.elasticsearch.xpack.ql.tree.NodeInfo;
1310
import org.elasticsearch.xpack.ql.tree.Source;
14-
import org.elasticsearch.xpack.ql.type.DataType;
15-
import org.elasticsearch.xpack.ql.type.DataTypes;
16-
import org.elasticsearch.xpack.sql.type.SqlDataTypeConverter;
1711

1812
import java.util.List;
1913

20-
import static java.util.Collections.singletonList;
21-
import static org.elasticsearch.xpack.ql.expression.TypeResolutions.isFoldable;
22-
import static org.elasticsearch.xpack.ql.expression.TypeResolutions.isNumeric;
14+
public class Percentile extends PercentileAggregate {
2315

24-
public class Percentile extends NumericAggregate implements EnclosedAgg {
25-
26-
private final Expression percent;
27-
28-
public Percentile(Source source, Expression field, Expression percent) {
29-
super(source, field, singletonList(percent));
30-
this.percent = percent;
16+
public Percentile(Source source, Expression field, Expression percent, Expression method, Expression methodParameter) {
17+
super(source, field, percent, method, methodParameter);
3118
}
3219

3320
@Override
3421
protected NodeInfo<Percentile> info() {
35-
return NodeInfo.create(this, Percentile::new, field(), percent);
22+
return NodeInfo.create(this, Percentile::new, field(), percent(), method(), methodParameter());
3623
}
3724

3825
@Override
3926
public Percentile replaceChildren(List<Expression> newChildren) {
4027
if (newChildren.size() != 2) {
4128
throw new IllegalArgumentException("expected [2] children but received [" + newChildren.size() + "]");
4229
}
43-
return new Percentile(source(), newChildren.get(0), newChildren.get(1));
44-
}
45-
46-
@Override
47-
protected TypeResolution resolveType() {
48-
TypeResolution resolution = isFoldable(percent, sourceText(), ParamOrdinal.SECOND);
49-
if (resolution.unresolved()) {
50-
return resolution;
51-
}
52-
53-
resolution = super.resolveType();
54-
if (resolution.unresolved()) {
55-
return resolution;
56-
}
57-
58-
return isNumeric(percent, sourceText(), ParamOrdinal.DEFAULT);
30+
return new Percentile(source(), newChildren.get(0), newChildren.get(1), method(), methodParameter());
5931
}
6032

6133
public Expression percent() {
62-
return percent;
63-
}
64-
65-
@Override
66-
public DataType dataType() {
67-
return DataTypes.DOUBLE;
68-
}
69-
70-
@Override
71-
public String innerName() {
72-
Double value = (Double) SqlDataTypeConverter.convert(Foldables.valueOf(percent), DataTypes.DOUBLE);
73-
return Double.toString(value);
34+
return parameter();
7435
}
7536
}

0 commit comments

Comments
 (0)