From da78135f0529fa8fdd60d7f724039916f37071dd Mon Sep 17 00:00:00 2001 From: Aaron Alvarez Date: Tue, 2 Sep 2025 16:56:02 -0700 Subject: [PATCH 1/6] First revision Signed-off-by: Aaron Alvarez --- .../function/BuiltinFunctionName.java | 2 ++ .../expression/function/PPLFuncImpTable.java | 20 +++++++++++++++++++ docs/user/dql/aggregations.rst | 20 +++++++++++++++++++ .../remote/CalcitePPLAggregationIT.java | 8 ++++++++ ppl/src/main/antlr/OpenSearchPPLParser.g4 | 1 + .../calcite/CalcitePPLAggregationTest.java | 15 ++++++++++++++ .../ppl/parser/AstExpressionBuilderTest.java | 12 +++++++++++ 7 files changed, 78 insertions(+) diff --git a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java index db7919e4de2..f260bfb3b4e 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java @@ -198,6 +198,7 @@ public enum BuiltinFunctionName { TAKE(FunctionName.of("take")), // t-digest percentile which is used in OpenSearch core by default. PERCENTILE_APPROX(FunctionName.of("percentile_approx")), + MEDIAN(FunctionName.of("median")), EARLIEST(FunctionName.of("earliest")), LATEST(FunctionName.of("latest")), DISTINCT_COUNT_APPROX(FunctionName.of("distinct_count_approx")), @@ -344,6 +345,7 @@ public enum BuiltinFunctionName { .put("take", BuiltinFunctionName.TAKE) .put("percentile", BuiltinFunctionName.PERCENTILE_APPROX) .put("percentile_approx", BuiltinFunctionName.PERCENTILE_APPROX) + .put("median", BuiltinFunctionName.MEDIAN) .put("earliest", BuiltinFunctionName.EARLIEST) .put("latest", BuiltinFunctionName.LATEST) .put("distinct_count_approx", BuiltinFunctionName.DISTINCT_COUNT_APPROX) diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java index bbccafc8dd8..92a5da29ab5 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java @@ -123,6 +123,7 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.MATCH_PHRASE_PREFIX; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MAX; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MD5; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MEDIAN; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MICROSECOND; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MIN; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MINUTE; @@ -253,6 +254,7 @@ import org.apache.logging.log4j.Logger; import org.opensearch.sql.calcite.CalcitePlanContext; import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory; +import org.opensearch.sql.calcite.utils.PPLOperandTypes; import org.opensearch.sql.calcite.utils.PlanUtils; import org.opensearch.sql.calcite.utils.UserDefinedFunctionUtils; import org.opensearch.sql.exception.ExpressionEvaluationException; @@ -1116,6 +1118,24 @@ void populate() { PERCENTILE_APPROX.name(), false)); + // Register MEDIAN as alias for PERCENTILE_APPROX with 50th percentile + register( + MEDIAN, + (distinct, field, argList, ctx) -> { + List medianArgList = new ArrayList<>(); + medianArgList.add( + ctx.rexBuilder.makeExactLiteral( + BigDecimal.valueOf(50.0))); // hardcoded 50th percentile + medianArgList.add(ctx.rexBuilder.makeFlag(field.getType().getSqlTypeName())); + return UserDefinedFunctionUtils.makeAggregateCall( + PPLBuiltinOperators.PERCENTILE_APPROX, + List.of(field), + medianArgList, + ctx.relBuilder); + }, + wrapSqlOperandTypeChecker( + PPLOperandTypes.NUMERIC.getInnerTypeChecker(), MEDIAN.name(), false)); + register( EARLIEST, (distinct, field, argList, ctx) -> { diff --git a/docs/user/dql/aggregations.rst b/docs/user/dql/aggregations.rst index 84043c7ead3..69ce997b375 100644 --- a/docs/user/dql/aggregations.rst +++ b/docs/user/dql/aggregations.rst @@ -419,6 +419,26 @@ Example:: | 32 | +---------+ +MEDIAN +------ + +Description +>>>>>>>>>>> + +Usage: MEDIAN(expr). Returns the median (50th percentile) value of `expr`. This is equivalent to ``PERCENTILE(expr, 50)``. + +Note: This function is only available when Calcite is enabled. + +Example:: + + ppl> source=accounts | stats median(age); + fetched rows / total rows = 1/1 + +-------------+ + | median(age) | + |-------------| + | 32 | + +-------------+ + HAVING Clause ============= diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAggregationIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAggregationIT.java index 26df53cbcdd..58ff065c587 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAggregationIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAggregationIT.java @@ -969,4 +969,12 @@ public void testStatsCountAliasByGroupWithSort() throws IOException { rows(1, "VA"), rows(1, "WA")); } + + @Test + public void testMedian() throws IOException { + JSONObject actual = + executeQuery(String.format("source=%s | stats median(balance)", TEST_INDEX_BANK)); + verifySchema(actual, schema("median(balance)", "bigint")); + verifyDataRows(actual, rows(32838)); + } } diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 5f1da7a4801..dd622c77e1f 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -512,6 +512,7 @@ statsFunctionName | STDDEV_POP | PERCENTILE | PERCENTILE_APPROX + | MEDIAN ; earliestLatestFunction diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAggregationTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAggregationTest.java index a7e81e1446e..1a67fb7101d 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAggregationTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAggregationTest.java @@ -696,4 +696,19 @@ public void testPercentileShortcutInvalidDecimalValueAbove100() { String ppl = "source=EMP | stats perc100.1(SAL)"; getRelNode(ppl); } + + @Test + public void testMedian() { + String ppl = "source=EMP | stats median(SAL)"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalAggregate(group=[{}], median(SAL)=[percentile_approx($0, $1, $2)])\n" + + " LogicalProject(SAL=[$5], $f1=[50.0:DECIMAL(3, 1)], $f2=[FLAG(DECIMAL)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `percentile_approx`(`SAL`, 50.0, DECIMAL) `median(SAL)`\n" + "FROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java index 230593541fd..c9a3028b826 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java @@ -1298,4 +1298,16 @@ public void testPercentileShortcutFunctionInvalidDecimalValueAbove100() { SyntaxCheckException.class, () -> assertEqual("source=t | stats perc100.1(a)", (Node) null)); } + + @Test + public void testMedianAggFuncExpr() { + assertEqual( + "source=t | stats median(a)", + agg( + relation("t"), + exprList(alias("median(a)", aggregate("median", field("a")))), + emptyList(), + emptyList(), + defaultStatsArgs())); + } } From 3990b293902e8d0cf1cc001e40d3689b6a871461 Mon Sep 17 00:00:00 2001 From: Aaron Alvarez Date: Thu, 4 Sep 2025 23:08:36 -0700 Subject: [PATCH 2/6] Fixing documentation Signed-off-by: Aaron Alvarez --- docs/user/dql/aggregations.rst | 20 -------------------- docs/user/ppl/cmd/stats.rst | 23 ++++++++++++++++++++++- 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/docs/user/dql/aggregations.rst b/docs/user/dql/aggregations.rst index 69ce997b375..84043c7ead3 100644 --- a/docs/user/dql/aggregations.rst +++ b/docs/user/dql/aggregations.rst @@ -419,26 +419,6 @@ Example:: | 32 | +---------+ -MEDIAN ------- - -Description ->>>>>>>>>>> - -Usage: MEDIAN(expr). Returns the median (50th percentile) value of `expr`. This is equivalent to ``PERCENTILE(expr, 50)``. - -Note: This function is only available when Calcite is enabled. - -Example:: - - ppl> source=accounts | stats median(age); - fetched rows / total rows = 1/1 - +-------------+ - | median(age) | - |-------------| - | 32 | - +-------------+ - HAVING Clause ============= diff --git a/docs/user/ppl/cmd/stats.rst b/docs/user/ppl/cmd/stats.rst index 074a9d54fb3..8b38d91b8e1 100644 --- a/docs/user/ppl/cmd/stats.rst +++ b/docs/user/ppl/cmd/stats.rst @@ -51,6 +51,7 @@ stats ... [by-clause] * Description: The unit of the interval expression is the natural unit by default. If the field is a date and time type field, and the interval is in date/time units, you will need to specify the unit in the interval expression. For example, to split the field ``age`` into buckets by 10 years, it looks like ``span(age, 10)``. And here is another example of time span, the span to split a ``timestamp`` field into hourly intervals, it looks like ``span(timestamp, 1h)``. * Available time unit: + +----------------------------+ | Span Interval Units | +============================+ @@ -273,7 +274,7 @@ Example:: +--------------------+ DISTINCT_COUNT_APPROX ----------- +--------------------- Description >>>>>>>>>>> @@ -336,6 +337,26 @@ Example:: | 36 | M | +---------------------+--------+ +MEDIAN +------ + +Description +>>>>>>>>>>> + +Usage: MEDIAN(expr). Returns the median (50th percentile) value of `expr`. This is equivalent to ``PERCENTILE(expr, 50)``. + +Note: This function requires Calcite to be enabled (see `Configuration`_ section above). + +Example:: + + os> source=accounts | stats median(age); + fetched rows / total rows = 1/1 + +-------------+ + | median(age) | + |-------------| + | 32 | + +-------------+ + EARLIEST -------- From 78f850bc2d4e152a93484cd449a521768ebb6dcd Mon Sep 17 00:00:00 2001 From: Aaron Alvarez Date: Thu, 4 Sep 2025 23:34:43 -0700 Subject: [PATCH 3/6] Removing unnecessary comments Signed-off-by: Aaron Alvarez --- .../opensearch/sql/expression/function/PPLFuncImpTable.java | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java index 3ddb48f48ff..2f3570b2e2b 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java @@ -1120,14 +1120,11 @@ void populate() { PERCENTILE_APPROX.name(), false)); - // Register MEDIAN as alias for PERCENTILE_APPROX with 50th percentile register( MEDIAN, (distinct, field, argList, ctx) -> { List medianArgList = new ArrayList<>(); - medianArgList.add( - ctx.rexBuilder.makeExactLiteral( - BigDecimal.valueOf(50.0))); // hardcoded 50th percentile + medianArgList.add(ctx.rexBuilder.makeExactLiteral(BigDecimal.valueOf(50.0))); medianArgList.add(ctx.rexBuilder.makeFlag(field.getType().getSqlTypeName())); return UserDefinedFunctionUtils.makeAggregateCall( PPLBuiltinOperators.PERCENTILE_APPROX, From f937d7ef83a5db90172680a9785cac726a676972 Mon Sep 17 00:00:00 2001 From: Aaron Alvarez Date: Thu, 4 Sep 2025 23:59:31 -0700 Subject: [PATCH 4/6] Fixinf stats.rst documentation Signed-off-by: Aaron Alvarez --- docs/user/dql/aggregations.rst | 28 ---------------------------- docs/user/ppl/cmd/stats.rst | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 28 deletions(-) diff --git a/docs/user/dql/aggregations.rst b/docs/user/dql/aggregations.rst index 84043c7ead3..4b8ca57a32d 100644 --- a/docs/user/dql/aggregations.rst +++ b/docs/user/dql/aggregations.rst @@ -391,34 +391,6 @@ Example:: | M | 36 | +--------+-----+ -Percentile Shortcut Functions ->>>>>>>>>>>>>>>>>>>>>>>>>>>>> - -For convenience, OpenSearch PPL provides shortcut functions for common percentiles: - -- ``PERC(expr)`` - Equivalent to ``PERCENTILE(expr, )`` -- ``P(expr)`` - Equivalent to ``PERCENTILE(expr, )`` - -Both integer and decimal percentiles from 0 to 100 are supported (e.g., ``PERC95``, ``P99.5``). - -Example:: - - ppl> source=accounts | stats perc99.5(age); - fetched rows / total rows = 1/1 - +---------------+ - | perc99.5(age) | - |---------------| - | 36 | - +---------------+ - - ppl> source=accounts | stats p50(age); - fetched rows / total rows = 1/1 - +---------+ - | p50(age) | - |---------| - | 32 | - +---------+ - HAVING Clause ============= diff --git a/docs/user/ppl/cmd/stats.rst b/docs/user/ppl/cmd/stats.rst index 8b38d91b8e1..359ace107bf 100644 --- a/docs/user/ppl/cmd/stats.rst +++ b/docs/user/ppl/cmd/stats.rst @@ -337,12 +337,44 @@ Example:: | 36 | M | +---------------------+--------+ +Percentile Shortcut Functions +>>>>>>>>>>>>>>>>>>>>>>>>>>>>> + +Version: 3.3.0 + +For convenience, OpenSearch PPL provides shortcut functions for common percentiles: + +- ``PERC(expr)`` - Equivalent to ``PERCENTILE(expr, )`` +- ``P(expr)`` - Equivalent to ``PERCENTILE(expr, )`` + +Both integer and decimal percentiles from 0 to 100 are supported (e.g., ``PERC95``, ``P99.5``). + +Example:: + + ppl> source=accounts | stats perc99.5(age); + fetched rows / total rows = 1/1 + +---------------+ + | perc99.5(age) | + |---------------| + | 36 | + +---------------+ + + ppl> source=accounts | stats p50(age); + fetched rows / total rows = 1/1 + +---------+ + | p50(age) | + |---------| + | 32 | + +---------+ + MEDIAN ------ Description >>>>>>>>>>> +Version: 3.3.0 + Usage: MEDIAN(expr). Returns the median (50th percentile) value of `expr`. This is equivalent to ``PERCENTILE(expr, 50)``. Note: This function requires Calcite to be enabled (see `Configuration`_ section above). From 0eb097ebefb0fdaf45df5b6a1edd3712fd47ca44 Mon Sep 17 00:00:00 2001 From: Aaron Alvarez Date: Mon, 8 Sep 2025 10:25:59 -0700 Subject: [PATCH 5/6] Fixing documentation Signed-off-by: Aaron Alvarez --- docs/user/ppl/cmd/stats.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/user/ppl/cmd/stats.rst b/docs/user/ppl/cmd/stats.rst index 359ace107bf..7577e16f2fd 100644 --- a/docs/user/ppl/cmd/stats.rst +++ b/docs/user/ppl/cmd/stats.rst @@ -386,7 +386,7 @@ Example:: +-------------+ | median(age) | |-------------| - | 32 | + | 33 | +-------------+ EARLIEST From 13d6eae7a10d8c4c0f3950e93308f65bba07ccd4 Mon Sep 17 00:00:00 2001 From: Aaron Alvarez Date: Mon, 8 Sep 2025 13:45:15 -0700 Subject: [PATCH 6/6] Addressing comments Signed-off-by: Aaron Alvarez --- .../expression/function/PPLFuncImpTable.java | 20 ++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java index 2f3570b2e2b..063b2058677 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java @@ -1035,6 +1035,7 @@ void register( } private static class AggBuilder { + private static final double MEDIAN_PERCENTILE = 50.0; private final Map> map = new HashMap<>(); @@ -1109,6 +1110,9 @@ void populate() { register( PERCENTILE_APPROX, (distinct, field, argList, ctx) -> { + if (field.getType() == null) { + throw new IllegalArgumentException("Field type cannot be null"); + } List newArgList = argList.stream().map(PlanUtils::derefMapCall).collect(Collectors.toList()); newArgList.add(ctx.rexBuilder.makeFlag(field.getType().getSqlTypeName())); @@ -1123,9 +1127,19 @@ void populate() { register( MEDIAN, (distinct, field, argList, ctx) -> { - List medianArgList = new ArrayList<>(); - medianArgList.add(ctx.rexBuilder.makeExactLiteral(BigDecimal.valueOf(50.0))); - medianArgList.add(ctx.rexBuilder.makeFlag(field.getType().getSqlTypeName())); + if (distinct) { + throw new IllegalArgumentException("MEDIAN does not support DISTINCT"); + } + if (!argList.isEmpty()) { + throw new IllegalArgumentException("MEDIAN takes no additional arguments"); + } + if (field.getType() == null) { + throw new IllegalArgumentException("Field type cannot be null"); + } + List medianArgList = + List.of( + ctx.rexBuilder.makeExactLiteral(BigDecimal.valueOf(MEDIAN_PERCENTILE)), + ctx.rexBuilder.makeFlag(field.getType().getSqlTypeName())); return UserDefinedFunctionUtils.makeAggregateCall( PPLBuiltinOperators.PERCENTILE_APPROX, List.of(field),