diff --git a/core/src/main/java/org/opensearch/sql/data/utils/MixedTypeComparator.java b/core/src/main/java/org/opensearch/sql/data/utils/MixedTypeComparator.java new file mode 100644 index 00000000000..9d8fc5def0f --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/data/utils/MixedTypeComparator.java @@ -0,0 +1,38 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.data.utils; + +import java.util.Comparator; + +/** Comparator for mixed-type values. */ +public class MixedTypeComparator implements Comparator { + + public static final MixedTypeComparator INSTANCE = new MixedTypeComparator(); + + private MixedTypeComparator() {} + + @Override + public int compare(Object a, Object b) { + boolean aIsNumeric = isNumeric(a); + boolean bIsNumeric = isNumeric(b); + + // For same types compare directly + if (aIsNumeric == bIsNumeric) { + if (aIsNumeric) { + return Double.compare(((Number) a).doubleValue(), ((Number) b).doubleValue()); + } else { + return Integer.compare(a.toString().compareTo(b.toString()), 0); + } + } + // For mixed types, strings are considered larger than numbers (non-numeric values are treated + // as strings) + return aIsNumeric ? -1 : 1; + } + + private static boolean isNumeric(Object obj) { + return obj instanceof Number; + } +} diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java index a4d0d5dbdfd..1097a9a5b0d 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java @@ -94,6 +94,8 @@ import org.opensearch.sql.expression.function.udf.math.ConvFunction; import org.opensearch.sql.expression.function.udf.math.DivideFunction; import org.opensearch.sql.expression.function.udf.math.EulerFunction; +import org.opensearch.sql.expression.function.udf.math.MaxFunction; +import org.opensearch.sql.expression.function.udf.math.MinFunction; import org.opensearch.sql.expression.function.udf.math.ModFunction; import org.opensearch.sql.expression.function.udf.math.NumberToStringFunction; @@ -124,6 +126,8 @@ public class PPLBuiltinOperators extends ReflectiveSqlOperatorTable { public static final SqlOperator DIVIDE = new DivideFunction().toUDF("DIVIDE"); public static final SqlOperator SHA2 = CryptographicFunction.sha2().toUDF("SHA2"); public static final SqlOperator CIDRMATCH = new CidrMatchFunction().toUDF("CIDRMATCH"); + public static final SqlOperator MAX = new MaxFunction().toUDF("MAX"); + public static final SqlOperator MIN = new MinFunction().toUDF("MIN"); public static final SqlOperator COSH = adaptMathFunctionToUDF( diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java index 0a483df3ff3..eeeed029b69 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java @@ -717,6 +717,10 @@ void populate() { registerOperator(INTERNAL_REGEXP_REPLACE_5, SqlLibraryOperators.REGEXP_REPLACE_5); registerOperator(INTERNAL_TRANSLATE3, SqlLibraryOperators.TRANSLATE3); + // Register eval functions for PPL max() and min() calls + registerOperator(MAX, PPLBuiltinOperators.MAX); + registerOperator(MIN, PPLBuiltinOperators.MIN); + // Register PPL UDF operator registerOperator(COSH, PPLBuiltinOperators.COSH); registerOperator(SINH, PPLBuiltinOperators.SINH); diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/math/MaxFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/math/MaxFunction.java new file mode 100644 index 00000000000..70e17cd13b4 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/math/MaxFunction.java @@ -0,0 +1,67 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.expression.function.udf.math; + +import java.util.Arrays; +import java.util.List; +import java.util.Objects; +import org.apache.calcite.adapter.enumerable.NotNullImplementor; +import org.apache.calcite.adapter.enumerable.NullPolicy; +import org.apache.calcite.adapter.enumerable.RexToLixTranslator; +import org.apache.calcite.linq4j.tree.Expression; +import org.apache.calcite.linq4j.tree.Expressions; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.sql.type.SqlReturnTypeInference; +import org.apache.calcite.sql.type.SqlTypeName; +import org.opensearch.sql.data.utils.MixedTypeComparator; +import org.opensearch.sql.expression.function.ImplementorUDF; +import org.opensearch.sql.expression.function.UDFOperandMetadata; + +/** + * MAX(value1, value2, ...) returns the maximum value from the arguments. For mixed types, strings + * have higher precedence than numbers. + */ +public class MaxFunction extends ImplementorUDF { + + public MaxFunction() { + super(new MaxImplementor(), NullPolicy.ALL); + } + + @Override + public SqlReturnTypeInference getReturnTypeInference() { + return opBinding -> opBinding.getTypeFactory().createSqlType(SqlTypeName.ANY); + } + + @Override + public UDFOperandMetadata getOperandMetadata() { + return null; + } + + public static class MaxImplementor implements NotNullImplementor { + + @Override + public Expression implement( + RexToLixTranslator translator, RexCall call, List translatedOperands) { + return Expressions.call( + MaxImplementor.class, "max", Expressions.newArrayInit(Object.class, translatedOperands)); + } + + public static Object max(Object[] args) { + return findMax(args); + } + + private static Object findMax(Object[] args) { + if (args == null) { + return null; + } + + return Arrays.stream(args) + .filter(Objects::nonNull) + .max(MixedTypeComparator.INSTANCE) + .orElse(null); + } + } +} diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/math/MinFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/math/MinFunction.java new file mode 100644 index 00000000000..2bb00ce05e6 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/math/MinFunction.java @@ -0,0 +1,67 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.expression.function.udf.math; + +import java.util.Arrays; +import java.util.List; +import java.util.Objects; +import org.apache.calcite.adapter.enumerable.NotNullImplementor; +import org.apache.calcite.adapter.enumerable.NullPolicy; +import org.apache.calcite.adapter.enumerable.RexToLixTranslator; +import org.apache.calcite.linq4j.tree.Expression; +import org.apache.calcite.linq4j.tree.Expressions; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.sql.type.SqlReturnTypeInference; +import org.apache.calcite.sql.type.SqlTypeName; +import org.opensearch.sql.data.utils.MixedTypeComparator; +import org.opensearch.sql.expression.function.ImplementorUDF; +import org.opensearch.sql.expression.function.UDFOperandMetadata; + +/** + * MIN(value1, value2, ...) returns the minimum value from the arguments. For mixed types, numbers + * have higher precedence than strings. + */ +public class MinFunction extends ImplementorUDF { + + public MinFunction() { + super(new MinImplementor(), NullPolicy.ALL); + } + + @Override + public SqlReturnTypeInference getReturnTypeInference() { + return opBinding -> opBinding.getTypeFactory().createSqlType(SqlTypeName.ANY); + } + + @Override + public UDFOperandMetadata getOperandMetadata() { + return null; + } + + public static class MinImplementor implements NotNullImplementor { + + @Override + public Expression implement( + RexToLixTranslator translator, RexCall call, List translatedOperands) { + return Expressions.call( + MinImplementor.class, "min", Expressions.newArrayInit(Object.class, translatedOperands)); + } + + public static Object min(Object[] args) { + return findMin(args); + } + + private static Object findMin(Object[] args) { + if (args == null) { + return null; + } + + return Arrays.stream(args) + .filter(Objects::nonNull) + .min(MixedTypeComparator.INSTANCE) + .orElse(null); + } + } +} diff --git a/core/src/test/java/org/opensearch/sql/data/utils/MixedTypeComparatorTest.java b/core/src/test/java/org/opensearch/sql/data/utils/MixedTypeComparatorTest.java new file mode 100644 index 00000000000..de656c50960 --- /dev/null +++ b/core/src/test/java/org/opensearch/sql/data/utils/MixedTypeComparatorTest.java @@ -0,0 +1,48 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.data.utils; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.math.BigDecimal; +import org.junit.jupiter.api.Test; + +class MixedTypeComparatorTest { + + private final MixedTypeComparator comparator = MixedTypeComparator.INSTANCE; + + @Test + public void testNumericComparison() { + assertEquals(-1, comparator.compare(1, 2)); + assertEquals(1, comparator.compare(2, 1)); + assertEquals(0, comparator.compare(5, 5)); + + // Different numeric types + assertEquals(-1, comparator.compare(1, 2.5)); + assertEquals(1, comparator.compare(3.14, 2)); + assertEquals(0, comparator.compare(4, 4.0)); + assertEquals(-1, comparator.compare(10L, new BigDecimal("20"))); + } + + @Test + public void testStringComparison() { + assertEquals(-1, comparator.compare("apple", "banana")); + assertEquals(1, comparator.compare("zebra", "apple")); + assertEquals(0, comparator.compare("test", "test")); + assertEquals(-1, comparator.compare("ABC", "abc")); // + assertEquals(1, comparator.compare("hello", "HELLO")); + } + + @Test + public void testMixedTypeComparison() { + assertEquals(-1, comparator.compare(42, "apple")); + assertEquals(1, comparator.compare("apple", 42)); + assertEquals(-1, comparator.compare(3.14, "hello")); + assertEquals(1, comparator.compare("world", 100L)); + assertEquals(-1, comparator.compare(0, "0")); + assertEquals(1, comparator.compare("123", 456)); + } +} diff --git a/docs/category.json b/docs/category.json index aed8a9c8d67..160e47810a3 100644 --- a/docs/category.json +++ b/docs/category.json @@ -49,6 +49,8 @@ "user/ppl/cmd/subquery.rst", "user/ppl/cmd/syntax.rst", "user/ppl/cmd/timechart.rst", + "user/ppl/cmd/search.rst", + "user/ppl/functions/statistical.rst", "user/ppl/cmd/top.rst", "user/ppl/cmd/trendline.rst", "user/ppl/cmd/where.rst", diff --git a/docs/user/ppl/functions/statistical.rst b/docs/user/ppl/functions/statistical.rst new file mode 100644 index 00000000000..f87cc104872 --- /dev/null +++ b/docs/user/ppl/functions/statistical.rst @@ -0,0 +1,109 @@ +====================== +Statistical Functions +====================== + +.. rubric:: Table of contents + +.. contents:: + :local: + :depth: 1 + + +MAX +--- + +Description +>>>>>>>>>>> + +Usage: max(x, y, ...) returns the maximum value from all provided arguments. Strings are treated as greater than numbers, so if provided both strings and numbers, it will return the maximum string value (lexicographically ordered) + +Note: This function is only available in the eval command context and requires Calcite engine to be enabled. + +Argument type: Variable number of INTEGER/LONG/FLOAT/DOUBLE/STRING arguments + +Return type: Type of the selected argument + +Example:: + + os> source=accounts | eval max_val = MAX(age, 30) | fields age, max_val + fetched rows / total rows = 4/4 + +-----+---------+ + | age | max_val | + |-----+---------| + | 32 | 32 | + | 36 | 36 | + | 28 | 30 | + | 33 | 33 | + +-----+---------+ + + os> source=accounts | eval result = MAX(firstname, 'John') | fields firstname, result + fetched rows / total rows = 4/4 + +-----------+---------+ + | firstname | result | + |-----------+---------| + | Amber | John | + | Hattie | John | + | Nanette | Nanette | + | Dale | John | + +-----------+---------+ + + os> source=accounts | eval result = MAX(age, 35, 'John', firstname) | fields age, firstname, result + fetched rows / total rows = 4/4 + +-----+-----------+---------+ + | age | firstname | result | + |-----+-----------+---------| + | 32 | Amber | John | + | 36 | Hattie | John | + | 28 | Nanette | Nanette | + | 33 | Dale | John | + +-----+-----------+---------+ + + +MIN +--- + +Description +>>>>>>>>>>> + +Usage: min(x, y, ...) returns the minimum value from all provided arguments. Strings are treated as greater than numbers, so if provided both strings and numbers, it will return the minimum numeric value. + +Note: This function is only available in the eval command context and requires Calcite engine to be enabled. + +Argument type: Variable number of INTEGER/LONG/FLOAT/DOUBLE/STRING arguments + +Return type: Type of the selected argument + +Example:: + + os> source=accounts | eval min_val = MIN(age, 30) | fields age, min_val + fetched rows / total rows = 4/4 + +-----+---------+ + | age | min_val | + |-----+---------| + | 32 | 30 | + | 36 | 30 | + | 28 | 28 | + | 33 | 30 | + +-----+---------+ + + os> source=accounts | eval result = MIN(firstname, 'John') | fields firstname, result + fetched rows / total rows = 4/4 + +-----------+--------+ + | firstname | result | + |-----------+--------| + | Amber | Amber | + | Hattie | Hattie | + | Nanette | John | + | Dale | Dale | + +-----------+--------+ + + os> source=accounts | eval result = MIN(age, 35, firstname) | fields age, firstname, result + fetched rows / total rows = 4/4 + +-----+-----------+--------+ + | age | firstname | result | + |-----+-----------+--------| + | 32 | Amber | 32 | + | 36 | Hattie | 35 | + | 28 | Nanette | 28 | + | 33 | Dale | 33 | + +-----+-----------+--------+ \ No newline at end of file diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index fd6ffbb70fa..279cfd94b3c 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -843,6 +843,24 @@ public void testExplainSortOnMetricsNoBucketNullable() throws IOException { + " gender, state | sort `count()`")); } + @Test + public void testExplainEvalMax() throws IOException { + String expected = loadExpectedPlan("explain_eval_max.json"); + assertJsonEqualsIgnoreId( + expected, + explainQueryToString( + "source=opensearch-sql_test_index_account | eval new = max(1, 2, 3, age, 'banana')")); + } + + @Test + public void testExplainEvalMin() throws IOException { + String expected = loadExpectedPlan("explain_eval_min.json"); + assertJsonEqualsIgnoreId( + expected, + explainQueryToString( + "source=opensearch-sql_test_index_account | eval new = min(1, 2, 3, age, 'banana')")); + } + /** * Executes the PPL query and returns the result as a string with windows-style line breaks * replaced with Unix-style ones. diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLEvalMaxMinFunctionIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLEvalMaxMinFunctionIT.java new file mode 100644 index 00000000000..4eaeb2b86e2 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLEvalMaxMinFunctionIT.java @@ -0,0 +1,137 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.remote; + +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_DOG; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_NULL_MISSING; +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.schema; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; +import static org.opensearch.sql.util.MatcherUtils.verifySchema; + +import org.json.JSONObject; +import org.junit.Test; +import org.opensearch.sql.ppl.PPLIntegTestCase; + +public class CalcitePPLEvalMaxMinFunctionIT extends PPLIntegTestCase { + @Override + public void init() throws Exception { + super.init(); + enableCalcite(); + + loadIndex(Index.DOG); + loadIndex(Index.NULL_MISSING); + } + + @Test + public void testEvalMaxNumeric() throws Exception { + JSONObject result = + executeQuery( + String.format( + "source=%s | eval new = max(1, 3, age) | fields age, new", TEST_INDEX_DOG)); + verifySchema(result, schema("age", "bigint"), schema("new", "int")); + verifyDataRows(result, rows(2, 3), rows(4, 4)); + } + + @Test + public void testEvalMaxString() throws Exception { + JSONObject result = + executeQuery( + String.format( + "source=%s | eval new = max('apple', 'sam', dog_name) | fields dog_name, new", + TEST_INDEX_DOG)); + verifySchema(result, schema("dog_name", "string"), schema("new", "string")); + verifyDataRows(result, rows("rex", "sam"), rows("snoopy", "snoopy")); + } + + @Test + public void testEvalMaxNumericAndString() throws Exception { + JSONObject result = + executeQuery( + String.format( + "source=%s | eval new = max(14, age, 'Fred', holdersName) | fields age," + + " holdersName, new", + TEST_INDEX_DOG)); + verifySchema( + result, schema("holdersName", "string"), schema("age", "bigint"), schema("new", "string")); + verifyDataRows(result, rows(2, "Daenerys", "Fred"), rows(4, "Hattie", "Hattie")); + } + + @Test + public void testEvalMinNumeric() throws Exception { + JSONObject result = + executeQuery( + String.format( + "source=%s | eval new = min(14, 3, age) | fields age, new", TEST_INDEX_DOG)); + verifySchema(result, schema("age", "bigint"), schema("new", "bigint")); + verifyDataRows(result, rows(2, 2), rows(4, 3)); + } + + @Test + public void testEvalMinString() throws Exception { + JSONObject result = + executeQuery( + String.format( + "source=%s | eval new = min('apple', 'sam', dog_name) | fields dog_name, new", + TEST_INDEX_DOG)); + verifySchema(result, schema("dog_name", "string"), schema("new", "string")); + verifyDataRows(result, rows("rex", "apple"), rows("snoopy", "apple")); + } + + @Test + public void testEvalMinNumericAndString() throws Exception { + JSONObject result = + executeQuery( + String.format( + "source=%s | eval new = min(14, age, 'sam', holdersName) | fields age, holdersName," + + " new", + TEST_INDEX_DOG)); + verifySchema( + result, schema("holdersName", "string"), schema("age", "bigint"), schema("new", "bigint")); + verifyDataRows(result, rows(2, "Daenerys", 2), rows(4, "Hattie", 4)); + } + + @Test + public void testEvalMaxIgnoresNulls() throws Exception { + JSONObject result = + executeQuery( + String.format( + "source=%s | eval new = max(`int`, 3) | fields `int`, new", + TEST_INDEX_NULL_MISSING)); + verifySchema(result, schema("int", "int"), schema("new", "int")); + verifyDataRows( + result, + rows(42, 42), + rows(null, 3), + rows(null, 3), + rows(null, 3), + rows(null, 3), + rows(null, 3), + rows(null, 3), + rows(null, 3), + rows(null, 3)); + } + + @Test + public void testEvalMinIgnoresNulls() throws Exception { + JSONObject result = + executeQuery( + String.format( + "source=%s | eval new = min(dbl, 5) | fields dbl, new", TEST_INDEX_NULL_MISSING)); + verifySchema(result, schema("dbl", "double"), schema("new", "double")); + verifyDataRows( + result, + rows(3.1415, 3.1415), + rows(null, 5), + rows(null, 5), + rows(null, 5), + rows(null, 5), + rows(null, 5), + rows(null, 5), + rows(null, 5), + rows(null, 5)); + } +} diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_eval_max.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_eval_max.json new file mode 100644 index 00000000000..6140bebcc40 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_eval_max.json @@ -0,0 +1,6 @@ +{ + "calcite": { + "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], new=[MAX(1, 2, 3, $8, 'banana':VARCHAR)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", + "physical": "EnumerableCalc(expr#0..10=[{inputs}], expr#11=[1], expr#12=[2], expr#13=[3], expr#14=['banana':VARCHAR], expr#15=[MAX($t11, $t12, $t13, $t8, $t14)], proj#0..10=[{exprs}], $f11=[$t15])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" + } +} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_eval_min.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_eval_min.json new file mode 100644 index 00000000000..b5e85f0eb58 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_eval_min.json @@ -0,0 +1,6 @@ +{ + "calcite": { + "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], new=[MIN(1, 2, 3, $8, 'banana':VARCHAR)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", + "physical": "EnumerableCalc(expr#0..10=[{inputs}], expr#11=[1], expr#12=[2], expr#13=[3], expr#14=['banana':VARCHAR], expr#15=[MIN($t11, $t12, $t13, $t8, $t14)], proj#0..10=[{exprs}], $f11=[$t15])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" + } +} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_eval_max.json b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_eval_max.json new file mode 100644 index 00000000000..5c366156f17 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_eval_max.json @@ -0,0 +1,6 @@ +{ + "calcite": { + "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], new=[MAX(1, 2, 3, $8, 'banana':VARCHAR)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", + "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..16=[{inputs}], expr#17=[1], expr#18=[2], expr#19=[3], expr#20=['banana':VARCHAR], expr#21=[MAX($t17, $t18, $t19, $t8, $t20)], proj#0..10=[{exprs}], new=[$t21])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n" + } +} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_eval_min.json b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_eval_min.json new file mode 100644 index 00000000000..5d6872ca060 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_eval_min.json @@ -0,0 +1,6 @@ +{ + "calcite": { + "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], new=[MIN(1, 2, 3, $8, 'banana':VARCHAR)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", + "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..16=[{inputs}], expr#17=[1], expr#18=[2], expr#19=[3], expr#20=['banana':VARCHAR], expr#21=[MIN($t17, $t18, $t19, $t8, $t20)], proj#0..10=[{exprs}], new=[$t21])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n" + } +} \ No newline at end of file diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 0bc7b784338..e6b7d41ac80 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -970,6 +970,8 @@ mathematicalFunctionName | SIGNUM | SUM | AVG + | MAX + | MIN | trigonometricFunctionName ; diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLEvalTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLEvalTest.java index 285c084e936..95605b3a303 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLEvalTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLEvalTest.java @@ -517,4 +517,106 @@ public void testValuesAggregationWithGroupBy() { + "GROUP BY `DEPTNO`"; verifyPPLToSparkSQL(root, expectedSparkSql); } + + @Test + public void testEvalMaxOnNumbers() { + String ppl = "source=EMP | eval a = max(5, 30, DEPTNO)"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7], a=[MAX(5, 30, $7)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, MAX(5, 30," + + " `DEPTNO`) `a`\n" + + "FROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testEvalMaxOnStrings() { + String ppl = "source=EMP | eval a = max('banana', 'Door', ENAME)"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7], a=[MAX('banana':VARCHAR, 'Door':VARCHAR, $1)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, MAX('banana'," + + " 'Door', `ENAME`) `a`\n" + + "FROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testEvalMaxOnNumericAndString() { + String ppl = "source=EMP | eval a = max(5, 30, DEPTNO, 'banana', 'Door', ENAME)"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7], a=[MAX(5, 30, $7, 'banana':VARCHAR, 'Door':VARCHAR, $1)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, MAX(5, 30," + + " `DEPTNO`, 'banana', 'Door', `ENAME`) `a`\n" + + "FROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testEvalMinOnNumbers() { + String ppl = "source=EMP | eval a = min(5, 30, DEPTNO)"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7], a=[MIN(5, 30, $7)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, MIN(5, 30," + + " `DEPTNO`) `a`\n" + + "FROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testEvalMinOnStrings() { + String ppl = "source=EMP | eval a = min('banana', 'Door', ENAME)"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7], a=[MIN('banana':VARCHAR, 'Door':VARCHAR, $1)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, MIN('banana'," + + " 'Door', `ENAME`) `a`\n" + + "FROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testEvalMinOnNumericAndString() { + String ppl = "source=EMP | eval a = min(5, 30, DEPTNO, 'banana', 'Door', ENAME)"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7], a=[MIN(5, 30, $7, 'banana':VARCHAR, 'Door':VARCHAR, $1)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, MIN(5, 30," + + " `DEPTNO`, 'banana', 'Door', `ENAME`) `a`\n" + + "FROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } }