Skip to content
Merged
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.data.utils;

import java.util.Comparator;

/** Comparator for mixed-type values. */
public class MixedTypeComparator implements Comparator<Object> {

public static final MixedTypeComparator INSTANCE = new MixedTypeComparator();

private MixedTypeComparator() {}

@Override
public int compare(Object a, Object b) {
boolean aIsNumeric = isNumeric(a);
boolean bIsNumeric = isNumeric(b);

// For same types compare directly
if (aIsNumeric == bIsNumeric) {
if (aIsNumeric) {
return Double.compare(((Number) a).doubleValue(), ((Number) b).doubleValue());
} else {
return Integer.compare(a.toString().compareTo(b.toString()), 0);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what does this Integer.compare with 0 meaning?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It will normalize it so if the string comparison returns a negative it will make it -1 and if it returns a positive it will be 1. Don't think that is necessary, can remove and just leave the string comparison

}
}
// For mixed types, strings are considered larger than numbers (non-numeric values are treated
// as strings)
return aIsNumeric ? -1 : 1;
}

private static boolean isNumeric(Object obj) {
return obj instanceof Number;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you confirm whether comparison between "1" and "2" also considered as numerical comparison?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No they will be compared as strings so max("9", "21") will return "9"

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,8 @@
import org.opensearch.sql.expression.function.udf.math.ConvFunction;
import org.opensearch.sql.expression.function.udf.math.DivideFunction;
import org.opensearch.sql.expression.function.udf.math.EulerFunction;
import org.opensearch.sql.expression.function.udf.math.MaxFunction;
import org.opensearch.sql.expression.function.udf.math.MinFunction;
import org.opensearch.sql.expression.function.udf.math.ModFunction;
import org.opensearch.sql.expression.function.udf.math.NumberToStringFunction;

Expand Down Expand Up @@ -124,6 +126,8 @@ public class PPLBuiltinOperators extends ReflectiveSqlOperatorTable {
public static final SqlOperator DIVIDE = new DivideFunction().toUDF("DIVIDE");
public static final SqlOperator SHA2 = CryptographicFunction.sha2().toUDF("SHA2");
public static final SqlOperator CIDRMATCH = new CidrMatchFunction().toUDF("CIDRMATCH");
public static final SqlOperator MAX = new MaxFunction().toUDF("MAX");
public static final SqlOperator MIN = new MinFunction().toUDF("MIN");

public static final SqlOperator COSH =
adaptMathFunctionToUDF(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -717,6 +717,10 @@ void populate() {
registerOperator(INTERNAL_REGEXP_REPLACE_5, SqlLibraryOperators.REGEXP_REPLACE_5);
registerOperator(INTERNAL_TRANSLATE3, SqlLibraryOperators.TRANSLATE3);

// Register eval functions for PPL max() and min() calls
registerOperator(MAX, PPLBuiltinOperators.MAX);
registerOperator(MIN, PPLBuiltinOperators.MIN);

// Register PPL UDF operator
registerOperator(COSH, PPLBuiltinOperators.COSH);
registerOperator(SINH, PPLBuiltinOperators.SINH);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.expression.function.udf.math;

import java.util.Arrays;
import java.util.List;
import java.util.Objects;
import org.apache.calcite.adapter.enumerable.NotNullImplementor;
import org.apache.calcite.adapter.enumerable.NullPolicy;
import org.apache.calcite.adapter.enumerable.RexToLixTranslator;
import org.apache.calcite.linq4j.tree.Expression;
import org.apache.calcite.linq4j.tree.Expressions;
import org.apache.calcite.rex.RexCall;
import org.apache.calcite.sql.type.SqlReturnTypeInference;
import org.apache.calcite.sql.type.SqlTypeName;
import org.opensearch.sql.data.utils.MixedTypeComparator;
import org.opensearch.sql.expression.function.ImplementorUDF;
import org.opensearch.sql.expression.function.UDFOperandMetadata;

/**
* MAX(value1, value2, ...) returns the maximum value from the arguments. For mixed types, strings
* have higher precedence than numbers.
*/
public class MaxFunction extends ImplementorUDF {

public MaxFunction() {
super(new MaxImplementor(), NullPolicy.ALL);
}

@Override
public SqlReturnTypeInference getReturnTypeInference() {
return opBinding -> opBinding.getTypeFactory().createSqlType(SqlTypeName.ANY);
}

@Override
public UDFOperandMetadata getOperandMetadata() {
return null;
}

public static class MaxImplementor implements NotNullImplementor {

@Override
public Expression implement(
RexToLixTranslator translator, RexCall call, List<Expression> translatedOperands) {
return Expressions.call(
MaxImplementor.class, "max", Expressions.newArrayInit(Object.class, translatedOperands));
}

public static Object max(Object[] args) {
return findMax(args);
}

private static Object findMax(Object[] args) {
if (args == null) {
return null;
}

return Arrays.stream(args)
.filter(Objects::nonNull)
.max(MixedTypeComparator.INSTANCE)
.orElse(null);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.expression.function.udf.math;

import java.util.Arrays;
import java.util.List;
import java.util.Objects;
import org.apache.calcite.adapter.enumerable.NotNullImplementor;
import org.apache.calcite.adapter.enumerable.NullPolicy;
import org.apache.calcite.adapter.enumerable.RexToLixTranslator;
import org.apache.calcite.linq4j.tree.Expression;
import org.apache.calcite.linq4j.tree.Expressions;
import org.apache.calcite.rex.RexCall;
import org.apache.calcite.sql.type.SqlReturnTypeInference;
import org.apache.calcite.sql.type.SqlTypeName;
import org.opensearch.sql.data.utils.MixedTypeComparator;
import org.opensearch.sql.expression.function.ImplementorUDF;
import org.opensearch.sql.expression.function.UDFOperandMetadata;

/**
* MIN(value1, value2, ...) returns the minimum value from the arguments. For mixed types, numbers
* have higher precedence than strings.
*/
public class MinFunction extends ImplementorUDF {

public MinFunction() {
super(new MinImplementor(), NullPolicy.ALL);
}

@Override
public SqlReturnTypeInference getReturnTypeInference() {
return opBinding -> opBinding.getTypeFactory().createSqlType(SqlTypeName.ANY);
}

@Override
public UDFOperandMetadata getOperandMetadata() {
return null;
}

public static class MinImplementor implements NotNullImplementor {

@Override
public Expression implement(
RexToLixTranslator translator, RexCall call, List<Expression> translatedOperands) {
return Expressions.call(
MinImplementor.class, "min", Expressions.newArrayInit(Object.class, translatedOperands));
}

public static Object min(Object[] args) {
return findMin(args);
}

private static Object findMin(Object[] args) {
if (args == null) {
return null;
}

return Arrays.stream(args)
.filter(Objects::nonNull)
.min(MixedTypeComparator.INSTANCE)
.orElse(null);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.data.utils;

import static org.junit.jupiter.api.Assertions.assertEquals;

import java.math.BigDecimal;
import org.junit.jupiter.api.Test;

class MixedTypeComparatorTest {

private final MixedTypeComparator comparator = MixedTypeComparator.INSTANCE;

@Test
public void testNumericComparison() {
assertEquals(-1, comparator.compare(1, 2));
assertEquals(1, comparator.compare(2, 1));
assertEquals(0, comparator.compare(5, 5));

// Different numeric types
assertEquals(-1, comparator.compare(1, 2.5));
assertEquals(1, comparator.compare(3.14, 2));
assertEquals(0, comparator.compare(4, 4.0));
assertEquals(-1, comparator.compare(10L, new BigDecimal("20")));
}

@Test
public void testStringComparison() {
assertEquals(-1, comparator.compare("apple", "banana"));
assertEquals(1, comparator.compare("zebra", "apple"));
assertEquals(0, comparator.compare("test", "test"));
assertEquals(-1, comparator.compare("ABC", "abc")); //
assertEquals(1, comparator.compare("hello", "HELLO"));
}

@Test
public void testMixedTypeComparison() {
assertEquals(-1, comparator.compare(42, "apple"));
assertEquals(1, comparator.compare("apple", 42));
assertEquals(-1, comparator.compare(3.14, "hello"));
assertEquals(1, comparator.compare("world", 100L));
assertEquals(-1, comparator.compare(0, "0"));
assertEquals(1, comparator.compare("123", 456));
}
}
2 changes: 2 additions & 0 deletions docs/category.json
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@
"user/ppl/cmd/subquery.rst",
"user/ppl/cmd/syntax.rst",
"user/ppl/cmd/timechart.rst",
"user/ppl/cmd/search.rst",
"user/ppl/functions/statistical.rst",
"user/ppl/cmd/top.rst",
"user/ppl/cmd/trendline.rst",
"user/ppl/cmd/where.rst",
Expand Down
109 changes: 109 additions & 0 deletions docs/user/ppl/functions/statistical.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
======================
Statistical Functions
======================

.. rubric:: Table of contents

.. contents::
:local:
:depth: 1


MAX
---

Description
>>>>>>>>>>>

Usage: max(x, y, ...) returns the maximum value from all provided arguments. Strings are treated as greater than numbers, so if provided both strings and numbers, it will return the maximum string value (lexicographically ordered)

Note: This function is only available in the eval command context and requires Calcite engine to be enabled.

Argument type: Variable number of INTEGER/LONG/FLOAT/DOUBLE/STRING arguments

Return type: Type of the selected argument

Example::

os> source=accounts | eval max_val = MAX(age, 30) | fields age, max_val
fetched rows / total rows = 4/4
+-----+---------+
| age | max_val |
|-----+---------|
| 32 | 32 |
| 36 | 36 |
| 28 | 30 |
| 33 | 33 |
+-----+---------+

os> source=accounts | eval result = MAX(firstname, 'John') | fields firstname, result
fetched rows / total rows = 4/4
+-----------+---------+
| firstname | result |
|-----------+---------|
| Amber | John |
| Hattie | John |
| Nanette | Nanette |
| Dale | John |
+-----------+---------+

os> source=accounts | eval result = MAX(age, 35, 'John', firstname) | fields age, firstname, result
fetched rows / total rows = 4/4
+-----+-----------+---------+
| age | firstname | result |
|-----+-----------+---------|
| 32 | Amber | John |
| 36 | Hattie | John |
| 28 | Nanette | Nanette |
| 33 | Dale | John |
+-----+-----------+---------+


MIN
---

Description
>>>>>>>>>>>

Usage: min(x, y, ...) returns the minimum value from all provided arguments. Strings are treated as greater than numbers, so if provided both strings and numbers, it will return the minimum numeric value.

Note: This function is only available in the eval command context and requires Calcite engine to be enabled.

Argument type: Variable number of INTEGER/LONG/FLOAT/DOUBLE/STRING arguments

Return type: Type of the selected argument

Example::

os> source=accounts | eval min_val = MIN(age, 30) | fields age, min_val
fetched rows / total rows = 4/4
+-----+---------+
| age | min_val |
|-----+---------|
| 32 | 30 |
| 36 | 30 |
| 28 | 28 |
| 33 | 30 |
+-----+---------+

os> source=accounts | eval result = MIN(firstname, 'John') | fields firstname, result
fetched rows / total rows = 4/4
+-----------+--------+
| firstname | result |
|-----------+--------|
| Amber | Amber |
| Hattie | Hattie |
| Nanette | John |
| Dale | Dale |
+-----------+--------+

os> source=accounts | eval result = MIN(age, 35, firstname) | fields age, firstname, result
fetched rows / total rows = 4/4
+-----+-----------+--------+
| age | firstname | result |
|-----+-----------+--------|
| 32 | Amber | 32 |
| 36 | Hattie | 35 |
| 28 | Nanette | 28 |
| 33 | Dale | 33 |
+-----+-----------+--------+
Original file line number Diff line number Diff line change
Expand Up @@ -843,6 +843,24 @@ public void testExplainSortOnMetricsNoBucketNullable() throws IOException {
+ " gender, state | sort `count()`"));
}

@Test
public void testExplainEvalMax() throws IOException {
String expected = loadExpectedPlan("explain_eval_max.json");
assertJsonEqualsIgnoreId(
expected,
explainQueryToString(
"source=opensearch-sql_test_index_account | eval new = max(1, 2, 3, age, 'banana')"));
}

@Test
public void testExplainEvalMin() throws IOException {
String expected = loadExpectedPlan("explain_eval_min.json");
assertJsonEqualsIgnoreId(
expected,
explainQueryToString(
"source=opensearch-sql_test_index_account | eval new = min(1, 2, 3, age, 'banana')"));
}

/**
* Executes the PPL query and returns the result as a string with windows-style line breaks
* replaced with Unix-style ones.
Expand Down
Loading
Loading