diff --git a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java index 7777a2ac034..e80010fd2d5 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java @@ -75,6 +75,7 @@ public enum BuiltinFunctionName { MVAPPEND(FunctionName.of("mvappend")), MVJOIN(FunctionName.of("mvjoin")), MVINDEX(FunctionName.of("mvindex")), + MVFIND(FunctionName.of("mvfind")), MVZIP(FunctionName.of("mvzip")), SPLIT(FunctionName.of("split")), MVDEDUP(FunctionName.of("mvdedup")), diff --git a/core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVFindFunctionImpl.java b/core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVFindFunctionImpl.java new file mode 100644 index 00000000000..9c189bf2ff5 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVFindFunctionImpl.java @@ -0,0 +1,168 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.expression.function.CollectionUDF; + +import java.util.List; +import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; +import org.apache.calcite.adapter.enumerable.NotNullImplementor; +import org.apache.calcite.adapter.enumerable.NullPolicy; +import org.apache.calcite.adapter.enumerable.RexToLixTranslator; +import org.apache.calcite.linq4j.tree.Expression; +import org.apache.calcite.linq4j.tree.Expressions; +import org.apache.calcite.linq4j.tree.Types; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.sql.type.OperandTypes; +import org.apache.calcite.sql.type.ReturnTypes; +import org.apache.calcite.sql.type.SqlReturnTypeInference; +import org.apache.calcite.sql.type.SqlTypeFamily; +import org.opensearch.sql.expression.function.ImplementorUDF; +import org.opensearch.sql.expression.function.UDFOperandMetadata; + +/** + * MVFIND function implementation that finds the index of the first element in a multivalue array + * that matches a regular expression. + * + *

Usage: mvfind(array, regex) + * + *

Returns the 0-based index of the first array element matching the regex pattern, or NULL if no + * match is found. + * + *

Example: mvfind(array('apple', 'banana', 'apricot'), 'ban.*') returns 1 + */ +public class MVFindFunctionImpl extends ImplementorUDF { + public MVFindFunctionImpl() { + super(new MVFindImplementor(), NullPolicy.ANY); + } + + @Override + public SqlReturnTypeInference getReturnTypeInference() { + return ReturnTypes.INTEGER_NULLABLE; + } + + @Override + public UDFOperandMetadata getOperandMetadata() { + // Accept ARRAY and STRING for the regex pattern + return UDFOperandMetadata.wrap( + OperandTypes.family(SqlTypeFamily.ARRAY, SqlTypeFamily.CHARACTER)); + } + + public static class MVFindImplementor implements NotNullImplementor { + @Override + public Expression implement( + RexToLixTranslator translator, RexCall call, List translatedOperands) { + Expression arrayExpr = translatedOperands.get(0); + Expression patternExpr = translatedOperands.get(1); + + // Check if regex pattern is a literal - compile at planning time + if (call.operands.size() >= 2 && call.operands.get(1) instanceof RexLiteral) { + RexLiteral patternLiteral = (RexLiteral) call.operands.get(1); + Expression literalPatternExpr = tryCompileLiteralPattern(patternLiteral, arrayExpr); + if (literalPatternExpr != null) { + return literalPatternExpr; + } + } + + // For dynamic patterns, use evalWithString + return Expressions.call( + Types.lookupMethod(MVFindFunctionImpl.class, "evalWithString", List.class, String.class), + arrayExpr, + patternExpr); + } + + private static Expression tryCompileLiteralPattern( + RexLiteral patternLiteral, Expression arrayExpr) { + // Use getValueAs(String.class) to correctly unwrap Calcite NlsString + String patternString = patternLiteral.getValueAs(String.class); + if (patternString == null) { + return null; + } + try { + // Compile pattern at planning time and validate + Pattern compiledPattern = Pattern.compile(patternString); + // Generate code that uses the pre-compiled pattern + return Expressions.call( + Types.lookupMethod( + MVFindFunctionImpl.class, "evalWithPattern", List.class, Pattern.class), + arrayExpr, + Expressions.constant(compiledPattern, Pattern.class)); + } catch (PatternSyntaxException e) { + // Convert to IllegalArgumentException so it's treated as a client error (400) + throw new IllegalArgumentException( + String.format("Invalid regex pattern '%s': %s", patternString, e.getDescription()), e); + } + } + } + + private static Integer mvfindCore(List array, Pattern pattern) { + for (int i = 0; i < array.size(); i++) { + Object element = array.get(i); + if (element != null) { + String strValue = element.toString(); + if (pattern.matcher(strValue).find()) { + return i; // Return 0-based index + } + } + } + return null; // No match found + } + + /** + * Evaluates mvfind with a pre-compiled Pattern (for literal patterns compiled at planning time). + * Any runtime exceptions from mvfindCore will propagate unchanged. + * + * @param array The array to search + * @param pattern The pre-compiled regex pattern + * @return The 0-based index of the first matching element, or null if no match + */ + public static Integer evalWithPattern(List array, Pattern pattern) { + if (array == null || pattern == null) { + return null; + } + return mvfindCore(array, pattern); + } + + /** + * Evaluates mvfind with a string pattern (for dynamic patterns at runtime). + * + * @param array The array to search + * @param regex The regex pattern string + * @return The 0-based index of the first matching element, or null if no match + */ + public static Integer evalWithString(List array, String regex) { + if (array == null || regex == null) { + return null; + } + return mvfind(array, regex); + } + + /** + * Evaluates mvfind with a String pattern. Compiles the regex pattern and executes search. Throws + * IllegalArgumentException for invalid regex patterns; other runtime exceptions propagate + * unchanged. + * + * @param array The array to search + * @param regex The regex pattern string + * @return The 0-based index of the first matching element, or null if no match + * @throws IllegalArgumentException if the regex pattern is invalid + */ + private static Integer mvfind(List array, String regex) { + if (array == null || regex == null) { + return null; + } + + Pattern pattern; + try { + pattern = Pattern.compile(regex); + } catch (PatternSyntaxException e) { + // Invalid regex is a client error (400) + throw new IllegalArgumentException( + String.format("Invalid regex pattern '%s': %s", regex, e.getDescription()), e); + } + return mvfindCore(array, pattern); + } +} diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java index 68ae5b2067c..2d769194924 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java @@ -47,6 +47,7 @@ import org.opensearch.sql.expression.function.CollectionUDF.FilterFunctionImpl; import org.opensearch.sql.expression.function.CollectionUDF.ForallFunctionImpl; import org.opensearch.sql.expression.function.CollectionUDF.MVAppendFunctionImpl; +import org.opensearch.sql.expression.function.CollectionUDF.MVFindFunctionImpl; import org.opensearch.sql.expression.function.CollectionUDF.MVZipFunctionImpl; import org.opensearch.sql.expression.function.CollectionUDF.MapAppendFunctionImpl; import org.opensearch.sql.expression.function.CollectionUDF.MapRemoveFunctionImpl; @@ -394,6 +395,7 @@ public class PPLBuiltinOperators extends ReflectiveSqlOperatorTable { public static final SqlOperator MAP_REMOVE = new MapRemoveFunctionImpl().toUDF("MAP_REMOVE"); public static final SqlOperator MVAPPEND = new MVAppendFunctionImpl().toUDF("mvappend"); public static final SqlOperator MVZIP = new MVZipFunctionImpl().toUDF("mvzip"); + public static final SqlOperator MVFIND = new MVFindFunctionImpl().toUDF("mvfind"); public static final SqlOperator FILTER = new FilterFunctionImpl().toUDF("filter"); public static final SqlOperator TRANSFORM = new TransformFunctionImpl().toUDF("transform"); public static final SqlOperator REDUCE = new ReduceFunctionImpl().toUDF("reduce"); diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java index 3cce7e34082..200639abdf6 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java @@ -152,6 +152,7 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.MULTI_MATCH; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MVAPPEND; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MVDEDUP; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MVFIND; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MVINDEX; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MVJOIN; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MVZIP; @@ -1036,6 +1037,7 @@ void populate() { registerOperator(ARRAY, PPLBuiltinOperators.ARRAY); registerOperator(MVAPPEND, PPLBuiltinOperators.MVAPPEND); registerOperator(MVDEDUP, SqlLibraryOperators.ARRAY_DISTINCT); + registerOperator(MVFIND, PPLBuiltinOperators.MVFIND); registerOperator(MVZIP, PPLBuiltinOperators.MVZIP); registerOperator(MAP_APPEND, PPLBuiltinOperators.MAP_APPEND); registerOperator(MAP_CONCAT, SqlLibraryOperators.MAP_CONCAT); diff --git a/core/src/test/java/org/opensearch/sql/expression/function/CollectionUDF/MVFindFunctionImplTest.java b/core/src/test/java/org/opensearch/sql/expression/function/CollectionUDF/MVFindFunctionImplTest.java new file mode 100644 index 00000000000..d8258b06387 --- /dev/null +++ b/core/src/test/java/org/opensearch/sql/expression/function/CollectionUDF/MVFindFunctionImplTest.java @@ -0,0 +1,216 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.expression.function.CollectionUDF; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import org.junit.jupiter.api.Test; + +public class MVFindFunctionImplTest { + + // Basic functionality tests + + @Test + public void testMvfindWithSimpleMatch() { + List array = Arrays.asList("apple", "banana", "cherry"); + Object result = MVFindFunctionImpl.evalWithString(array, "banana"); + assertEquals(1, result); + } + + @Test + public void testMvfindWithNoMatch() { + List array = Arrays.asList("apple", "banana", "cherry"); + Object result = MVFindFunctionImpl.evalWithString(array, "orange"); + assertNull(result); + } + + @Test + public void testMvfindWithFirstElementMatch() { + List array = Arrays.asList("apple", "banana", "cherry"); + Object result = MVFindFunctionImpl.evalWithString(array, "apple"); + assertEquals(0, result); + } + + @Test + public void testMvfindWithLastElementMatch() { + List array = Arrays.asList("apple", "banana", "cherry"); + Object result = MVFindFunctionImpl.evalWithString(array, "cherry"); + assertEquals(2, result); + } + + @Test + public void testMvfindReturnsFirstMatch() { + List array = Arrays.asList("test1", "test2", "test3"); + Object result = MVFindFunctionImpl.evalWithString(array, "test"); + assertEquals(0, result); // Returns first match, not all + } + + // Null handling tests + + @Test + public void testMvfindWithNullArray() { + Object result = MVFindFunctionImpl.evalWithString(null, "pattern"); + assertNull(result); + } + + @Test + public void testMvfindWithNullRegex() { + List array = Arrays.asList("apple", "banana"); + Object result = MVFindFunctionImpl.evalWithString(array, null); + assertNull(result); + } + + @Test + public void testMvfindWithBothArgsNull() { + Object result = MVFindFunctionImpl.evalWithString(null, null); + assertNull(result); + } + + @Test + public void testMvfindWithNullElementInArray() { + List array = Arrays.asList("apple", null, "banana"); + Object result = MVFindFunctionImpl.evalWithString(array, "banana"); + assertEquals(2, result); + } + + // Edge cases + + @Test + public void testMvfindWithEmptyArray() { + List array = Collections.emptyList(); + Object result = MVFindFunctionImpl.evalWithString(array, "pattern"); + assertNull(result); + } + + @Test + public void testMvfindWithEmptyStringPattern() { + List array = Arrays.asList("apple", "banana"); + Object result = MVFindFunctionImpl.evalWithString(array, ""); + assertEquals(0, result); // Empty pattern matches first element + } + + @Test + public void testMvfindWithSingleElementArray() { + List array = Collections.singletonList("apple"); + Object result = MVFindFunctionImpl.evalWithString(array, "app"); + assertEquals(0, result); + } + + // Regex pattern tests + + @Test + public void testMvfindWithWildcardPattern() { + List array = Arrays.asList("apple", "banana", "apricot"); + Object result = MVFindFunctionImpl.evalWithString(array, "ban.*"); + assertEquals(1, result); + } + + @Test + public void testMvfindWithCharacterClass() { + List array = Arrays.asList("error123", "info", "error456"); + Object result = MVFindFunctionImpl.evalWithString(array, "error[0-9]+"); + assertEquals(0, result); + } + + @Test + public void testMvfindWithDigitClass() { + List array = Arrays.asList("abc", "def123", "ghi"); + Object result = MVFindFunctionImpl.evalWithString(array, "\\d+"); + assertEquals(1, result); + } + + @Test + public void testMvfindWithCaseInsensitiveFlag() { + List array = Arrays.asList("Apple", "Banana", "Cherry"); + Object result = MVFindFunctionImpl.evalWithString(array, "(?i)banana"); + assertEquals(1, result); + } + + @Test + public void testMvfindWithAnchorStart() { + List array = Arrays.asList("hello", "say hello", "hello world"); + Object result = MVFindFunctionImpl.evalWithString(array, "^hello"); + assertEquals(0, result); + } + + @Test + public void testMvfindWithAnchorEnd() { + List array = Arrays.asList("world", "hello world", "world!"); + Object result = MVFindFunctionImpl.evalWithString(array, "world$"); + assertEquals(0, result); + } + + // Case sensitivity + + @Test + public void testMvfindIsCaseSensitiveByDefault() { + List array = Arrays.asList("Apple", "banana", "Cherry"); + Object result = MVFindFunctionImpl.evalWithString(array, "apple"); + assertNull(result); // No match because case-sensitive + } + + // Invalid regex patterns + + @Test + public void testMvfindWithInvalidRegex() { + List array = Arrays.asList("test"); + // Invalid regex should throw IllegalArgumentException (client error - 400) + IllegalArgumentException exception = + assertThrows( + IllegalArgumentException.class, + () -> MVFindFunctionImpl.evalWithString(array, "[invalid")); + // Verify error message contains pattern details + assertTrue(exception.getMessage().contains("Invalid regex pattern")); + assertTrue(exception.getMessage().contains("[invalid")); + } + + // Type conversion tests + + @Test + public void testMvfindWithNumericElements() { + List array = Arrays.asList(123, 456, 789); + Object result = MVFindFunctionImpl.evalWithString(array, "456"); + assertEquals(1, result); + } + + @Test + public void testMvfindWithMixedTypes() { + List array = Arrays.asList("text", 123, "more text"); + Object result = MVFindFunctionImpl.evalWithString(array, "123"); + assertEquals(1, result); + } + + @Test + public void testMvfindWithBooleanElements() { + List array = Arrays.asList(true, false, true); + Object result = MVFindFunctionImpl.evalWithString(array, "false"); + assertEquals(1, result); + } + + // Type coercion tests (numeric patterns) + + @Test + public void testMvfindWithNumericPatternAsString() { + List array = Arrays.asList("apple", "404", "banana"); + // When called with string pattern + Object result = MVFindFunctionImpl.evalWithString(array, "404"); + assertEquals(1, result); + } + + @Test + public void testMvfindWithNumericPatternMatchingNumber() { + List array = Arrays.asList("error", 404, "success"); + // Number in array matched by numeric pattern (toString conversion) + Object result = MVFindFunctionImpl.evalWithString(array, "404"); + assertEquals(1, result); + } +} diff --git a/docs/user/ppl/functions/collection.md b/docs/user/ppl/functions/collection.md index ac2c79e8cda..79e58587a4e 100644 --- a/docs/user/ppl/functions/collection.md +++ b/docs/user/ppl/functions/collection.md @@ -625,7 +625,88 @@ fetched rows / total rows = 1/1 | [] | +--------+ ``` - + +## MVFIND + +### Description + +Usage: mvfind(array, regex) searches a multivalue array and returns the 0-based index of the first element that matches the regular expression. Returns NULL if no match is found. +Argument type: array: ARRAY, regex: STRING +Return type: INTEGER (nullable) +Example + +```ppl +source=people +| eval array = array('apple', 'banana', 'apricot'), result = mvfind(array, 'ban.*') +| fields result +| head 1 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------+ +| result | +|--------| +| 1 | ++--------+ +``` + +```ppl +source=people +| eval array = array('cat', 'dog', 'bird'), result = mvfind(array, 'fish') +| fields result +| head 1 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------+ +| result | +|--------| +| null | ++--------+ +``` + +```ppl +source=people +| eval array = array('error123', 'info', 'error456'), result = mvfind(array, 'error[0-9]+') +| fields result +| head 1 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------+ +| result | +|--------| +| 0 | ++--------+ +``` + +```ppl +source=people +| eval array = array('Apple', 'Banana', 'Cherry'), result = mvfind(array, '(?i)banana') +| fields result +| head 1 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------+ +| result | +|--------| +| 1 | ++--------+ +``` + ## MVINDEX ### Description diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteArrayFunctionIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteArrayFunctionIT.java index 8b402fcff6e..6d92c5473b8 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteArrayFunctionIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteArrayFunctionIT.java @@ -490,6 +490,125 @@ public void testMvindexRangeSingleElement() throws IOException { verifyDataRows(actual, rows(List.of(3))); } + @Test + public void testMvfindWithMatch() throws IOException { + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval arr = array('apple', 'banana', 'apricot'), result = mvfind(arr," + + " 'ban.*') | head 1 | fields result", + TEST_INDEX_BANK)); + + verifySchema(actual, schema("result", "int")); + verifyDataRows(actual, rows(1)); + } + + @Test + public void testMvfindWithNoMatch() throws IOException { + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval arr = array('cat', 'dog', 'bird'), result = mvfind(arr, 'fish') |" + + " head 1 | fields result", + TEST_INDEX_BANK)); + + verifySchema(actual, schema("result", "int")); + verifyDataRows(actual, rows((Object) null)); + } + + @Test + public void testMvfindWithFirstMatch() throws IOException { + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval arr = array('error123', 'info', 'error456'), result =" + + " mvfind(arr, 'err.*') | head 1 | fields result", + TEST_INDEX_BANK)); + + verifySchema(actual, schema("result", "int")); + verifyDataRows(actual, rows(0)); + } + + @Test + public void testMvfindWithMultipleMatches() throws IOException { + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval arr = array('test1', 'test2', 'test3'), result = mvfind(arr," + + " 'test.*') | head 1 | fields result", + TEST_INDEX_BANK)); + + verifySchema(actual, schema("result", "int")); + verifyDataRows(actual, rows(0)); // Returns first match at index 0 + } + + @Test + public void testMvfindWithComplexRegex() throws IOException { + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval arr = array('abc123', 'def456', 'ghi789'), result = mvfind(arr," + + " 'def\\\\d+') | head 1 | fields result", + TEST_INDEX_BANK)); + + verifySchema(actual, schema("result", "int")); + verifyDataRows(actual, rows(1)); + } + + @Test + public void testMvfindWithCaseInsensitive() throws IOException { + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval arr = array('Apple', 'Banana', 'Cherry'), result = mvfind(arr," + + " '(?i)banana') | head 1 | fields result", + TEST_INDEX_BANK)); + + verifySchema(actual, schema("result", "int")); + verifyDataRows(actual, rows(1)); + } + + @Test + public void testMvfindWithNumericArray() throws IOException { + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval arr = array(100, 200, 300), result = mvfind(arr, '200') | head 1" + + " | fields result", + TEST_INDEX_BANK)); + + verifySchema(actual, schema("result", "int")); + verifyDataRows(actual, rows(1)); + } + + @Test + public void testMvfindWithEmptyArray() throws IOException { + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval arr = array(), result = mvfind(arr, 'test') | head 1 | fields" + + " result", + TEST_INDEX_BANK)); + + verifySchema(actual, schema("result", "int")); + verifyDataRows(actual, rows((Object) null)); + } + + @Test + public void testMvfindWithDynamicRegex() throws IOException { + // Test non-literal regex pattern (computed at runtime via concat) + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval arr = array('apple', 'banana', 'apricot'), pattern =" + + " concat('ban', '.*'), result = mvfind(arr, pattern) | head 1 | fields" + + " result", + TEST_INDEX_BANK)); + + verifySchema(actual, schema("result", "int")); + verifyDataRows(actual, rows(1)); + } + @Test public void testMvzipBasic() throws IOException { // Basic example from spec: eval nserver=mvzip(hosts,ports) diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index ca2288e46c4..638f95b2e52 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -452,6 +452,7 @@ ARRAY_LENGTH: 'ARRAY_LENGTH'; MVAPPEND: 'MVAPPEND'; MVJOIN: 'MVJOIN'; MVINDEX: 'MVINDEX'; +MVFIND: 'MVFIND'; MVZIP: 'MVZIP'; MVDEDUP: 'MVDEDUP'; SPLIT: 'SPLIT'; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index e65f510fea2..0cc8159859c 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -1124,6 +1124,7 @@ collectionFunctionName | MVAPPEND | MVJOIN | MVINDEX + | MVFIND | MVDEDUP | MVZIP | SPLIT diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLArrayFunctionTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLArrayFunctionTest.java index 8a350501adb..41e84402273 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLArrayFunctionTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLArrayFunctionTest.java @@ -215,6 +215,91 @@ public void testMvindexRangeNegative() { verifyPPLToSparkSQL(root, expectedSparkSql); } + @Test + public void testMvfindWithMatch() { + String ppl = + "source=EMP | eval arr = array('apple', 'banana', 'apricot'), result = mvfind(arr," + + " 'ban.*') | head 1 | fields result"; + RelNode root = getRelNode(ppl); + + String expectedResult = "result=1\n"; + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT MVFIND(ARRAY('apple', 'banana', 'apricot'), 'ban.*') `result`\n" + + "FROM `scott`.`EMP`\n" + + "LIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testMvfindWithNoMatch() { + String ppl = + "source=EMP | eval arr = array('cat', 'dog', 'bird'), result = mvfind(arr, 'fish') | head" + + " 1 | fields result"; + RelNode root = getRelNode(ppl); + + String expectedResult = "result=null\n"; + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT MVFIND(ARRAY('cat', 'dog', 'bird'), 'fish') `result`\n" + + "FROM `scott`.`EMP`\n" + + "LIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testMvfindWithFirstMatch() { + String ppl = + "source=EMP | eval arr = array('error123', 'info', 'error456'), result = mvfind(arr," + + " 'err.*') | head 1 | fields result"; + RelNode root = getRelNode(ppl); + + String expectedResult = "result=0\n"; + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT MVFIND(ARRAY('error123', 'info', 'error456'), 'err.*') `result`\n" + + "FROM `scott`.`EMP`\n" + + "LIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testMvfindWithMultipleMatches() { + String ppl = + "source=EMP | eval arr = array('test1', 'test2', 'test3'), result = mvfind(arr, 'test.*')" + + " | head 1 | fields result"; + RelNode root = getRelNode(ppl); + + String expectedResult = "result=0\n"; + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT MVFIND(ARRAY('test1', 'test2', 'test3'), 'test.*') `result`\n" + + "FROM `scott`.`EMP`\n" + + "LIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testMvfindWithComplexRegex() { + String ppl = + "source=EMP | eval arr = array('abc123', 'def456', 'ghi789'), result = mvfind(arr," + + " 'def\\d+') | head 1 | fields result"; + RelNode root = getRelNode(ppl); + + String expectedResult = "result=1\n"; + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT MVFIND(ARRAY('abc123', 'def456', 'ghi789'), 'def\\d+') `result`\n" + + "FROM `scott`.`EMP`\n" + + "LIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + @Test public void testMvdedupWithDuplicates() { String ppl = diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index 58232ae8cf2..e0704e3ea2e 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -993,4 +993,13 @@ public void testSpath() { anonymize( "search source=t | spath input=json_attr output=out path=foo.bar | fields id, out")); } + + @Test + public void testMvfind() { + assertEquals( + "source=table | eval identifier=mvfind(array(***,***,***),***) | fields + identifier", + anonymize( + "source=t | eval result=mvfind(array('apple', 'banana', 'apricot'), 'ban.*') | fields" + + " result")); + } }