diff --git a/core/src/main/java/org/opensearch/sql/expression/DSL.java b/core/src/main/java/org/opensearch/sql/expression/DSL.java index e1d558ce55..8bc7fcc01c 100644 --- a/core/src/main/java/org/opensearch/sql/expression/DSL.java +++ b/core/src/main/java/org/opensearch/sql/expression/DSL.java @@ -118,6 +118,10 @@ public NamedArgumentExpression namedArgument(String argName, Expression value) { return new NamedArgumentExpression(argName, value); } + public NamedArgumentExpression namedArgument(String name, String value) { + return namedArgument(name, literal(value)); + } + public static ParseExpression parsed(Expression expression, Expression pattern, Expression identifier) { return new ParseExpression(expression, pattern, identifier); @@ -658,6 +662,10 @@ public FunctionExpression match_phrase(Expression... args) { return compile(BuiltinFunctionName.MATCH_PHRASE, args); } + public FunctionExpression match_phrase_prefix(Expression... args) { + return compile(BuiltinFunctionName.MATCH_PHRASE_PREFIX, args); + } + public FunctionExpression multi_match(Expression... args) { return compile(BuiltinFunctionName.MULTI_MATCH, args); } diff --git a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java index baf715a2ad..69efbe10bf 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java @@ -191,7 +191,7 @@ public enum BuiltinFunctionName { MATCH_PHRASE(FunctionName.of("match_phrase")), MATCHPHRASE(FunctionName.of("matchphrase")), MATCH_BOOL_PREFIX(FunctionName.of("match_bool_prefix")), - + MATCH_PHRASE_PREFIX(FunctionName.of("match_phrase_prefix")), /** * Legacy Relevance Function. */ diff --git a/core/src/main/java/org/opensearch/sql/expression/function/OpenSearchFunctions.java b/core/src/main/java/org/opensearch/sql/expression/function/OpenSearchFunctions.java index 60b9174e09..70c254af9a 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/OpenSearchFunctions.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/OpenSearchFunctions.java @@ -33,6 +33,7 @@ public class OpenSearchFunctions { public static final int MIN_NUM_PARAMETERS = 2; public static final int MULTI_MATCH_MAX_NUM_PARAMETERS = 17; public static final int SIMPLE_QUERY_STRING_MAX_NUM_PARAMETERS = 14; + public static final int MATCH_PHRASE_PREFIX_MAX_NUM_PARAMETERS = 7; /** * Add functions specific to OpenSearch to repository. @@ -46,6 +47,7 @@ public void register(BuiltinFunctionRepository repository) { // compatibility. repository.register(match_phrase(BuiltinFunctionName.MATCH_PHRASE)); repository.register(match_phrase(BuiltinFunctionName.MATCHPHRASE)); + repository.register(match_phrase_prefix()); } private static FunctionResolver match_bool_prefix() { @@ -58,6 +60,11 @@ private static FunctionResolver match() { return getRelevanceFunctionResolver(funcName, MATCH_MAX_NUM_PARAMETERS, STRING); } + private static FunctionResolver match_phrase_prefix() { + FunctionName funcName = BuiltinFunctionName.MATCH_PHRASE_PREFIX.getName(); + return getRelevanceFunctionResolver(funcName, MATCH_PHRASE_PREFIX_MAX_NUM_PARAMETERS, STRING); + } + private static FunctionResolver match_phrase(BuiltinFunctionName matchPhrase) { FunctionName funcName = matchPhrase.getName(); return getRelevanceFunctionResolver(funcName, MATCH_PHRASE_MAX_NUM_PARAMETERS, STRING); diff --git a/core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java b/core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java index 5aaf4e8b3e..76dfd3d24d 100644 --- a/core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java +++ b/core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java @@ -15,6 +15,7 @@ import static org.opensearch.sql.ast.dsl.AstDSL.intLiteral; import static org.opensearch.sql.ast.dsl.AstDSL.qualifiedName; import static org.opensearch.sql.ast.dsl.AstDSL.stringLiteral; +import static org.opensearch.sql.ast.dsl.AstDSL.unresolvedArg; import static org.opensearch.sql.data.model.ExprValueUtils.LITERAL_TRUE; import static org.opensearch.sql.data.model.ExprValueUtils.integerValue; import static org.opensearch.sql.data.type.ExprCoreType.BOOLEAN; @@ -466,6 +467,30 @@ void simple_query_string_expression_two_fields() { AstDSL.unresolvedArg("query", stringLiteral("sample query")))); } + @Test + public void match_phrase_prefix_all_params() { + assertAnalyzeEqual( + dsl.match_phrase_prefix( + dsl.namedArgument("field", "test"), + dsl.namedArgument("query", "search query"), + dsl.namedArgument("slop", "3"), + dsl.namedArgument("boost", "1.5"), + dsl.namedArgument("analyzer", "standard"), + dsl.namedArgument("max_expansions", "4"), + dsl.namedArgument("zero_terms_query", "NONE") + ), + AstDSL.function("match_phrase_prefix", + unresolvedArg("field", stringLiteral("test")), + unresolvedArg("query", stringLiteral("search query")), + unresolvedArg("slop", stringLiteral("3")), + unresolvedArg("boost", stringLiteral("1.5")), + unresolvedArg("analyzer", stringLiteral("standard")), + unresolvedArg("max_expansions", stringLiteral("4")), + unresolvedArg("zero_terms_query", stringLiteral("NONE")) + ) + ); + } + protected Expression analyze(UnresolvedExpression unresolvedExpression) { return expressionAnalyzer.analyze(unresolvedExpression, analysisContext); } diff --git a/core/src/test/java/org/opensearch/sql/expression/function/OpenSearchFunctionsTest.java b/core/src/test/java/org/opensearch/sql/expression/function/OpenSearchFunctionsTest.java index 24ff23d004..d78e9858df 100644 --- a/core/src/test/java/org/opensearch/sql/expression/function/OpenSearchFunctionsTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/function/OpenSearchFunctionsTest.java @@ -140,6 +140,19 @@ List match_phrase_dsl_expressions() { ); } + List match_phrase_prefix_dsl_expressions() { + return List.of( + dsl.match_phrase_prefix(field, query) + ); + } + + @Test + public void match_phrase_prefix() { + for (FunctionExpression fe : match_phrase_prefix_dsl_expressions()) { + assertEquals(BOOLEAN, fe.type()); + } + } + @Test void match_in_memory() { FunctionExpression expr = dsl.match(field, query); diff --git a/docs/user/dql/functions.rst b/docs/user/dql/functions.rst index cc64e2d591..1f7ad92c97 100644 --- a/docs/user/dql/functions.rst +++ b/docs/user/dql/functions.rst @@ -2274,6 +2274,47 @@ Another example to show how to set custom values for the optional parameters:: | Hattie | 671 Bristol Street | +-------------+--------------------+ +MATCH_PHRASE_PREFIX +------------ + +Description +>>>>>>>>>>> + +``match_phrase_prefix(field_expression, query_expression[, option=]*)`` + +The match_phrase_prefix function maps to the match_phrase_prefix query used in search engine, +to return the documents that match a provided text with a given field. Available parameters include: + +- analyzer +- slop +- zero_terms_query +- max_expansions +- boost + + +Example with only ``field`` and ``query`` expressions, and all other parameters are set default values:: + + os> SELECT author, title FROM books WHERE match_phrase_prefix(author, 'Alexander Mil'); + fetched rows / total rows = 2/2 + +----------------------+--------------------------+ + | author | title | + |----------------------+--------------------------| + | Alan Alexander Milne | The House at Pooh Corner | + | Alan Alexander Milne | Winnie-the-Pooh | + +----------------------+--------------------------+ + +Another example to show how to set custom values for the optional parameters:: + + os> SELECT author, title FROM books WHERE match_phrase_prefix(author, 'Alan Mil', slop = 2); + fetched rows / total rows = 2/2 + +----------------------+--------------------------+ + | author | title | + |----------------------+--------------------------| + | Alan Alexander Milne | The House at Pooh Corner | + | Alan Alexander Milne | Winnie-the-Pooh | + +----------------------+--------------------------+ + + MULTI_MATCH ----------- diff --git a/docs/user/ppl/functions/relevance.rst b/docs/user/ppl/functions/relevance.rst index 7262aea3e9..39661a26f6 100644 --- a/docs/user/ppl/functions/relevance.rst +++ b/docs/user/ppl/functions/relevance.rst @@ -98,6 +98,49 @@ Another example to show how to set custom values for the optional parameters:: +----------------------+--------------------------+ + +MATCH_PHRASE_PREFIX +------------ + +Description +>>>>>>>>>>> + +``match_phrase_prefix(field_expression, query_expression[, option=]*)`` + +The match_phrase_prefix function maps to the match_phrase_prefix query used in search engine, to return the documents that match a provided text with a given field. Available parameters include: + +- analyzer +- slop +- max_expansions +- boost +- zero_terms_query + +Example with only ``field`` and ``query`` expressions, and all other parameters are set default values:: + + os> source=books | where match_phrase_prefix(author, 'Alexander Mil') | fields author, title + fetched rows / total rows = 2/2 + +----------------------+--------------------------+ + | author | title | + |----------------------+--------------------------| + | Alan Alexander Milne | The House at Pooh Corner | + | Alan Alexander Milne | Winnie-the-Pooh | + +----------------------+--------------------------+ + + + +Another example to show how to set custom values for the optional parameters:: + + os> source=books | where match_phrase_prefix(author, 'Alan Mil', slop = 2) | fields author, title + fetched rows / total rows = 2/2 + +----------------------+--------------------------+ + | author | title | + |----------------------+--------------------------| + | Alan Alexander Milne | The House at Pooh Corner | + | Alan Alexander Milne | Winnie-the-Pooh | + +----------------------+--------------------------+ + + + MULTI_MATCH ----------- diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/MatchPhrasePrefixWhereCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/MatchPhrasePrefixWhereCommandIT.java new file mode 100644 index 0000000000..2543602758 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/MatchPhrasePrefixWhereCommandIT.java @@ -0,0 +1,116 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ppl; + +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BEER; +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; + +import java.io.IOException; +import org.json.JSONObject; +import org.junit.Test; + +public class MatchPhrasePrefixWhereCommandIT extends PPLIntegTestCase { + + @Override + public void init() throws IOException { + loadIndex(Index.BEER); + } + + @Test + public void required_parameters() throws IOException { + String query = "source = %s | WHERE match_phrase_prefix(Title, 'champagne be') | fields Title"; + JSONObject result = executeQuery(String.format(query, TEST_INDEX_BEER)); + verifyDataRows(result, + rows("Can old flat champagne be used for vinegar?"), + rows("Elder flower champagne best to use natural yeast or add a wine yeast?")); + } + + + @Test + public void all_optional_parameters() throws IOException { + // The values for optional parameters are valid but arbitrary. + String query = "source = %s " + + "| WHERE match_phrase_prefix(Title, 'flat champ', boost = 1.0, " + + "zero_terms_query='ALL', max_expansions = 2, analyzer=standard, slop=0) " + + "| fields Title"; + JSONObject result = executeQuery(String.format(query, TEST_INDEX_BEER)); + verifyDataRows(result, rows("Can old flat champagne be used for vinegar?")); + } + + + @Test + public void max_expansions_is_3() throws IOException { + // max_expansions applies to the last term in the query -- 'bottl' + // It tells OpenSearch to consider only the first 3 terms that start with 'bottl' + // In this dataset these are 'bottle-conditioning', 'bottling', 'bottles'. + + String query = "source = %s " + + "| WHERE match_phrase_prefix(Tags, 'draught bottl', max_expansions=3) | fields Tags"; + JSONObject result = executeQuery(String.format(query, TEST_INDEX_BEER)); + verifyDataRows(result, rows("brewing draught bottling"), + rows("draught bottles")); + } + + @Test + public void analyzer_english() throws IOException { + // English analyzer removes 'in' and 'to' as they are common words. + // This results in an empty query. + String query = "source = %s " + + "| WHERE match_phrase_prefix(Title, 'in to', analyzer=english)" + + "| fields Title"; + JSONObject result = executeQuery(String.format(query, TEST_INDEX_BEER)); + assertTrue("Expect English analyzer to filter out common words 'in' and 'to'", + result.getInt("total") == 0); + } + + @Test + public void analyzer_standard() throws IOException { + // Standard analyzer does not treat 'in' and 'to' as special terms. + // This results in 'to' being used as a phrase prefix given us 'Tokyo'. + String query = "source = %s " + + "| WHERE match_phrase_prefix(Title, 'in to', analyzer=standard)" + + "| fields Title"; + JSONObject result = executeQuery(String.format(query, TEST_INDEX_BEER)); + verifyDataRows(result, rows("Local microbreweries and craft beer in Tokyo")); + } + + @Test + public void zero_term_query_all() throws IOException { + // English analyzer removes 'in' and 'to' as they are common words. + // zero_terms_query of 'ALL' causes all rows to be returned. + // ORDER BY ... LIMIT helps make the test understandable. + String query = "source = %s" + + "| WHERE match_phrase_prefix(Title, 'in to', analyzer=english, zero_terms_query='ALL') " + + "| sort -Title | head 1 | fields Title"; + JSONObject result = executeQuery(String.format(query, TEST_INDEX_BEER)); + verifyDataRows(result, rows("was working great, now all foam")); + } + + + @Test + public void slop_is_2() throws IOException { + // When slop is 0, the terms are matched exactly in the order specified. + // 'open' is used to match prefix of the next term. + String query = "source = %s" + + "| where match_phrase_prefix(Tags, 'gas ta', slop=2) " + + "| fields Tags"; + JSONObject result = executeQuery(String.format(query, TEST_INDEX_BEER)); + verifyDataRows(result, rows("taste gas")); + } + + @Test + public void slop_is_3() throws IOException { + // When slop is 2, results will include phrases where the query terms are transposed. + String query = "source = %s" + + "| where match_phrase_prefix(Tags, 'gas ta', slop=3)" + + "| fields Tags"; + JSONObject result = executeQuery(String.format(query, TEST_INDEX_BEER)); + verifyDataRows(result, + rows("taste draught gas"), + rows("taste gas")); + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/MatchPhrasePrefixFunctionIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/MatchPhrasePrefixFunctionIT.java new file mode 100644 index 0000000000..f38c7776dd --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/sql/MatchPhrasePrefixFunctionIT.java @@ -0,0 +1,110 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + + +package org.opensearch.sql.sql; + +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BEER; +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; + +import java.io.IOException; +import org.json.JSONObject; +import org.junit.Test; +import org.opensearch.sql.legacy.SQLIntegTestCase; + +public class MatchPhrasePrefixFunctionIT extends SQLIntegTestCase { + + @Override + protected void init() throws Exception { + loadIndex(Index.BEER); + } + + @Test + public void required_parameters() throws IOException { + String query = "SELECT Title FROM %s WHERE match_phrase_prefix(Title, 'champagne be')"; + JSONObject result = executeJdbcRequest(String.format(query, TEST_INDEX_BEER)); + verifyDataRows(result, + rows("Can old flat champagne be used for vinegar?"), + rows("Elder flower champagne best to use natural yeast or add a wine yeast?")); + } + + @Test + public void all_optional_parameters() throws IOException { + // The values for optional parameters are valid but arbitrary. + String query = "SELECT Title FROM %s " + + "WHERE match_phrase_prefix(Title, 'flat champ', boost = 1.0, zero_terms_query='ALL', " + + "max_expansions = 2, analyzer=standard, slop=0)"; + JSONObject result = executeJdbcRequest(String.format(query, TEST_INDEX_BEER)); + verifyDataRows(result, rows("Can old flat champagne be used for vinegar?")); + } + + @Test + public void max_expansions_is_3() throws IOException { + // max_expansions applies to the last term in the query -- 'bottl' + // It tells OpenSearch to consider only the first 3 terms that start with 'bottl' + // In this dataset these are 'bottle-conditioning', 'bottling', 'bottles'. + + String query = "SELECT Tags FROM %s " + + "WHERE match_phrase_prefix(Tags, 'draught bottl', max_expansions=3)"; + JSONObject result = executeJdbcRequest(String.format(query, TEST_INDEX_BEER)); + verifyDataRows(result, rows("brewing draught bottling"), + rows("draught bottles")); + } + + @Test + public void analyzer_english() throws IOException { + // English analyzer removes 'in' and 'to' as they are common words. + // This results in an empty query. + String query = "SELECT Title FROM %s " + + "WHERE match_phrase_prefix(Title, 'in to', analyzer=english)"; + JSONObject result = executeJdbcRequest(String.format(query, TEST_INDEX_BEER)); + assertTrue("Expect English analyzer to filter out common words 'in' and 'to'", + result.getInt("total") == 0); + } + + @Test + public void analyzer_standard() throws IOException { + // Standard analyzer does not treat 'in' and 'to' as special terms. + // This results in 'to' being used as a phrase prefix given us 'Tokyo'. + String query = "SELECT Title FROM %s " + + "WHERE match_phrase_prefix(Title, 'in to', analyzer=standard)"; + JSONObject result = executeJdbcRequest(String.format(query, TEST_INDEX_BEER)); + verifyDataRows(result, rows("Local microbreweries and craft beer in Tokyo")); + } + + @Test + public void zero_term_query_all() throws IOException { + // English analyzer removes 'in' and 'to' as they are common words. + // zero_terms_query of 'ALL' causes all rows to be returned. + // ORDER BY ... LIMIT helps make the test understandable. + String query = "SELECT Title FROM %s" + + " WHERE match_phrase_prefix(Title, 'in to', analyzer=english, zero_terms_query='ALL')" + + " ORDER BY Title DESC" + + " LIMIT 1"; + JSONObject result = executeJdbcRequest(String.format(query, TEST_INDEX_BEER)); + verifyDataRows(result, rows("was working great, now all foam")); + } + + + @Test + public void slop_is_2() throws IOException { + // When slop is 0, the terms are matched exactly in the order specified. + // 'open' is used to match prefix of the next term. + String query = "SELECT Tags from %s where match_phrase_prefix(Tags, 'gas ta', slop=2)"; + JSONObject result = executeJdbcRequest(String.format(query, TEST_INDEX_BEER)); + verifyDataRows(result, rows("taste gas")); + } + + @Test + public void slop_is_3() throws IOException { + // When slop is 2, results will include phrases where the query terms are transposed. + String query = "SELECT Tags from %s where match_phrase_prefix(Tags, 'gas ta', slop=3)"; + JSONObject result = executeJdbcRequest(String.format(query, TEST_INDEX_BEER)); + verifyDataRows(result, + rows("taste draught gas"), + rows("taste gas")); + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilder.java index 7a69f265c9..ee580e516f 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilder.java @@ -30,6 +30,7 @@ import org.opensearch.sql.opensearch.storage.script.filter.lucene.TermQuery; import org.opensearch.sql.opensearch.storage.script.filter.lucene.WildcardQuery; import org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance.MatchBoolPrefixQuery; +import org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance.MatchPhrasePrefixQuery; import org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance.MatchPhraseQuery; import org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance.MatchQuery; import org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance.MultiMatchQuery; @@ -64,6 +65,7 @@ public class FilterQueryBuilder extends ExpressionNodeVisitor { + /** + * Default constructor for MatchPhrasePrefixQuery configures how RelevanceQuery.build() handles + * named arguments. + */ + public MatchPhrasePrefixQuery() { + super(ImmutableMap.>builder() + .put("analyzer", (b, v) -> b.analyzer(v.stringValue())) + .put("slop", (b, v) -> b.slop(Integer.parseInt(v.stringValue()))) + .put("max_expansions", (b, v) -> b.maxExpansions(Integer.parseInt(v.stringValue()))) + .put("zero_terms_query", (b, v) -> b.zeroTermsQuery( + org.opensearch.index.search.MatchQuery.ZeroTermsQuery.valueOf(v.stringValue()))) + .put("boost", (b, v) -> b.boost(Float.parseFloat(v.stringValue()))) + .build()); + } + + @Override + protected MatchPhrasePrefixQueryBuilder createQueryBuilder(String field, String query) { + return QueryBuilders.matchPhrasePrefixQuery(field, query); + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/relevance/MatchPhraseQuery.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/relevance/MatchPhraseQuery.java index 1ded3f4708..9004fa78b4 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/relevance/MatchPhraseQuery.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/relevance/MatchPhraseQuery.java @@ -30,6 +30,7 @@ public class MatchPhraseQuery extends RelevanceQuery { */ public MatchPhraseQuery() { super(ImmutableMap.>builder() + .put("boost", (b, v) -> b.boost(Float.parseFloat(v.stringValue()))) .put("analyzer", (b, v) -> b.analyzer(v.stringValue())) .put("slop", (b, v) -> b.slop(Integer.parseInt(v.stringValue()))) .put("zero_terms_query", (b, v) -> b.zeroTermsQuery( diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilderTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilderTest.java index 615b542302..a20af6d111 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilderTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilderTest.java @@ -836,6 +836,48 @@ void multi_match_missing_query() { msg); } + @Test + void should_build_match_phrase_prefix_query_with_default_parameters() { + assertJsonEquals( + "{\n" + + " \"match_phrase_prefix\" : {\n" + + " \"message\" : {\n" + + " \"query\" : \"search query\",\n" + + " \"slop\" : 0,\n" + + " \"zero_terms_query\" : \"NONE\",\n" + + " \"max_expansions\" : 50,\n" + + " \"boost\" : 1.0\n" + + " }\n" + + " }\n" + + "}", + buildQuery( + dsl.match_phrase_prefix( + dsl.namedArgument("field", literal("message")), + dsl.namedArgument("query", literal("search query"))))); + } + + @Test + void should_build_match_phrase_prefix_query_with_analyzer() { + assertJsonEquals( + "{\n" + + " \"match_phrase_prefix\" : {\n" + + " \"message\" : {\n" + + " \"query\" : \"search query\",\n" + + " \"slop\" : 0,\n" + + " \"zero_terms_query\" : \"NONE\",\n" + + " \"max_expansions\" : 50,\n" + + " \"boost\" : 1.0,\n" + + " \"analyzer\": english\n" + + " }\n" + + " }\n" + + "}", + buildQuery( + dsl.match_phrase_prefix( + dsl.namedArgument("field", literal("message")), + dsl.namedArgument("query", literal("search query")), + dsl.namedArgument("analyzer", literal("english"))))); + } + @Test void cast_to_string_in_filter() { String json = "{\n" diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/MatchPhrasePrefixQueryTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/MatchPhrasePrefixQueryTest.java new file mode 100644 index 0000000000..9a5eeb14b5 --- /dev/null +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/MatchPhrasePrefixQueryTest.java @@ -0,0 +1,123 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage.script.filter.lucene; + + +import static org.junit.jupiter.api.Assertions.assertThrows; + +import java.util.List; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.DisplayNameGeneration; +import org.junit.jupiter.api.DisplayNameGenerator; +import org.junit.jupiter.api.Test; +import org.opensearch.sql.common.antlr.SyntaxCheckException; +import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.data.type.ExprType; +import org.opensearch.sql.exception.SemanticCheckException; +import org.opensearch.sql.expression.DSL; +import org.opensearch.sql.expression.Expression; +import org.opensearch.sql.expression.FunctionExpression; +import org.opensearch.sql.expression.config.ExpressionConfig; +import org.opensearch.sql.expression.env.Environment; +import org.opensearch.sql.expression.function.FunctionName; +import org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance.MatchPhrasePrefixQuery; + +@DisplayNameGeneration(DisplayNameGenerator.ReplaceUnderscores.class) +public class MatchPhrasePrefixQueryTest { + + private final DSL dsl = new ExpressionConfig().dsl(new ExpressionConfig().functionRepository()); + private final MatchPhrasePrefixQuery matchPhrasePrefixQuery = new MatchPhrasePrefixQuery(); + private final FunctionName matchPhrasePrefix = FunctionName.of("match_phrase_prefix"); + + @Test + public void test_SyntaxCheckException_when_no_arguments() { + List arguments = List.of(); + assertThrows(SyntaxCheckException.class, + () -> matchPhrasePrefixQuery.build(new MatchPhraseExpression(arguments))); + } + + @Test + public void test_SyntaxCheckException_when_one_argument() { + List arguments = List.of(dsl.namedArgument("field", "test")); + assertThrows(SyntaxCheckException.class, + () -> matchPhrasePrefixQuery.build(new MatchPhraseExpression(arguments))); + } + + @Test + public void test_SyntaxCheckException_when_invalid_parameter() { + List arguments = List.of( + dsl.namedArgument("field", "test"), + dsl.namedArgument("query", "test2"), + dsl.namedArgument("unsupported", "3")); + Assertions.assertThrows(SemanticCheckException.class, + () -> matchPhrasePrefixQuery.build(new MatchPhraseExpression(arguments))); + } + + @Test + public void test_analyzer_parameter() { + List arguments = List.of( + dsl.namedArgument("field", "t1"), + dsl.namedArgument("query", "t2"), + dsl.namedArgument("analyzer", "standard") + ); + Assertions.assertNotNull(matchPhrasePrefixQuery.build(new MatchPhraseExpression(arguments))); + } + + @Test + public void build_succeeds_with_two_arguments() { + List arguments = List.of( + dsl.namedArgument("field", "test"), + dsl.namedArgument("query", "test2")); + Assertions.assertNotNull(matchPhrasePrefixQuery.build(new MatchPhraseExpression(arguments))); + } + + @Test + public void test_slop_parameter() { + List arguments = List.of( + dsl.namedArgument("field", "t1"), + dsl.namedArgument("query", "t2"), + dsl.namedArgument("slop", "2") + ); + Assertions.assertNotNull(matchPhrasePrefixQuery.build(new MatchPhraseExpression(arguments))); + } + + @Test + public void test_zero_terms_query_parameter() { + List arguments = List.of( + dsl.namedArgument("field", "t1"), + dsl.namedArgument("query", "t2"), + dsl.namedArgument("zero_terms_query", "ALL") + ); + Assertions.assertNotNull(matchPhrasePrefixQuery.build(new MatchPhraseExpression(arguments))); + } + + + @Test + public void test_boost_parameter() { + List arguments = List.of( + dsl.namedArgument("field", "t1"), + dsl.namedArgument("query", "t2"), + dsl.namedArgument("boost", "0.1") + ); + Assertions.assertNotNull(matchPhrasePrefixQuery.build(new MatchPhraseExpression(arguments))); + } + + private class MatchPhraseExpression extends FunctionExpression { + public MatchPhraseExpression(List arguments) { + super(MatchPhrasePrefixQueryTest.this.matchPhrasePrefix, arguments); + } + + @Override + public ExprValue valueOf(Environment valueEnv) { + return null; + } + + @Override + public ExprType type() { + return null; + } + } +} diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index 68fb402a2a..eb4396ee9e 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -271,10 +271,10 @@ IF: 'IF'; // RELEVANCE FUNCTIONS AND PARAMETERS MATCH: 'MATCH'; MATCH_PHRASE: 'MATCH_PHRASE'; +MATCH_PHRASE_PREFIX: 'MATCH_PHRASE_PREFIX'; MATCH_BOOL_PREFIX: 'MATCH_BOOL_PREFIX'; SIMPLE_QUERY_STRING: 'SIMPLE_QUERY_STRING'; MULTI_MATCH: 'MULTI_MATCH'; - ALLOW_LEADING_WILDCARD: 'ALLOW_LEADING_WILDCARD'; ANALYZE_WILDCARD: 'ANALYZE_WILDCARD'; ANALYZER: 'ANALYZER'; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index c0ab5ebb84..e35e4c3dbe 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -390,6 +390,7 @@ singleFieldRelevanceFunctionName : MATCH | MATCH_PHRASE | MATCH_BOOL_PREFIX + | MATCH_PHRASE_PREFIX ; multiFieldRelevanceFunctionName diff --git a/sql/src/main/antlr/OpenSearchSQLLexer.g4 b/sql/src/main/antlr/OpenSearchSQLLexer.g4 index 5070eefab8..d406b17522 100644 --- a/sql/src/main/antlr/OpenSearchSQLLexer.g4 +++ b/sql/src/main/antlr/OpenSearchSQLLexer.g4 @@ -280,6 +280,7 @@ IN_TERMS: 'IN_TERMS'; MATCHPHRASE: 'MATCHPHRASE'; MATCH_PHRASE: 'MATCH_PHRASE'; SIMPLE_QUERY_STRING: 'SIMPLE_QUERY_STRING'; +MATCH_PHRASE_PREFIX: 'MATCH_PHRASE_PREFIX'; MATCHQUERY: 'MATCHQUERY'; MATCH_QUERY: 'MATCH_QUERY'; MINUTE_OF_DAY: 'MINUTE_OF_DAY'; diff --git a/sql/src/main/antlr/OpenSearchSQLParser.g4 b/sql/src/main/antlr/OpenSearchSQLParser.g4 index 2bea8afbc8..3c0cb83657 100644 --- a/sql/src/main/antlr/OpenSearchSQLParser.g4 +++ b/sql/src/main/antlr/OpenSearchSQLParser.g4 @@ -395,7 +395,7 @@ flowControlFunctionName singleFieldRelevanceFunctionName : MATCH | MATCH_PHRASE | MATCHPHRASE - | MATCH_BOOL_PREFIX + | MATCH_BOOL_PREFIX | MATCH_PHRASE_PREFIX ; multiFieldRelevanceFunctionName diff --git a/sql/src/test/java/org/opensearch/sql/sql/antlr/SQLSyntaxParserTest.java b/sql/src/test/java/org/opensearch/sql/sql/antlr/SQLSyntaxParserTest.java index 1969f845ef..ec2fa4360c 100644 --- a/sql/src/test/java/org/opensearch/sql/sql/antlr/SQLSyntaxParserTest.java +++ b/sql/src/test/java/org/opensearch/sql/sql/antlr/SQLSyntaxParserTest.java @@ -9,11 +9,14 @@ import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertThrows; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.Streams; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.Iterator; +import java.util.List; +import java.util.Map; import java.util.Random; import java.util.stream.Stream; import org.apache.commons.lang3.RandomStringUtils; @@ -245,12 +248,23 @@ public void can_parse_match_relevance_function() { } @ParameterizedTest - @MethodSource({"matchPhraseComplexQueries", - "matchPhraseGeneratedQueries", "generateMatchPhraseQueries"}) + @MethodSource({ + "matchPhraseComplexQueries", + "matchPhraseGeneratedQueries", + "generateMatchPhraseQueries", + }) public void canParseComplexMatchPhraseArgsTest(String query) { assertNotNull(parser.parse(query)); } + @ParameterizedTest + @MethodSource({ + "generateMatchPhrasePrefixQueries" + }) + public void canParseComplexMatchPhrasePrefixQueries(String query) { + assertNotNull(parser.parse(query)); + } + private static Stream matchPhraseComplexQueries() { return Stream.of( "SELECT * FROM t WHERE match_phrase(c, 3)", @@ -295,8 +309,18 @@ private static Stream generateMatchPhraseQueries() { return generateQueries("match_phrase", matchPhraseArgs); } + private static Stream generateMatchPhrasePrefixQueries() { + return generateQueries("match_phrase_prefix", ImmutableMap.builder() + .put("analyzer", new String[] {"standard", "stop", "english"}) + .put("slop", new Integer[] {0, 1, 2}) + .put("max_expansions", new Integer[] {0, 3, 10}) + .put("zero_terms_query", new String[] {"NONE", "ALL", "NULL"}) + .put("boost", new Float[] {-0.5f, 1.0f, 1.2f}) + .build()); + } + private static Stream generateQueries(String function, - HashMap functionArgs) { + Map functionArgs) { var rand = new Random(0); class QueryGenerator implements Iterator { diff --git a/sql/src/test/java/org/opensearch/sql/sql/parser/AstExpressionBuilderTest.java b/sql/src/test/java/org/opensearch/sql/sql/parser/AstExpressionBuilderTest.java index 65d48ac8b7..de91810dda 100644 --- a/sql/src/test/java/org/opensearch/sql/sql/parser/AstExpressionBuilderTest.java +++ b/sql/src/test/java/org/opensearch/sql/sql/parser/AstExpressionBuilderTest.java @@ -431,6 +431,24 @@ public void filteredDistinctCount() { ); } + @Test + public void matchPhrasePrefixAllParameters() { + assertEquals( + AstDSL.function("match_phrase_prefix", + unresolvedArg("field", stringLiteral("test")), + unresolvedArg("query", stringLiteral("search query")), + unresolvedArg("slop", stringLiteral("3")), + unresolvedArg("boost", stringLiteral("1.5")), + unresolvedArg("analyzer", stringLiteral("standard")), + unresolvedArg("max_expansions", stringLiteral("4")), + unresolvedArg("zero_terms_query", stringLiteral("NONE")) + ), + buildExprAst("match_phrase_prefix(test, 'search query', slop = 3, boost = 1.5" + + ", analyzer = 'standard', max_expansions = 4, zero_terms_query='NONE'" + + ")") + ); + } + @Test public void relevanceMatch() { assertEquals(AstDSL.function("match",