Skip to content

Commit

Permalink
Add support for wildcard_query function to the new engine (#156) (#1108)
Browse files Browse the repository at this point in the history
Signed-off-by: Guian Gumpac <guiang@bitquilltech.com>
  • Loading branch information
Guian Gumpac authored Dec 7, 2022
1 parent 64a3794 commit 2af7321
Show file tree
Hide file tree
Showing 29 changed files with 1,015 additions and 38 deletions.
4 changes: 4 additions & 0 deletions core/src/main/java/org/opensearch/sql/expression/DSL.java
Original file line number Diff line number Diff line change
Expand Up @@ -715,6 +715,10 @@ public static FunctionExpression match_bool_prefix(Expression... args) {
return compile(FunctionProperties.None, BuiltinFunctionName.MATCH_BOOL_PREFIX, args);
}

public static FunctionExpression wildcard_query(Expression... args) {
return compile(FunctionProperties.None,BuiltinFunctionName.WILDCARD_QUERY, args);
}

public static FunctionExpression now(FunctionProperties functionProperties,
Expression... args) {
return compile(functionProperties, BuiltinFunctionName.NOW, args);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,9 @@ public enum BuiltinFunctionName {
MATCHQUERY(FunctionName.of("matchquery")),
MULTI_MATCH(FunctionName.of("multi_match")),
MULTIMATCH(FunctionName.of("multimatch")),
MULTIMATCHQUERY(FunctionName.of("multimatchquery"));
MULTIMATCHQUERY(FunctionName.of("multimatchquery")),
WILDCARDQUERY(FunctionName.of("wildcardquery")),
WILDCARD_QUERY(FunctionName.of("wildcard_query"));

private final FunctionName name;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ public void register(BuiltinFunctionRepository repository) {
repository.register(match_phrase(BuiltinFunctionName.MATCHPHRASE));
repository.register(match_phrase(BuiltinFunctionName.MATCHPHRASEQUERY));
repository.register(match_phrase_prefix());
repository.register(wildcard_query(BuiltinFunctionName.WILDCARD_QUERY));
repository.register(wildcard_query(BuiltinFunctionName.WILDCARDQUERY));
}

private static FunctionResolver match_bool_prefix() {
Expand Down Expand Up @@ -83,6 +85,11 @@ private static FunctionResolver query_string() {
return new RelevanceFunctionResolver(funcName, STRUCT);
}

private static FunctionResolver wildcard_query(BuiltinFunctionName wildcardQuery) {
FunctionName funcName = wildcardQuery.getName();
return new RelevanceFunctionResolver(funcName, STRING);
}

public static class OpenSearchFunction extends FunctionExpression {
private final FunctionName functionName;
private final List<Expression> arguments;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -540,6 +540,34 @@ void query_string_expression_two_fields() {
AstDSL.unresolvedArg("query", stringLiteral("query_value"))));
}

@Test
void wildcard_query_expression() {
assertAnalyzeEqual(
DSL.wildcard_query(
DSL.namedArgument("field", DSL.literal("test")),
DSL.namedArgument("query", DSL.literal("query_value*"))),
AstDSL.function("wildcard_query",
unresolvedArg("field", stringLiteral("test")),
unresolvedArg("query", stringLiteral("query_value*"))));
}

@Test
void wildcard_query_expression_all_params() {
assertAnalyzeEqual(
DSL.wildcard_query(
DSL.namedArgument("field", DSL.literal("test")),
DSL.namedArgument("query", DSL.literal("query_value*")),
DSL.namedArgument("boost", DSL.literal("1.5")),
DSL.namedArgument("case_insensitive", DSL.literal("true")),
DSL.namedArgument("rewrite", DSL.literal("scoring_boolean"))),
AstDSL.function("wildcard_query",
unresolvedArg("field", stringLiteral("test")),
unresolvedArg("query", stringLiteral("query_value*")),
unresolvedArg("boost", stringLiteral("1.5")),
unresolvedArg("case_insensitive", stringLiteral("true")),
unresolvedArg("rewrite", stringLiteral("scoring_boolean"))));
}

@Test
public void match_phrase_prefix_all_params() {
assertAnalyzeEqual(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -197,4 +197,12 @@ void query_string() {
fields.getValue(), query.getValue()),
expr.toString());
}

@Test
void wildcard_query() {
FunctionExpression expr = DSL.wildcard_query(field, query);
assertEquals(String.format("wildcard_query(field=%s, query=%s)",
field.getValue(), query.getValue()),
expr.toString());
}
}
55 changes: 55 additions & 0 deletions docs/user/dql/functions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3299,6 +3299,59 @@ Example searching for field Tags::
| [Winnie-the-<em>Pooh</em>] |
+----------------------------------------------+

WILDCARD_QUERY
------------

Description
>>>>>>>>>>>

``wildcard_query(field_expression, query_expression[, option=<option_value>]*)``

The ``wildcard_query`` function maps to the ``wildcard_query`` query used in search engine. It returns documents that match provided text in the specified field.
OpenSearch supports wildcard characters ``*`` and ``?``. See the full description here: https://opensearch.org/docs/latest/opensearch/query-dsl/term/#wildcards.
You may include a backslash ``\`` to escape SQL wildcard characters ``\%`` and ``\_``.

Available parameters include:

- boost
- case_insensitive
- rewrite

For backward compatibility, ``wildcardquery`` is also supported and mapped to ``wildcard_query`` query as well.

Example with only ``field`` and ``query`` expressions, and all other parameters are set default values::

os> select Body from wildcard where wildcard_query(Body, 'test wildcard*');
fetched rows / total rows = 7/7
+-------------------------------------------+
| Body |
|-------------------------------------------|
| test wildcard |
| test wildcard in the end of the text% |
| test wildcard in % the middle of the text |
| test wildcard %% beside each other |
| test wildcard in the end of the text_ |
| test wildcard in _ the middle of the text |
| test wildcard __ beside each other |
+-------------------------------------------+

Another example to show how to set custom values for the optional parameters::

os> select Body from wildcard where wildcard_query(Body, 'test wildcard*', boost=0.7, case_insensitive=true, rewrite='constant_score');
fetched rows / total rows = 8/8
+-------------------------------------------+
| Body |
|-------------------------------------------|
| test wildcard |
| test wildcard in the end of the text% |
| test wildcard in % the middle of the text |
| test wildcard %% beside each other |
| test wildcard in the end of the text_ |
| test wildcard in _ the middle of the text |
| test wildcard __ beside each other |
| tEsT wIlDcArD sensitive cases |
+-------------------------------------------+

System Functions
================

Expand All @@ -3323,3 +3376,5 @@ Example::
|----------------+---------------+-----------------+------------------|
| DATE | INTEGER | DATETIME | STRUCT |
+----------------+---------------+-----------------+------------------+


22 changes: 22 additions & 0 deletions doctest/test_data/wildcard.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{"index":{"_id":"0"}}
{"Body":"test wildcard"}
{"index":{"_id":"1"}}
{"Body":"test wildcard in the end of the text%"}
{"index":{"_id":"2"}}
{"Body":"%test wildcard in the beginning of the text"}
{"index":{"_id":"3"}}
{"Body":"test wildcard in % the middle of the text"}
{"index":{"_id":"4"}}
{"Body":"test wildcard %% beside each other"}
{"index":{"_id":"5"}}
{"Body":"test wildcard in the end of the text_"}
{"index":{"_id":"6"}}
{"Body":"_test wildcard in the beginning of the text"}
{"index":{"_id":"7"}}
{"Body":"test wildcard in _ the middle of the text"}
{"index":{"_id":"8"}}
{"Body":"test wildcard __ beside each other"}
{"index":{"_id":"9"}}
{"Body":"test backslash wildcard \\_"}
{"index":{"_id":"10"}}
{"Body":"tEsT wIlDcArD sensitive cases"}
4 changes: 3 additions & 1 deletion doctest/test_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
NYC_TAXI = "nyc_taxi"
BOOKS = "books"
APACHE = "apache"
WILDCARD = "wildcard"


class DocTestConnection(OpenSearchConnection):
Expand Down Expand Up @@ -92,6 +93,7 @@ def set_up_test_indices(test):
load_file("nyc_taxi.json", index_name=NYC_TAXI)
load_file("books.json", index_name=BOOKS)
load_file("apache.json", index_name=APACHE)
load_file("wildcard.json", index_name=WILDCARD)


def load_file(filename, index_name):
Expand Down Expand Up @@ -120,7 +122,7 @@ def set_up(test):

def tear_down(test):
# drop leftover tables after each test
test_data_client.indices.delete(index=[ACCOUNTS, EMPLOYEES, PEOPLE, ACCOUNT2, NYC_TAXI, BOOKS, APACHE], ignore_unavailable=True)
test_data_client.indices.delete(index=[ACCOUNTS, EMPLOYEES, PEOPLE, ACCOUNT2, NYC_TAXI, BOOKS, APACHE, WILDCARD], ignore_unavailable=True)


docsuite = partial(doctest.DocFileSuite,
Expand Down
9 changes: 9 additions & 0 deletions doctest/test_mapping/wildcard.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"mappings" : {
"properties" : {
"Body" : {
"type" : "keyword"
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -584,7 +584,11 @@ public enum Index {
CALCS(TestsConstants.TEST_INDEX_CALCS,
"calcs",
getMappingFile("calcs_index_mappings.json"),
"src/test/resources/calcs.json"),;
"src/test/resources/calcs.json"),
WILDCARD(TestsConstants.TEST_INDEX_WILDCARD,
"wildcard",
getMappingFile("wildcard_index_mappings.json"),
"src/test/resources/wildcard.json"),;

private final String name;
private final String type;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ public class TestsConstants {
public final static String TEST_INDEX_BEER = TEST_INDEX + "_beer";
public final static String TEST_INDEX_NULL_MISSING = TEST_INDEX + "_null_missing";
public final static String TEST_INDEX_CALCS = TEST_INDEX + "_calcs";
public final static String TEST_INDEX_WILDCARD = TEST_INDEX + "_wildcard";

public final static String DATE_FORMAT = "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'";
public final static String TS_DATE_FORMAT = "yyyy-MM-dd HH:mm:ss.SSS";
Expand Down
88 changes: 88 additions & 0 deletions integ-test/src/test/java/org/opensearch/sql/ppl/LikeQueryIT.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/


package org.opensearch.sql.ppl;

import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_WILDCARD;
import static org.opensearch.sql.util.MatcherUtils.rows;
import static org.opensearch.sql.util.MatcherUtils.verifyDataRows;

import java.io.IOException;
import org.json.JSONObject;
import org.junit.Test;

public class LikeQueryIT extends PPLIntegTestCase {

@Override
public void init() throws IOException {
loadIndex(Index.WILDCARD);
}

@Test
public void test_like_with_percent() throws IOException {
String query = "source=" + TEST_INDEX_WILDCARD + " | WHERE Like(KeywordBody, 'test wildcard%') | fields KeywordBody";
JSONObject result = executeQuery(query);
verifyDataRows(result,
rows("test wildcard"),
rows("test wildcard in the end of the text%"),
rows("test wildcard in % the middle of the text"),
rows("test wildcard %% beside each other"),
rows("test wildcard in the end of the text_"),
rows("test wildcard in _ the middle of the text"),
rows("test wildcard __ beside each other"));
}

@Test
public void test_like_with_escaped_percent() throws IOException {
String query = "source=" + TEST_INDEX_WILDCARD + " | WHERE Like(KeywordBody, '\\\\%test wildcard%') | fields KeywordBody";
JSONObject result = executeQuery(query);
verifyDataRows(result,
rows("%test wildcard in the beginning of the text"));
}

@Test
public void test_like_in_where_with_escaped_underscore() throws IOException {
String query = "source=" + TEST_INDEX_WILDCARD + " | WHERE Like(KeywordBody, '\\\\_test wildcard%') | fields KeywordBody";
JSONObject result = executeQuery(query);
verifyDataRows(result,
rows("_test wildcard in the beginning of the text"));
}

@Test
public void test_like_on_text_field_with_one_word() throws IOException {
String query = "source=" + TEST_INDEX_WILDCARD + " | WHERE Like(TextBody, 'test*') | fields TextBody";
JSONObject result = executeQuery(query);
assertEquals(9, result.getInt("total"));
}

@Test
public void test_like_on_text_keyword_field_with_one_word() throws IOException {
String query = "source=" + TEST_INDEX_WILDCARD + " | WHERE Like(TextKeywordBody, 'test*') | fields TextKeywordBody";
JSONObject result = executeQuery(query);
assertEquals(8, result.getInt("total"));
}

@Test
public void test_like_on_text_keyword_field_with_greater_than_one_word() throws IOException {
String query = "source=" + TEST_INDEX_WILDCARD + " | WHERE Like(TextKeywordBody, 'test wild*') | fields TextKeywordBody";
JSONObject result = executeQuery(query);
assertEquals(7, result.getInt("total"));
}

@Test
public void test_like_on_text_field_with_greater_than_one_word() throws IOException {
String query = "source=" + TEST_INDEX_WILDCARD + " | WHERE Like(TextBody, 'test wild*') | fields TextBody";
JSONObject result = executeQuery(query);
assertEquals(0, result.getInt("total"));
}

@Test
public void test_convert_field_text_to_keyword() throws IOException {
String query = "source=" + TEST_INDEX_WILDCARD + " | WHERE Like(TextKeywordBody, '*') | fields TextKeywordBody";
String result = explainQueryToString(query);
assertTrue(result.contains("TextKeywordBody.keyword"));
}
}
Loading

0 comments on commit 2af7321

Please sign in to comment.