Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -713,7 +713,6 @@ void populate() {
registerOperator(REGEXP, SqlLibraryOperators.REGEXP);
registerOperator(CONCAT, SqlLibraryOperators.CONCAT_FUNCTION);
registerOperator(CONCAT_WS, SqlLibraryOperators.CONCAT_WS);
registerOperator(LIKE, SqlLibraryOperators.ILIKE);
registerOperator(CONCAT_WS, SqlLibraryOperators.CONCAT_WS);
registerOperator(REVERSE, SqlLibraryOperators.REVERSE);
registerOperator(RIGHT, SqlLibraryOperators.RIGHT);
Expand Down Expand Up @@ -992,6 +991,18 @@ void populate() {
builder.makeLiteral(" "),
arg))),
PPLTypeChecker.family(SqlTypeFamily.ANY)));
register(
LIKE,
createFunctionImpWithTypeChecker(
(builder, arg1, arg2) ->
builder.makeCall(
SqlLibraryOperators.ILIKE,
arg1,
arg2,
// TODO: Figure out escaping solution. '\\' is used for JSON input but is not
// necessary for SQL function input
builder.makeLiteral("\\")),
PPLTypeChecker.family(SqlTypeFamily.STRING, SqlTypeFamily.STRING)));
}
}

Expand Down
2 changes: 2 additions & 0 deletions docs/user/ppl/functions/string.rst
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,8 @@ Example::
+-------------------------------+


Limitation: The pushdown of the LIKE function to a DSL wildcard query is supported only for keyword fields.

LOCATE
-------

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,66 +6,22 @@
package org.opensearch.sql.calcite.remote;

import java.io.IOException;
import org.junit.Ignore;
import org.junit.Assume;
import org.junit.Test;
import org.opensearch.sql.ppl.LikeQueryIT;

// TODO Like function behaviour in V2 is not correct. Remove when it was fixed in V2.
public class CalciteLikeQueryIT extends LikeQueryIT {
@Override
public void init() throws Exception {
super.init();
enableCalcite();
// TODO: "https://github.com/opensearch-project/sql/issues/3428"
// disallowCalciteFallback();
disallowCalciteFallback();
}

@Override
@Test
@Ignore("https://github.com/opensearch-project/sql/issues/3428")
public void test_like_with_escaped_percent() throws IOException, IOException {
super.test_like_with_escaped_percent();
}

@Override
@Test
@Ignore("https://github.com/opensearch-project/sql/issues/3428")
public void test_like_in_where_with_escaped_underscore() throws IOException {
super.test_like_in_where_with_escaped_underscore();
}

@Override
@Test
@Ignore("https://github.com/opensearch-project/sql/issues/3428")
public void test_like_on_text_field_with_one_word() throws IOException {
super.test_like_on_text_field_with_one_word();
}

@Override
@Test
@Ignore("https://github.com/opensearch-project/sql/issues/3428")
public void test_like_on_text_keyword_field_with_one_word() throws IOException {
super.test_like_on_text_keyword_field_with_one_word();
}

@Override
@Test
@Ignore("https://github.com/opensearch-project/sql/issues/3428")
public void test_like_on_text_keyword_field_with_greater_than_one_word() throws IOException {
super.test_like_on_text_keyword_field_with_greater_than_one_word();
}

@Override
@Test
@Ignore("https://github.com/opensearch-project/sql/issues/3428")
public void test_like_on_text_field_with_greater_than_one_word() throws IOException {
super.test_like_on_text_field_with_greater_than_one_word();
}

@Override
@Test
@Ignore("https://github.com/opensearch-project/sql/issues/3428")
public void test_convert_field_text_to_keyword() throws IOException {
Assume.assumeTrue("Pushdown is not enabled, skipping this test.", isPushdownEnabled());
super.test_convert_field_text_to_keyword();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

package org.opensearch.sql.calcite.remote;

import java.io.IOException;
import org.opensearch.sql.ppl.WhereCommandIT;

public class CalciteWhereCommandIT extends WhereCommandIT {
Expand All @@ -16,19 +15,6 @@ public void init() throws Exception {
disallowCalciteFallback();
}

@Override
public void testIsNotNullFunction() throws IOException {
withFallbackEnabled(
() -> {
try {
super.testIsNotNullFunction();
} catch (IOException e) {
throw new RuntimeException(e);
}
},
"https://github.com/opensearch-project/sql/issues/3428");
}

@Override
protected String getIncompatibleTypeErrMsg() {
return "In expression types are incompatible: fields type LONG, values type [INTEGER, INTEGER,"
Expand Down
18 changes: 18 additions & 0 deletions integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,24 @@ public void testMultiFieldsRelevanceQueryFunctionExplain() throws IOException {
+ " default_operator='or', analyzer=english)"));
}

@Test
public void testKeywordLikeFunctionExplain() throws IOException {
String expected = loadExpectedPlan("explain_keyword_like_function.json");
assertJsonEqualsIgnoreId(
expected,
explainQueryToString(
"source=opensearch-sql_test_index_account | where like(firstname, '%mbe%')"));
}

@Test
public void testTextLikeFunctionExplain() throws IOException {
String expected = loadExpectedPlan("explain_text_like_function.json");
assertJsonEqualsIgnoreId(
expected,
explainQueryToString(
"source=opensearch-sql_test_index_account | where like(address, '%Holmes%')"));
}

@Ignore("The serialized string is unstable because of function properties")
@Test
public void testFilterScriptPushDownExplain() throws Exception {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,17 +62,17 @@ public void test_like_in_where_with_escaped_underscore() throws IOException {
@Test
public void test_like_on_text_field_with_one_word() throws IOException {
String query =
"source=" + TEST_INDEX_WILDCARD + " | WHERE Like(TextBody, 'test*') | fields TextBody";
"source=" + TEST_INDEX_WILDCARD + " | WHERE Like(TextBody, 'test%') | fields TextBody";
JSONObject result = executeQuery(query);
assertEquals(9, result.getInt("total"));
assertEquals(8, result.getInt("total"));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Question:
With the patch, both in v2 and v3, predicate WHERE Like(TextKeywordBody, 'test%') will trigger wildcard query pushdown but predicate WHERE Like(TextBody, 'test%') won't trigger any pushdown. right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes

}

@Test
public void test_like_on_text_keyword_field_with_one_word() throws IOException {
String query =
"source="
+ TEST_INDEX_WILDCARD
+ " | WHERE Like(TextKeywordBody, 'test*') | fields TextKeywordBody";
+ " | WHERE Like(TextKeywordBody, 'test%') | fields TextKeywordBody";
JSONObject result = executeQuery(query);
assertEquals(8, result.getInt("total"));
}
Expand All @@ -82,17 +82,17 @@ public void test_like_on_text_keyword_field_with_greater_than_one_word() throws
String query =
"source="
+ TEST_INDEX_WILDCARD
+ " | WHERE Like(TextKeywordBody, 'test wild*') | fields TextKeywordBody";
+ " | WHERE Like(TextKeywordBody, 'test wild%') | fields TextKeywordBody";
JSONObject result = executeQuery(query);
assertEquals(7, result.getInt("total"));
}

@Test
public void test_like_on_text_field_with_greater_than_one_word() throws IOException {
String query =
"source=" + TEST_INDEX_WILDCARD + " | WHERE Like(TextBody, 'test wild*') | fields TextBody";
"source=" + TEST_INDEX_WILDCARD + " | WHERE Like(TextBody, 'test wild%') | fields TextBody";
JSONObject result = executeQuery(query);
assertEquals(0, result.getInt("total"));
assertEquals(7, result.getInt("total"));
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,16 @@ public void testLikeFunction() throws IOException {
verifyDataRows(result, rows("Amber"));
}

@Test
public void testLikeFunctionNoHit() throws IOException {
JSONObject result =
executeQuery(
String.format(
"source=%s | where like(firstname, 'Duk_') | fields lastname",
TEST_INDEX_BANK_WITH_NULL_VALUES));
assertEquals(0, result.getInt("total"));
}

@Test
public void testIsNullFunction() throws IOException {
JSONObject result =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,14 +117,14 @@ public void test_like_in_where_with_escaped_underscore() throws IOException {

@Test
public void test_like_on_text_field_with_one_word() throws IOException {
String query = "SELECT * FROM " + TEST_INDEX_WILDCARD + " WHERE TextBody LIKE 'test*'";
String query = "SELECT * FROM " + TEST_INDEX_WILDCARD + " WHERE TextBody LIKE 'test%'";
JSONObject result = executeJdbcRequest(query);
assertEquals(9, result.getInt("total"));
assertEquals(8, result.getInt("total"));
}

@Test
public void test_like_on_text_keyword_field_with_one_word() throws IOException {
String query = "SELECT * FROM " + TEST_INDEX_WILDCARD + " WHERE TextKeywordBody LIKE 'test*'";
String query = "SELECT * FROM " + TEST_INDEX_WILDCARD + " WHERE TextKeywordBody LIKE 'test%'";
JSONObject result = executeJdbcRequest(query);
assertEquals(8, result.getInt("total"));
}
Expand All @@ -134,7 +134,7 @@ public void test_like_on_text_keyword_field_with_greater_than_one_word() throws
String query =
"SELECT * FROM " + TEST_INDEX_WILDCARD + " WHERE TextKeywordBody LIKE 'test wild*'";
JSONObject result = executeJdbcRequest(query);
assertEquals(7, result.getInt("total"));
assertEquals(0, result.getInt("total"));
}

@Test
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"calcite": {
"logical": "LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalFilter(condition=[ILIKE($1, '%mbe%':VARCHAR, '\\')])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n",
"physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], FILTER->ILIKE($1, '%mbe%':VARCHAR, '\\')], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"query\":{\"wildcard\":{\"firstname.keyword\":{\"wildcard\":\"*mbe*\",\"case_insensitive\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}]}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"calcite": {
"logical": "LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalFilter(condition=[ILIKE($2, '%Holmes%':VARCHAR, '\\')])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n",
"physical": "EnumerableCalc(expr#0..10=[{inputs}], expr#11=['%Holmes%':VARCHAR], expr#12=['\\'], expr#13=[ILIKE($t2, $t11, $t12)], proj#0..10=[{exprs}], $condition=[$t13])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"calcite": {
"logical": "LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalFilter(condition=[ILIKE($1, '%mbe%':VARCHAR, '\\')])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n",
"physical": "EnumerableCalc(expr#0..16=[{inputs}], expr#17=['%mbe%':VARCHAR], expr#18=['\\'], expr#19=[ILIKE($t1, $t17, $t18)], proj#0..10=[{exprs}], $condition=[$t19])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"calcite": {
"logical": "LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalFilter(condition=[ILIKE($2, '%Holmes%':VARCHAR, '\\')])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n",
"physical": "EnumerableCalc(expr#0..16=[{inputs}], expr#17=['%Holmes%':VARCHAR], expr#18=['\\'], expr#19=[ILIKE($t2, $t17, $t18)], proj#0..10=[{exprs}], $condition=[$t19])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"root": {
"name": "ProjectOperator",
"description": {
"fields": "[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]"
},
"children": [{
"name": "OpenSearchIndexScan",
"description": {
"request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"wildcard\":{\"firstname.keyword\":{\"wildcard\":\"*mbe*\",\"case_insensitive\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}]}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=1m, searchAfter=null, searchResponse=null)"
},
"children": []
}]
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
"root": {
"name": "ProjectOperator",
"description": {
"fields": "[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]"
},
"children": [{
"name": "FilterOperator",
"description": {
"conditions": "like(address, \"%Holmes%\")"
},
"children": [{
"name": "OpenSearchIndexScan",
"description": {
"request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\"}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=1m, searchAfter=null, searchResponse=null)"
},
"children": []
}]
}]
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import static org.opensearch.index.query.QueryBuilders.regexpQuery;
import static org.opensearch.index.query.QueryBuilders.termQuery;
import static org.opensearch.index.query.QueryBuilders.termsQuery;
import static org.opensearch.index.query.QueryBuilders.wildcardQuery;
import static org.opensearch.script.Script.DEFAULT_SCRIPT_TYPE;
import static org.opensearch.sql.calcite.utils.UserDefinedFunctionUtils.MULTI_FIELDS_RELEVANCE_FUNCTION_SET;
import static org.opensearch.sql.calcite.utils.UserDefinedFunctionUtils.SINGLE_FIELD_RELEVANCE_FUNCTION_SET;
Expand Down Expand Up @@ -93,6 +94,7 @@
import org.opensearch.sql.opensearch.storage.script.CalciteScriptEngine.ReferenceFieldVisitor;
import org.opensearch.sql.opensearch.storage.script.CalciteScriptEngine.UnsupportedScriptException;
import org.opensearch.sql.opensearch.storage.script.CompoundedScriptEngine.ScriptEngineType;
import org.opensearch.sql.opensearch.storage.script.StringUtils;
import org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance.MatchBoolPrefixQuery;
import org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance.MatchPhrasePrefixQuery;
import org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance.MatchPhraseQuery;
Expand Down Expand Up @@ -325,7 +327,8 @@ public Expression visitCall(RexCall call) {
case SPECIAL:
return switch (call.getKind()) {
case CAST -> toCastExpression(call);
case LIKE, CONTAINS -> binary(call);
case CONTAINS -> binary(call);
case LIKE -> like(call);
default -> {
String message = format(Locale.ROOT, "Unsupported call: [%s]", call);
throw new PredicateAnalyzerException(message);
Expand Down Expand Up @@ -533,8 +536,6 @@ private QueryExpression binary(RexCall call) {
switch (call.getKind()) {
case CONTAINS:
return QueryExpression.create(pair.getKey()).contains(pair.getValue());
case LIKE:
throw new UnsupportedOperationException("LIKE not yet supported");
case EQUALS:
return QueryExpression.create(pair.getKey()).equals(pair.getValue());
case NOT_EQUALS:
Expand Down Expand Up @@ -580,6 +581,16 @@ private QueryExpression binary(RexCall call) {
throw new PredicateAnalyzerException(message);
}

private QueryExpression like(RexCall call) {
// The third default escape is not used here. It's handled by
// StringUtils.convertSqlWildcardToLucene
checkState(call.getOperands().size() == 3);
final Expression a = call.getOperands().get(0).accept(this);
final Expression b = call.getOperands().get(1).accept(this);
final SwapResult pair = swap(a, b);
return QueryExpression.create(pair.getKey()).like(pair.getValue());
}

private static QueryExpression constructQueryExpressionForSearch(
RexCall call, SwapResult pair) {
if (isSearchWithComplementedPoints(call)) {
Expand Down Expand Up @@ -1137,10 +1148,24 @@ public QueryExpression notExists() {
return this;
}

/*
* Prefer to run wildcard query for keyword type field. For text type field, it doesn't support
* cross term match because OpenSearch internally break text to multiple terms and apply wildcard
* matching one by one, which is not same behavior with regular like function without pushdown.
*/
@Override
public QueryExpression like(LiteralExpression literal) {
builder = regexpQuery(getFieldReference(), literal.stringValue());
return this;
String fieldName = getFieldReference();
String keywordField = OpenSearchTextType.toKeywordSubField(fieldName, this.rel.getExprType());
boolean isKeywordField = keywordField != null;
if (isKeywordField) {
builder =
wildcardQuery(
keywordField, StringUtils.convertSqlWildcardToLuceneSafe(literal.stringValue()))
.caseInsensitive(true);
return this;
}
throw new UnsupportedOperationException("Like query is not supported for text field");
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Text field can also be push down as script, Can we track it as enhancement issue?

And add a Notes to explain current limitation on Text field support in LIKE.

###
PUT {{baseUrl}}/demo-index
Content-Type: application/x-ndjson

{
  "settings": {
    "number_of_shards": 1,
    "number_of_replicas": 0
  },
  "mappings": {
    "properties": {
      "region": { "type": "text" },
      "sales": { "type": "double" }
    }
  }
}

###
POST {{baseUrl}}/demo-index/_bulk
Content-Type: application/x-ndjson

{ "index": {} }
{ "region": "us-east", "sales": 100.12 }
{ "index": {} }
{ "region": "us-east", "sales": 200.34 }
{ "index": {} }
{ "region": "us-west", "sales": 300.65 }
{ "index": {} }
{ "region": "us-west", "sales": 400.78 }

###
POST {{baseUrl}}/demo-index/_search
Content-Type: application/x-ndjson

{
  "derived": {
    "region_runtime": {
      "type": "keyword",
      "script": {
        "source": "emit(params._source['region'])"
      }
    }
  },
  "query": {
    "term": {
      "region_runtime": {
        "value": "us-east"
      }
    }
  }
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added limitation to LIKE function doc.

I tried pushdown like function script for text field. However, getting ScriptDocValues for text field throws exception to not recommend to do it.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, text data does not have doc values, we should use source, e.g. params._source['region']. Let's track it in issue, not high priority.

Copy link
Contributor Author

@songkant-aws songkant-aws Jul 31, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense. Created a tracking issue: #3950. Hopefully, we can find a way to read script values from source.

}

@Override
Expand Down
Loading
Loading