Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add position() function to V2 engine #1121

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions core/src/main/java/org/opensearch/sql/expression/DSL.java
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,10 @@ public static FunctionExpression cbrt(Expression... expressions) {
return compile(FunctionProperties.None, BuiltinFunctionName.CBRT, expressions);
}

public static FunctionExpression position(Expression... expressions) {
return compile(FunctionProperties.None, BuiltinFunctionName.POSITION, expressions);
}

public static FunctionExpression truncate(Expression... expressions) {
return compile(FunctionProperties.None, BuiltinFunctionName.TRUNCATE, expressions);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -153,23 +153,24 @@ public enum BuiltinFunctionName {
/**
* Text Functions.
*/
SUBSTR(FunctionName.of("substr")),
SUBSTRING(FunctionName.of("substring")),
RTRIM(FunctionName.of("rtrim")),
LTRIM(FunctionName.of("ltrim")),
TRIM(FunctionName.of("trim")),
UPPER(FunctionName.of("upper")),
LOWER(FunctionName.of("lower")),
REGEXP(FunctionName.of("regexp")),
ASCII(FunctionName.of("ascii")),
CONCAT(FunctionName.of("concat")),
CONCAT_WS(FunctionName.of("concat_ws")),
LENGTH(FunctionName.of("length")),
STRCMP(FunctionName.of("strcmp")),
RIGHT(FunctionName.of("right")),
LEFT(FunctionName.of("left")),
ASCII(FunctionName.of("ascii")),
LENGTH(FunctionName.of("length")),
LOCATE(FunctionName.of("locate")),
LOWER(FunctionName.of("lower")),
LTRIM(FunctionName.of("ltrim")),
POSITION(FunctionName.of("position")),
REGEXP(FunctionName.of("regexp")),
REPLACE(FunctionName.of("replace")),
RIGHT(FunctionName.of("right")),
RTRIM(FunctionName.of("rtrim")),
STRCMP(FunctionName.of("strcmp")),
SUBSTR(FunctionName.of("substr")),
SUBSTRING(FunctionName.of("substring")),
TRIM(FunctionName.of("trim")),
UPPER(FunctionName.of("upper")),

/**
* NULL Test.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,22 +39,23 @@ public class TextFunction {
* @param repository {@link BuiltinFunctionRepository}.
*/
public void register(BuiltinFunctionRepository repository) {
repository.register(substr());
repository.register(substring());
repository.register(ltrim());
repository.register(rtrim());
repository.register(trim());
repository.register(lower());
repository.register(upper());
repository.register(ascii());
repository.register(concat());
repository.register(concat_ws());
repository.register(length());
repository.register(strcmp());
repository.register(right());
repository.register(left());
repository.register(ascii());
repository.register(length());
repository.register(locate());
repository.register(lower());
repository.register(ltrim());
repository.register(position());
repository.register(replace());
repository.register(right());
repository.register(rtrim());
repository.register(strcmp());
repository.register(substr());
repository.register(substring());
repository.register(trim());
repository.register(upper());
}

/**
Expand Down Expand Up @@ -241,6 +242,20 @@ private DefaultFunctionResolver locate() {
TextFunction::exprLocate), INTEGER, STRING, STRING, INTEGER));
}

/**
* Returns the position of the first occurrence of a substring in a string starting from 1.
* Returns 0 if substring is not in string.
* Returns NULL if any argument is NULL.
* Supports following signature:
* (STRING IN STRING) -> INTEGER
*/
private DefaultFunctionResolver position() {
return define(BuiltinFunctionName.POSITION.getName(),
impl(nullMissingHandling(
(SerializableBiFunction<ExprValue, ExprValue, ExprValue>)
TextFunction::exprLocate), INTEGER, STRING, STRING));
}

/**
* REPLACE(str, from_str, to_str) returns the string str with all occurrences of
* the string from_str replaced by the string to_str.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,26 @@ void locate() {
DSL.locate(missingRef, DSL.literal("hello"), DSL.literal(1))));
}

@Test
void position() {
FunctionExpression expression = DSL.position(
DSL.literal("world"),
DSL.literal("helloworldworld"));
assertEquals(INTEGER, expression.type());
assertEquals(6, eval(expression).integerValue());

expression = DSL.position(
DSL.literal("abc"),
DSL.literal("hello world"));
assertEquals(INTEGER, expression.type());
assertEquals(0, eval(expression).integerValue());

when(nullRef.type()).thenReturn(STRING);
assertEquals(nullValue(), eval(DSL.position(nullRef, DSL.literal("hello"))));
when(missingRef.type()).thenReturn(STRING);
assertEquals(missingValue(), eval(DSL.position(missingRef, DSL.literal("hello"))));
}

@Test
void replace() {
FunctionExpression expression = DSL.replace(
Expand Down
25 changes: 25 additions & 0 deletions docs/user/dql/functions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2330,6 +2330,31 @@ Example::
+---------------------+---------------------+


POSITION
------

Description
>>>>>>>>>>>

Usage: The syntax POSITION(substr IN str) returns the position of the first occurrence of substring substr in string str. Returns 0 if substr is not in str. Returns NULL if any argument is NULL.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Returns 0 if substr is not in str? not -1?
what if POSITION('world' IN 'world')?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function is the synonym of the LOCATE function. And works the same way in mySQL and legacy engine.
It returns 0, if substr in not in string like LOCATE and POSITION do in mySql and legacy engine.
POSITION('world' IN 'world') would return 1 as the first argument matches the given string (2nd arg) starting from the 1st position.
Screenshot 2022-12-06 at 8 56 55 AM


Argument type: STRING, STRING

Return type integer:

(STRING IN STRING) -> INTEGER

Example::

os> SELECT POSITION('world' IN 'helloworld'), POSITION('invalid' IN 'helloworld');
fetched rows / total rows = 1/1
+-------------------------------------+---------------------------------------+
| POSITION('world' IN 'helloworld') | POSITION('invalid' IN 'helloworld') |
|-------------------------------------+---------------------------------------|
| 6 | 0 |
+-------------------------------------+---------------------------------------+


REPLACE
-------

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.sql;

import org.json.JSONObject;
import org.junit.Test;
import org.opensearch.sql.legacy.SQLIntegTestCase;
import org.opensearch.sql.legacy.TestsConstants;

import static org.opensearch.sql.util.MatcherUtils.rows;
import static org.opensearch.sql.util.MatcherUtils.schema;
import static org.opensearch.sql.util.MatcherUtils.verifyDataRows;
import static org.opensearch.sql.util.MatcherUtils.verifySchema;

public class PositionFunctionIT extends SQLIntegTestCase {

@Override
protected void init() throws Exception {
loadIndex(Index.PEOPLE2);
loadIndex(Index.CALCS);
}

@Test
public void position_function_test() {
String query = "SELECT firstname, position('a' IN firstname) FROM %s";
JSONObject response = executeJdbcRequest(String.format(query, TestsConstants.TEST_INDEX_PEOPLE2));

verifySchema(response, schema("firstname", null, "keyword"),
schema("position('a' IN firstname)", null, "integer"));
assertEquals(12, response.getInt("total"));

verifyDataRows(response,
rows("Daenerys", 2), rows("Hattie", 2),
rows("Nanette", 2), rows("Dale", 2),
rows("Elinor", 0), rows("Virginia", 8),
rows("Dillard", 5), rows("Mcgee", 0),
rows("Aurelia", 7), rows("Fulton", 0),
rows("Burton", 0), rows("Josie", 0));
}

@Test
public void position_function_with_nulls_test() {
String query = "SELECT str2, position('ee' IN str2) FROM %s";
JSONObject response = executeJdbcRequest(String.format(query, TestsConstants.TEST_INDEX_CALCS));

verifySchema(response, schema("str2", null, "keyword"),
schema("position('ee' IN str2)", null, "integer"));
assertEquals(17, response.getInt("total"));

verifyDataRows(response,
rows("one", 0), rows("two", 0),
rows("three", 4), rows(null, null),
rows("five", 0), rows("six", 0),
rows(null, null), rows("eight", 0),
rows("nine", 0), rows("ten", 0),
rows("eleven", 0), rows("twelve", 0),
rows(null, null), rows("fourteen", 6),
rows("fifteen", 5), rows("sixteen", 5),
rows(null, null));
}

@Test
public void position_function_with_string_literals_test() {
String query = "SELECT position('world' IN 'hello world')";
JSONObject response = executeJdbcRequest(query);

verifySchema(response, schema("position('world' IN 'hello world')", null, "integer"));
assertEquals(1, response.getInt("total"));

verifyDataRows(response, rows(7));
}

@Test
public void position_function_with_only_fields_as_args_test() {
String query = "SELECT position(str3 IN str2) FROM %s WHERE str2 IN ('one', 'two', 'three')";
JSONObject response = executeJdbcRequest(String.format(query, TestsConstants.TEST_INDEX_CALCS));

verifySchema(response, schema("position(str3 IN str2)", null, "integer"));
assertEquals(3, response.getInt("total"));

verifyDataRows(response, rows(3), rows(0), rows(4));
}

@Test
public void position_function_with_function_as_arg_test() {
String query = "SELECT position(upper(str3) IN str1) FROM %s WHERE str1 LIKE 'BINDING SUPPLIES'";
JSONObject response = executeJdbcRequest(String.format(query, TestsConstants.TEST_INDEX_CALCS));

verifySchema(response, schema("position(upper(str3) IN str1)", null, "integer"));
assertEquals(1, response.getInt("total"));

verifyDataRows(response, rows(15));
}

@Test
public void position_function_in_where_clause_test() {
String query = "SELECT str2 FROM %s WHERE position(str3 IN str2)=1";
JSONObject response = executeJdbcRequest(String.format(query, TestsConstants.TEST_INDEX_CALCS));

verifySchema(response, schema("str2", null, "keyword"));
assertEquals(2, response.getInt("total"));

verifyDataRows(response, rows("eight"), rows("eleven"));
}

@Test
public void position_function_with_null_args_test() {
String query1 = "SELECT str2, position(null IN str2) FROM %s WHERE str2 IN ('one')";
String query2 = "SELECT str2, position(str2 IN null) FROM %s WHERE str2 IN ('one')";
JSONObject response1 = executeJdbcRequest(String.format(query1, TestsConstants.TEST_INDEX_CALCS));
JSONObject response2 = executeJdbcRequest(String.format(query2, TestsConstants.TEST_INDEX_CALCS));

verifySchema(response1,
schema("str2", null, "keyword"),
schema("position(null IN str2)", null, "integer"));
assertEquals(1, response1.getInt("total"));

verifySchema(response2,
schema("str2", null, "keyword"),
schema("position(str2 IN null)", null, "integer"));
assertEquals(1, response2.getInt("total"));

verifyDataRows(response1, rows("one", null));
verifyDataRows(response2, rows("one", null));
}
}
1 change: 1 addition & 0 deletions sql/src/main/antlr/OpenSearchSQLLexer.g4
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,7 @@ NULLIF: 'NULLIF';
PERIOD_ADD: 'PERIOD_ADD';
PERIOD_DIFF: 'PERIOD_DIFF';
PI: 'PI';
POSITION: 'POSITION';
POW: 'POW';
POWER: 'POWER';
RADIANS: 'RADIANS';
Expand Down
5 changes: 5 additions & 0 deletions sql/src/main/antlr/OpenSearchSQLParser.g4
Original file line number Diff line number Diff line change
Expand Up @@ -313,13 +313,18 @@ functionCall
| aggregateFunction (orderByClause)? filterClause #filteredAggregationFunctionCall
| relevanceFunction #relevanceFunctionCall
| highlightFunction #highlightFunctionCall
| positionFunction #positionFunctionCall
;


highlightFunction
: HIGHLIGHT LR_BRACKET relevanceField (COMMA highlightArg)* RR_BRACKET
;

positionFunction
: POSITION LR_BRACKET functionArg IN functionArg RR_BRACKET
;
dai-chen marked this conversation as resolved.
Show resolved Hide resolved

scalarFunctionName
: mathematicalFunctionName
| dateTimeFunctionName
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import static org.opensearch.sql.expression.function.BuiltinFunctionName.IS_NULL;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.LIKE;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.NOT_LIKE;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.POSITION;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.REGEXP;
import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.BinaryComparisonPredicateContext;
import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.BooleanContext;
Expand Down Expand Up @@ -145,6 +146,15 @@ public UnresolvedExpression visitHighlightFunctionCall(
builder.build());
}

@Override
public UnresolvedExpression visitPositionFunction(
OpenSearchSQLParser.PositionFunctionContext ctx) {
return new Function(
POSITION.getName().getFunctionName(),
Arrays.asList(visitFunctionArg(ctx.functionArg(0)),
visitFunctionArg(ctx.functionArg(1))));
}

@Override
public UnresolvedExpression visitTableFilter(TableFilterContext ctx) {
return new Function(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,14 @@ public void canBuildQualifiedNameHighlightFunction() {
);
}

@Test
public void canBuildStringLiteralPositionFunction() {
assertEquals(
function("position", stringLiteral("substr"), stringLiteral("str")),
buildExprAst("position(\"substr\" IN \"str\")")
);
}

@Test
public void canBuildWindowFunctionWithoutOrderBy() {
assertEquals(
Expand Down