Skip to content

Commit

Permalink
Add position() function to V2 engine (#1121)
Browse files Browse the repository at this point in the history
POSITION function is a synonym to LOCATE. It's syntax is POSITION(<substr> IN <string>)

Signed-off-by: Margarit Hakobyan <margarith@bitquilltech.com>
  • Loading branch information
margarit-h authored Dec 6, 2022
1 parent b76a178 commit 354e843
Show file tree
Hide file tree
Showing 10 changed files with 241 additions and 23 deletions.
4 changes: 4 additions & 0 deletions core/src/main/java/org/opensearch/sql/expression/DSL.java
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,10 @@ public static FunctionExpression cbrt(Expression... expressions) {
return compile(FunctionProperties.None, BuiltinFunctionName.CBRT, expressions);
}

public static FunctionExpression position(Expression... expressions) {
return compile(FunctionProperties.None, BuiltinFunctionName.POSITION, expressions);
}

public static FunctionExpression truncate(Expression... expressions) {
return compile(FunctionProperties.None, BuiltinFunctionName.TRUNCATE, expressions);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -156,23 +156,24 @@ public enum BuiltinFunctionName {
/**
* Text Functions.
*/
SUBSTR(FunctionName.of("substr")),
SUBSTRING(FunctionName.of("substring")),
RTRIM(FunctionName.of("rtrim")),
LTRIM(FunctionName.of("ltrim")),
TRIM(FunctionName.of("trim")),
UPPER(FunctionName.of("upper")),
LOWER(FunctionName.of("lower")),
REGEXP(FunctionName.of("regexp")),
ASCII(FunctionName.of("ascii")),
CONCAT(FunctionName.of("concat")),
CONCAT_WS(FunctionName.of("concat_ws")),
LENGTH(FunctionName.of("length")),
STRCMP(FunctionName.of("strcmp")),
RIGHT(FunctionName.of("right")),
LEFT(FunctionName.of("left")),
ASCII(FunctionName.of("ascii")),
LENGTH(FunctionName.of("length")),
LOCATE(FunctionName.of("locate")),
LOWER(FunctionName.of("lower")),
LTRIM(FunctionName.of("ltrim")),
POSITION(FunctionName.of("position")),
REGEXP(FunctionName.of("regexp")),
REPLACE(FunctionName.of("replace")),
RIGHT(FunctionName.of("right")),
RTRIM(FunctionName.of("rtrim")),
STRCMP(FunctionName.of("strcmp")),
SUBSTR(FunctionName.of("substr")),
SUBSTRING(FunctionName.of("substring")),
TRIM(FunctionName.of("trim")),
UPPER(FunctionName.of("upper")),

/**
* NULL Test.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,22 +39,23 @@ public class TextFunction {
* @param repository {@link BuiltinFunctionRepository}.
*/
public void register(BuiltinFunctionRepository repository) {
repository.register(substr());
repository.register(substring());
repository.register(ltrim());
repository.register(rtrim());
repository.register(trim());
repository.register(lower());
repository.register(upper());
repository.register(ascii());
repository.register(concat());
repository.register(concat_ws());
repository.register(length());
repository.register(strcmp());
repository.register(right());
repository.register(left());
repository.register(ascii());
repository.register(length());
repository.register(locate());
repository.register(lower());
repository.register(ltrim());
repository.register(position());
repository.register(replace());
repository.register(right());
repository.register(rtrim());
repository.register(strcmp());
repository.register(substr());
repository.register(substring());
repository.register(trim());
repository.register(upper());
}

/**
Expand Down Expand Up @@ -241,6 +242,20 @@ private DefaultFunctionResolver locate() {
TextFunction::exprLocate), INTEGER, STRING, STRING, INTEGER));
}

/**
* Returns the position of the first occurrence of a substring in a string starting from 1.
* Returns 0 if substring is not in string.
* Returns NULL if any argument is NULL.
* Supports following signature:
* (STRING IN STRING) -> INTEGER
*/
private DefaultFunctionResolver position() {
return define(BuiltinFunctionName.POSITION.getName(),
impl(nullMissingHandling(
(SerializableBiFunction<ExprValue, ExprValue, ExprValue>)
TextFunction::exprLocate), INTEGER, STRING, STRING));
}

/**
* REPLACE(str, from_str, to_str) returns the string str with all occurrences of
* the string from_str replaced by the string to_str.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,26 @@ void locate() {
DSL.locate(missingRef, DSL.literal("hello"), DSL.literal(1))));
}

@Test
void position() {
FunctionExpression expression = DSL.position(
DSL.literal("world"),
DSL.literal("helloworldworld"));
assertEquals(INTEGER, expression.type());
assertEquals(6, eval(expression).integerValue());

expression = DSL.position(
DSL.literal("abc"),
DSL.literal("hello world"));
assertEquals(INTEGER, expression.type());
assertEquals(0, eval(expression).integerValue());

when(nullRef.type()).thenReturn(STRING);
assertEquals(nullValue(), eval(DSL.position(nullRef, DSL.literal("hello"))));
when(missingRef.type()).thenReturn(STRING);
assertEquals(missingValue(), eval(DSL.position(missingRef, DSL.literal("hello"))));
}

@Test
void replace() {
FunctionExpression expression = DSL.replace(
Expand Down
25 changes: 25 additions & 0 deletions docs/user/dql/functions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2420,6 +2420,31 @@ Example::
+---------------------+---------------------+


POSITION
------

Description
>>>>>>>>>>>

Usage: The syntax POSITION(substr IN str) returns the position of the first occurrence of substring substr in string str. Returns 0 if substr is not in str. Returns NULL if any argument is NULL.

Argument type: STRING, STRING

Return type integer:

(STRING IN STRING) -> INTEGER

Example::

os> SELECT POSITION('world' IN 'helloworld'), POSITION('invalid' IN 'helloworld');
fetched rows / total rows = 1/1
+-------------------------------------+---------------------------------------+
| POSITION('world' IN 'helloworld') | POSITION('invalid' IN 'helloworld') |
|-------------------------------------+---------------------------------------|
| 6 | 0 |
+-------------------------------------+---------------------------------------+


REPLACE
-------

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.sql;

import org.json.JSONObject;
import org.junit.Test;
import org.opensearch.sql.legacy.SQLIntegTestCase;
import org.opensearch.sql.legacy.TestsConstants;

import static org.opensearch.sql.util.MatcherUtils.rows;
import static org.opensearch.sql.util.MatcherUtils.schema;
import static org.opensearch.sql.util.MatcherUtils.verifyDataRows;
import static org.opensearch.sql.util.MatcherUtils.verifySchema;

public class PositionFunctionIT extends SQLIntegTestCase {

@Override
protected void init() throws Exception {
loadIndex(Index.PEOPLE2);
loadIndex(Index.CALCS);
}

@Test
public void position_function_test() {
String query = "SELECT firstname, position('a' IN firstname) FROM %s";
JSONObject response = executeJdbcRequest(String.format(query, TestsConstants.TEST_INDEX_PEOPLE2));

verifySchema(response, schema("firstname", null, "keyword"),
schema("position('a' IN firstname)", null, "integer"));
assertEquals(12, response.getInt("total"));

verifyDataRows(response,
rows("Daenerys", 2), rows("Hattie", 2),
rows("Nanette", 2), rows("Dale", 2),
rows("Elinor", 0), rows("Virginia", 8),
rows("Dillard", 5), rows("Mcgee", 0),
rows("Aurelia", 7), rows("Fulton", 0),
rows("Burton", 0), rows("Josie", 0));
}

@Test
public void position_function_with_nulls_test() {
String query = "SELECT str2, position('ee' IN str2) FROM %s";
JSONObject response = executeJdbcRequest(String.format(query, TestsConstants.TEST_INDEX_CALCS));

verifySchema(response, schema("str2", null, "keyword"),
schema("position('ee' IN str2)", null, "integer"));
assertEquals(17, response.getInt("total"));

verifyDataRows(response,
rows("one", 0), rows("two", 0),
rows("three", 4), rows(null, null),
rows("five", 0), rows("six", 0),
rows(null, null), rows("eight", 0),
rows("nine", 0), rows("ten", 0),
rows("eleven", 0), rows("twelve", 0),
rows(null, null), rows("fourteen", 6),
rows("fifteen", 5), rows("sixteen", 5),
rows(null, null));
}

@Test
public void position_function_with_string_literals_test() {
String query = "SELECT position('world' IN 'hello world')";
JSONObject response = executeJdbcRequest(query);

verifySchema(response, schema("position('world' IN 'hello world')", null, "integer"));
assertEquals(1, response.getInt("total"));

verifyDataRows(response, rows(7));
}

@Test
public void position_function_with_only_fields_as_args_test() {
String query = "SELECT position(str3 IN str2) FROM %s WHERE str2 IN ('one', 'two', 'three')";
JSONObject response = executeJdbcRequest(String.format(query, TestsConstants.TEST_INDEX_CALCS));

verifySchema(response, schema("position(str3 IN str2)", null, "integer"));
assertEquals(3, response.getInt("total"));

verifyDataRows(response, rows(3), rows(0), rows(4));
}

@Test
public void position_function_with_function_as_arg_test() {
String query = "SELECT position(upper(str3) IN str1) FROM %s WHERE str1 LIKE 'BINDING SUPPLIES'";
JSONObject response = executeJdbcRequest(String.format(query, TestsConstants.TEST_INDEX_CALCS));

verifySchema(response, schema("position(upper(str3) IN str1)", null, "integer"));
assertEquals(1, response.getInt("total"));

verifyDataRows(response, rows(15));
}

@Test
public void position_function_in_where_clause_test() {
String query = "SELECT str2 FROM %s WHERE position(str3 IN str2)=1";
JSONObject response = executeJdbcRequest(String.format(query, TestsConstants.TEST_INDEX_CALCS));

verifySchema(response, schema("str2", null, "keyword"));
assertEquals(2, response.getInt("total"));

verifyDataRows(response, rows("eight"), rows("eleven"));
}

@Test
public void position_function_with_null_args_test() {
String query1 = "SELECT str2, position(null IN str2) FROM %s WHERE str2 IN ('one')";
String query2 = "SELECT str2, position(str2 IN null) FROM %s WHERE str2 IN ('one')";
JSONObject response1 = executeJdbcRequest(String.format(query1, TestsConstants.TEST_INDEX_CALCS));
JSONObject response2 = executeJdbcRequest(String.format(query2, TestsConstants.TEST_INDEX_CALCS));

verifySchema(response1,
schema("str2", null, "keyword"),
schema("position(null IN str2)", null, "integer"));
assertEquals(1, response1.getInt("total"));

verifySchema(response2,
schema("str2", null, "keyword"),
schema("position(str2 IN null)", null, "integer"));
assertEquals(1, response2.getInt("total"));

verifyDataRows(response1, rows("one", null));
verifyDataRows(response2, rows("one", null));
}
}
1 change: 1 addition & 0 deletions sql/src/main/antlr/OpenSearchSQLLexer.g4
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,7 @@ NULLIF: 'NULLIF';
PERIOD_ADD: 'PERIOD_ADD';
PERIOD_DIFF: 'PERIOD_DIFF';
PI: 'PI';
POSITION: 'POSITION';
POW: 'POW';
POWER: 'POWER';
RADIANS: 'RADIANS';
Expand Down
5 changes: 5 additions & 0 deletions sql/src/main/antlr/OpenSearchSQLParser.g4
Original file line number Diff line number Diff line change
Expand Up @@ -316,13 +316,18 @@ functionCall
| aggregateFunction (orderByClause)? filterClause #filteredAggregationFunctionCall
| relevanceFunction #relevanceFunctionCall
| highlightFunction #highlightFunctionCall
| positionFunction #positionFunctionCall
;


highlightFunction
: HIGHLIGHT LR_BRACKET relevanceField (COMMA highlightArg)* RR_BRACKET
;

positionFunction
: POSITION LR_BRACKET functionArg IN functionArg RR_BRACKET
;

scalarFunctionName
: mathematicalFunctionName
| dateTimeFunctionName
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import static org.opensearch.sql.expression.function.BuiltinFunctionName.IS_NULL;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.LIKE;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.NOT_LIKE;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.POSITION;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.REGEXP;
import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.BinaryComparisonPredicateContext;
import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.BooleanContext;
Expand Down Expand Up @@ -148,6 +149,15 @@ public UnresolvedExpression visitHighlightFunctionCall(
builder.build());
}

@Override
public UnresolvedExpression visitPositionFunction(
OpenSearchSQLParser.PositionFunctionContext ctx) {
return new Function(
POSITION.getName().getFunctionName(),
Arrays.asList(visitFunctionArg(ctx.functionArg(0)),
visitFunctionArg(ctx.functionArg(1))));
}

@Override
public UnresolvedExpression visitTableFilter(TableFilterContext ctx) {
return new Function(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,14 @@ public void canBuildQualifiedNameHighlightFunction() {
);
}

@Test
public void canBuildStringLiteralPositionFunction() {
assertEquals(
function("position", stringLiteral("substr"), stringLiteral("str")),
buildExprAst("position(\"substr\" IN \"str\")")
);
}

@Test
public void canBuildWindowFunctionWithoutOrderBy() {
assertEquals(
Expand Down

0 comments on commit 354e843

Please sign in to comment.