From d8b412d340ed6374b229f5918b7f4cc49ace2df8 Mon Sep 17 00:00:00 2001 From: Andrew Carbonetto Date: Tue, 7 Mar 2023 13:41:27 -0800 Subject: [PATCH 01/40] Rebase from main Signed-off-by: Andrew Carbonetto --- .../org/opensearch/sql/analysis/Analyzer.java | 7 +++++ .../sql/ast/expression/QualifiedName.java | 17 ++++++++++- .../sql/legacy/CsvFormatResponseIT.java | 3 ++ .../OpenSearchDescribeIndexRequest.java | 5 ++++ .../response/OpenSearchResponse.java | 28 ++++++++++++++----- sql/src/main/antlr/OpenSearchSQLLexer.g4 | 14 +++++++++- .../sql/sql/parser/AstExpressionBuilder.java | 11 ++++---- 7 files changed, 71 insertions(+), 14 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index 228b54ba0c..d94d1cf3ce 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -158,6 +158,13 @@ public LogicalPlan visitRelation(Relation node, AnalysisContext context) { } table.getFieldTypes().forEach((k, v) -> curEnv.define(new Symbol(Namespace.FIELD_NAME, k), v)); + // add OpenSearch metadata types + curEnv.define(new Symbol(Namespace.FIELD_NAME, "_index"), ExprCoreType.STRING); + curEnv.define(new Symbol(Namespace.FIELD_NAME, "_id"), ExprCoreType.STRING); + curEnv.define(new Symbol(Namespace.FIELD_NAME, "_score"), ExprCoreType.FLOAT); + curEnv.define(new Symbol(Namespace.FIELD_NAME, "_maxscore"), ExprCoreType.FLOAT); + curEnv.define(new Symbol(Namespace.FIELD_NAME, "_sort"), ExprCoreType.LONG); + // Put index name or its alias in index namespace on type environment so qualifier // can be removed when analyzing qualified name. The value (expr type) here doesn't matter. curEnv.define(new Symbol(Namespace.INDEX_NAME, diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/QualifiedName.java b/core/src/main/java/org/opensearch/sql/ast/expression/QualifiedName.java index 8b16119dc0..a6c10c55dd 100644 --- a/core/src/main/java/org/opensearch/sql/ast/expression/QualifiedName.java +++ b/core/src/main/java/org/opensearch/sql/ast/expression/QualifiedName.java @@ -25,14 +25,27 @@ public class QualifiedName extends UnresolvedExpression { private final List parts; + @Getter + private final Boolean isMetadataField; + public QualifiedName(String name) { + this(name, Boolean.FALSE); + } + + public QualifiedName(String name, Boolean isMetadataField) { this.parts = Collections.singletonList(name); + this.isMetadataField = isMetadataField; + } + + public QualifiedName(Iterable parts) { + this(parts, Boolean.FALSE); } /** * QualifiedName Constructor. */ - public QualifiedName(Iterable parts) { + public QualifiedName(Iterable parts, Boolean isMetadataField) { + this.isMetadataField = isMetadataField; List partsList = StreamSupport.stream(parts.spliterator(), false).collect(toList()); if (partsList.isEmpty()) { throw new IllegalArgumentException("parts is empty"); @@ -110,4 +123,6 @@ public List getChild() { public R accept(AbstractNodeVisitor nodeVisitor, C context) { return nodeVisitor.visitQualifiedName(this, context); } + + public Boolean isMetadataField() { return Boolean.TRUE; } } diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/CsvFormatResponseIT.java b/integ-test/src/test/java/org/opensearch/sql/legacy/CsvFormatResponseIT.java index d562794409..7bbc341ccf 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/CsvFormatResponseIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/CsvFormatResponseIT.java @@ -99,6 +99,7 @@ public void specificPercentilesIntAndDouble() throws IOException { } } + // TODO: this should now work @Ignore("only work for legacy engine") public void nestedObjectsAndArraysAreQuoted() throws IOException { final String query = String.format(Locale.ROOT, "SELECT * FROM %s WHERE _id = 5", @@ -114,6 +115,7 @@ public void nestedObjectsAndArraysAreQuoted() throws IOException { Assert.assertThat(result, containsString(expectedMessage)); } + // TODO: this should now work @Ignore("only work for legacy engine") public void arraysAreQuotedInFlatMode() throws IOException { setFlatOption(true); @@ -575,6 +577,7 @@ public void twoCharsSeperator() throws Exception { } + // TODO: this should now work @Ignore("only work for legacy engine") public void includeIdAndNotTypeOrScore() throws Exception { String query = String.format(Locale.ROOT, diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/system/OpenSearchDescribeIndexRequest.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/system/OpenSearchDescribeIndexRequest.java index 50402fc75b..cd0a99e309 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/system/OpenSearchDescribeIndexRequest.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/system/OpenSearchDescribeIndexRequest.java @@ -90,6 +90,11 @@ public Map getFieldTypes() { .filter(entry -> !ExprCoreType.UNKNOWN.equals(entry.getValue())) .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue))); } + fieldTypes.put("_index", ExprCoreType.STRING); + fieldTypes.put("_id", ExprCoreType.STRING); + fieldTypes.put("_score", ExprCoreType.FLOAT); + fieldTypes.put("_maxscore", ExprCoreType.FLOAT); + fieldTypes.put("_sort", ExprCoreType.LONG); return fieldTypes; } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/OpenSearchResponse.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/OpenSearchResponse.java index aadd73efdd..5563ee5136 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/OpenSearchResponse.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/OpenSearchResponse.java @@ -17,6 +17,9 @@ import org.opensearch.search.SearchHits; import org.opensearch.search.aggregations.Aggregations; import org.opensearch.sql.data.model.ExprTupleValue; +import org.opensearch.sql.data.model.ExprStringValue; +import org.opensearch.sql.data.model.ExprFloatValue; +import org.opensearch.sql.data.model.ExprLongValue; import org.opensearch.sql.data.model.ExprValue; import org.opensearch.sql.data.model.ExprValueUtils; import org.opensearch.sql.opensearch.data.value.OpenSearchExprValueFactory; @@ -92,14 +95,25 @@ public Iterator iterator() { return (ExprValue) ExprTupleValue.fromExprValueMap(builder.build()); }).iterator(); } else { + float maxScore = hits.getMaxScore(); return Arrays.stream(hits.getHits()) .map(hit -> { - ExprValue docData = exprValueFactory.construct(hit.getSourceAsString()); - if (hit.getHighlightFields().isEmpty()) { - return docData; - } else { - ImmutableMap.Builder builder = new ImmutableMap.Builder<>(); - builder.putAll(docData.tupleValue()); + String source = hit.getSourceAsString(); + ExprValue docData = exprValueFactory.construct(source); + + ImmutableMap.Builder builder = new ImmutableMap.Builder<>(); + builder.putAll(docData.tupleValue()); + builder.put("_index", new ExprStringValue(hit.getIndex())); + builder.put("_id", new ExprStringValue(hit.getId())); + if (!Float.isNaN(hit.getScore())) { + builder.put("_score", new ExprFloatValue(hit.getScore())); + } + if (!Float.isNaN(maxScore)) { + builder.put("_maxscore", new ExprLongValue(maxScore)); + } + builder.put("_sort", new ExprLongValue(hit.getSeqNo())); + + if (!hit.getHighlightFields().isEmpty()) { var hlBuilder = ImmutableMap.builder(); for (var es : hit.getHighlightFields().entrySet()) { hlBuilder.put(es.getKey(), ExprValueUtils.collectionValue( @@ -107,8 +121,8 @@ public Iterator iterator() { t -> (t.toString())).collect(Collectors.toList()))); } builder.put("_highlight", ExprTupleValue.fromExprValueMap(hlBuilder.build())); - return ExprTupleValue.fromExprValueMap(builder.build()); } + return (ExprValue) ExprTupleValue.fromExprValueMap(builder.build()); }).iterator(); } } diff --git a/sql/src/main/antlr/OpenSearchSQLLexer.g4 b/sql/src/main/antlr/OpenSearchSQLLexer.g4 index 25f23a7bd6..002efd0f2f 100644 --- a/sql/src/main/antlr/OpenSearchSQLLexer.g4 +++ b/sql/src/main/antlr/OpenSearchSQLLexer.g4 @@ -135,6 +135,15 @@ SUBSTRING: 'SUBSTRING'; TRIM: 'TRIM'; +// Metadata fields can be ID + +META_INDEX: '_ID'; +META_ID: '_ID'; +META_SCORE: '_SCORE'; +META_MAXSCORE: '_MAXSCORE'; +META_SORT: '_SORT'; + + // Keywords, but can be ID // Common Keywords, but can be ID @@ -456,7 +465,6 @@ BACKTICK_QUOTE_ID: BQUOTA_STRING; // Fragments for Literal primitives fragment EXPONENT_NUM_PART: 'E' [-+]? DEC_DIGIT+; -fragment ID_LITERAL: [@*A-Z]+?[*A-Z_\-0-9]*; fragment DQUOTA_STRING: '"' ( '\\'. | '""' | ~('"'| '\\') )* '"'; fragment SQUOTA_STRING: '\'' ('\\'. | '\'\'' | ~('\'' | '\\'))* '\''; fragment BQUOTA_STRING: '`' ( '\\'. | '``' | ~('`'|'\\'))* '`'; @@ -464,6 +472,10 @@ fragment HEX_DIGIT: [0-9A-F]; fragment DEC_DIGIT: [0-9]; fragment BIT_STRING_L: 'B' '\'' [01]+ '\''; +// Identifiers cannot start with a single '_' since this an OpebSearch reserved +// metadata field. Two underscores (or more) is acceptable, such as '__field'. +fragment ID_LITERAL: ([_][_]|[@*A-Z])+?[*A-Z_\-0-9]*; + // Last tokens must generate Errors ERROR_RECOGNITION: . -> channel(ERRORCHANNEL); diff --git a/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java b/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java index c024d74f8c..23d51c2d07 100644 --- a/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java +++ b/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java @@ -488,12 +488,13 @@ private Function buildFunction(String functionName, } private QualifiedName visitIdentifiers(List identifiers) { + Boolean isMetadataField = identifiers.stream().filter(id -> id.metadataField() != null).findFirst().isPresent(); return new QualifiedName( - identifiers.stream() - .map(RuleContext::getText) - .map(StringUtils::unquoteIdentifier) - .collect(Collectors.toList()) - ); + identifiers.stream() + .map(RuleContext::getText) + .map(StringUtils::unquoteIdentifier) + .collect(Collectors.toList()), + isMetadataField); } private void fillRelevanceArgs(List args, From 5eaa34441cba9ffda82425fd41c7745bddb8721d Mon Sep 17 00:00:00 2001 From: Andrew Carbonetto Date: Mon, 14 Nov 2022 12:31:42 -0800 Subject: [PATCH 02/40] Update to define and include metadata when visiting the expr node Signed-off-by: Andrew Carbonetto --- .../org/opensearch/sql/analysis/Analyzer.java | 7 ----- .../sql/analysis/ExpressionAnalyzer.java | 31 +++++++++++++++++++ .../sql/ast/expression/QualifiedName.java | 3 +- .../OpenSearchDescribeIndexRequest.java | 5 --- sql/src/main/antlr/OpenSearchSQLLexer.g4 | 2 +- 5 files changed, 33 insertions(+), 15 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index d94d1cf3ce..228b54ba0c 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -158,13 +158,6 @@ public LogicalPlan visitRelation(Relation node, AnalysisContext context) { } table.getFieldTypes().forEach((k, v) -> curEnv.define(new Symbol(Namespace.FIELD_NAME, k), v)); - // add OpenSearch metadata types - curEnv.define(new Symbol(Namespace.FIELD_NAME, "_index"), ExprCoreType.STRING); - curEnv.define(new Symbol(Namespace.FIELD_NAME, "_id"), ExprCoreType.STRING); - curEnv.define(new Symbol(Namespace.FIELD_NAME, "_score"), ExprCoreType.FLOAT); - curEnv.define(new Symbol(Namespace.FIELD_NAME, "_maxscore"), ExprCoreType.FLOAT); - curEnv.define(new Symbol(Namespace.FIELD_NAME, "_sort"), ExprCoreType.LONG); - // Put index name or its alias in index namespace on type environment so qualifier // can be removed when analyzing qualified name. The value (expr type) here doesn't matter. curEnv.define(new Symbol(Namespace.INDEX_NAME, diff --git a/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java b/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java index ff3c01d5b8..1cf7571cfc 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java @@ -51,6 +51,7 @@ import org.opensearch.sql.ast.expression.Xor; import org.opensearch.sql.common.antlr.SyntaxCheckException; import org.opensearch.sql.data.model.ExprValueUtils; +import org.opensearch.sql.data.type.ExprCoreType; import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.exception.SemanticCheckException; import org.opensearch.sql.expression.DSL; @@ -297,6 +298,9 @@ public Expression visitAllFields(AllFields node, AnalysisContext context) { @Override public Expression visitQualifiedName(QualifiedName node, AnalysisContext context) { QualifierAnalyzer qualifierAnalyzer = new QualifierAnalyzer(context); + if (node.isMetadataField().booleanValue()) { + return visitMetadata(qualifierAnalyzer.unqualified(node), context); + } return visitIdentifier(qualifierAnalyzer.unqualified(node), context); } @@ -313,6 +317,33 @@ public Expression visitUnresolvedArgument(UnresolvedArgument node, AnalysisConte return new NamedArgumentExpression(node.getArgName(), node.getValue().accept(this, context)); } + private Expression visitMetadata(String ident, AnalysisContext context) { + // ParseExpression will always override ReferenceExpression when ident conflicts + for (NamedExpression expr : context.getNamedParseExpressions()) { + if (expr.getNameOrAlias().equals(ident) && expr.getDelegated() instanceof ParseExpression) { + return expr.getDelegated(); + } + } + + ReferenceExpression ref; + switch(ident.toLowerCase()) { + case "_index": + case "_id": + ref = DSL.ref(ident, ExprCoreType.STRING); + break; + case "_score": + case "_maxscore": + ref = DSL.ref(ident, ExprCoreType.FLOAT); + break; + case "_sort": + ref = DSL.ref(ident, ExprCoreType.LONG); + break; + default: + throw new SemanticCheckException("invalid metadata field"); + } + return ref; + } + private Expression visitIdentifier(String ident, AnalysisContext context) { // ParseExpression will always override ReferenceExpression when ident conflicts for (NamedExpression expr : context.getNamedParseExpressions()) { diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/QualifiedName.java b/core/src/main/java/org/opensearch/sql/ast/expression/QualifiedName.java index a6c10c55dd..85f2bf63a6 100644 --- a/core/src/main/java/org/opensearch/sql/ast/expression/QualifiedName.java +++ b/core/src/main/java/org/opensearch/sql/ast/expression/QualifiedName.java @@ -25,7 +25,6 @@ public class QualifiedName extends UnresolvedExpression { private final List parts; - @Getter private final Boolean isMetadataField; public QualifiedName(String name) { @@ -124,5 +123,5 @@ public R accept(AbstractNodeVisitor nodeVisitor, C context) { return nodeVisitor.visitQualifiedName(this, context); } - public Boolean isMetadataField() { return Boolean.TRUE; } + public Boolean isMetadataField() { return this.isMetadataField; } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/system/OpenSearchDescribeIndexRequest.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/system/OpenSearchDescribeIndexRequest.java index cd0a99e309..50402fc75b 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/system/OpenSearchDescribeIndexRequest.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/system/OpenSearchDescribeIndexRequest.java @@ -90,11 +90,6 @@ public Map getFieldTypes() { .filter(entry -> !ExprCoreType.UNKNOWN.equals(entry.getValue())) .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue))); } - fieldTypes.put("_index", ExprCoreType.STRING); - fieldTypes.put("_id", ExprCoreType.STRING); - fieldTypes.put("_score", ExprCoreType.FLOAT); - fieldTypes.put("_maxscore", ExprCoreType.FLOAT); - fieldTypes.put("_sort", ExprCoreType.LONG); return fieldTypes; } diff --git a/sql/src/main/antlr/OpenSearchSQLLexer.g4 b/sql/src/main/antlr/OpenSearchSQLLexer.g4 index 002efd0f2f..84c81659bc 100644 --- a/sql/src/main/antlr/OpenSearchSQLLexer.g4 +++ b/sql/src/main/antlr/OpenSearchSQLLexer.g4 @@ -137,7 +137,7 @@ TRIM: 'TRIM'; // Metadata fields can be ID -META_INDEX: '_ID'; +META_INDEX: '_INDEX'; META_ID: '_ID'; META_SCORE: '_SCORE'; META_MAXSCORE: '_MAXSCORE'; From ea254555ba7f3150f605b2e2f88668ac6c9fb5fe Mon Sep 17 00:00:00 2001 From: Andrew Carbonetto Date: Mon, 9 Jan 2023 21:15:53 -0800 Subject: [PATCH 03/40] Add specific metadata identifiers Signed-off-by: Andrew Carbonetto --- .../sql/analysis/ExpressionAnalyzer.java | 9 +---- .../org/opensearch/sql/ast/dsl/AstDSL.java | 7 +++- .../sql/ast/expression/QualifiedName.java | 4 +- .../sql/analysis/ExpressionAnalyzerTest.java | 39 +++++++++++++++++++ sql/src/main/antlr/OpenSearchSQLLexer.g4 | 2 +- 5 files changed, 49 insertions(+), 12 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java b/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java index 1cf7571cfc..d2149a6260 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java @@ -318,15 +318,8 @@ public Expression visitUnresolvedArgument(UnresolvedArgument node, AnalysisConte } private Expression visitMetadata(String ident, AnalysisContext context) { - // ParseExpression will always override ReferenceExpression when ident conflicts - for (NamedExpression expr : context.getNamedParseExpressions()) { - if (expr.getNameOrAlias().equals(ident) && expr.getDelegated() instanceof ParseExpression) { - return expr.getDelegated(); - } - } - ReferenceExpression ref; - switch(ident.toLowerCase()) { + switch (ident.toLowerCase()) { case "_index": case "_id": ref = DSL.ref(ident, ExprCoreType.STRING); diff --git a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java index 039b6380f7..80f209e27e 100644 --- a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java +++ b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java @@ -60,7 +60,6 @@ import org.opensearch.sql.ast.tree.TableFunction; import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.ast.tree.Values; -import org.opensearch.sql.expression.function.BuiltinFunctionName; /** * Class of static methods to create specific node instances. @@ -140,7 +139,11 @@ public UnresolvedPlan values(List... values) { } public static QualifiedName qualifiedName(String... parts) { - return new QualifiedName(Arrays.asList(parts)); + return new QualifiedName(Arrays.asList(parts), Boolean.FALSE); + } + + public static QualifiedName qualifiedNameWithMetadata(String... parts) { + return new QualifiedName(Arrays.asList(parts), Boolean.TRUE); } public static UnresolvedExpression equalTo( diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/QualifiedName.java b/core/src/main/java/org/opensearch/sql/ast/expression/QualifiedName.java index 85f2bf63a6..60f10ee3a1 100644 --- a/core/src/main/java/org/opensearch/sql/ast/expression/QualifiedName.java +++ b/core/src/main/java/org/opensearch/sql/ast/expression/QualifiedName.java @@ -123,5 +123,7 @@ public R accept(AbstractNodeVisitor nodeVisitor, C context) { return nodeVisitor.visitQualifiedName(this, context); } - public Boolean isMetadataField() { return this.isMetadataField; } + public Boolean isMetadataField() { + return this.isMetadataField; + } } diff --git a/core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java b/core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java index c7a11658e3..0ab84e94b7 100644 --- a/core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java +++ b/core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java @@ -15,12 +15,15 @@ import static org.opensearch.sql.ast.dsl.AstDSL.function; import static org.opensearch.sql.ast.dsl.AstDSL.intLiteral; import static org.opensearch.sql.ast.dsl.AstDSL.qualifiedName; +import static org.opensearch.sql.ast.dsl.AstDSL.qualifiedNameWithMetadata; import static org.opensearch.sql.ast.dsl.AstDSL.stringLiteral; import static org.opensearch.sql.ast.dsl.AstDSL.unresolvedArg; import static org.opensearch.sql.data.model.ExprValueUtils.LITERAL_TRUE; import static org.opensearch.sql.data.model.ExprValueUtils.integerValue; import static org.opensearch.sql.data.type.ExprCoreType.BOOLEAN; +import static org.opensearch.sql.data.type.ExprCoreType.FLOAT; import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; +import static org.opensearch.sql.data.type.ExprCoreType.LONG; import static org.opensearch.sql.data.type.ExprCoreType.STRING; import static org.opensearch.sql.data.type.ExprCoreType.STRUCT; import static org.opensearch.sql.expression.DSL.ref; @@ -228,6 +231,42 @@ public void qualified_name_with_qualifier() { analysisContext.pop(); } + @Test + public void qualified_name_with_metadata_field_success() { + analysisContext.push(); + analysisContext.peek().define(new Symbol(Namespace.INDEX_NAME, "index_alias"), STRUCT); + + assertAnalyzeEqual(DSL.ref("_id", STRING), qualifiedNameWithMetadata("index_alias", "_id")); + assertAnalyzeEqual(DSL.ref("_index", STRING), + qualifiedNameWithMetadata("index_alias", "_index")); + assertAnalyzeEqual(DSL.ref("_score", FLOAT), + qualifiedNameWithMetadata("index_alias", "_score")); + assertAnalyzeEqual(DSL.ref("_maxscore", FLOAT), + qualifiedNameWithMetadata("index_alias", "_maxscore")); + assertAnalyzeEqual(DSL.ref("_sort", LONG), qualifiedNameWithMetadata("index_alias", "_sort")); + + assertAnalyzeEqual(DSL.ref("_id", STRING), qualifiedNameWithMetadata("_id")); + assertAnalyzeEqual(DSL.ref("_index", STRING), qualifiedNameWithMetadata("_index")); + + analysisContext.pop(); + } + + @Test + public void qualified_name_with_metadata_field_failure() { + analysisContext.push(); + analysisContext.peek().define(new Symbol(Namespace.INDEX_NAME, "index_alias"), STRUCT); + + SemanticCheckException exception = + assertThrows(SemanticCheckException.class, + () -> analyze(qualifiedNameWithMetadata("index_alias", "_invalid"))); + assertEquals( + "invalid metadata field", + exception.getMessage() + ); + + analysisContext.pop(); + } + @Test public void interval() { assertAnalyzeEqual( diff --git a/sql/src/main/antlr/OpenSearchSQLLexer.g4 b/sql/src/main/antlr/OpenSearchSQLLexer.g4 index 84c81659bc..fc596dc828 100644 --- a/sql/src/main/antlr/OpenSearchSQLLexer.g4 +++ b/sql/src/main/antlr/OpenSearchSQLLexer.g4 @@ -472,7 +472,7 @@ fragment HEX_DIGIT: [0-9A-F]; fragment DEC_DIGIT: [0-9]; fragment BIT_STRING_L: 'B' '\'' [01]+ '\''; -// Identifiers cannot start with a single '_' since this an OpebSearch reserved +// Identifiers cannot start with a single '_' since this an OpenSearch reserved // metadata field. Two underscores (or more) is acceptable, such as '__field'. fragment ID_LITERAL: ([_][_]|[@*A-Z])+?[*A-Z_\-0-9]*; From 9f1dcca437fcc437e04f8fb4eba773a3ad8530aa Mon Sep 17 00:00:00 2001 From: Andrew Carbonetto Date: Tue, 10 Jan 2023 10:22:43 -0800 Subject: [PATCH 04/40] Add IT tests and add parser changes Signed-off-by: Andrew Carbonetto --- doctest/bin/test-docs | 3 -- .../org/opensearch/sql/sql/IdentifierIT.java | 31 +++++++++++++++++++ sql/src/main/antlr/OpenSearchSQLParser.g4 | 9 ++++++ 3 files changed, 40 insertions(+), 3 deletions(-) delete mode 100755 doctest/bin/test-docs diff --git a/doctest/bin/test-docs b/doctest/bin/test-docs deleted file mode 100755 index 4dd4390b89..0000000000 --- a/doctest/bin/test-docs +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/bash -DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -$DIR/../.venv/bin/python -X faulthandler -m unittest -v --failfast test_docs diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/IdentifierIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/IdentifierIT.java index 591364ea19..3e7401d74f 100644 --- a/integ-test/src/test/java/org/opensearch/sql/sql/IdentifierIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/sql/IdentifierIT.java @@ -64,6 +64,37 @@ public void testMultipleQueriesWithSpecialIndexNames() throws IOException { queryAndAssertTheDoc("SELECT * FROM test.two"); } + @Test + public void testDoubleUnderscoreIdentifierTest() throws IOException { + new Index("test.twounderscores") + .addDoc("{\"__age\": 30}"); + final JSONObject result = new JSONObject(executeQuery("SELECT __age FROM test.twounderscores", "jdbc")); + + verifySchema(result, + schema("__age", null, "long")); + verifyDataRows(result, rows(30)); + } + + @Test + public void testMetafieldIdentifierTest() throws IOException { + // create an index, but the contents doesn't matter + createIndexWithOneDoc("test.metafields"); + + // Execute using field metadata values + final JSONObject result = new JSONObject(executeQuery( + "SELECT *, _id, _index, _score, _maxscore, _sort FROM test.metafields", + "jdbc")); + + // Verify that the metadata values are returned when requested + verifySchema(result, + schema("age", null, "long"), + schema("_id", null, "keyword"), + schema("_index", null, "keyword"), + schema("_score", null, "float"), + schema("_maxscore", null, "float"), + schema("_sort", null, "long")); + } + private void createIndexWithOneDoc(String... indexNames) throws IOException { for (String indexName : indexNames) { new Index(indexName).addDoc("{\"age\": 30}"); diff --git a/sql/src/main/antlr/OpenSearchSQLParser.g4 b/sql/src/main/antlr/OpenSearchSQLParser.g4 index e5efeabba0..80aa688c5d 100644 --- a/sql/src/main/antlr/OpenSearchSQLParser.g4 +++ b/sql/src/main/antlr/OpenSearchSQLParser.g4 @@ -627,10 +627,19 @@ qualifiedName ident : DOT? ID | BACKTICK_QUOTE_ID + | metadataField | keywordsCanBeId | scalarFunctionName ; +metadataField + : META_INDEX + | META_ID + | META_SCORE + | META_MAXSCORE + | META_SORT + ; + keywordsCanBeId : FULL | FIELD | D | T | TS // OD SQL and ODBC special From 5a7036306c84e85a186a1dba55a90133c5d0ba85 Mon Sep 17 00:00:00 2001 From: Andrew Carbonetto Date: Tue, 7 Mar 2023 13:42:02 -0800 Subject: [PATCH 05/40] Rebase from main Signed-off-by: Andrew Carbonetto --- .../sql/analysis/ExpressionAnalyzer.java | 7 ++ .../sql/ast/AbstractNodeVisitor.java | 5 ++ .../sql/ast/expression/ScoreFunction.java | 41 +++++++++ .../org/opensearch/sql/expression/DSL.java | 6 +- .../sql/expression/ExpressionNodeVisitor.java | 4 + .../sql/expression/ScoreExpression.java | 89 +++++++++++++++++++ .../function/BuiltinFunctionName.java | 6 ++ .../function/OpenSearchFunctions.java | 36 +++++++- .../rule/read/TableScanPushDown.java | 6 ++ .../sql/legacy/CsvFormatResponseIT.java | 10 +-- .../request/OpenSearchRequestBuilder.java | 4 + .../storage/OpenSearchIndexScan.java | 9 +- .../scan/OpenSearchIndexScanBuilder.java | 2 + .../storage/script/sort/SortQueryBuilder.java | 3 + .../OpenSearchIndexScanOptimizationTest.java | 2 + sql/src/main/antlr/OpenSearchSQLParser.g4 | 4 + .../sql/sql/parser/AstExpressionBuilder.java | 14 +++ 17 files changed, 236 insertions(+), 12 deletions(-) create mode 100644 core/src/main/java/org/opensearch/sql/ast/expression/ScoreFunction.java create mode 100644 core/src/main/java/org/opensearch/sql/expression/ScoreExpression.java diff --git a/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java b/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java index d2149a6260..fafd29e963 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java @@ -42,6 +42,7 @@ import org.opensearch.sql.ast.expression.Or; import org.opensearch.sql.ast.expression.QualifiedName; import org.opensearch.sql.ast.expression.RelevanceFieldList; +import org.opensearch.sql.ast.expression.ScoreFunction; import org.opensearch.sql.ast.expression.Span; import org.opensearch.sql.ast.expression.UnresolvedArgument; import org.opensearch.sql.ast.expression.UnresolvedAttribute; @@ -61,6 +62,7 @@ import org.opensearch.sql.expression.NamedArgumentExpression; import org.opensearch.sql.expression.NamedExpression; import org.opensearch.sql.expression.ReferenceExpression; +import org.opensearch.sql.expression.ScoreExpression; import org.opensearch.sql.expression.aggregation.AggregationState; import org.opensearch.sql.expression.aggregation.Aggregator; import org.opensearch.sql.expression.conditional.cases.CaseClause; @@ -208,6 +210,11 @@ public Expression visitHighlightFunction(HighlightFunction node, AnalysisContext return new HighlightExpression(expr); } + public Expression visitScoreFunction(ScoreFunction node, AnalysisContext context) { + Expression relevanceQueryExpr = node.getRelevanceQuery().accept(this, context); + return new ScoreExpression(relevanceQueryExpr); + } + @Override public Expression visitIn(In node, AnalysisContext context) { return visitIn(node.getField(), node.getValueList(), context); diff --git a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java index 393de05164..d2ebb9eb99 100644 --- a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java @@ -29,6 +29,7 @@ import org.opensearch.sql.ast.expression.Or; import org.opensearch.sql.ast.expression.QualifiedName; import org.opensearch.sql.ast.expression.RelevanceFieldList; +import org.opensearch.sql.ast.expression.ScoreFunction; import org.opensearch.sql.ast.expression.Span; import org.opensearch.sql.ast.expression.UnresolvedArgument; import org.opensearch.sql.ast.expression.UnresolvedAttribute; @@ -278,6 +279,10 @@ public T visitHighlightFunction(HighlightFunction node, C context) { return visitChildren(node, context); } + public T visitScoreFunction(ScoreFunction node, C context) { + return visitChildren(node, context); + } + public T visitStatement(Statement node, C context) { return visit(node, context); } diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/ScoreFunction.java b/core/src/main/java/org/opensearch/sql/ast/expression/ScoreFunction.java new file mode 100644 index 0000000000..cdde418834 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/ast/expression/ScoreFunction.java @@ -0,0 +1,41 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ast.expression; + +import lombok.AllArgsConstructor; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.ToString; +import org.opensearch.sql.ast.AbstractNodeVisitor; + +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.stream.Stream; + +/** + * Expression node of Highlight function. + */ +@AllArgsConstructor +@EqualsAndHashCode(callSuper = false) +@Getter +@ToString +public class ScoreFunction extends UnresolvedExpression { + private final UnresolvedExpression relevanceQuery; + private final List funcArgs; + + @Override + public T accept(AbstractNodeVisitor nodeVisitor, C context) { + return nodeVisitor.visitScoreFunction(this, context); + } + + @Override + public List getChild() { + List resultingList = List.of(relevanceQuery); + resultingList.addAll(funcArgs); + return resultingList; + } +} diff --git a/core/src/main/java/org/opensearch/sql/expression/DSL.java b/core/src/main/java/org/opensearch/sql/expression/DSL.java index 616f431283..a22535507d 100644 --- a/core/src/main/java/org/opensearch/sql/expression/DSL.java +++ b/core/src/main/java/org/opensearch/sql/expression/DSL.java @@ -787,7 +787,11 @@ public static FunctionExpression match_bool_prefix(Expression... args) { } public static FunctionExpression wildcard_query(Expression... args) { - return compile(FunctionProperties.None,BuiltinFunctionName.WILDCARD_QUERY, args); + return compile(FunctionProperties.None, BuiltinFunctionName.WILDCARD_QUERY, args); + } + + public static FunctionExpression score(Expression... args) { + return compile(FunctionProperties.None, BuiltinFunctionName.SCORE, args); } public static FunctionExpression now(FunctionProperties functionProperties, diff --git a/core/src/main/java/org/opensearch/sql/expression/ExpressionNodeVisitor.java b/core/src/main/java/org/opensearch/sql/expression/ExpressionNodeVisitor.java index e3d4e38674..6ab2375067 100644 --- a/core/src/main/java/org/opensearch/sql/expression/ExpressionNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/expression/ExpressionNodeVisitor.java @@ -60,6 +60,10 @@ public T visitHighlight(HighlightExpression node, C context) { return visitNode(node, context); } + public T visitScore(ScoreExpression node, C context) { + return visitNode(node, context); + } + public T visitReference(ReferenceExpression node, C context) { return visitNode(node, context); } diff --git a/core/src/main/java/org/opensearch/sql/expression/ScoreExpression.java b/core/src/main/java/org/opensearch/sql/expression/ScoreExpression.java new file mode 100644 index 0000000000..d061dad6d2 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/expression/ScoreExpression.java @@ -0,0 +1,89 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.expression; + +import lombok.Getter; +import org.opensearch.sql.common.utils.StringUtils; +import org.opensearch.sql.data.model.ExprNullValue; +import org.opensearch.sql.data.model.ExprTupleValue; +import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.data.model.ExprValueUtils; +import org.opensearch.sql.data.type.ExprCoreType; +import org.opensearch.sql.data.type.ExprType; +import org.opensearch.sql.expression.env.Environment; +import org.opensearch.sql.expression.function.BuiltinFunctionName; + +import java.util.LinkedHashMap; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +/** + * Score Expression. + */ +@Getter +public class ScoreExpression extends FunctionExpression { + + private final Expression relevanceQueryExpr; + + /** + * ScoreExpression Constructor. + * @param relevanceQueryExpr : relevanceQueryExpr for expression. + */ + public ScoreExpression(Expression relevanceQueryExpr) { + super(BuiltinFunctionName.SCORE.getName(), List.of(relevanceQueryExpr)); + this.relevanceQueryExpr = relevanceQueryExpr; + } + + /** + * Return collection value matching relevance query expression. + * @param valueEnv : Dataset to parse value from. + * @return : collection value of relevance query expression. + */ + @Override + public ExprValue valueOf(Environment valueEnv) { +// String refName = "_highlight"; +// // Not a wilcard expression +// if (this.type == ExprCoreType.ARRAY) { +// refName += "." + StringUtils.unquoteText(getHighlightField().toString()); +// } +// ExprValue value = valueEnv.resolve(DSL.ref(refName, ExprCoreType.STRING)); +// +// // In the event of multiple returned highlights and wildcard being +// // used in conjunction with other highlight calls, we need to ensure +// // only wildcard regex matching is mapped to wildcard call. +// if (this.type == ExprCoreType.STRUCT && value.type() == ExprCoreType.STRUCT) { +// value = new ExprTupleValue( +// new LinkedHashMap(value.tupleValue() +// .entrySet() +// .stream() +// .filter(s -> matchesHighlightRegex(s.getKey(), +// StringUtils.unquoteText(highlightField.toString()))) +// .collect(Collectors.toMap( +// e -> e.getKey(), +// e -> e.getValue())))); +// if (value.tupleValue().isEmpty()) { +// value = ExprValueUtils.missingValue(); +// } +// } + + // TODO: this is where we visit relevance function nodes and update BOOST values as necessary + // Otherwise, this is a no-op + + return ExprNullValue.of(); + } + + @Override + public T accept(ExpressionNodeVisitor visitor, C context) { + return visitor.visitScore(this, context); + } + + @Override + public ExprType type() { + return ExprCoreType.UNDEFINED; + } +} diff --git a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java index ec4a7bc140..8af12baa65 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java @@ -112,6 +112,7 @@ public enum BuiltinFunctionName { WEEKOFYEAR(FunctionName.of("weekofyear")), WEEK_OF_YEAR(FunctionName.of("week_of_year")), YEAR(FunctionName.of("year")), + // `now`-like functions NOW(FunctionName.of("now")), CURDATE(FunctionName.of("curdate")), @@ -122,6 +123,7 @@ public enum BuiltinFunctionName { CURRENT_TIMESTAMP(FunctionName.of("current_timestamp")), LOCALTIMESTAMP(FunctionName.of("localtimestamp")), SYSDATE(FunctionName.of("sysdate")), + /** * Text Functions. */ @@ -239,6 +241,10 @@ public enum BuiltinFunctionName { MATCH_BOOL_PREFIX(FunctionName.of("match_bool_prefix")), HIGHLIGHT(FunctionName.of("highlight")), MATCH_PHRASE_PREFIX(FunctionName.of("match_phrase_prefix")), + SCORE(FunctionName.of("score")), + SCOREQUERY(FunctionName.of("scorequery")), + SCORE_QUERY(FunctionName.of("score_query")), + /** * Legacy Relevance Function. */ diff --git a/core/src/main/java/org/opensearch/sql/expression/function/OpenSearchFunctions.java b/core/src/main/java/org/opensearch/sql/expression/function/OpenSearchFunctions.java index 842cf25cd6..939706804b 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/OpenSearchFunctions.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/OpenSearchFunctions.java @@ -5,9 +5,13 @@ package org.opensearch.sql.expression.function; +import static org.opensearch.sql.data.type.ExprCoreType.BOOLEAN; +import static org.opensearch.sql.data.type.ExprCoreType.DOUBLE; + import java.util.List; import java.util.stream.Collectors; import lombok.experimental.UtilityClass; +import org.opensearch.sql.data.model.ExprDoubleValue; import org.opensearch.sql.data.model.ExprValue; import org.opensearch.sql.data.type.ExprCoreType; import org.opensearch.sql.data.type.ExprType; @@ -32,6 +36,7 @@ public void register(BuiltinFunctionRepository repository) { repository.register(simple_query_string()); repository.register(query()); repository.register(query_string()); + // Register MATCHPHRASE as MATCH_PHRASE as well for backwards // compatibility. repository.register(match_phrase(BuiltinFunctionName.MATCH_PHRASE)); @@ -40,6 +45,9 @@ public void register(BuiltinFunctionRepository repository) { repository.register(match_phrase_prefix()); repository.register(wildcard_query(BuiltinFunctionName.WILDCARD_QUERY)); repository.register(wildcard_query(BuiltinFunctionName.WILDCARDQUERY)); + repository.register(score(BuiltinFunctionName.SCORE)); + repository.register(score(BuiltinFunctionName.SCOREQUERY)); + repository.register(score(BuiltinFunctionName.SCORE_QUERY)); } private static FunctionResolver match_bool_prefix() { @@ -86,6 +94,32 @@ private static FunctionResolver wildcard_query(BuiltinFunctionName wildcardQuery return new RelevanceFunctionResolver(funcName); } + /** + * Definition of score() function. + * Enables score calculation for the match call + */ +// private static DefaultFunctionResolver score(BuiltinFunctionName score) { +// FunctionName funcName = score.getName(); +// return FunctionDSL.define(funcName, +// FunctionDSL.impl( +// FunctionDSL.nullMissingHandling( +// (relevanceFunc) -> new ExprDoubleValue( +// Math.pow(relevanceFunc.shortValue(), 1)) +// ), +// BOOLEAN, BOOLEAN), +// FunctionDSL.impl( +// FunctionDSL.nullMissingHandling( +// (relevanceFunc, boost) -> new ExprDoubleValue( +// Math.pow(relevanceFunc.shortValue(), boost.shortValue())) +// ), +// BOOLEAN, BOOLEAN, DOUBLE)); +// } + + private static FunctionResolver score(BuiltinFunctionName score) { + FunctionName funcName = score.getName(); + return new RelevanceFunctionResolver(funcName); + } + public static class OpenSearchFunction extends FunctionExpression { private final FunctionName functionName; private final List arguments; @@ -110,7 +144,7 @@ public ExprValue valueOf(Environment valueEnv) { @Override public ExprType type() { - return ExprCoreType.BOOLEAN; + return BOOLEAN; } @Override diff --git a/core/src/main/java/org/opensearch/sql/planner/optimizer/rule/read/TableScanPushDown.java b/core/src/main/java/org/opensearch/sql/planner/optimizer/rule/read/TableScanPushDown.java index 556a12bb34..bb4e51c39e 100644 --- a/core/src/main/java/org/opensearch/sql/planner/optimizer/rule/read/TableScanPushDown.java +++ b/core/src/main/java/org/opensearch/sql/planner/optimizer/rule/read/TableScanPushDown.java @@ -75,6 +75,12 @@ public class TableScanPushDown implements Rule { .apply((highlight, scanBuilder) -> scanBuilder.pushDownHighlight(highlight)); + public static final Rule PUSH_DOWN_SCORE = + match(highlight(scanBuilder())).apply( + (highlight, scanBuilder) -> scanBuilder.pushDownHighlight(highlight) + ); + + /** Pattern that matches a plan node. */ private final WithPattern pattern; diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/CsvFormatResponseIT.java b/integ-test/src/test/java/org/opensearch/sql/legacy/CsvFormatResponseIT.java index 7bbc341ccf..b48cfab843 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/CsvFormatResponseIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/CsvFormatResponseIT.java @@ -99,8 +99,6 @@ public void specificPercentilesIntAndDouble() throws IOException { } } - // TODO: this should now work - @Ignore("only work for legacy engine") public void nestedObjectsAndArraysAreQuoted() throws IOException { final String query = String.format(Locale.ROOT, "SELECT * FROM %s WHERE _id = 5", TEST_INDEX_NESTED_TYPE); @@ -115,8 +113,6 @@ public void nestedObjectsAndArraysAreQuoted() throws IOException { Assert.assertThat(result, containsString(expectedMessage)); } - // TODO: this should now work - @Ignore("only work for legacy engine") public void arraysAreQuotedInFlatMode() throws IOException { setFlatOption(true); @@ -523,7 +519,7 @@ private void assertEquals(String expected, String actual, Double delta) { @Test public void includeScore() throws Exception { String query = String.format(Locale.ROOT, - "select age , firstname from %s where age > 31 order by _score desc limit 2 ", + "select age, firstname, _score from %s where age > 31 order by _score desc limit 2 ", TEST_INDEX_ACCOUNT); CSVResult csvResult = executeCsvRequest(query, false, true, false); List headers = csvResult.getHeaders(); @@ -577,11 +573,9 @@ public void twoCharsSeperator() throws Exception { } - // TODO: this should now work - @Ignore("only work for legacy engine") public void includeIdAndNotTypeOrScore() throws Exception { String query = String.format(Locale.ROOT, - "select age , firstname from %s where lastname = 'Marquez' ", TEST_INDEX_ACCOUNT); + "select age, firstname, _id from %s where lastname = 'Marquez' ", TEST_INDEX_ACCOUNT); CSVResult csvResult = executeCsvRequest(query, false, false, true); List headers = csvResult.getHeaders(); Assert.assertEquals(3, headers.size()); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java index 439a970a4f..ee4a89119e 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java @@ -78,6 +78,8 @@ public class OpenSearchRequestBuilder { */ private Integer querySize; + private boolean trackScores; + public OpenSearchRequestBuilder(String indexName, Integer maxResultWindow, Settings settings, @@ -97,9 +99,11 @@ public OpenSearchRequestBuilder(OpenSearchRequest.IndexName indexName, this.sourceBuilder = new SearchSourceBuilder(); this.exprValueFactory = exprValueFactory; this.querySize = settings.getSettingValue(Settings.Key.QUERY_SIZE_LIMIT); + this.trackScores = true; sourceBuilder.from(0); sourceBuilder.size(querySize); sourceBuilder.timeout(DEFAULT_QUERY_TIMEOUT); + sourceBuilder.trackScores(this.trackScores); } /** diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/OpenSearchIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/OpenSearchIndexScan.java index e9746e1fae..66d9216b25 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/OpenSearchIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/OpenSearchIndexScan.java @@ -58,8 +58,13 @@ public class OpenSearchIndexScan extends TableScanOperator { public OpenSearchIndexScan(OpenSearchClient client, Settings settings, String indexName, Integer maxResultWindow, OpenSearchExprValueFactory exprValueFactory) { - this(client, settings, - new OpenSearchRequest.IndexName(indexName),maxResultWindow, exprValueFactory); + this( + client, + settings, + new OpenSearchRequest.IndexName(indexName), + maxResultWindow, + exprValueFactory + ); } /** diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanBuilder.java index d7483cfcf0..9b36a29bf2 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanBuilder.java @@ -34,6 +34,8 @@ public class OpenSearchIndexScanBuilder extends TableScanBuilder { /** Is limit operator pushed down. */ private boolean isLimitPushedDown = false; + private boolean isScoreTrackedPushedDown = false; + @VisibleForTesting OpenSearchIndexScanBuilder(TableScanBuilder delegate) { this.delegate = delegate; diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/SortQueryBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/SortQueryBuilder.java index 8fb4eabbd8..fecc721618 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/SortQueryBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/SortQueryBuilder.java @@ -49,6 +49,9 @@ public class SortQueryBuilder { */ public SortBuilder build(Expression expression, Sort.SortOption option) { if (expression instanceof ReferenceExpression) { + if (((ReferenceExpression) expression).getAttr().equalsIgnoreCase("_score")) { + return SortBuilders.scoreSort().order(sortOrderMap.get(option.getSortOrder())); + } return fieldBuild((ReferenceExpression) expression, option); } else { throw new IllegalStateException("unsupported expression " + expression.getClass()); diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanOptimizationTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanOptimizationTest.java index 363727cbd3..d800208105 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanOptimizationTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanOptimizationTest.java @@ -30,6 +30,7 @@ import static org.opensearch.sql.planner.optimizer.rule.read.TableScanPushDown.PUSH_DOWN_HIGHLIGHT; import static org.opensearch.sql.planner.optimizer.rule.read.TableScanPushDown.PUSH_DOWN_LIMIT; import static org.opensearch.sql.planner.optimizer.rule.read.TableScanPushDown.PUSH_DOWN_PROJECT; +import static org.opensearch.sql.planner.optimizer.rule.read.TableScanPushDown.PUSH_DOWN_SCORE; import static org.opensearch.sql.planner.optimizer.rule.read.TableScanPushDown.PUSH_DOWN_SORT; import com.google.common.collect.ImmutableList; @@ -603,6 +604,7 @@ private LogicalPlan optimize(LogicalPlan plan) { PUSH_DOWN_SORT, PUSH_DOWN_LIMIT, PUSH_DOWN_HIGHLIGHT, + PUSH_DOWN_SCORE, PUSH_DOWN_PROJECT)); return optimizer.optimize(plan); } diff --git a/sql/src/main/antlr/OpenSearchSQLParser.g4 b/sql/src/main/antlr/OpenSearchSQLParser.g4 index 80aa688c5d..e864f880e2 100644 --- a/sql/src/main/antlr/OpenSearchSQLParser.g4 +++ b/sql/src/main/antlr/OpenSearchSQLParser.g4 @@ -306,6 +306,7 @@ functionCall | windowFunctionClause #windowFunctionCall | aggregateFunction #aggregateFunctionCall | aggregateFunction (orderByClause)? filterClause #filteredAggregationFunctionCall + | scoreRelevanceFunction #scoreRelevanceFunctionCall | relevanceFunction #relevanceFunctionCall | highlightFunction #highlightFunctionCall | positionFunction #positionFunctionCall @@ -354,7 +355,10 @@ specificFunction relevanceFunction : noFieldRelevanceFunction | singleFieldRelevanceFunction | multiFieldRelevanceFunction | altSingleFieldRelevanceFunction | altMultiFieldRelevanceFunction + ; +scoreRelevanceFunction + : scoreRelevanceFunctionName=SCORE LR_BRACKET relevanceFunction (COMMA functionArg)* RR_BRACKET ; noFieldRelevanceFunction diff --git a/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java b/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java index 23d51c2d07..35abe12879 100644 --- a/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java +++ b/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java @@ -41,6 +41,7 @@ import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.MathExpressionAtomContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.MultiFieldRelevanceFunctionContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.NoFieldRelevanceFunctionContext; +import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.ScoreRelevanceFunctionContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.NotExpressionContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.NullLiteralContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.OverClauseContext; @@ -91,6 +92,7 @@ import org.opensearch.sql.ast.expression.Or; import org.opensearch.sql.ast.expression.QualifiedName; import org.opensearch.sql.ast.expression.RelevanceFieldList; +import org.opensearch.sql.ast.expression.ScoreFunction; import org.opensearch.sql.ast.expression.UnresolvedArgument; import org.opensearch.sql.ast.expression.UnresolvedExpression; import org.opensearch.sql.ast.expression.When; @@ -98,6 +100,7 @@ import org.opensearch.sql.ast.tree.Sort.SortOption; import org.opensearch.sql.common.utils.StringUtils; import org.opensearch.sql.expression.function.BuiltinFunctionName; +import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser; import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.AlternateMultiMatchQueryContext; import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.AndExpressionContext; import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.ColumnNameContext; @@ -105,6 +108,7 @@ import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.IntervalLiteralContext; import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.NestedExpressionAtomContext; import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.OrExpressionContext; +import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.RelevanceFunctionContext; import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.TableNameContext; import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParserBaseVisitor; @@ -476,6 +480,16 @@ public UnresolvedExpression visitAltMultiFieldRelevanceFunction( altMultiFieldRelevanceFunctionArguments(ctx)); } + public UnresolvedExpression visitScoreRelevanceFunction(ScoreRelevanceFunctionContext ctx) { + RelevanceFunctionContext relevanceFunction = ctx.relevanceFunction(); + List functionArgs = ctx.functionArg(); + + return new ScoreFunction( + visit(ctx.relevanceFunction()), + ctx.functionArg().stream().map(this::visitFunctionArg).collect(Collectors.toList()) + ); + } + private Function buildFunction(String functionName, List arg) { return new Function( From 8c0aaf636aa3b7653d9ae6962ae6a15d8fa63fd2 Mon Sep 17 00:00:00 2001 From: Andrew Carbonetto Date: Mon, 13 Feb 2023 19:20:35 -0800 Subject: [PATCH 06/40] Update score function expression analyzer to return boosted relevance function Signed-off-by: Andrew Carbonetto --- .../sql/analysis/ExpressionAnalyzer.java | 57 ++++++++++++++++++- 1 file changed, 55 insertions(+), 2 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java b/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java index fafd29e963..1d3ce5f8e7 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java @@ -31,6 +31,7 @@ import org.opensearch.sql.ast.expression.Case; import org.opensearch.sql.ast.expression.Cast; import org.opensearch.sql.ast.expression.Compare; +import org.opensearch.sql.ast.expression.DataType; import org.opensearch.sql.ast.expression.EqualTo; import org.opensearch.sql.ast.expression.Field; import org.opensearch.sql.ast.expression.Function; @@ -211,8 +212,60 @@ public Expression visitHighlightFunction(HighlightFunction node, AnalysisContext } public Expression visitScoreFunction(ScoreFunction node, AnalysisContext context) { - Expression relevanceQueryExpr = node.getRelevanceQuery().accept(this, context); - return new ScoreExpression(relevanceQueryExpr); + // if no function argument + if (node.getFuncArgs().isEmpty() || !(node.getFuncArgs().get(0) instanceof Literal)) { + return node.getRelevanceQuery().accept(this, context); + } + + // note: if an argument exists, and there should only be one, it will be a boost argument + Literal boostFunctionArg = (Literal) node.getFuncArgs().get(0); + if (!boostFunctionArg.getType().equals(DataType.DOUBLE)) { + throw new SemanticCheckException(String.format("Expected boost type '%s' but got '%s'", + boostFunctionArg.getType().name(), DataType.DOUBLE.name())); + } + Double thisBoostValue = ((Double) ((Literal) node.getFuncArgs().get(0)).getValue()); + + // update the existing unresolved expression to add a boost argument if it doesn't exist + // OR multiply the existing boost argument + Function relevanceQueryUnresolvedExpr = (Function)node.getRelevanceQuery(); + List relevanceFuncArgs = relevanceQueryUnresolvedExpr.getFuncArgs(); + + boolean doesFunctionContainBoostArgument = false; + List updatedFuncArgs = new ArrayList<>(); + for (UnresolvedExpression expr: relevanceFuncArgs) { + if (!(expr instanceof UnresolvedArgument)) { + continue; + } + String argumentName = ((UnresolvedArgument) expr).getArgName(); + if (argumentName.equalsIgnoreCase("boost")) { + doesFunctionContainBoostArgument = true; + Literal boostArgLiteral = (Literal)((UnresolvedArgument) expr).getValue(); + Double boostValue = boostArgLiteral.getType() == DataType.STRING + ? Double.parseDouble((String)boostArgLiteral.getValue()) * thisBoostValue + : thisBoostValue; + UnresolvedArgument newBoostArg = new UnresolvedArgument( + argumentName, + new Literal(boostValue.toString(), DataType.STRING) + ); + updatedFuncArgs.add(newBoostArg); + } else { + updatedFuncArgs.add(expr); + } + } + + // since nothing was found, add an argument + if (!doesFunctionContainBoostArgument) { + UnresolvedArgument newBoostArg = new UnresolvedArgument( + "boost", new Literal(thisBoostValue, DataType.STRING)); + updatedFuncArgs.add(newBoostArg); + } + + // create a new function expression with boost argument and resolve it + Function updatedRelevanceQueryUnresolvedExpr = new Function( + relevanceQueryUnresolvedExpr.getFuncName(), + updatedFuncArgs); + Expression relevanceQueryExpr = updatedRelevanceQueryUnresolvedExpr.accept(this, context); + return relevanceQueryExpr; } @Override From 4d8229d8efe2512ceada12a1db8a91979de4dcf3 Mon Sep 17 00:00:00 2001 From: Andrew Carbonetto Date: Tue, 14 Feb 2023 18:07:47 -0800 Subject: [PATCH 07/40] Update builder to track scores Signed-off-by: Andrew Carbonetto --- .../sql/analysis/ExpressionAnalyzer.java | 14 ++++++--- .../ExpressionReferenceOptimizer.java | 8 ++++- .../function/OpenSearchFunctions.java | 29 +++++-------------- .../request/OpenSearchRequestBuilder.java | 9 +++--- .../scan/OpenSearchIndexScanQueryBuilder.java | 18 +++++++++++- 5 files changed, 47 insertions(+), 31 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java b/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java index 1d3ce5f8e7..271f498c66 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java @@ -71,6 +71,7 @@ import org.opensearch.sql.expression.function.BuiltinFunctionName; import org.opensearch.sql.expression.function.BuiltinFunctionRepository; import org.opensearch.sql.expression.function.FunctionName; +import org.opensearch.sql.expression.function.OpenSearchFunctions; import org.opensearch.sql.expression.parse.ParseExpression; import org.opensearch.sql.expression.span.SpanExpression; import org.opensearch.sql.expression.window.aggregation.AggregateWindowFunction; @@ -212,9 +213,12 @@ public Expression visitHighlightFunction(HighlightFunction node, AnalysisContext } public Expression visitScoreFunction(ScoreFunction node, AnalysisContext context) { - // if no function argument + // if no function argument given, just accept the relevance query and return if (node.getFuncArgs().isEmpty() || !(node.getFuncArgs().get(0) instanceof Literal)) { - return node.getRelevanceQuery().accept(this, context); + OpenSearchFunctions.OpenSearchFunction relevanceQueryExpr = + (OpenSearchFunctions.OpenSearchFunction)node.getRelevanceQuery().accept(this, context); + relevanceQueryExpr.setScoreTracked(true); + return relevanceQueryExpr; } // note: if an argument exists, and there should only be one, it will be a boost argument @@ -256,7 +260,7 @@ public Expression visitScoreFunction(ScoreFunction node, AnalysisContext context // since nothing was found, add an argument if (!doesFunctionContainBoostArgument) { UnresolvedArgument newBoostArg = new UnresolvedArgument( - "boost", new Literal(thisBoostValue, DataType.STRING)); + "boost", new Literal(Double.toString(thisBoostValue), DataType.STRING)); updatedFuncArgs.add(newBoostArg); } @@ -264,7 +268,9 @@ public Expression visitScoreFunction(ScoreFunction node, AnalysisContext context Function updatedRelevanceQueryUnresolvedExpr = new Function( relevanceQueryUnresolvedExpr.getFuncName(), updatedFuncArgs); - Expression relevanceQueryExpr = updatedRelevanceQueryUnresolvedExpr.accept(this, context); + OpenSearchFunctions.OpenSearchFunction relevanceQueryExpr = + (OpenSearchFunctions.OpenSearchFunction) updatedRelevanceQueryUnresolvedExpr.accept(this, context); + relevanceQueryExpr.setScoreTracked(true); return relevanceQueryExpr; } diff --git a/core/src/main/java/org/opensearch/sql/analysis/ExpressionReferenceOptimizer.java b/core/src/main/java/org/opensearch/sql/analysis/ExpressionReferenceOptimizer.java index f75bcd5a1d..b61acc1f7a 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/ExpressionReferenceOptimizer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/ExpressionReferenceOptimizer.java @@ -19,6 +19,7 @@ import org.opensearch.sql.expression.conditional.cases.CaseClause; import org.opensearch.sql.expression.conditional.cases.WhenClause; import org.opensearch.sql.expression.function.BuiltinFunctionRepository; +import org.opensearch.sql.expression.function.OpenSearchFunctions; import org.opensearch.sql.planner.logical.LogicalAggregation; import org.opensearch.sql.planner.logical.LogicalPlan; import org.opensearch.sql.planner.logical.LogicalPlanNodeVisitor; @@ -70,8 +71,13 @@ public Expression visitFunction(FunctionExpression node, AnalysisContext context final List args = node.getArguments().stream().map(expr -> expr.accept(this, context)) .collect(Collectors.toList()); - return (Expression) repository.compile(context.getFunctionProperties(), + Expression optimizedFunctionExpression = (Expression) repository.compile(context.getFunctionProperties(), node.getFunctionName(), args); + if (optimizedFunctionExpression instanceof OpenSearchFunctions.OpenSearchFunction) { + ((OpenSearchFunctions.OpenSearchFunction) optimizedFunctionExpression).setScoreTracked( + ((OpenSearchFunctions.OpenSearchFunction)node).isScoreTracked()); + } + return optimizedFunctionExpression; } } diff --git a/core/src/main/java/org/opensearch/sql/expression/function/OpenSearchFunctions.java b/core/src/main/java/org/opensearch/sql/expression/function/OpenSearchFunctions.java index 939706804b..4797b7521b 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/OpenSearchFunctions.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/OpenSearchFunctions.java @@ -10,6 +10,9 @@ import java.util.List; import java.util.stream.Collectors; + +import lombok.Getter; +import lombok.Setter; import lombok.experimental.UtilityClass; import org.opensearch.sql.data.model.ExprDoubleValue; import org.opensearch.sql.data.model.ExprValue; @@ -94,27 +97,6 @@ private static FunctionResolver wildcard_query(BuiltinFunctionName wildcardQuery return new RelevanceFunctionResolver(funcName); } - /** - * Definition of score() function. - * Enables score calculation for the match call - */ -// private static DefaultFunctionResolver score(BuiltinFunctionName score) { -// FunctionName funcName = score.getName(); -// return FunctionDSL.define(funcName, -// FunctionDSL.impl( -// FunctionDSL.nullMissingHandling( -// (relevanceFunc) -> new ExprDoubleValue( -// Math.pow(relevanceFunc.shortValue(), 1)) -// ), -// BOOLEAN, BOOLEAN), -// FunctionDSL.impl( -// FunctionDSL.nullMissingHandling( -// (relevanceFunc, boost) -> new ExprDoubleValue( -// Math.pow(relevanceFunc.shortValue(), boost.shortValue())) -// ), -// BOOLEAN, BOOLEAN, DOUBLE)); -// } - private static FunctionResolver score(BuiltinFunctionName score) { FunctionName funcName = score.getName(); return new RelevanceFunctionResolver(funcName); @@ -124,6 +106,10 @@ public static class OpenSearchFunction extends FunctionExpression { private final FunctionName functionName; private final List arguments; + @Getter + @Setter + private boolean isScoreTracked; + /** * Required argument constructor. * @param functionName name of the function @@ -133,6 +119,7 @@ public OpenSearchFunction(FunctionName functionName, List arguments) super(functionName, arguments); this.functionName = functionName; this.arguments = arguments; + this.isScoreTracked = false; } @Override diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java index ee4a89119e..d598476a3e 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java @@ -78,8 +78,6 @@ public class OpenSearchRequestBuilder { */ private Integer querySize; - private boolean trackScores; - public OpenSearchRequestBuilder(String indexName, Integer maxResultWindow, Settings settings, @@ -99,11 +97,10 @@ public OpenSearchRequestBuilder(OpenSearchRequest.IndexName indexName, this.sourceBuilder = new SearchSourceBuilder(); this.exprValueFactory = exprValueFactory; this.querySize = settings.getSettingValue(Settings.Key.QUERY_SIZE_LIMIT); - this.trackScores = true; sourceBuilder.from(0); sourceBuilder.size(querySize); sourceBuilder.timeout(DEFAULT_QUERY_TIMEOUT); - sourceBuilder.trackScores(this.trackScores); + sourceBuilder.trackScores(false); } /** @@ -184,6 +181,10 @@ public void pushDownLimit(Integer limit, Integer offset) { sourceBuilder.from(offset).size(limit); } + public void pushDownTrackedScore(boolean trackScores) { + sourceBuilder.trackScores(trackScores); + } + /** * Add highlight to DSL requests. * @param field name of the field to highlight diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanQueryBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanQueryBuilder.java index 7190d58000..867227ac00 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanQueryBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanQueryBuilder.java @@ -18,8 +18,10 @@ import org.opensearch.sql.common.utils.StringUtils; import org.opensearch.sql.expression.Expression; import org.opensearch.sql.expression.ExpressionNodeVisitor; +import org.opensearch.sql.expression.FunctionExpression; import org.opensearch.sql.expression.NamedExpression; import org.opensearch.sql.expression.ReferenceExpression; +import org.opensearch.sql.expression.function.OpenSearchFunctions; import org.opensearch.sql.opensearch.storage.OpenSearchIndexScan; import org.opensearch.sql.opensearch.storage.script.filter.FilterQueryBuilder; import org.opensearch.sql.opensearch.storage.script.sort.SortQueryBuilder; @@ -61,8 +63,10 @@ public TableScanOperator build() { public boolean pushDownFilter(LogicalFilter filter) { FilterQueryBuilder queryBuilder = new FilterQueryBuilder( new DefaultExpressionSerializer()); - QueryBuilder query = queryBuilder.build(filter.getCondition()); + Expression queryCondition = filter.getCondition(); + QueryBuilder query = queryBuilder.build(queryCondition); indexScan.getRequestBuilder().pushDown(query); + indexScan.getRequestBuilder().pushDownTrackedScore(trackScoresFromOpenSearchFunction(queryCondition)); return true; } @@ -99,6 +103,18 @@ public boolean pushDownHighlight(LogicalHighlight highlight) { return true; } + private boolean trackScoresFromOpenSearchFunction(Expression condition) { + if (condition instanceof OpenSearchFunctions.OpenSearchFunction) { + return ((OpenSearchFunctions.OpenSearchFunction) condition).isScoreTracked(); + } + if (condition instanceof FunctionExpression) { + for(Expression expr: ((FunctionExpression) condition).getArguments()) { + return trackScoresFromOpenSearchFunction(expr); + } + } + return false; + } + /** * Find reference expression from expression. * @param expressions a list of expression. From 4630c981f9b7883e353642ef6a8a59e3e9e1067d Mon Sep 17 00:00:00 2001 From: Andrew Carbonetto Date: Wed, 15 Feb 2023 09:07:47 -0800 Subject: [PATCH 08/40] Remove ScoreExpression.java and cleanup checkstyle Signed-off-by: Andrew Carbonetto --- .../sql/analysis/ExpressionAnalyzer.java | 15 +++- .../ExpressionReferenceOptimizer.java | 8 +- .../sql/ast/expression/ScoreFunction.java | 6 +- .../sql/expression/ExpressionNodeVisitor.java | 4 - .../sql/expression/ScoreExpression.java | 89 ------------------- .../function/OpenSearchFunctions.java | 4 - 6 files changed, 19 insertions(+), 107 deletions(-) delete mode 100644 core/src/main/java/org/opensearch/sql/expression/ScoreExpression.java diff --git a/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java b/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java index 271f498c66..49179a7643 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java @@ -63,7 +63,6 @@ import org.opensearch.sql.expression.NamedArgumentExpression; import org.opensearch.sql.expression.NamedExpression; import org.opensearch.sql.expression.ReferenceExpression; -import org.opensearch.sql.expression.ScoreExpression; import org.opensearch.sql.expression.aggregation.AggregationState; import org.opensearch.sql.expression.aggregation.Aggregator; import org.opensearch.sql.expression.conditional.cases.CaseClause; @@ -212,11 +211,20 @@ public Expression visitHighlightFunction(HighlightFunction node, AnalysisContext return new HighlightExpression(expr); } + /** + * visitScoreFunction removes the score function from the AST and replaces it with the child + * relevance function node. If the optional boost variable is provided, the boost argument + * of the relevance function is combined. + * @param node score function node + * @param context analysis context for the query + * @return resolved relevance function + */ public Expression visitScoreFunction(ScoreFunction node, AnalysisContext context) { // if no function argument given, just accept the relevance query and return if (node.getFuncArgs().isEmpty() || !(node.getFuncArgs().get(0) instanceof Literal)) { OpenSearchFunctions.OpenSearchFunction relevanceQueryExpr = - (OpenSearchFunctions.OpenSearchFunction)node.getRelevanceQuery().accept(this, context); + (OpenSearchFunctions.OpenSearchFunction) node + .getRelevanceQuery().accept(this, context); relevanceQueryExpr.setScoreTracked(true); return relevanceQueryExpr; } @@ -269,7 +277,8 @@ public Expression visitScoreFunction(ScoreFunction node, AnalysisContext context relevanceQueryUnresolvedExpr.getFuncName(), updatedFuncArgs); OpenSearchFunctions.OpenSearchFunction relevanceQueryExpr = - (OpenSearchFunctions.OpenSearchFunction) updatedRelevanceQueryUnresolvedExpr.accept(this, context); + (OpenSearchFunctions.OpenSearchFunction) updatedRelevanceQueryUnresolvedExpr + .accept(this, context); relevanceQueryExpr.setScoreTracked(true); return relevanceQueryExpr; } diff --git a/core/src/main/java/org/opensearch/sql/analysis/ExpressionReferenceOptimizer.java b/core/src/main/java/org/opensearch/sql/analysis/ExpressionReferenceOptimizer.java index b61acc1f7a..0d2d4f90bf 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/ExpressionReferenceOptimizer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/ExpressionReferenceOptimizer.java @@ -71,8 +71,12 @@ public Expression visitFunction(FunctionExpression node, AnalysisContext context final List args = node.getArguments().stream().map(expr -> expr.accept(this, context)) .collect(Collectors.toList()); - Expression optimizedFunctionExpression = (Expression) repository.compile(context.getFunctionProperties(), - node.getFunctionName(), args); + Expression optimizedFunctionExpression = (Expression) repository.compile( + context.getFunctionProperties(), + node.getFunctionName(), + args + ); + // OpenSearch functions can request score_tracked if (optimizedFunctionExpression instanceof OpenSearchFunctions.OpenSearchFunction) { ((OpenSearchFunctions.OpenSearchFunction) optimizedFunctionExpression).setScoreTracked( ((OpenSearchFunctions.OpenSearchFunction)node).isScoreTracked()); diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/ScoreFunction.java b/core/src/main/java/org/opensearch/sql/ast/expression/ScoreFunction.java index cdde418834..2061ff8aef 100644 --- a/core/src/main/java/org/opensearch/sql/ast/expression/ScoreFunction.java +++ b/core/src/main/java/org/opensearch/sql/ast/expression/ScoreFunction.java @@ -9,12 +9,8 @@ import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.ToString; -import org.opensearch.sql.ast.AbstractNodeVisitor; - -import java.util.Collections; import java.util.List; -import java.util.Map; -import java.util.stream.Stream; +import org.opensearch.sql.ast.AbstractNodeVisitor; /** * Expression node of Highlight function. diff --git a/core/src/main/java/org/opensearch/sql/expression/ExpressionNodeVisitor.java b/core/src/main/java/org/opensearch/sql/expression/ExpressionNodeVisitor.java index 6ab2375067..e3d4e38674 100644 --- a/core/src/main/java/org/opensearch/sql/expression/ExpressionNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/expression/ExpressionNodeVisitor.java @@ -60,10 +60,6 @@ public T visitHighlight(HighlightExpression node, C context) { return visitNode(node, context); } - public T visitScore(ScoreExpression node, C context) { - return visitNode(node, context); - } - public T visitReference(ReferenceExpression node, C context) { return visitNode(node, context); } diff --git a/core/src/main/java/org/opensearch/sql/expression/ScoreExpression.java b/core/src/main/java/org/opensearch/sql/expression/ScoreExpression.java deleted file mode 100644 index d061dad6d2..0000000000 --- a/core/src/main/java/org/opensearch/sql/expression/ScoreExpression.java +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql.expression; - -import lombok.Getter; -import org.opensearch.sql.common.utils.StringUtils; -import org.opensearch.sql.data.model.ExprNullValue; -import org.opensearch.sql.data.model.ExprTupleValue; -import org.opensearch.sql.data.model.ExprValue; -import org.opensearch.sql.data.model.ExprValueUtils; -import org.opensearch.sql.data.type.ExprCoreType; -import org.opensearch.sql.data.type.ExprType; -import org.opensearch.sql.expression.env.Environment; -import org.opensearch.sql.expression.function.BuiltinFunctionName; - -import java.util.LinkedHashMap; -import java.util.List; -import java.util.regex.Matcher; -import java.util.regex.Pattern; -import java.util.stream.Collectors; - -/** - * Score Expression. - */ -@Getter -public class ScoreExpression extends FunctionExpression { - - private final Expression relevanceQueryExpr; - - /** - * ScoreExpression Constructor. - * @param relevanceQueryExpr : relevanceQueryExpr for expression. - */ - public ScoreExpression(Expression relevanceQueryExpr) { - super(BuiltinFunctionName.SCORE.getName(), List.of(relevanceQueryExpr)); - this.relevanceQueryExpr = relevanceQueryExpr; - } - - /** - * Return collection value matching relevance query expression. - * @param valueEnv : Dataset to parse value from. - * @return : collection value of relevance query expression. - */ - @Override - public ExprValue valueOf(Environment valueEnv) { -// String refName = "_highlight"; -// // Not a wilcard expression -// if (this.type == ExprCoreType.ARRAY) { -// refName += "." + StringUtils.unquoteText(getHighlightField().toString()); -// } -// ExprValue value = valueEnv.resolve(DSL.ref(refName, ExprCoreType.STRING)); -// -// // In the event of multiple returned highlights and wildcard being -// // used in conjunction with other highlight calls, we need to ensure -// // only wildcard regex matching is mapped to wildcard call. -// if (this.type == ExprCoreType.STRUCT && value.type() == ExprCoreType.STRUCT) { -// value = new ExprTupleValue( -// new LinkedHashMap(value.tupleValue() -// .entrySet() -// .stream() -// .filter(s -> matchesHighlightRegex(s.getKey(), -// StringUtils.unquoteText(highlightField.toString()))) -// .collect(Collectors.toMap( -// e -> e.getKey(), -// e -> e.getValue())))); -// if (value.tupleValue().isEmpty()) { -// value = ExprValueUtils.missingValue(); -// } -// } - - // TODO: this is where we visit relevance function nodes and update BOOST values as necessary - // Otherwise, this is a no-op - - return ExprNullValue.of(); - } - - @Override - public T accept(ExpressionNodeVisitor visitor, C context) { - return visitor.visitScore(this, context); - } - - @Override - public ExprType type() { - return ExprCoreType.UNDEFINED; - } -} diff --git a/core/src/main/java/org/opensearch/sql/expression/function/OpenSearchFunctions.java b/core/src/main/java/org/opensearch/sql/expression/function/OpenSearchFunctions.java index 4797b7521b..9a50aca344 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/OpenSearchFunctions.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/OpenSearchFunctions.java @@ -6,17 +6,13 @@ package org.opensearch.sql.expression.function; import static org.opensearch.sql.data.type.ExprCoreType.BOOLEAN; -import static org.opensearch.sql.data.type.ExprCoreType.DOUBLE; import java.util.List; import java.util.stream.Collectors; - import lombok.Getter; import lombok.Setter; import lombok.experimental.UtilityClass; -import org.opensearch.sql.data.model.ExprDoubleValue; import org.opensearch.sql.data.model.ExprValue; -import org.opensearch.sql.data.type.ExprCoreType; import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.expression.Expression; import org.opensearch.sql.expression.FunctionExpression; From 9a10273d454095d4122f2ba4a56bff1e1d41cebd Mon Sep 17 00:00:00 2001 From: Andrew Carbonetto Date: Wed, 15 Feb 2023 09:10:13 -0800 Subject: [PATCH 09/40] cleanup checkstyle Signed-off-by: Andrew Carbonetto --- .../java/org/opensearch/sql/ast/expression/ScoreFunction.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/ScoreFunction.java b/core/src/main/java/org/opensearch/sql/ast/expression/ScoreFunction.java index 2061ff8aef..1b73d97821 100644 --- a/core/src/main/java/org/opensearch/sql/ast/expression/ScoreFunction.java +++ b/core/src/main/java/org/opensearch/sql/ast/expression/ScoreFunction.java @@ -5,11 +5,11 @@ package org.opensearch.sql.ast.expression; +import java.util.List; import lombok.AllArgsConstructor; import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.ToString; -import java.util.List; import org.opensearch.sql.ast.AbstractNodeVisitor; /** From 0a9296908b78bdb54d26e4a6324807b8f9aa7124 Mon Sep 17 00:00:00 2001 From: Andrew Carbonetto Date: Wed, 15 Feb 2023 14:47:58 -0800 Subject: [PATCH 10/40] Cleanup and add alternative score function syntax Signed-off-by: Andrew Carbonetto --- .../sql/analysis/ExpressionAnalyzer.java | 28 ++++++++----------- .../ExpressionReferenceOptimizer.java | 2 +- .../sql/ast/expression/QualifiedName.java | 17 +++++++++++ .../sql/ast/expression/ScoreFunction.java | 3 +- .../org/opensearch/sql/expression/DSL.java | 8 ++++++ .../rule/read/TableScanPushDown.java | 7 ----- doctest/bin/test-docs | 3 ++ .../scan/OpenSearchIndexScanBuilder.java | 2 -- .../OpenSearchIndexScanOptimizationTest.java | 2 -- sql/src/main/antlr/OpenSearchSQLLexer.g4 | 2 ++ sql/src/main/antlr/OpenSearchSQLParser.g4 | 6 +++- 11 files changed, 49 insertions(+), 31 deletions(-) create mode 100644 doctest/bin/test-docs diff --git a/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java b/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java index 49179a7643..b14543dcab 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java @@ -8,6 +8,7 @@ import static org.opensearch.sql.ast.dsl.AstDSL.and; import static org.opensearch.sql.ast.dsl.AstDSL.compare; +import static org.opensearch.sql.ast.expression.QualifiedName.METADATAFIELD_TYPE_MAP; import static org.opensearch.sql.expression.function.BuiltinFunctionName.GTE; import static org.opensearch.sql.expression.function.BuiltinFunctionName.LTE; @@ -392,24 +393,17 @@ public Expression visitUnresolvedArgument(UnresolvedArgument node, AnalysisConte return new NamedArgumentExpression(node.getArgName(), node.getValue().accept(this, context)); } + /** + * If QualifiedName is actually a reserved metadata field, return the expr type associated + * with the metadata field + * @param ident metadata field name + * @param context + * @return DSL reference + */ private Expression visitMetadata(String ident, AnalysisContext context) { - ReferenceExpression ref; - switch (ident.toLowerCase()) { - case "_index": - case "_id": - ref = DSL.ref(ident, ExprCoreType.STRING); - break; - case "_score": - case "_maxscore": - ref = DSL.ref(ident, ExprCoreType.FLOAT); - break; - case "_sort": - ref = DSL.ref(ident, ExprCoreType.LONG); - break; - default: - throw new SemanticCheckException("invalid metadata field"); - } - return ref; + ExprCoreType exprCoreType = Optional.ofNullable(METADATAFIELD_TYPE_MAP.get(ident)) + .orElseThrow(() -> new SemanticCheckException("invalid metadata field")); + return DSL.ref(ident, exprCoreType); } private Expression visitIdentifier(String ident, AnalysisContext context) { diff --git a/core/src/main/java/org/opensearch/sql/analysis/ExpressionReferenceOptimizer.java b/core/src/main/java/org/opensearch/sql/analysis/ExpressionReferenceOptimizer.java index 0d2d4f90bf..eaf5c4abca 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/ExpressionReferenceOptimizer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/ExpressionReferenceOptimizer.java @@ -76,7 +76,7 @@ public Expression visitFunction(FunctionExpression node, AnalysisContext context node.getFunctionName(), args ); - // OpenSearch functions can request score_tracked + // Propagate scoreTracked for OpenSearch functions if (optimizedFunctionExpression instanceof OpenSearchFunctions.OpenSearchFunction) { ((OpenSearchFunctions.OpenSearchFunction) optimizedFunctionExpression).setScoreTracked( ((OpenSearchFunctions.OpenSearchFunction)node).isScoreTracked()); diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/QualifiedName.java b/core/src/main/java/org/opensearch/sql/ast/expression/QualifiedName.java index 60f10ee3a1..0e48bf5e59 100644 --- a/core/src/main/java/org/opensearch/sql/ast/expression/QualifiedName.java +++ b/core/src/main/java/org/opensearch/sql/ast/expression/QualifiedName.java @@ -13,12 +13,14 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.HashMap; import java.util.List; import java.util.Optional; import java.util.stream.StreamSupport; import lombok.EqualsAndHashCode; import lombok.Getter; import org.opensearch.sql.ast.AbstractNodeVisitor; +import org.opensearch.sql.data.type.ExprCoreType; @Getter @EqualsAndHashCode(callSuper = false) @@ -31,6 +33,21 @@ public QualifiedName(String name) { this(name, Boolean.FALSE); } + public static final String METADATA_FIELD_ID = "_id"; + public static final String METADATA_FIELD_INDEX = "_index"; + public static final String METADATA_FIELD_SCORE = "_score"; + public static final String METADATA_FIELD_MAXSCORE = "_maxscore"; + public static final String METADATA_FIELD_SORT = "_sort"; + public static final java.util.Map METADATAFIELD_TYPE_MAP = new HashMap<>(){ + { + put(METADATA_FIELD_ID, ExprCoreType.STRING); + put(METADATA_FIELD_INDEX, ExprCoreType.STRING); + put(METADATA_FIELD_SCORE, ExprCoreType.FLOAT); + put(METADATA_FIELD_MAXSCORE, ExprCoreType.FLOAT); + put(METADATA_FIELD_SORT, ExprCoreType.LONG); + } + }; + public QualifiedName(String name, Boolean isMetadataField) { this.parts = Collections.singletonList(name); this.isMetadataField = isMetadataField; diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/ScoreFunction.java b/core/src/main/java/org/opensearch/sql/ast/expression/ScoreFunction.java index 1b73d97821..0587151930 100644 --- a/core/src/main/java/org/opensearch/sql/ast/expression/ScoreFunction.java +++ b/core/src/main/java/org/opensearch/sql/ast/expression/ScoreFunction.java @@ -13,7 +13,8 @@ import org.opensearch.sql.ast.AbstractNodeVisitor; /** - * Expression node of Highlight function. + * Expression node of Score function. + * Score takes a relevance-search expression as an argument and returns it */ @AllArgsConstructor @EqualsAndHashCode(callSuper = false) diff --git a/core/src/main/java/org/opensearch/sql/expression/DSL.java b/core/src/main/java/org/opensearch/sql/expression/DSL.java index a22535507d..0d10a4f38c 100644 --- a/core/src/main/java/org/opensearch/sql/expression/DSL.java +++ b/core/src/main/java/org/opensearch/sql/expression/DSL.java @@ -794,6 +794,14 @@ public static FunctionExpression score(Expression... args) { return compile(FunctionProperties.None, BuiltinFunctionName.SCORE, args); } + public static FunctionExpression scorequery(Expression... args) { + return compile(FunctionProperties.None, BuiltinFunctionName.SCOREQUERY, args); + } + + public static FunctionExpression score_query(Expression... args) { + return compile(FunctionProperties.None, BuiltinFunctionName.SCORE_QUERY, args); + } + public static FunctionExpression now(FunctionProperties functionProperties, Expression... args) { return compile(functionProperties, BuiltinFunctionName.NOW, args); diff --git a/core/src/main/java/org/opensearch/sql/planner/optimizer/rule/read/TableScanPushDown.java b/core/src/main/java/org/opensearch/sql/planner/optimizer/rule/read/TableScanPushDown.java index bb4e51c39e..a6986375dc 100644 --- a/core/src/main/java/org/opensearch/sql/planner/optimizer/rule/read/TableScanPushDown.java +++ b/core/src/main/java/org/opensearch/sql/planner/optimizer/rule/read/TableScanPushDown.java @@ -74,13 +74,6 @@ public class TableScanPushDown implements Rule { scanBuilder())) .apply((highlight, scanBuilder) -> scanBuilder.pushDownHighlight(highlight)); - - public static final Rule PUSH_DOWN_SCORE = - match(highlight(scanBuilder())).apply( - (highlight, scanBuilder) -> scanBuilder.pushDownHighlight(highlight) - ); - - /** Pattern that matches a plan node. */ private final WithPattern pattern; diff --git a/doctest/bin/test-docs b/doctest/bin/test-docs new file mode 100644 index 0000000000..6dc45d142b --- /dev/null +++ b/doctest/bin/test-docs @@ -0,0 +1,3 @@ +#!/bin/bash +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +$DIR/../.venv/bin/python -X faulthandler -m unittest -v --failfast test_docs \ No newline at end of file diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanBuilder.java index 9b36a29bf2..d7483cfcf0 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanBuilder.java @@ -34,8 +34,6 @@ public class OpenSearchIndexScanBuilder extends TableScanBuilder { /** Is limit operator pushed down. */ private boolean isLimitPushedDown = false; - private boolean isScoreTrackedPushedDown = false; - @VisibleForTesting OpenSearchIndexScanBuilder(TableScanBuilder delegate) { this.delegate = delegate; diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanOptimizationTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanOptimizationTest.java index d800208105..363727cbd3 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanOptimizationTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanOptimizationTest.java @@ -30,7 +30,6 @@ import static org.opensearch.sql.planner.optimizer.rule.read.TableScanPushDown.PUSH_DOWN_HIGHLIGHT; import static org.opensearch.sql.planner.optimizer.rule.read.TableScanPushDown.PUSH_DOWN_LIMIT; import static org.opensearch.sql.planner.optimizer.rule.read.TableScanPushDown.PUSH_DOWN_PROJECT; -import static org.opensearch.sql.planner.optimizer.rule.read.TableScanPushDown.PUSH_DOWN_SCORE; import static org.opensearch.sql.planner.optimizer.rule.read.TableScanPushDown.PUSH_DOWN_SORT; import com.google.common.collect.ImmutableList; @@ -604,7 +603,6 @@ private LogicalPlan optimize(LogicalPlan plan) { PUSH_DOWN_SORT, PUSH_DOWN_LIMIT, PUSH_DOWN_HIGHLIGHT, - PUSH_DOWN_SCORE, PUSH_DOWN_PROJECT)); return optimizer.optimize(plan); } diff --git a/sql/src/main/antlr/OpenSearchSQLLexer.g4 b/sql/src/main/antlr/OpenSearchSQLLexer.g4 index fc596dc828..2b03a8c010 100644 --- a/sql/src/main/antlr/OpenSearchSQLLexer.g4 +++ b/sql/src/main/antlr/OpenSearchSQLLexer.g4 @@ -332,6 +332,8 @@ REVERSE_NESTED: 'REVERSE_NESTED'; QUERY: 'QUERY'; RANGE: 'RANGE'; SCORE: 'SCORE'; +SCOREQUERY: 'SCOREQUERY'; +SCORE_QUERY: 'SCORE_QUERY'; SECOND_OF_MINUTE: 'SECOND_OF_MINUTE'; STATS: 'STATS'; TERM: 'TERM'; diff --git a/sql/src/main/antlr/OpenSearchSQLParser.g4 b/sql/src/main/antlr/OpenSearchSQLParser.g4 index e864f880e2..722c4bd98e 100644 --- a/sql/src/main/antlr/OpenSearchSQLParser.g4 +++ b/sql/src/main/antlr/OpenSearchSQLParser.g4 @@ -358,7 +358,7 @@ relevanceFunction ; scoreRelevanceFunction - : scoreRelevanceFunctionName=SCORE LR_BRACKET relevanceFunction (COMMA functionArg)* RR_BRACKET + : scoreRelevanceFunctionName LR_BRACKET relevanceFunction (COMMA functionArg)* RR_BRACKET ; noFieldRelevanceFunction @@ -506,6 +506,10 @@ systemFunctionName : TYPEOF ; +scoreRelevanceFunctionName + : SCORE | SCOREQUERY | SCORE_QUERY + ; + singleFieldRelevanceFunctionName : MATCH | MATCHQUERY | MATCH_QUERY | MATCH_PHRASE | MATCHPHRASE | MATCHPHRASEQUERY From 3b5e9004794733507da876924a0677ab9f4ce045 Mon Sep 17 00:00:00 2001 From: Andrew Carbonetto Date: Wed, 15 Feb 2023 14:49:15 -0800 Subject: [PATCH 11/40] Cleanup and add alternative score function syntax Signed-off-by: Andrew Carbonetto --- doctest/bin/test-docs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doctest/bin/test-docs b/doctest/bin/test-docs index 6dc45d142b..4dd4390b89 100644 --- a/doctest/bin/test-docs +++ b/doctest/bin/test-docs @@ -1,3 +1,3 @@ #!/bin/bash DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -$DIR/../.venv/bin/python -X faulthandler -m unittest -v --failfast test_docs \ No newline at end of file +$DIR/../.venv/bin/python -X faulthandler -m unittest -v --failfast test_docs From 348b8b992df6c558243c81b45cd7f34544005e5c Mon Sep 17 00:00:00 2001 From: Andrew Carbonetto Date: Thu, 16 Feb 2023 13:25:34 -0800 Subject: [PATCH 12/40] Fix some bugs and add Expression tests Signed-off-by: Andrew Carbonetto --- .../sql/analysis/ExpressionAnalyzer.java | 12 +- .../sql/ast/expression/QualifiedName.java | 2 +- .../rule/read/TableScanPushDown.java | 1 + .../sql/analysis/ExpressionAnalyzerTest.java | 135 ++++++++++++++++++ .../response/OpenSearchResponse.java | 2 +- .../scan/OpenSearchIndexScanQueryBuilder.java | 9 +- 6 files changed, 152 insertions(+), 9 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java b/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java index b14543dcab..b9c6ec9bdb 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java @@ -232,11 +232,15 @@ public Expression visitScoreFunction(ScoreFunction node, AnalysisContext context // note: if an argument exists, and there should only be one, it will be a boost argument Literal boostFunctionArg = (Literal) node.getFuncArgs().get(0); - if (!boostFunctionArg.getType().equals(DataType.DOUBLE)) { + Double thisBoostValue = 1.0; + if (boostFunctionArg.getType().equals(DataType.DOUBLE)) { + thisBoostValue = ((Double) boostFunctionArg.getValue()); + } else if (boostFunctionArg.getType().equals(DataType.DOUBLE)) { + thisBoostValue = ((Integer) boostFunctionArg.getValue()).doubleValue(); + } else { throw new SemanticCheckException(String.format("Expected boost type '%s' but got '%s'", boostFunctionArg.getType().name(), DataType.DOUBLE.name())); } - Double thisBoostValue = ((Double) ((Literal) node.getFuncArgs().get(0)).getValue()); // update the existing unresolved expression to add a boost argument if it doesn't exist // OR multiply the existing boost argument @@ -395,9 +399,9 @@ public Expression visitUnresolvedArgument(UnresolvedArgument node, AnalysisConte /** * If QualifiedName is actually a reserved metadata field, return the expr type associated - * with the metadata field + * with the metadata field. * @param ident metadata field name - * @param context + * @param context analysis context * @return DSL reference */ private Expression visitMetadata(String ident, AnalysisContext context) { diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/QualifiedName.java b/core/src/main/java/org/opensearch/sql/ast/expression/QualifiedName.java index 0e48bf5e59..abcbcfa509 100644 --- a/core/src/main/java/org/opensearch/sql/ast/expression/QualifiedName.java +++ b/core/src/main/java/org/opensearch/sql/ast/expression/QualifiedName.java @@ -38,7 +38,7 @@ public QualifiedName(String name) { public static final String METADATA_FIELD_SCORE = "_score"; public static final String METADATA_FIELD_MAXSCORE = "_maxscore"; public static final String METADATA_FIELD_SORT = "_sort"; - public static final java.util.Map METADATAFIELD_TYPE_MAP = new HashMap<>(){ + public static final java.util.Map METADATAFIELD_TYPE_MAP = new HashMap<>() { { put(METADATA_FIELD_ID, ExprCoreType.STRING); put(METADATA_FIELD_INDEX, ExprCoreType.STRING); diff --git a/core/src/main/java/org/opensearch/sql/planner/optimizer/rule/read/TableScanPushDown.java b/core/src/main/java/org/opensearch/sql/planner/optimizer/rule/read/TableScanPushDown.java index a6986375dc..556a12bb34 100644 --- a/core/src/main/java/org/opensearch/sql/planner/optimizer/rule/read/TableScanPushDown.java +++ b/core/src/main/java/org/opensearch/sql/planner/optimizer/rule/read/TableScanPushDown.java @@ -74,6 +74,7 @@ public class TableScanPushDown implements Rule { scanBuilder())) .apply((highlight, scanBuilder) -> scanBuilder.pushDownHighlight(highlight)); + /** Pattern that matches a plan node. */ private final WithPattern pattern; diff --git a/core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java b/core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java index 0ab84e94b7..4924d254bd 100644 --- a/core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java +++ b/core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java @@ -639,6 +639,141 @@ public void match_phrase_prefix_all_params() { ); } + @Test void score_function_expression() { + assertAnalyzeEqual( + DSL.score( + DSL.namedArgument("RelevanceQuery", + DSL.match_phrase_prefix( + DSL.namedArgument("field", "field_value1"), + DSL.namedArgument("query", "search query"), + DSL.namedArgument("slop", "3") + ) + )), + AstDSL.function("score", + unresolvedArg("RelevanceQuery", + AstDSL.function("match_phrase_prefix", + unresolvedArg("field", stringLiteral("field_value1")), + unresolvedArg("query", stringLiteral("search query")), + unresolvedArg("slop", stringLiteral("3")) + ) + ) + ) + ); + } + + @Test void score_function_with_boost() { + assertAnalyzeEqual( + DSL.score( + DSL.namedArgument("RelevanceQuery", + DSL.match_phrase_prefix( + DSL.namedArgument("field", "field_value1"), + DSL.namedArgument("query", "search query"), + DSL.namedArgument("boost", "3.0") + )), + DSL.namedArgument("boost", "2.0") + ), + AstDSL.function("score", + unresolvedArg("RelevanceQuery", + AstDSL.function("match_phrase_prefix", + unresolvedArg("field", stringLiteral("field_value1")), + unresolvedArg("query", stringLiteral("search query")), + unresolvedArg("boost", stringLiteral("3.0")) + ) + ), + unresolvedArg("boost", stringLiteral("2.0")) + ) + ); + } + + @Test void score_query_function_expression() { + assertAnalyzeEqual( + DSL.score_query( + DSL.namedArgument("RelevanceQuery", + DSL.wildcard_query( + DSL.namedArgument("field", "field_value1"), + DSL.namedArgument("query", "search query") + ) + )), + AstDSL.function("score_query", + unresolvedArg("RelevanceQuery", + AstDSL.function("wildcard_query", + unresolvedArg("field", stringLiteral("field_value1")), + unresolvedArg("query", stringLiteral("search query")) + ) + ) + ) + ); + } + + @Test void score_query_function_with_boost() { + assertAnalyzeEqual( + DSL.score_query( + DSL.namedArgument("RelevanceQuery", + DSL.wildcard_query( + DSL.namedArgument("field", "field_value1"), + DSL.namedArgument("query", "search query") + ) + ), + DSL.namedArgument("boost", "2.0") + ), + AstDSL.function("score_query", + unresolvedArg("RelevanceQuery", + AstDSL.function("wildcard_query", + unresolvedArg("field", stringLiteral("field_value1")), + unresolvedArg("query", stringLiteral("search query")) + ) + ), + unresolvedArg("boost", stringLiteral("2.0")) + ) + ); + } + + @Test void scorequery_function_expression() { + assertAnalyzeEqual( + DSL.scorequery( + DSL.namedArgument("RelevanceQuery", + DSL.simple_query_string( + DSL.namedArgument("field", "field_value1"), + DSL.namedArgument("query", "search query"), + DSL.namedArgument("slop", "3") + ) + )), + AstDSL.function("scorequery", + unresolvedArg("RelevanceQuery", + AstDSL.function("simple_query_string", + unresolvedArg("field", stringLiteral("field_value1")), + unresolvedArg("query", stringLiteral("search query")), + unresolvedArg("slop", stringLiteral("3")) + ) + ) + ) + ); + } + + @Test void scorequery_function_with_boost() { + assertAnalyzeEqual( + DSL.scorequery( + DSL.namedArgument("RelevanceQuery", + DSL.simple_query_string( + DSL.namedArgument("field", "field_value1"), + DSL.namedArgument("query", "search query"), + DSL.namedArgument("slop", "3") + )), + DSL.namedArgument("boost", "2.0") + ), + AstDSL.function("scorequery", + unresolvedArg("RelevanceQuery", + AstDSL.function("simple_query_string", + unresolvedArg("field", stringLiteral("field_value1")), + unresolvedArg("query", stringLiteral("search query")), + unresolvedArg("slop", stringLiteral("3")) + ) + ), + unresolvedArg("boost", stringLiteral("2.0")) + ) + ); + } + @Test public void function_isnt_calculated_on_analyze() { assertTrue(analyze(function("now")) instanceof FunctionExpression); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/OpenSearchResponse.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/OpenSearchResponse.java index 5563ee5136..d1a77ef7f6 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/OpenSearchResponse.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/OpenSearchResponse.java @@ -109,7 +109,7 @@ public Iterator iterator() { builder.put("_score", new ExprFloatValue(hit.getScore())); } if (!Float.isNaN(maxScore)) { - builder.put("_maxscore", new ExprLongValue(maxScore)); + builder.put("_maxscore", new ExprFloatValue(maxScore)); } builder.put("_sort", new ExprLongValue(hit.getSeqNo())); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanQueryBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanQueryBuilder.java index 867227ac00..6b7a3d144e 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanQueryBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanQueryBuilder.java @@ -104,12 +104,15 @@ public boolean pushDownHighlight(LogicalHighlight highlight) { } private boolean trackScoresFromOpenSearchFunction(Expression condition) { - if (condition instanceof OpenSearchFunctions.OpenSearchFunction) { - return ((OpenSearchFunctions.OpenSearchFunction) condition).isScoreTracked(); + if (condition instanceof OpenSearchFunctions.OpenSearchFunction && + ((OpenSearchFunctions.OpenSearchFunction) condition).isScoreTracked()) { + return true; } if (condition instanceof FunctionExpression) { for(Expression expr: ((FunctionExpression) condition).getArguments()) { - return trackScoresFromOpenSearchFunction(expr); + if (trackScoresFromOpenSearchFunction(expr)) { + return true; + } } } return false; From f67d4f2ab61d567253e5cefdba91000152a4e43c Mon Sep 17 00:00:00 2001 From: Andrew Carbonetto Date: Thu, 16 Feb 2023 14:55:14 -0800 Subject: [PATCH 13/40] Add expresssion and analyzer tests Signed-off-by: Andrew Carbonetto --- .../sql/analysis/ExpressionAnalyzer.java | 2 +- .../opensearch/sql/analysis/AnalyzerTest.java | 113 ++++++++++++++++++ .../sql/analysis/ExpressionAnalyzerTest.java | 4 +- 3 files changed, 116 insertions(+), 3 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java b/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java index b9c6ec9bdb..d0e1367b71 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java @@ -235,7 +235,7 @@ public Expression visitScoreFunction(ScoreFunction node, AnalysisContext context Double thisBoostValue = 1.0; if (boostFunctionArg.getType().equals(DataType.DOUBLE)) { thisBoostValue = ((Double) boostFunctionArg.getValue()); - } else if (boostFunctionArg.getType().equals(DataType.DOUBLE)) { + } else if (boostFunctionArg.getType().equals(DataType.INTEGER)) { thisBoostValue = ((Integer) boostFunctionArg.getValue()).doubleValue(); } else { throw new SemanticCheckException(String.format("Expected boost type '%s' but got '%s'", diff --git a/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java b/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java index 1db29a6a42..0c826c0741 100644 --- a/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java +++ b/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java @@ -71,17 +71,22 @@ import org.opensearch.sql.ast.expression.HighlightFunction; import org.opensearch.sql.ast.expression.Literal; import org.opensearch.sql.ast.expression.ParseMethod; +import org.opensearch.sql.ast.expression.ScoreFunction; import org.opensearch.sql.ast.expression.SpanUnit; import org.opensearch.sql.ast.tree.AD; import org.opensearch.sql.ast.tree.Kmeans; import org.opensearch.sql.ast.tree.ML; import org.opensearch.sql.ast.tree.RareTopN.CommandType; +import org.opensearch.sql.ast.tree.UnresolvedPlan; +import org.opensearch.sql.data.type.ExprCoreType; import org.opensearch.sql.exception.ExpressionEvaluationException; import org.opensearch.sql.exception.SemanticCheckException; import org.opensearch.sql.expression.DSL; import org.opensearch.sql.expression.HighlightExpression; +import org.opensearch.sql.expression.function.OpenSearchFunctions; import org.opensearch.sql.expression.window.WindowDefinition; import org.opensearch.sql.planner.logical.LogicalAD; +import org.opensearch.sql.planner.logical.LogicalFilter; import org.opensearch.sql.planner.logical.LogicalMLCommons; import org.opensearch.sql.planner.logical.LogicalPlan; import org.opensearch.sql.planner.logical.LogicalPlanDSL; @@ -214,6 +219,114 @@ public void filter_relation_with_multiple_tables() { AstDSL.equalTo(AstDSL.field("integer_value"), AstDSL.intLiteral(1)))); } + @Test + public void analyze_filter_visit_score_function() { + UnresolvedPlan unresolvedPlan = AstDSL.filter( + AstDSL.relation("schema"), + new ScoreFunction( + AstDSL.function("match_phrase_prefix", + AstDSL.unresolvedArg("field", stringLiteral("field_value1")), + AstDSL.unresolvedArg("query", stringLiteral("search query")), + AstDSL.unresolvedArg("boost", stringLiteral("3")) + ), List.of()) + ); + assertAnalyzeEqual( + LogicalPlanDSL.filter( + LogicalPlanDSL.relation("schema", table), + DSL.match_phrase_prefix( + DSL.namedArgument("field", "field_value1"), + DSL.namedArgument("query", "search query"), + DSL.namedArgument("boost", "3") + ) + ), + unresolvedPlan + ); + + LogicalPlan logicalPlan = analyze(unresolvedPlan); + OpenSearchFunctions.OpenSearchFunction relevanceQuery = (OpenSearchFunctions.OpenSearchFunction)((LogicalFilter) logicalPlan).getCondition(); + assertEquals(true, relevanceQuery.isScoreTracked()); + } + + @Test + public void analyze_filter_visit_score_function_with_double_boost() { + UnresolvedPlan unresolvedPlan = AstDSL.filter( + AstDSL.relation("schema"), + new ScoreFunction( + AstDSL.function("match_phrase_prefix", + AstDSL.unresolvedArg("field", stringLiteral("field_value1")), + AstDSL.unresolvedArg("query", stringLiteral("search query")), + AstDSL.unresolvedArg("slop", stringLiteral("3")) + ), List.of(new Literal(3.0, DataType.DOUBLE)) + ) + ); + + assertAnalyzeEqual( + LogicalPlanDSL.filter( + LogicalPlanDSL.relation("schema", table), + DSL.match_phrase_prefix( + DSL.namedArgument("field", "field_value1"), + DSL.namedArgument("query", "search query"), + DSL.namedArgument("slop", "3"), + DSL.namedArgument("boost", "3.0") + ) + ), + unresolvedPlan + ); + + LogicalPlan logicalPlan = analyze(unresolvedPlan); + OpenSearchFunctions.OpenSearchFunction relevanceQuery = (OpenSearchFunctions.OpenSearchFunction)((LogicalFilter) logicalPlan).getCondition(); + assertEquals(true, relevanceQuery.isScoreTracked()); + } + + @Test + public void analyze_filter_visit_score_function_with_integer_boost() { + assertAnalyzeEqual( + LogicalPlanDSL.filter( + LogicalPlanDSL.relation("schema", table), + DSL.match_phrase_prefix( + DSL.namedArgument("field", "field_value1"), + DSL.namedArgument("query", "search query"), + DSL.namedArgument("boost", "9.0") + ) + ), + AstDSL.filter( + AstDSL.relation("schema"), + new ScoreFunction( + AstDSL.function("match_phrase_prefix", + AstDSL.unresolvedArg("field", stringLiteral("field_value1")), + AstDSL.unresolvedArg("query", stringLiteral("search query")), + AstDSL.unresolvedArg("boost", stringLiteral("3")) + ), List.of(new Literal(3, DataType.INTEGER)) + ) + ) + ); + } + + @Test + public void analyze_filter_visit_score_function_with() { + assertAnalyzeEqual( + LogicalPlanDSL.filter( + LogicalPlanDSL.relation("schema", table), + DSL.match_phrase_prefix( + DSL.namedArgument("field", "field_value1"), + DSL.namedArgument("query", "search query"), + DSL.namedArgument("slop", "3"), + DSL.namedArgument("boost", "3.0") + ) + ), + AstDSL.filter( + AstDSL.relation("schema"), + new ScoreFunction( + AstDSL.function("match_phrase_prefix", + AstDSL.unresolvedArg("field", stringLiteral("field_value1")), + AstDSL.unresolvedArg("query", stringLiteral("search query")), + AstDSL.unresolvedArg("slop", stringLiteral("3")) + ), List.of(new Literal(3, DataType.INTEGER)) + ) + ) + ); + } + @Test public void head_relation() { assertAnalyzeEqual( diff --git a/core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java b/core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java index 4924d254bd..b92aad31e7 100644 --- a/core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java +++ b/core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java @@ -670,7 +670,7 @@ public void match_phrase_prefix_all_params() { DSL.namedArgument("query", "search query"), DSL.namedArgument("boost", "3.0") )), - DSL.namedArgument("boost", "2.0") + DSL.namedArgument("boost", "2") ), AstDSL.function("score", unresolvedArg("RelevanceQuery", @@ -680,7 +680,7 @@ public void match_phrase_prefix_all_params() { unresolvedArg("boost", stringLiteral("3.0")) ) ), - unresolvedArg("boost", stringLiteral("2.0")) + unresolvedArg("boost", stringLiteral("2")) ) ); } From be7191aee6deda75f8864f929100eec96332be8f Mon Sep 17 00:00:00 2001 From: Andrew Carbonetto Date: Fri, 17 Feb 2023 00:15:23 -0800 Subject: [PATCH 14/40] Add score doctests Signed-off-by: Andrew Carbonetto --- docs/user/dql/functions.rst | 28 ++++++++++++++++++++++++++++ doctest/bin/test-docs | 0 2 files changed, 28 insertions(+) mode change 100644 => 100755 doctest/bin/test-docs diff --git a/docs/user/dql/functions.rst b/docs/user/dql/functions.rst index 5d5a3e1f96..70d2158e36 100644 --- a/docs/user/dql/functions.rst +++ b/docs/user/dql/functions.rst @@ -3839,6 +3839,34 @@ Another example to show how to set custom values for the optional parameters:: | 1 | The House at Pooh Corner | Alan Alexander Milne | +------+--------------------------+----------------------+ +SCORE +------------ + +Description +>>>>>>>>>>> + +``score(search_expression, boost)`` +``score_query(search_expression, boost)`` +``scorequery(search_expression, boost)`` + +The score function returns the _score of any documents matching the enclosed relevance-search expression. The SCORE function expects two +arguments. The first argument is the search expression. The second argument is an optional floating-point number to boost the score (the default value is 1.0). +Please refer to examples below: + +| ``score(query('Tags:taste OR Body:taste', ...), 2.0)`` + +The `score_query` and `scorequery` functions are alternative syntax to the `score` function. + +Example boosting score:: + + os> select *, _score from books where score(query('title:Pooh House', default_operator='AND'), 2.0); + fetched rows / total rows = 1/1 + +------+--------------------------+----------------------+-----------+ + | id | title | author | _score | + |------+--------------------------+----------------------+-----------| + | 1 | The House at Pooh Corner | Alan Alexander Milne | 1.5884793 | + +------+--------------------------+----------------------+-----------+ + HIGHLIGHT ------------ diff --git a/doctest/bin/test-docs b/doctest/bin/test-docs old mode 100644 new mode 100755 From 1a0b3ef9235885266efcb3ffeb7d026af1bd801f Mon Sep 17 00:00:00 2001 From: Andrew Carbonetto Date: Fri, 17 Feb 2023 00:39:23 -0800 Subject: [PATCH 15/40] Add score function doctests Signed-off-by: Andrew Carbonetto --- .../sql/analysis/ExpressionAnalyzer.java | 11 ++---- .../opensearch/sql/analysis/AnalyzerTest.java | 36 +++++++++++++++---- docs/user/dql/functions.rst | 22 ++++++++++++ 3 files changed, 54 insertions(+), 15 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java b/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java index d0e1367b71..75b093248a 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java @@ -232,14 +232,14 @@ public Expression visitScoreFunction(ScoreFunction node, AnalysisContext context // note: if an argument exists, and there should only be one, it will be a boost argument Literal boostFunctionArg = (Literal) node.getFuncArgs().get(0); - Double thisBoostValue = 1.0; + Double thisBoostValue; if (boostFunctionArg.getType().equals(DataType.DOUBLE)) { thisBoostValue = ((Double) boostFunctionArg.getValue()); } else if (boostFunctionArg.getType().equals(DataType.INTEGER)) { thisBoostValue = ((Integer) boostFunctionArg.getValue()).doubleValue(); } else { throw new SemanticCheckException(String.format("Expected boost type '%s' but got '%s'", - boostFunctionArg.getType().name(), DataType.DOUBLE.name())); + DataType.DOUBLE.name(), boostFunctionArg.getType().name())); } // update the existing unresolved expression to add a boost argument if it doesn't exist @@ -250,16 +250,11 @@ public Expression visitScoreFunction(ScoreFunction node, AnalysisContext context boolean doesFunctionContainBoostArgument = false; List updatedFuncArgs = new ArrayList<>(); for (UnresolvedExpression expr: relevanceFuncArgs) { - if (!(expr instanceof UnresolvedArgument)) { - continue; - } String argumentName = ((UnresolvedArgument) expr).getArgName(); if (argumentName.equalsIgnoreCase("boost")) { doesFunctionContainBoostArgument = true; Literal boostArgLiteral = (Literal)((UnresolvedArgument) expr).getValue(); - Double boostValue = boostArgLiteral.getType() == DataType.STRING - ? Double.parseDouble((String)boostArgLiteral.getValue()) * thisBoostValue - : thisBoostValue; + Double boostValue = Double.parseDouble((String)boostArgLiteral.getValue()) * thisBoostValue; UnresolvedArgument newBoostArg = new UnresolvedArgument( argumentName, new Literal(boostValue.toString(), DataType.STRING) diff --git a/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java b/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java index 0c826c0741..6c87a28a10 100644 --- a/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java +++ b/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java @@ -243,7 +243,8 @@ public void analyze_filter_visit_score_function() { ); LogicalPlan logicalPlan = analyze(unresolvedPlan); - OpenSearchFunctions.OpenSearchFunction relevanceQuery = (OpenSearchFunctions.OpenSearchFunction)((LogicalFilter) logicalPlan).getCondition(); + OpenSearchFunctions.OpenSearchFunction relevanceQuery = + (OpenSearchFunctions.OpenSearchFunction)((LogicalFilter) logicalPlan).getCondition(); assertEquals(true, relevanceQuery.isScoreTracked()); } @@ -274,7 +275,8 @@ public void analyze_filter_visit_score_function_with_double_boost() { ); LogicalPlan logicalPlan = analyze(unresolvedPlan); - OpenSearchFunctions.OpenSearchFunction relevanceQuery = (OpenSearchFunctions.OpenSearchFunction)((LogicalFilter) logicalPlan).getCondition(); + OpenSearchFunctions.OpenSearchFunction relevanceQuery = + (OpenSearchFunctions.OpenSearchFunction)((LogicalFilter) logicalPlan).getCondition(); assertEquals(true, relevanceQuery.isScoreTracked()); } @@ -303,15 +305,35 @@ public void analyze_filter_visit_score_function_with_integer_boost() { } @Test - public void analyze_filter_visit_score_function_with() { + public void analyze_filter_visit_score_function_with_unsupported_boost_SemanticCheckException() { + UnresolvedPlan unresolvedPlan = AstDSL.filter( + AstDSL.relation("schema"), + new ScoreFunction( + AstDSL.function("match_phrase_prefix", + AstDSL.unresolvedArg("field", stringLiteral("field_value1")), + AstDSL.unresolvedArg("query", stringLiteral("search query")), + AstDSL.unresolvedArg("boost", stringLiteral("3")) + ), List.of(new Literal("3.0", DataType.STRING)) + ) + ); + SemanticCheckException exception = + assertThrows( + SemanticCheckException.class, + () -> analyze(unresolvedPlan)); + assertEquals( + "Expected boost type 'DOUBLE' but got 'STRING'", + exception.getMessage()); + } + + @Test + public void analyze_filter_visit_score_function_with_invalid_field_ignored() { assertAnalyzeEqual( LogicalPlanDSL.filter( LogicalPlanDSL.relation("schema", table), DSL.match_phrase_prefix( DSL.namedArgument("field", "field_value1"), DSL.namedArgument("query", "search query"), - DSL.namedArgument("slop", "3"), - DSL.namedArgument("boost", "3.0") + DSL.namedArgument("boost", "3") ) ), AstDSL.filter( @@ -320,8 +342,8 @@ public void analyze_filter_visit_score_function_with() { AstDSL.function("match_phrase_prefix", AstDSL.unresolvedArg("field", stringLiteral("field_value1")), AstDSL.unresolvedArg("query", stringLiteral("search query")), - AstDSL.unresolvedArg("slop", stringLiteral("3")) - ), List.of(new Literal(3, DataType.INTEGER)) + AstDSL.unresolvedArg("boost", stringLiteral("3")) + ), List.of(AstDSL.unresolvedArg("invalid", stringLiteral("value"))) ) ) ); diff --git a/docs/user/dql/functions.rst b/docs/user/dql/functions.rst index 70d2158e36..8f7f728ee9 100644 --- a/docs/user/dql/functions.rst +++ b/docs/user/dql/functions.rst @@ -3845,6 +3845,7 @@ SCORE Description >>>>>>>>>>> +<<<<<<< Updated upstream ``score(search_expression, boost)`` ``score_query(search_expression, boost)`` ``scorequery(search_expression, boost)`` @@ -3866,6 +3867,27 @@ Example boosting score:: |------+--------------------------+----------------------+-----------| | 1 | The House at Pooh Corner | Alan Alexander Milne | 1.5884793 | +------+--------------------------+----------------------+-----------+ +======= +``SCORE(search_expression, boost)`` +``SCOREQUERY(search_expression, boost)`` +``SCORE_QUERY(search_expression, boost)`` + +The score function ensures that tracked_scores are returned with every matching document of a relevance search query. +The score function expects two arguments. The first argument is the relevance-search expression. The second argument is an optional floating-point number to boost the score: +`scorequery` and `score_query` functions are alternative names for the `score` function. + +Please refer to examples below: + + os> select *, _score from books where score(query('title:Pooh House')); + fetched rows / total rows = 2/2 + +------+--------------------------+----------------------+ + | id | title | author | + |------+--------------------------+----------------------| + | 1 | The House at Pooh Corner | Alan Alexander Milne | + | 2 | Winnie-the-Pooh | Alan Alexander Milne | + +------+--------------------------+----------------------+ + +>>>>>>> Stashed changes HIGHLIGHT ------------ From 05d9dd362f66f26a5789288ca1c04d40eb045f32 Mon Sep 17 00:00:00 2001 From: Andrew Carbonetto Date: Fri, 3 Mar 2023 09:06:10 -0800 Subject: [PATCH 16/40] Add metafield tests Signed-off-by: Andrew Carbonetto --- docs/user/dql/functions.rst | 22 ---- .../opensearch/sql/legacy/MethodQueryIT.java | 6 +- .../sql/legacy/PrettyFormatResponseIT.java | 4 +- .../org/opensearch/sql/sql/ScoreQueryIT.java | 116 ++++++++++++++++++ .../client/OpenSearchNodeClientTest.java | 2 +- .../client/OpenSearchRestClientTest.java | 2 +- .../response/OpenSearchResponseTest.java | 45 ++++++- .../OpenSearchIndexScanOptimizationTest.java | 74 +++++++++++ 8 files changed, 241 insertions(+), 30 deletions(-) create mode 100644 integ-test/src/test/java/org/opensearch/sql/sql/ScoreQueryIT.java diff --git a/docs/user/dql/functions.rst b/docs/user/dql/functions.rst index 8f7f728ee9..70d2158e36 100644 --- a/docs/user/dql/functions.rst +++ b/docs/user/dql/functions.rst @@ -3845,7 +3845,6 @@ SCORE Description >>>>>>>>>>> -<<<<<<< Updated upstream ``score(search_expression, boost)`` ``score_query(search_expression, boost)`` ``scorequery(search_expression, boost)`` @@ -3867,27 +3866,6 @@ Example boosting score:: |------+--------------------------+----------------------+-----------| | 1 | The House at Pooh Corner | Alan Alexander Milne | 1.5884793 | +------+--------------------------+----------------------+-----------+ -======= -``SCORE(search_expression, boost)`` -``SCOREQUERY(search_expression, boost)`` -``SCORE_QUERY(search_expression, boost)`` - -The score function ensures that tracked_scores are returned with every matching document of a relevance search query. -The score function expects two arguments. The first argument is the relevance-search expression. The second argument is an optional floating-point number to boost the score: -`scorequery` and `score_query` functions are alternative names for the `score` function. - -Please refer to examples below: - - os> select *, _score from books where score(query('title:Pooh House')); - fetched rows / total rows = 2/2 - +------+--------------------------+----------------------+ - | id | title | author | - |------+--------------------------+----------------------| - | 1 | The House at Pooh Corner | Alan Alexander Milne | - | 2 | Winnie-the-Pooh | Alan Alexander Milne | - +------+--------------------------+----------------------+ - ->>>>>>> Stashed changes HIGHLIGHT ------------ diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/MethodQueryIT.java b/integ-test/src/test/java/org/opensearch/sql/legacy/MethodQueryIT.java index fdbbb0f6ba..d7aab0d5c1 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/MethodQueryIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/MethodQueryIT.java @@ -12,6 +12,7 @@ import java.io.IOException; import java.util.Locale; import org.junit.Assert; +import org.junit.Ignore; import org.junit.Test; /** @@ -66,13 +67,14 @@ public void matchQueryTest() throws IOException { * * @throws IOException */ - // todo + // score query no longer maps to constant_score in the V2 engine @Test + @Ignore public void scoreQueryTest() throws IOException { final String result = explainQuery(String.format(Locale.ROOT, "select address from %s " + "where score(matchQuery(address, 'Lane'),100) " + - "or score(matchQuery(address,'Street'),0.5) order by _score desc limit 3", + "or score(matchQuery(address,'Street'),0.5) order by _score desc limit 3", TestsConstants.TEST_INDEX_ACCOUNT)); Assert.assertThat(result, both(containsString("{\"constant_score\":" + diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/PrettyFormatResponseIT.java b/integ-test/src/test/java/org/opensearch/sql/legacy/PrettyFormatResponseIT.java index 226645ce85..a7b2398a2b 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/PrettyFormatResponseIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/PrettyFormatResponseIT.java @@ -128,8 +128,8 @@ public void selectWrongField() throws IOException { @Test public void selectScore() throws IOException { JSONObject response = executeQuery( - String.format(Locale.ROOT, "SELECT _score FROM %s WHERE balance > 30000", - TestsConstants.TEST_INDEX_ACCOUNT)); + String.format(Locale.ROOT, "SELECT _score FROM %s WHERE SCORE(match_phrase(phrase, 'brown fox'))", + TestsConstants.TEST_INDEX_PHRASE)); List fields = Collections.singletonList("_score"); assertContainsColumns(getSchema(response), fields); diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/ScoreQueryIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/ScoreQueryIT.java new file mode 100644 index 0000000000..4b0964d62e --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/sql/ScoreQueryIT.java @@ -0,0 +1,116 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.sql; + +import org.json.JSONObject; +import org.junit.Assert; +import org.junit.Ignore; +import org.junit.Test; +import org.opensearch.sql.legacy.SQLIntegTestCase; +import org.opensearch.sql.legacy.TestsConstants; + +import java.io.IOException; +import java.util.Locale; + +import static org.hamcrest.Matchers.allOf; +import static org.hamcrest.Matchers.both; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.not; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BEER; + +public class ScoreQueryIT extends SQLIntegTestCase { + @Override + protected void init() throws Exception { + loadIndex(Index.ACCOUNT); + } + + /** + * "query" : { + * "from": 0, + * "size": 3, + * "timeout": "1m", + * "query": { + * "bool": { + * "should": [ + * { + * "match": { + * "address": { + * "query": "Lane", + * "operator": "OR", + * "prefix_length": 0, + * "max_expansions": 50, + * "fuzzy_transpositions": true, + * "lenient": false, + * "zero_terms_query": "NONE", + * "auto_generate_synonyms_phrase_query": true, + * "boost": 100.0 + * } + * } + * }, + * { + * "match": { + * "address": { + * "query": "Street", + * "operator": "OR", + * "prefix_length": 0, + * "max_expansions": 50, + * "fuzzy_transpositions": true, + * "lenient": false, + * "zero_terms_query": "NONE", + * "auto_generate_synonyms_phrase_query": true, + * "boost": 0.5 + * } + * } + * } + * ], + * "adjust_pure_negative": true, + * "boost": 1.0 + * } + * }, + * "_source": { + * "includes": [ + * "address" + * ], + * "excludes": [] + * }, + * "sort": [ + * { + * "_score": { + * "order": "desc" + * } + * } + * ], + * "track_scores": true + * } + * @throws IOException + */ + @Test + public void scoreQueryTest() throws IOException { + final String result = explainQuery(String.format(Locale.ROOT, + "select address from %s " + + "where score(matchQuery(address, 'Lane'),100) " + + "or score(matchQuery(address,'Street'),0.5) order by _score desc limit 3", + TestsConstants.TEST_INDEX_ACCOUNT)); + Assert.assertThat(result, containsString("\\\"match\\\":{\\\"address\\\":{\\\"query\\\":\\\"Lane\\\"")); + Assert.assertThat(result, containsString("\\\"boost\\\":100.0")); + Assert.assertThat(result, containsString("\\\"match\\\":{\\\"address\\\":{\\\"query\\\":\\\"Street\\\"")); + Assert.assertThat(result, containsString("\\\"boost\\\":0.5")); + Assert.assertThat(result, containsString("\\\"sort\\\":[{\\\"_score\\\"")); + Assert.assertThat(result, containsString("\\\"track_scores\\\":true")); + } + + @Test + public void scoreQueryDefaultBoostTest() throws IOException { + final String result = explainQuery(String.format(Locale.ROOT, + "select address from %s " + + "where score(matchQuery(address, 'Lane')) order by _score desc limit 2", + TestsConstants.TEST_INDEX_ACCOUNT)); + Assert.assertThat(result, containsString("\\\"match\\\":{\\\"address\\\":{\\\"query\\\":\\\"Lane\\\"")); + Assert.assertThat(result, containsString("\\\"boost\\\":1.0")); + Assert.assertThat(result, containsString("\\\"sort\\\":[{\\\"_score\\\"")); + Assert.assertThat(result, containsString("\\\"track_scores\\\":true")); + } +} diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchNodeClientTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchNodeClientTest.java index ccfd2a57d0..56d5de1cbc 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchNodeClientTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchNodeClientTest.java @@ -280,7 +280,7 @@ void search() { Iterator hits = response1.iterator(); assertTrue(hits.hasNext()); - assertEquals(exprTupleValue, hits.next()); + assertEquals(exprTupleValue.tupleValue().get("id"), hits.next().tupleValue().get("id")); assertFalse(hits.hasNext()); // Verify response for second scroll request diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchRestClientTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchRestClientTest.java index 083446adcc..45a5e748f8 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchRestClientTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchRestClientTest.java @@ -261,7 +261,7 @@ void search() throws IOException { Iterator hits = response1.iterator(); assertTrue(hits.hasNext()); - assertEquals(exprTupleValue, hits.next()); + assertEquals(exprTupleValue.tupleValue().get("id"), hits.next().tupleValue().get("id")); assertFalse(hits.hasNext()); // Verify response for second scroll request diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/response/OpenSearchResponseTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/response/OpenSearchResponseTest.java index 0a60503415..ab6f901b2c 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/response/OpenSearchResponseTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/response/OpenSearchResponseTest.java @@ -32,6 +32,8 @@ import org.opensearch.search.aggregations.Aggregations; import org.opensearch.search.fetch.subphase.highlight.HighlightField; import org.opensearch.sql.data.model.ExprIntegerValue; +import org.opensearch.sql.data.model.ExprLongValue; +import org.opensearch.sql.data.model.ExprStringValue; import org.opensearch.sql.data.model.ExprTupleValue; import org.opensearch.sql.data.model.ExprValue; import org.opensearch.sql.data.model.ExprValueUtils; @@ -106,9 +108,48 @@ void iterator() { int i = 0; for (ExprValue hit : new OpenSearchResponse(searchResponse, factory)) { if (i == 0) { - assertEquals(exprTupleValue1, hit); + assertEquals(exprTupleValue1.tupleValue().get("id"), hit.tupleValue().get("id")); } else if (i == 1) { - assertEquals(exprTupleValue2, hit); + assertEquals(exprTupleValue2.tupleValue().get("id"), hit.tupleValue().get("id")); + } else { + fail("More search hits returned than expected"); + } + i++; + } + } + + @Test + void iterator_metafields() { + + ExprTupleValue exprTupleHit = ExprTupleValue.fromExprValueMap(ImmutableMap.of( + "id1", new ExprIntegerValue(1) + )); + ExprTupleValue exprTupleResponse = ExprTupleValue.fromExprValueMap(ImmutableMap.of( + "id1", new ExprIntegerValue(1), + "_index", new ExprStringValue("testIndex"), + "_id", new ExprStringValue("testId"), + "_sort", new ExprLongValue(123456L) + )); + + when(searchResponse.getHits()) + .thenReturn( + new SearchHits( + new SearchHit[] {searchHit1}, + new TotalHits(1L, TotalHits.Relation.EQUAL_TO), + Float.NaN)); + + when(searchHit1.getSourceAsString()).thenReturn("{\"id1\", 1}"); + when(searchHit1.getId()).thenReturn("testId"); + when(searchHit1.getIndex()).thenReturn("testIndex"); + when(searchHit1.getScore()).thenReturn(Float.NaN); + when(searchHit1.getSeqNo()).thenReturn(123456L); + + when(factory.construct(any())).thenReturn(exprTupleHit); + + int i = 0; + for (ExprValue hit : new OpenSearchResponse(searchResponse, factory)) { + if (i == 0) { + assertEquals(exprTupleResponse, hit); } else { fail("More search hits returned than expected"); } diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanOptimizationTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanOptimizationTest.java index 363727cbd3..1e8b1662ea 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanOptimizationTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanOptimizationTest.java @@ -36,8 +36,11 @@ import java.util.Arrays; import java.util.Collections; import java.util.HashSet; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; + +import com.google.common.collect.ImmutableMap; import lombok.Builder; import org.apache.commons.lang3.tuple.Pair; import org.junit.jupiter.api.BeforeEach; @@ -56,16 +59,22 @@ import org.opensearch.search.sort.SortOrder; import org.opensearch.sql.ast.expression.Literal; import org.opensearch.sql.ast.tree.Sort.SortOption; +import org.opensearch.sql.data.model.ExprTupleValue; +import org.opensearch.sql.data.model.ExprValueUtils; +import org.opensearch.sql.data.type.ExprCoreType; import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.expression.DSL; +import org.opensearch.sql.expression.FunctionExpression; import org.opensearch.sql.expression.HighlightExpression; import org.opensearch.sql.expression.ReferenceExpression; +import org.opensearch.sql.expression.function.OpenSearchFunctions; import org.opensearch.sql.opensearch.request.OpenSearchRequestBuilder; import org.opensearch.sql.opensearch.response.agg.CompositeAggregationParser; import org.opensearch.sql.opensearch.response.agg.OpenSearchAggregationResponseParser; import org.opensearch.sql.opensearch.response.agg.SingleValueParser; import org.opensearch.sql.opensearch.storage.OpenSearchIndexScan; import org.opensearch.sql.opensearch.storage.script.aggregation.AggregationQueryBuilder; +import org.opensearch.sql.planner.logical.LogicalFilter; import org.opensearch.sql.planner.logical.LogicalPlan; import org.opensearch.sql.planner.optimizer.LogicalPlanOptimizer; import org.opensearch.sql.planner.optimizer.rule.read.CreateTableScanBuilder; @@ -132,6 +141,41 @@ void test_filter_push_down() { ); } + /** + * SELECT intV as i FROM schema WHERE query_string(["intV^1.5", "QUERY", boost=12.5). + */ + @Test + void test_filter_on_opensearchfunction_push_down() { + LogicalPlan expectedPlan = + project( + indexScanBuilder( + withFilterPushedDown( + QueryBuilders.queryStringQuery("QUERY") + .field("intV", 1.5F) + .boost(12.5F) + ) + ), + DSL.named("i", DSL.ref("intV", INTEGER)) + ); + FunctionExpression queryString = DSL.query_string( + DSL.namedArgument("fields", DSL.literal( + new ExprTupleValue(new LinkedHashMap<>(ImmutableMap.of( + "intV", ExprValueUtils.floatValue(1.5F)))))), + DSL.namedArgument("query", "QUERY"), + DSL.namedArgument("boost", "12.5")); + + ((OpenSearchFunctions.OpenSearchFunction) queryString).setScoreTracked(true); + + LogicalPlan logicalPlan = project( + filter( + relation("schema", table), + queryString + ), + DSL.named("i", DSL.ref("intV", INTEGER)) + ); + assertEqualsAfterOptimization(expectedPlan, logicalPlan, true); + } + /** * SELECT avg(intV) FROM schema GROUP BY string_value. */ @@ -208,6 +252,21 @@ void test_sort_push_down() { ); } + @Test + void test_score_sort_push_down() { + assertEqualsAfterOptimization( + indexScanBuilder( + withSortPushedDown( + SortBuilders.scoreSort().order(SortOrder.ASC) + ) + ), + sort( + relation("schema", table), + Pair.of(SortOption.DEFAULT_ASC, DSL.ref("_score", INTEGER)) + ) + ); + } + @Test void test_limit_push_down() { assertEqualsAfterOptimization( @@ -524,6 +583,21 @@ private void assertEqualsAfterOptimization(LogicalPlan expected, LogicalPlan act } } + private void assertEqualsAfterOptimization(LogicalPlan expected, LogicalPlan actual, boolean isScoreTracked) { + LogicalPlan optimizedPlan = optimize(actual); + assertEquals(expected, optimizedPlan); + + // Trigger build to make sure all push down actually happened in scan builder + indexScanBuilder.build(); + + // Verify to make sure all push down methods are called as expected + if (verifyPushDownCalls.length == 0) { + reset(indexScan); + } else { + Arrays.stream(verifyPushDownCalls).forEach(Runnable::run); + } + } + private Runnable withFilterPushedDown(QueryBuilder filteringCondition) { return () -> verify(requestBuilder, times(1)).pushDown(filteringCondition); } From 996a166e3a20f4e2378d46794dd9f76db21fd2bf Mon Sep 17 00:00:00 2001 From: Andrew Carbonetto Date: Fri, 3 Mar 2023 09:22:37 -0800 Subject: [PATCH 17/40] Move legacy test and mark old as ignore Signed-off-by: Andrew Carbonetto --- .../org/opensearch/sql/legacy/MethodQueryIT.java | 6 +++++- .../java/org/opensearch/sql/sql/MatchIT.java | 16 ++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/MethodQueryIT.java b/integ-test/src/test/java/org/opensearch/sql/legacy/MethodQueryIT.java index d7aab0d5c1..aa53df9dde 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/MethodQueryIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/MethodQueryIT.java @@ -59,6 +59,8 @@ public void matchQueryTest() throws IOException { } /** + * score query no longer maps to constant_score in the V2 engine + * @see org.opensearch.sql.sql.ScoreQueryIT * matchQuery 是利用分词结果进行单个字段的搜索. "query" : { "bool" : { "must" : { "bool" : { * "should" : [ { "constant_score" : { "query" : { "match" : { "address" : { * "query" : "Lane", "type" : "boolean" } } }, "boost" : 100.0 } }, { @@ -67,7 +69,6 @@ public void matchQueryTest() throws IOException { * * @throws IOException */ - // score query no longer maps to constant_score in the V2 engine @Test @Ignore public void scoreQueryTest() throws IOException { @@ -119,6 +120,8 @@ public void wildcardQueryTest() throws IOException { } /** + * score query no longer handled by legacy engine + * @see org.opensearch.sql.sql.ScoreQueryIT * matchPhraseQueryTest 短语查询完全匹配. * "address" : { * "query" : "671 Bristol Street", @@ -128,6 +131,7 @@ public void wildcardQueryTest() throws IOException { * @throws IOException */ @Test + @Ignore public void matchPhraseQueryTest() throws IOException { final String result = explainQuery(String.format(Locale.ROOT, "select address from %s " + diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/MatchIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/MatchIT.java index 28573fdd10..6fdb70e99d 100644 --- a/integ-test/src/test/java/org/opensearch/sql/sql/MatchIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/sql/MatchIT.java @@ -5,6 +5,7 @@ package org.opensearch.sql.sql; +import static org.hamcrest.Matchers.containsString; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_ACCOUNT; import static org.opensearch.sql.util.MatcherUtils.rows; import static org.opensearch.sql.util.MatcherUtils.schema; @@ -12,9 +13,14 @@ import static org.opensearch.sql.util.MatcherUtils.verifySchema; import java.io.IOException; +import java.util.Locale; + import org.json.JSONObject; +import org.junit.Assert; +import org.junit.Ignore; import org.junit.Test; import org.opensearch.sql.legacy.SQLIntegTestCase; +import org.opensearch.sql.legacy.TestsConstants; import org.opensearch.sql.legacy.utils.StringUtils; public class MatchIT extends SQLIntegTestCase { @@ -147,4 +153,14 @@ public void match_alternate_syntaxes_return_the_same_results() throws IOExceptio assertEquals(result1.getInt("total"), result2.getInt("total")); assertEquals(result1.getInt("total"), result3.getInt("total")); } + + @Test + public void matchPhraseQueryTest() throws IOException { + final String result = explainQuery(String.format(Locale.ROOT, + "select address from %s " + + "where address= matchPhrase('671 Bristol Street') order by _score desc limit 3", + TestsConstants.TEST_INDEX_ACCOUNT)); + Assert.assertThat(result, + containsString("{\\\"match_phrase\\\":{\\\"address\\\":{\\\"query\\\":\\\"671 Bristol Street\\\"")); + } } From 2c469c7759c5949c78b38821efdfa85219725d22 Mon Sep 17 00:00:00 2001 From: Andrew Carbonetto Date: Fri, 3 Mar 2023 12:15:45 -0800 Subject: [PATCH 18/40] fix checkstyle violations Signed-off-by: Andrew Carbonetto --- .../sql/opensearch/response/OpenSearchResponse.java | 8 ++++---- .../storage/scan/OpenSearchIndexScanQueryBuilder.java | 9 +++++---- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/OpenSearchResponse.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/OpenSearchResponse.java index d1a77ef7f6..535c69ea37 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/OpenSearchResponse.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/OpenSearchResponse.java @@ -16,10 +16,10 @@ import org.opensearch.action.search.SearchResponse; import org.opensearch.search.SearchHits; import org.opensearch.search.aggregations.Aggregations; -import org.opensearch.sql.data.model.ExprTupleValue; -import org.opensearch.sql.data.model.ExprStringValue; import org.opensearch.sql.data.model.ExprFloatValue; import org.opensearch.sql.data.model.ExprLongValue; +import org.opensearch.sql.data.model.ExprStringValue; +import org.opensearch.sql.data.model.ExprTupleValue; import org.opensearch.sql.data.model.ExprValue; import org.opensearch.sql.data.model.ExprValueUtils; import org.opensearch.sql.opensearch.data.value.OpenSearchExprValueFactory; @@ -106,10 +106,10 @@ public Iterator iterator() { builder.put("_index", new ExprStringValue(hit.getIndex())); builder.put("_id", new ExprStringValue(hit.getId())); if (!Float.isNaN(hit.getScore())) { - builder.put("_score", new ExprFloatValue(hit.getScore())); + builder.put("_score", new ExprFloatValue(hit.getScore())); } if (!Float.isNaN(maxScore)) { - builder.put("_maxscore", new ExprFloatValue(maxScore)); + builder.put("_maxscore", new ExprFloatValue(maxScore)); } builder.put("_sort", new ExprLongValue(hit.getSeqNo())); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanQueryBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanQueryBuilder.java index 6b7a3d144e..18fc4201a5 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanQueryBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanQueryBuilder.java @@ -66,7 +66,8 @@ public boolean pushDownFilter(LogicalFilter filter) { Expression queryCondition = filter.getCondition(); QueryBuilder query = queryBuilder.build(queryCondition); indexScan.getRequestBuilder().pushDown(query); - indexScan.getRequestBuilder().pushDownTrackedScore(trackScoresFromOpenSearchFunction(queryCondition)); + indexScan.getRequestBuilder().pushDownTrackedScore( + trackScoresFromOpenSearchFunction(queryCondition)); return true; } @@ -104,12 +105,12 @@ public boolean pushDownHighlight(LogicalHighlight highlight) { } private boolean trackScoresFromOpenSearchFunction(Expression condition) { - if (condition instanceof OpenSearchFunctions.OpenSearchFunction && - ((OpenSearchFunctions.OpenSearchFunction) condition).isScoreTracked()) { + if (condition instanceof OpenSearchFunctions.OpenSearchFunction + && ((OpenSearchFunctions.OpenSearchFunction) condition).isScoreTracked()) { return true; } if (condition instanceof FunctionExpression) { - for(Expression expr: ((FunctionExpression) condition).getArguments()) { + for (Expression expr: ((FunctionExpression) condition).getArguments()) { if (trackScoresFromOpenSearchFunction(expr)) { return true; } From 4c7a5d7ddc60c7e962420c34933b1b69c6ad81cb Mon Sep 17 00:00:00 2001 From: Andrew Carbonetto Date: Fri, 3 Mar 2023 13:44:35 -0800 Subject: [PATCH 19/40] fix checkstyle violations Signed-off-by: Andrew Carbonetto --- .../sql/sql/parser/AstExpressionBuilder.java | 88 ++++++++++--------- 1 file changed, 45 insertions(+), 43 deletions(-) diff --git a/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java b/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java index 35abe12879..455f2ca444 100644 --- a/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java +++ b/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java @@ -9,7 +9,6 @@ import static org.opensearch.sql.ast.dsl.AstDSL.between; import static org.opensearch.sql.ast.dsl.AstDSL.not; import static org.opensearch.sql.ast.dsl.AstDSL.qualifiedName; -import static org.opensearch.sql.ast.dsl.AstDSL.stringLiteral; import static org.opensearch.sql.expression.function.BuiltinFunctionName.IS_NOT_NULL; import static org.opensearch.sql.expression.function.BuiltinFunctionName.IS_NULL; import static org.opensearch.sql.expression.function.BuiltinFunctionName.LIKE; @@ -41,7 +40,6 @@ import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.MathExpressionAtomContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.MultiFieldRelevanceFunctionContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.NoFieldRelevanceFunctionContext; -import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.ScoreRelevanceFunctionContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.NotExpressionContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.NullLiteralContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.OverClauseContext; @@ -53,6 +51,7 @@ import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.RelevanceFieldAndWeightContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.ScalarFunctionCallContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.ScalarWindowFunctionContext; +import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.ScoreRelevanceFunctionContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.ShowDescribePatternContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.SignedDecimalContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.SignedRealContext; @@ -100,7 +99,6 @@ import org.opensearch.sql.ast.tree.Sort.SortOption; import org.opensearch.sql.common.utils.StringUtils; import org.opensearch.sql.expression.function.BuiltinFunctionName; -import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser; import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.AlternateMultiMatchQueryContext; import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.AndExpressionContext; import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.ColumnNameContext; @@ -108,7 +106,6 @@ import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.IntervalLiteralContext; import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.NestedExpressionAtomContext; import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.OrExpressionContext; -import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.RelevanceFunctionContext; import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.TableNameContext; import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParserBaseVisitor; @@ -178,11 +175,11 @@ public UnresolvedExpression visitHighlightFunctionCall( @Override public UnresolvedExpression visitPositionFunction( - PositionFunctionContext ctx) { + PositionFunctionContext ctx) { return new Function( - POSITION.getName().getFunctionName(), - Arrays.asList(visitFunctionArg(ctx.functionArg(0)), - visitFunctionArg(ctx.functionArg(1)))); + POSITION.getName().getFunctionName(), + Arrays.asList(visitFunctionArg(ctx.functionArg(0)), + visitFunctionArg(ctx.functionArg(1)))); } @Override @@ -219,20 +216,20 @@ public UnresolvedExpression visitWindowFunctionClause(WindowFunctionClauseContex List partitionByList = Collections.emptyList(); if (overClause.partitionByClause() != null) { partitionByList = overClause.partitionByClause() - .expression() - .stream() - .map(this::visit) - .collect(Collectors.toList()); + .expression() + .stream() + .map(this::visit) + .collect(Collectors.toList()); } List> sortList = Collections.emptyList(); if (overClause.orderByClause() != null) { sortList = overClause.orderByClause() - .orderByElement() - .stream() - .map(item -> ImmutablePair.of( - createSortOption(item), visit(item.expression()))) - .collect(Collectors.toList()); + .orderByElement() + .stream() + .map(item -> ImmutablePair.of( + createSortOption(item), visit(item.expression()))) + .collect(Collectors.toList()); } return new WindowFunction(visit(ctx.function), partitionByList, sortList); } @@ -301,7 +298,7 @@ public UnresolvedExpression visitLikePredicate(LikePredicateContext ctx) { @Override public UnresolvedExpression visitRegexpPredicate(RegexpPredicateContext ctx) { return new Function(REGEXP.getName().getFunctionName(), - Arrays.asList(visit(ctx.left), visit(ctx.right))); + Arrays.asList(visit(ctx.left), visit(ctx.right))); } @Override @@ -402,9 +399,9 @@ public UnresolvedExpression visitBinaryComparisonPredicate( public UnresolvedExpression visitCaseFunctionCall(CaseFunctionCallContext ctx) { UnresolvedExpression caseValue = (ctx.expression() == null) ? null : visit(ctx.expression()); List whenStatements = ctx.caseFuncAlternative() - .stream() - .map(when -> (When) visit(when)) - .collect(Collectors.toList()); + .stream() + .map(when -> (When) visit(when)) + .collect(Collectors.toList()); UnresolvedExpression elseStatement = (ctx.elseArg == null) ? null : visit(ctx.elseArg); return new Case(caseValue, whenStatements, elseStatement); @@ -429,10 +426,10 @@ public UnresolvedExpression visitConvertedDataType( @Override public UnresolvedExpression visitNoFieldRelevanceFunction( - NoFieldRelevanceFunctionContext ctx) { + NoFieldRelevanceFunctionContext ctx) { return new Function( - ctx.noFieldRelevanceFunctionName().getText().toLowerCase(), - noFieldRelevanceArguments(ctx)); + ctx.noFieldRelevanceFunctionName().getText().toLowerCase(), + noFieldRelevanceArguments(ctx)); } @Override @@ -460,7 +457,7 @@ public UnresolvedExpression visitMultiFieldRelevanceFunction( if ((funcName.equalsIgnoreCase(BuiltinFunctionName.MULTI_MATCH.toString()) || funcName.equalsIgnoreCase(BuiltinFunctionName.MULTIMATCH.toString()) || funcName.equalsIgnoreCase(BuiltinFunctionName.MULTIMATCHQUERY.toString())) - && ! ctx.getRuleContexts(AlternateMultiMatchQueryContext.class) + && !ctx.getRuleContexts(AlternateMultiMatchQueryContext.class) .isEmpty()) { return new Function( ctx.multiFieldRelevanceFunctionName().getText().toLowerCase(), @@ -480,13 +477,16 @@ public UnresolvedExpression visitAltMultiFieldRelevanceFunction( altMultiFieldRelevanceFunctionArguments(ctx)); } + /** + * Visit score-relevance function and collect children. + * + * @param ctx the parse tree + * @return children + */ public UnresolvedExpression visitScoreRelevanceFunction(ScoreRelevanceFunctionContext ctx) { - RelevanceFunctionContext relevanceFunction = ctx.relevanceFunction(); - List functionArgs = ctx.functionArg(); - return new ScoreFunction( - visit(ctx.relevanceFunction()), - ctx.functionArg().stream().map(this::visitFunctionArg).collect(Collectors.toList()) + visit(ctx.relevanceFunction()), + ctx.functionArg().stream().map(this::visitFunctionArg).collect(Collectors.toList()) ); } @@ -502,13 +502,14 @@ private Function buildFunction(String functionName, } private QualifiedName visitIdentifiers(List identifiers) { - Boolean isMetadataField = identifiers.stream().filter(id -> id.metadataField() != null).findFirst().isPresent(); + Boolean isMetadataField = + identifiers.stream().anyMatch(id -> id.metadataField() != null); return new QualifiedName( - identifiers.stream() - .map(RuleContext::getText) - .map(StringUtils::unquoteIdentifier) - .collect(Collectors.toList()), - isMetadataField); + identifiers.stream() + .map(RuleContext::getText) + .map(StringUtils::unquoteIdentifier) + .collect(Collectors.toList()), + isMetadataField); } private void fillRelevanceArgs(List args, @@ -523,18 +524,18 @@ private void fillRelevanceArgs(List args, } private List noFieldRelevanceArguments( - NoFieldRelevanceFunctionContext ctx) { + NoFieldRelevanceFunctionContext ctx) { // all the arguments are defaulted to string values // to skip environment resolving and function signature resolving ImmutableList.Builder builder = ImmutableList.builder(); builder.add(new UnresolvedArgument("query", - new Literal(StringUtils.unquoteText(ctx.query.getText()), DataType.STRING))); + new Literal(StringUtils.unquoteText(ctx.query.getText()), DataType.STRING))); fillRelevanceArgs(ctx.relevanceArg(), builder); return builder.build(); } private List singleFieldRelevanceArguments( - SingleFieldRelevanceFunctionContext ctx) { + SingleFieldRelevanceFunctionContext ctx) { // all the arguments are defaulted to string values // to skip environment resolving and function signature resolving ImmutableList.Builder builder = ImmutableList.builder(); @@ -590,6 +591,7 @@ private List getFormatFunctionArguments( /** * Adds support for multi_match alternate syntax like * MULTI_MATCH('query'='Dale', 'fields'='*name'). + * * @param ctx : Context for multi field relevance function. * @return : Returns list of all arguments for relevance function. */ @@ -602,7 +604,7 @@ private List alternateMultiMatchArguments( String[] fieldAndWeights = StringUtils.unquoteText( ctx.getRuleContexts(AlternateMultiMatchFieldContext.class) - .stream().findFirst().get().argVal.getText()).split(","); + .stream().findFirst().get().argVal.getText()).split(","); for (var fieldAndWeight : fieldAndWeights) { String[] splitFieldAndWeights = fieldAndWeight.split("\\^"); @@ -614,9 +616,9 @@ private List alternateMultiMatchArguments( ctx.getRuleContexts(AlternateMultiMatchQueryContext.class) .stream().findFirst().ifPresent( - arg -> - builder.add(new UnresolvedArgument("query", - new Literal(StringUtils.unquoteText(arg.argVal.getText()), DataType.STRING))) + arg -> + builder.add(new UnresolvedArgument("query", + new Literal(StringUtils.unquoteText(arg.argVal.getText()), DataType.STRING))) ); fillRelevanceArgs(ctx.relevanceArg(), builder); From dec36fa8bbb9f8c35011d3708f61b51e75afc4b3 Mon Sep 17 00:00:00 2001 From: Andrew Carbonetto Date: Mon, 6 Mar 2023 16:19:58 -0800 Subject: [PATCH 20/40] Update tests and identifier to accept metafields Signed-off-by: Andrew Carbonetto --- .../org/opensearch/sql/ast/dsl/AstDSL.java | 6 + .../opensearch/sql/analysis/AnalyzerTest.java | 28 +++++ .../request/OpenSearchRequestBuilderTest.java | 4 +- .../OpenSearchIndexScanOptimizationTest.java | 105 +++++++++++++++--- sql/src/main/antlr/OpenSearchSQLLexer.g4 | 2 +- .../sql/sql/parser/AstExpressionBuilder.java | 3 +- .../sql/parser/AstExpressionBuilderTest.java | 51 +++++++++ 7 files changed, 177 insertions(+), 22 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java index 80f209e27e..22ee1e2f6d 100644 --- a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java +++ b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java @@ -34,6 +34,7 @@ import org.opensearch.sql.ast.expression.Or; import org.opensearch.sql.ast.expression.ParseMethod; import org.opensearch.sql.ast.expression.QualifiedName; +import org.opensearch.sql.ast.expression.ScoreFunction; import org.opensearch.sql.ast.expression.Span; import org.opensearch.sql.ast.expression.SpanUnit; import org.opensearch.sql.ast.expression.UnresolvedArgument; @@ -288,6 +289,11 @@ public UnresolvedExpression highlight(UnresolvedExpression fieldName, return new HighlightFunction(fieldName, arguments); } + public UnresolvedExpression score(UnresolvedExpression relevanceQuery, + List funcArgs) { + return new ScoreFunction(relevanceQuery, funcArgs); + } + public UnresolvedExpression window(UnresolvedExpression function, List partitionByList, List> sortList) { diff --git a/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java b/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java index 6c87a28a10..db560e02c0 100644 --- a/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java +++ b/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java @@ -248,6 +248,34 @@ public void analyze_filter_visit_score_function() { assertEquals(true, relevanceQuery.isScoreTracked()); } + @Test + public void analyze_filter_visit_without_score_function() { + UnresolvedPlan unresolvedPlan = AstDSL.filter( + AstDSL.relation("schema"), + AstDSL.function("match_phrase_prefix", + AstDSL.unresolvedArg("field", stringLiteral("field_value1")), + AstDSL.unresolvedArg("query", stringLiteral("search query")), + AstDSL.unresolvedArg("boost", stringLiteral("3")) + ) + ); + assertAnalyzeEqual( + LogicalPlanDSL.filter( + LogicalPlanDSL.relation("schema", table), + DSL.match_phrase_prefix( + DSL.namedArgument("field", "field_value1"), + DSL.namedArgument("query", "search query"), + DSL.namedArgument("boost", "3") + ) + ), + unresolvedPlan + ); + + LogicalPlan logicalPlan = analyze(unresolvedPlan); + OpenSearchFunctions.OpenSearchFunction relevanceQuery = + (OpenSearchFunctions.OpenSearchFunction)((LogicalFilter) logicalPlan).getCondition(); + assertEquals(false, relevanceQuery.isScoreTracked()); + } + @Test public void analyze_filter_visit_score_function_with_double_boost() { UnresolvedPlan unresolvedPlan = AstDSL.filter( diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilderTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilderTest.java index 33376ece83..9e3d043ed7 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilderTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilderTest.java @@ -71,6 +71,7 @@ void buildQueryRequest() { Integer limit = 200; Integer offset = 0; requestBuilder.pushDownLimit(limit, offset); + requestBuilder.pushDownTrackedScore(true); assertEquals( new OpenSearchQueryRequest( @@ -78,7 +79,8 @@ void buildQueryRequest() { new SearchSourceBuilder() .from(offset) .size(limit) - .timeout(DEFAULT_QUERY_TIMEOUT), + .timeout(DEFAULT_QUERY_TIMEOUT) + .trackScores(true), exprValueFactory), requestBuilder.build()); } diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanOptimizationTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanOptimizationTest.java index 1e8b1662ea..4cb90b3ac0 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanOptimizationTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanOptimizationTest.java @@ -7,6 +7,7 @@ package org.opensearch.sql.opensearch.storage.scan; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.reset; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; @@ -50,6 +51,7 @@ import org.mockito.junit.jupiter.MockitoExtension; import org.opensearch.index.query.QueryBuilder; import org.opensearch.index.query.QueryBuilders; +import org.opensearch.index.query.SpanOrQueryBuilder; import org.opensearch.search.aggregations.AggregationBuilder; import org.opensearch.search.aggregations.AggregationBuilders; import org.opensearch.search.aggregations.bucket.composite.CompositeAggregationBuilder; @@ -145,7 +147,7 @@ void test_filter_push_down() { * SELECT intV as i FROM schema WHERE query_string(["intV^1.5", "QUERY", boost=12.5). */ @Test - void test_filter_on_opensearchfunction_push_down() { + void test_filter_on_opensearchfunction_with_trackedscores_push_down() { LogicalPlan expectedPlan = project( indexScanBuilder( @@ -153,7 +155,8 @@ void test_filter_on_opensearchfunction_push_down() { QueryBuilders.queryStringQuery("QUERY") .field("intV", 1.5F) .boost(12.5F) - ) + ), + withTrackedScoresPushedDown(true) ), DSL.named("i", DSL.ref("intV", INTEGER)) ); @@ -173,7 +176,84 @@ void test_filter_on_opensearchfunction_push_down() { ), DSL.named("i", DSL.ref("intV", INTEGER)) ); - assertEqualsAfterOptimization(expectedPlan, logicalPlan, true); + assertEqualsAfterOptimization(expectedPlan, logicalPlan); + } + + @Test + void test_filter_on_multiple_opensearchfunctions_with_trackedscores_push_down() { + LogicalPlan expectedPlan = + project( + indexScanBuilder( + withFilterPushedDown( + QueryBuilders.boolQuery() + .should( + QueryBuilders.queryStringQuery("QUERY") + .field("intV", 1.5F) + .boost(12.5F)) + .should( + QueryBuilders.queryStringQuery("QUERY") + .field("intV", 1.5F) + .boost(12.5F) + ) + ), + withTrackedScoresPushedDown(true) + ), + DSL.named("i", DSL.ref("intV", INTEGER)) + ); + FunctionExpression firstQueryString = DSL.query_string( + DSL.namedArgument("fields", DSL.literal( + new ExprTupleValue(new LinkedHashMap<>(ImmutableMap.of( + "intV", ExprValueUtils.floatValue(1.5F)))))), + DSL.namedArgument("query", "QUERY"), + DSL.namedArgument("boost", "12.5")); + ((OpenSearchFunctions.OpenSearchFunction) firstQueryString).setScoreTracked(false); + FunctionExpression secondQueryString = DSL.query_string( + DSL.namedArgument("fields", DSL.literal( + new ExprTupleValue(new LinkedHashMap<>(ImmutableMap.of( + "intV", ExprValueUtils.floatValue(1.5F)))))), + DSL.namedArgument("query", "QUERY"), + DSL.namedArgument("boost", "12.5")); + ((OpenSearchFunctions.OpenSearchFunction) secondQueryString).setScoreTracked(true); + + LogicalPlan logicalPlan = project( + filter( + relation("schema", table), + DSL.or(firstQueryString, secondQueryString) + ), + DSL.named("i", DSL.ref("intV", INTEGER)) + ); + assertEqualsAfterOptimization(expectedPlan, logicalPlan); + } + + @Test + void test_filter_on_opensearchfunction_without_trackedscores_push_down() { + LogicalPlan expectedPlan = + project( + indexScanBuilder( + withFilterPushedDown( + QueryBuilders.queryStringQuery("QUERY") + .field("intV", 1.5F) + .boost(12.5F) + ), + withTrackedScoresPushedDown(false) + ), + DSL.named("i", DSL.ref("intV", INTEGER)) + ); + FunctionExpression queryString = DSL.query_string( + DSL.namedArgument("fields", DSL.literal( + new ExprTupleValue(new LinkedHashMap<>(ImmutableMap.of( + "intV", ExprValueUtils.floatValue(1.5F)))))), + DSL.namedArgument("query", "QUERY"), + DSL.namedArgument("boost", "12.5")); + + LogicalPlan logicalPlan = project( + filter( + relation("schema", table), + queryString + ), + DSL.named("i", DSL.ref("intV", INTEGER)) + ); + assertEqualsAfterOptimization(expectedPlan, logicalPlan); } /** @@ -583,21 +663,6 @@ private void assertEqualsAfterOptimization(LogicalPlan expected, LogicalPlan act } } - private void assertEqualsAfterOptimization(LogicalPlan expected, LogicalPlan actual, boolean isScoreTracked) { - LogicalPlan optimizedPlan = optimize(actual); - assertEquals(expected, optimizedPlan); - - // Trigger build to make sure all push down actually happened in scan builder - indexScanBuilder.build(); - - // Verify to make sure all push down methods are called as expected - if (verifyPushDownCalls.length == 0) { - reset(indexScan); - } else { - Arrays.stream(verifyPushDownCalls).forEach(Runnable::run); - } - } - private Runnable withFilterPushedDown(QueryBuilder filteringCondition) { return () -> verify(requestBuilder, times(1)).pushDown(filteringCondition); } @@ -647,6 +712,10 @@ private Runnable withHighlightPushedDown(String field, Map argu return () -> verify(requestBuilder, times(1)).pushDownHighlight(field, arguments); } + private Runnable withTrackedScoresPushedDown(boolean trackScores) { + return() -> verify(requestBuilder, times(1)).pushDownTrackedScore(trackScores); + } + private static AggregationAssertHelper.AggregationAssertHelperBuilder aggregate(String aggName) { var aggBuilder = new AggregationAssertHelper.AggregationAssertHelperBuilder(); aggBuilder.aggregateName = aggName; diff --git a/sql/src/main/antlr/OpenSearchSQLLexer.g4 b/sql/src/main/antlr/OpenSearchSQLLexer.g4 index 2b03a8c010..dec67ec363 100644 --- a/sql/src/main/antlr/OpenSearchSQLLexer.g4 +++ b/sql/src/main/antlr/OpenSearchSQLLexer.g4 @@ -476,7 +476,7 @@ fragment BIT_STRING_L: 'B' '\'' [01]+ '\''; // Identifiers cannot start with a single '_' since this an OpenSearch reserved // metadata field. Two underscores (or more) is acceptable, such as '__field'. -fragment ID_LITERAL: ([_][_]|[@*A-Z])+?[*A-Z_\-0-9]*; +fragment ID_LITERAL: ([@*A-Z_])+?[*A-Z_\-0-9]*; // Last tokens must generate Errors diff --git a/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java b/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java index 455f2ca444..ca6b8ffee1 100644 --- a/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java +++ b/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java @@ -502,8 +502,7 @@ private Function buildFunction(String functionName, } private QualifiedName visitIdentifiers(List identifiers) { - Boolean isMetadataField = - identifiers.stream().anyMatch(id -> id.metadataField() != null); + Boolean isMetadataField = identifiers.stream().filter(id -> id.metadataField() != null).findFirst().isPresent(); return new QualifiedName( identifiers.stream() .map(RuleContext::getText) diff --git a/sql/src/test/java/org/opensearch/sql/sql/parser/AstExpressionBuilderTest.java b/sql/src/test/java/org/opensearch/sql/sql/parser/AstExpressionBuilderTest.java index 80e7ddb8e5..33cb1a86d2 100644 --- a/sql/src/test/java/org/opensearch/sql/sql/parser/AstExpressionBuilderTest.java +++ b/sql/src/test/java/org/opensearch/sql/sql/parser/AstExpressionBuilderTest.java @@ -23,6 +23,7 @@ import static org.opensearch.sql.ast.dsl.AstDSL.nullLiteral; import static org.opensearch.sql.ast.dsl.AstDSL.or; import static org.opensearch.sql.ast.dsl.AstDSL.qualifiedName; +import static org.opensearch.sql.ast.dsl.AstDSL.qualifiedNameWithMetadata; import static org.opensearch.sql.ast.dsl.AstDSL.stringLiteral; import static org.opensearch.sql.ast.dsl.AstDSL.timeLiteral; import static org.opensearch.sql.ast.dsl.AstDSL.timestampLiteral; @@ -36,6 +37,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import java.util.HashMap; +import java.util.List; import org.antlr.v4.runtime.CommonTokenStream; import org.apache.commons.lang3.tuple.ImmutablePair; import org.junit.jupiter.api.Test; @@ -435,6 +437,26 @@ public void canBuildKeywordsAsIdentInQualifiedName() { ); } + @Test + public void canBuildMetaDataFieldAsQualifiedName() { + List.of("_id", "_index", "_sort", "_score", "_maxscore").stream().forEach( + field -> assertEquals( + qualifiedNameWithMetadata(field), + buildExprAst(field) + ) + ); + } + + @Test + public void canBuildNonMetaDataFieldAsQualifiedName() { + List.of("id", "__id", "_routing", "___field").stream().forEach( + field -> assertEquals( + qualifiedName(field), + buildExprAst(field) + ) + ); + } + @Test public void canCastFieldAsString() { assertEquals( @@ -770,6 +792,35 @@ public void relevanceWildcard_query() { ); } + @Test + public void relevanceScore_query() { + assertEquals( + AstDSL.score( + AstDSL.function("query_string", + unresolvedArg("fields", new RelevanceFieldList(ImmutableMap.of( + "field2", 3.2F, "field1", 1.F))), + unresolvedArg("query", stringLiteral("search query")) + ), + List.of() + ), + buildExprAst("score(query_string(['field1', 'field2' ^ 3.2], 'search query'))") + ); + } + + @Test + public void relevanceScore_withBoost_query() { + assertEquals( + AstDSL.score( + AstDSL.function("query_string", + unresolvedArg("fields", new RelevanceFieldList(ImmutableMap.of( + "field1", 1.F, "field2", 3.2F))), + unresolvedArg("query", stringLiteral("search query")) + ), + List.of(doubleLiteral(1.0)) + ), + buildExprAst("score(query_string(['field1', 'field2' ^ 3.2], 'search query'), 1.0)") + ); + } @Test public void relevanceQuery() { assertEquals(AstDSL.function("query", From 7aab6eec22e938834b00ea901dccf203ca069d0c Mon Sep 17 00:00:00 2001 From: Andrew Carbonetto Date: Tue, 7 Mar 2023 13:17:32 -0800 Subject: [PATCH 21/40] Checkstyle fixes Signed-off-by: Andrew Carbonetto --- .../storage/scan/OpenSearchIndexScanOptimizationTest.java | 5 ++--- .../org/opensearch/sql/sql/parser/AstExpressionBuilder.java | 3 ++- .../opensearch/sql/sql/parser/AstExpressionBuilderTest.java | 1 + 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanOptimizationTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanOptimizationTest.java index 4cb90b3ac0..4dfb148ac8 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanOptimizationTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanOptimizationTest.java @@ -34,14 +34,13 @@ import static org.opensearch.sql.planner.optimizer.rule.read.TableScanPushDown.PUSH_DOWN_SORT; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; import java.util.Arrays; import java.util.Collections; import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; - -import com.google.common.collect.ImmutableMap; import lombok.Builder; import org.apache.commons.lang3.tuple.Pair; import org.junit.jupiter.api.BeforeEach; @@ -713,7 +712,7 @@ private Runnable withHighlightPushedDown(String field, Map argu } private Runnable withTrackedScoresPushedDown(boolean trackScores) { - return() -> verify(requestBuilder, times(1)).pushDownTrackedScore(trackScores); + return () -> verify(requestBuilder, times(1)).pushDownTrackedScore(trackScores); } private static AggregationAssertHelper.AggregationAssertHelperBuilder aggregate(String aggName) { diff --git a/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java b/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java index ca6b8ffee1..a19d7ad3f3 100644 --- a/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java +++ b/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java @@ -502,7 +502,8 @@ private Function buildFunction(String functionName, } private QualifiedName visitIdentifiers(List identifiers) { - Boolean isMetadataField = identifiers.stream().filter(id -> id.metadataField() != null).findFirst().isPresent(); + Boolean isMetadataField = identifiers.stream().filter( + id -> id.metadataField() != null).findFirst().isPresent(); return new QualifiedName( identifiers.stream() .map(RuleContext::getText) diff --git a/sql/src/test/java/org/opensearch/sql/sql/parser/AstExpressionBuilderTest.java b/sql/src/test/java/org/opensearch/sql/sql/parser/AstExpressionBuilderTest.java index 33cb1a86d2..aa4f3a4bd5 100644 --- a/sql/src/test/java/org/opensearch/sql/sql/parser/AstExpressionBuilderTest.java +++ b/sql/src/test/java/org/opensearch/sql/sql/parser/AstExpressionBuilderTest.java @@ -821,6 +821,7 @@ public void relevanceScore_withBoost_query() { buildExprAst("score(query_string(['field1', 'field2' ^ 3.2], 'search query'), 1.0)") ); } + @Test public void relevanceQuery() { assertEquals(AstDSL.function("query", From fcb3470fa7af6b5d5d59060837b43ec50f46aa0c Mon Sep 17 00:00:00 2001 From: Andrew Carbonetto Date: Tue, 7 Mar 2023 13:36:52 -0800 Subject: [PATCH 22/40] Rebase from main Signed-off-by: Andrew Carbonetto --- .../main/java/org/opensearch/sql/analysis/Analyzer.java | 7 +++++++ .../request/system/OpenSearchDescribeIndexRequest.java | 5 +++++ 2 files changed, 12 insertions(+) diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index 228b54ba0c..d94d1cf3ce 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -158,6 +158,13 @@ public LogicalPlan visitRelation(Relation node, AnalysisContext context) { } table.getFieldTypes().forEach((k, v) -> curEnv.define(new Symbol(Namespace.FIELD_NAME, k), v)); + // add OpenSearch metadata types + curEnv.define(new Symbol(Namespace.FIELD_NAME, "_index"), ExprCoreType.STRING); + curEnv.define(new Symbol(Namespace.FIELD_NAME, "_id"), ExprCoreType.STRING); + curEnv.define(new Symbol(Namespace.FIELD_NAME, "_score"), ExprCoreType.FLOAT); + curEnv.define(new Symbol(Namespace.FIELD_NAME, "_maxscore"), ExprCoreType.FLOAT); + curEnv.define(new Symbol(Namespace.FIELD_NAME, "_sort"), ExprCoreType.LONG); + // Put index name or its alias in index namespace on type environment so qualifier // can be removed when analyzing qualified name. The value (expr type) here doesn't matter. curEnv.define(new Symbol(Namespace.INDEX_NAME, diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/system/OpenSearchDescribeIndexRequest.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/system/OpenSearchDescribeIndexRequest.java index 50402fc75b..cd0a99e309 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/system/OpenSearchDescribeIndexRequest.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/system/OpenSearchDescribeIndexRequest.java @@ -90,6 +90,11 @@ public Map getFieldTypes() { .filter(entry -> !ExprCoreType.UNKNOWN.equals(entry.getValue())) .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue))); } + fieldTypes.put("_index", ExprCoreType.STRING); + fieldTypes.put("_id", ExprCoreType.STRING); + fieldTypes.put("_score", ExprCoreType.FLOAT); + fieldTypes.put("_maxscore", ExprCoreType.FLOAT); + fieldTypes.put("_sort", ExprCoreType.LONG); return fieldTypes; } From 6e52d138b04e5cd1ad754ee0a0b3d0ad8d9b03bd Mon Sep 17 00:00:00 2001 From: Andrew Carbonetto Date: Tue, 7 Mar 2023 13:37:35 -0800 Subject: [PATCH 23/40] Rebase from main Signed-off-by: Andrew Carbonetto --- .../main/java/org/opensearch/sql/analysis/Analyzer.java | 7 ------- .../request/system/OpenSearchDescribeIndexRequest.java | 5 ----- 2 files changed, 12 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index d94d1cf3ce..228b54ba0c 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -158,13 +158,6 @@ public LogicalPlan visitRelation(Relation node, AnalysisContext context) { } table.getFieldTypes().forEach((k, v) -> curEnv.define(new Symbol(Namespace.FIELD_NAME, k), v)); - // add OpenSearch metadata types - curEnv.define(new Symbol(Namespace.FIELD_NAME, "_index"), ExprCoreType.STRING); - curEnv.define(new Symbol(Namespace.FIELD_NAME, "_id"), ExprCoreType.STRING); - curEnv.define(new Symbol(Namespace.FIELD_NAME, "_score"), ExprCoreType.FLOAT); - curEnv.define(new Symbol(Namespace.FIELD_NAME, "_maxscore"), ExprCoreType.FLOAT); - curEnv.define(new Symbol(Namespace.FIELD_NAME, "_sort"), ExprCoreType.LONG); - // Put index name or its alias in index namespace on type environment so qualifier // can be removed when analyzing qualified name. The value (expr type) here doesn't matter. curEnv.define(new Symbol(Namespace.INDEX_NAME, diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/system/OpenSearchDescribeIndexRequest.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/system/OpenSearchDescribeIndexRequest.java index cd0a99e309..50402fc75b 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/system/OpenSearchDescribeIndexRequest.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/system/OpenSearchDescribeIndexRequest.java @@ -90,11 +90,6 @@ public Map getFieldTypes() { .filter(entry -> !ExprCoreType.UNKNOWN.equals(entry.getValue())) .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue))); } - fieldTypes.put("_index", ExprCoreType.STRING); - fieldTypes.put("_id", ExprCoreType.STRING); - fieldTypes.put("_score", ExprCoreType.FLOAT); - fieldTypes.put("_maxscore", ExprCoreType.FLOAT); - fieldTypes.put("_sort", ExprCoreType.LONG); return fieldTypes; } From 0d3beab3d2615a4c2272893f064ed618b196b01c Mon Sep 17 00:00:00 2001 From: Andrew Carbonetto Date: Tue, 7 Mar 2023 13:38:39 -0800 Subject: [PATCH 24/40] Rebase from main Signed-off-by: Andrew Carbonetto --- .../sql/analysis/ExpressionAnalyzer.java | 101 ++++-------------- .../sql/ast/expression/ScoreFunction.java | 9 +- .../org/opensearch/sql/expression/DSL.java | 8 -- .../sql/expression/ExpressionNodeVisitor.java | 4 + .../sql/expression/ScoreExpression.java | 89 +++++++++++++++ .../function/OpenSearchFunctions.java | 31 ++++-- .../rule/read/TableScanPushDown.java | 6 ++ .../request/OpenSearchRequestBuilder.java | 9 +- .../scan/OpenSearchIndexScanBuilder.java | 2 + .../OpenSearchIndexScanOptimizationTest.java | 2 + sql/src/main/antlr/OpenSearchSQLParser.g4 | 6 +- .../sql/sql/parser/AstExpressionBuilder.java | 88 ++++++++------- 12 files changed, 201 insertions(+), 154 deletions(-) create mode 100644 core/src/main/java/org/opensearch/sql/expression/ScoreExpression.java diff --git a/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java b/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java index 75b093248a..fafd29e963 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java @@ -8,7 +8,6 @@ import static org.opensearch.sql.ast.dsl.AstDSL.and; import static org.opensearch.sql.ast.dsl.AstDSL.compare; -import static org.opensearch.sql.ast.expression.QualifiedName.METADATAFIELD_TYPE_MAP; import static org.opensearch.sql.expression.function.BuiltinFunctionName.GTE; import static org.opensearch.sql.expression.function.BuiltinFunctionName.LTE; @@ -32,7 +31,6 @@ import org.opensearch.sql.ast.expression.Case; import org.opensearch.sql.ast.expression.Cast; import org.opensearch.sql.ast.expression.Compare; -import org.opensearch.sql.ast.expression.DataType; import org.opensearch.sql.ast.expression.EqualTo; import org.opensearch.sql.ast.expression.Field; import org.opensearch.sql.ast.expression.Function; @@ -64,6 +62,7 @@ import org.opensearch.sql.expression.NamedArgumentExpression; import org.opensearch.sql.expression.NamedExpression; import org.opensearch.sql.expression.ReferenceExpression; +import org.opensearch.sql.expression.ScoreExpression; import org.opensearch.sql.expression.aggregation.AggregationState; import org.opensearch.sql.expression.aggregation.Aggregator; import org.opensearch.sql.expression.conditional.cases.CaseClause; @@ -71,7 +70,6 @@ import org.opensearch.sql.expression.function.BuiltinFunctionName; import org.opensearch.sql.expression.function.BuiltinFunctionRepository; import org.opensearch.sql.expression.function.FunctionName; -import org.opensearch.sql.expression.function.OpenSearchFunctions; import org.opensearch.sql.expression.parse.ParseExpression; import org.opensearch.sql.expression.span.SpanExpression; import org.opensearch.sql.expression.window.aggregation.AggregateWindowFunction; @@ -212,75 +210,9 @@ public Expression visitHighlightFunction(HighlightFunction node, AnalysisContext return new HighlightExpression(expr); } - /** - * visitScoreFunction removes the score function from the AST and replaces it with the child - * relevance function node. If the optional boost variable is provided, the boost argument - * of the relevance function is combined. - * @param node score function node - * @param context analysis context for the query - * @return resolved relevance function - */ public Expression visitScoreFunction(ScoreFunction node, AnalysisContext context) { - // if no function argument given, just accept the relevance query and return - if (node.getFuncArgs().isEmpty() || !(node.getFuncArgs().get(0) instanceof Literal)) { - OpenSearchFunctions.OpenSearchFunction relevanceQueryExpr = - (OpenSearchFunctions.OpenSearchFunction) node - .getRelevanceQuery().accept(this, context); - relevanceQueryExpr.setScoreTracked(true); - return relevanceQueryExpr; - } - - // note: if an argument exists, and there should only be one, it will be a boost argument - Literal boostFunctionArg = (Literal) node.getFuncArgs().get(0); - Double thisBoostValue; - if (boostFunctionArg.getType().equals(DataType.DOUBLE)) { - thisBoostValue = ((Double) boostFunctionArg.getValue()); - } else if (boostFunctionArg.getType().equals(DataType.INTEGER)) { - thisBoostValue = ((Integer) boostFunctionArg.getValue()).doubleValue(); - } else { - throw new SemanticCheckException(String.format("Expected boost type '%s' but got '%s'", - DataType.DOUBLE.name(), boostFunctionArg.getType().name())); - } - - // update the existing unresolved expression to add a boost argument if it doesn't exist - // OR multiply the existing boost argument - Function relevanceQueryUnresolvedExpr = (Function)node.getRelevanceQuery(); - List relevanceFuncArgs = relevanceQueryUnresolvedExpr.getFuncArgs(); - - boolean doesFunctionContainBoostArgument = false; - List updatedFuncArgs = new ArrayList<>(); - for (UnresolvedExpression expr: relevanceFuncArgs) { - String argumentName = ((UnresolvedArgument) expr).getArgName(); - if (argumentName.equalsIgnoreCase("boost")) { - doesFunctionContainBoostArgument = true; - Literal boostArgLiteral = (Literal)((UnresolvedArgument) expr).getValue(); - Double boostValue = Double.parseDouble((String)boostArgLiteral.getValue()) * thisBoostValue; - UnresolvedArgument newBoostArg = new UnresolvedArgument( - argumentName, - new Literal(boostValue.toString(), DataType.STRING) - ); - updatedFuncArgs.add(newBoostArg); - } else { - updatedFuncArgs.add(expr); - } - } - - // since nothing was found, add an argument - if (!doesFunctionContainBoostArgument) { - UnresolvedArgument newBoostArg = new UnresolvedArgument( - "boost", new Literal(Double.toString(thisBoostValue), DataType.STRING)); - updatedFuncArgs.add(newBoostArg); - } - - // create a new function expression with boost argument and resolve it - Function updatedRelevanceQueryUnresolvedExpr = new Function( - relevanceQueryUnresolvedExpr.getFuncName(), - updatedFuncArgs); - OpenSearchFunctions.OpenSearchFunction relevanceQueryExpr = - (OpenSearchFunctions.OpenSearchFunction) updatedRelevanceQueryUnresolvedExpr - .accept(this, context); - relevanceQueryExpr.setScoreTracked(true); - return relevanceQueryExpr; + Expression relevanceQueryExpr = node.getRelevanceQuery().accept(this, context); + return new ScoreExpression(relevanceQueryExpr); } @Override @@ -392,17 +324,24 @@ public Expression visitUnresolvedArgument(UnresolvedArgument node, AnalysisConte return new NamedArgumentExpression(node.getArgName(), node.getValue().accept(this, context)); } - /** - * If QualifiedName is actually a reserved metadata field, return the expr type associated - * with the metadata field. - * @param ident metadata field name - * @param context analysis context - * @return DSL reference - */ private Expression visitMetadata(String ident, AnalysisContext context) { - ExprCoreType exprCoreType = Optional.ofNullable(METADATAFIELD_TYPE_MAP.get(ident)) - .orElseThrow(() -> new SemanticCheckException("invalid metadata field")); - return DSL.ref(ident, exprCoreType); + ReferenceExpression ref; + switch (ident.toLowerCase()) { + case "_index": + case "_id": + ref = DSL.ref(ident, ExprCoreType.STRING); + break; + case "_score": + case "_maxscore": + ref = DSL.ref(ident, ExprCoreType.FLOAT); + break; + case "_sort": + ref = DSL.ref(ident, ExprCoreType.LONG); + break; + default: + throw new SemanticCheckException("invalid metadata field"); + } + return ref; } private Expression visitIdentifier(String ident, AnalysisContext context) { diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/ScoreFunction.java b/core/src/main/java/org/opensearch/sql/ast/expression/ScoreFunction.java index 0587151930..cdde418834 100644 --- a/core/src/main/java/org/opensearch/sql/ast/expression/ScoreFunction.java +++ b/core/src/main/java/org/opensearch/sql/ast/expression/ScoreFunction.java @@ -5,16 +5,19 @@ package org.opensearch.sql.ast.expression; -import java.util.List; import lombok.AllArgsConstructor; import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.ToString; import org.opensearch.sql.ast.AbstractNodeVisitor; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.stream.Stream; + /** - * Expression node of Score function. - * Score takes a relevance-search expression as an argument and returns it + * Expression node of Highlight function. */ @AllArgsConstructor @EqualsAndHashCode(callSuper = false) diff --git a/core/src/main/java/org/opensearch/sql/expression/DSL.java b/core/src/main/java/org/opensearch/sql/expression/DSL.java index 0d10a4f38c..a22535507d 100644 --- a/core/src/main/java/org/opensearch/sql/expression/DSL.java +++ b/core/src/main/java/org/opensearch/sql/expression/DSL.java @@ -794,14 +794,6 @@ public static FunctionExpression score(Expression... args) { return compile(FunctionProperties.None, BuiltinFunctionName.SCORE, args); } - public static FunctionExpression scorequery(Expression... args) { - return compile(FunctionProperties.None, BuiltinFunctionName.SCOREQUERY, args); - } - - public static FunctionExpression score_query(Expression... args) { - return compile(FunctionProperties.None, BuiltinFunctionName.SCORE_QUERY, args); - } - public static FunctionExpression now(FunctionProperties functionProperties, Expression... args) { return compile(functionProperties, BuiltinFunctionName.NOW, args); diff --git a/core/src/main/java/org/opensearch/sql/expression/ExpressionNodeVisitor.java b/core/src/main/java/org/opensearch/sql/expression/ExpressionNodeVisitor.java index e3d4e38674..6ab2375067 100644 --- a/core/src/main/java/org/opensearch/sql/expression/ExpressionNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/expression/ExpressionNodeVisitor.java @@ -60,6 +60,10 @@ public T visitHighlight(HighlightExpression node, C context) { return visitNode(node, context); } + public T visitScore(ScoreExpression node, C context) { + return visitNode(node, context); + } + public T visitReference(ReferenceExpression node, C context) { return visitNode(node, context); } diff --git a/core/src/main/java/org/opensearch/sql/expression/ScoreExpression.java b/core/src/main/java/org/opensearch/sql/expression/ScoreExpression.java new file mode 100644 index 0000000000..d061dad6d2 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/expression/ScoreExpression.java @@ -0,0 +1,89 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.expression; + +import lombok.Getter; +import org.opensearch.sql.common.utils.StringUtils; +import org.opensearch.sql.data.model.ExprNullValue; +import org.opensearch.sql.data.model.ExprTupleValue; +import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.data.model.ExprValueUtils; +import org.opensearch.sql.data.type.ExprCoreType; +import org.opensearch.sql.data.type.ExprType; +import org.opensearch.sql.expression.env.Environment; +import org.opensearch.sql.expression.function.BuiltinFunctionName; + +import java.util.LinkedHashMap; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +/** + * Score Expression. + */ +@Getter +public class ScoreExpression extends FunctionExpression { + + private final Expression relevanceQueryExpr; + + /** + * ScoreExpression Constructor. + * @param relevanceQueryExpr : relevanceQueryExpr for expression. + */ + public ScoreExpression(Expression relevanceQueryExpr) { + super(BuiltinFunctionName.SCORE.getName(), List.of(relevanceQueryExpr)); + this.relevanceQueryExpr = relevanceQueryExpr; + } + + /** + * Return collection value matching relevance query expression. + * @param valueEnv : Dataset to parse value from. + * @return : collection value of relevance query expression. + */ + @Override + public ExprValue valueOf(Environment valueEnv) { +// String refName = "_highlight"; +// // Not a wilcard expression +// if (this.type == ExprCoreType.ARRAY) { +// refName += "." + StringUtils.unquoteText(getHighlightField().toString()); +// } +// ExprValue value = valueEnv.resolve(DSL.ref(refName, ExprCoreType.STRING)); +// +// // In the event of multiple returned highlights and wildcard being +// // used in conjunction with other highlight calls, we need to ensure +// // only wildcard regex matching is mapped to wildcard call. +// if (this.type == ExprCoreType.STRUCT && value.type() == ExprCoreType.STRUCT) { +// value = new ExprTupleValue( +// new LinkedHashMap(value.tupleValue() +// .entrySet() +// .stream() +// .filter(s -> matchesHighlightRegex(s.getKey(), +// StringUtils.unquoteText(highlightField.toString()))) +// .collect(Collectors.toMap( +// e -> e.getKey(), +// e -> e.getValue())))); +// if (value.tupleValue().isEmpty()) { +// value = ExprValueUtils.missingValue(); +// } +// } + + // TODO: this is where we visit relevance function nodes and update BOOST values as necessary + // Otherwise, this is a no-op + + return ExprNullValue.of(); + } + + @Override + public T accept(ExpressionNodeVisitor visitor, C context) { + return visitor.visitScore(this, context); + } + + @Override + public ExprType type() { + return ExprCoreType.UNDEFINED; + } +} diff --git a/core/src/main/java/org/opensearch/sql/expression/function/OpenSearchFunctions.java b/core/src/main/java/org/opensearch/sql/expression/function/OpenSearchFunctions.java index 9a50aca344..939706804b 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/OpenSearchFunctions.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/OpenSearchFunctions.java @@ -6,13 +6,14 @@ package org.opensearch.sql.expression.function; import static org.opensearch.sql.data.type.ExprCoreType.BOOLEAN; +import static org.opensearch.sql.data.type.ExprCoreType.DOUBLE; import java.util.List; import java.util.stream.Collectors; -import lombok.Getter; -import lombok.Setter; import lombok.experimental.UtilityClass; +import org.opensearch.sql.data.model.ExprDoubleValue; import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.data.type.ExprCoreType; import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.expression.Expression; import org.opensearch.sql.expression.FunctionExpression; @@ -93,6 +94,27 @@ private static FunctionResolver wildcard_query(BuiltinFunctionName wildcardQuery return new RelevanceFunctionResolver(funcName); } + /** + * Definition of score() function. + * Enables score calculation for the match call + */ +// private static DefaultFunctionResolver score(BuiltinFunctionName score) { +// FunctionName funcName = score.getName(); +// return FunctionDSL.define(funcName, +// FunctionDSL.impl( +// FunctionDSL.nullMissingHandling( +// (relevanceFunc) -> new ExprDoubleValue( +// Math.pow(relevanceFunc.shortValue(), 1)) +// ), +// BOOLEAN, BOOLEAN), +// FunctionDSL.impl( +// FunctionDSL.nullMissingHandling( +// (relevanceFunc, boost) -> new ExprDoubleValue( +// Math.pow(relevanceFunc.shortValue(), boost.shortValue())) +// ), +// BOOLEAN, BOOLEAN, DOUBLE)); +// } + private static FunctionResolver score(BuiltinFunctionName score) { FunctionName funcName = score.getName(); return new RelevanceFunctionResolver(funcName); @@ -102,10 +124,6 @@ public static class OpenSearchFunction extends FunctionExpression { private final FunctionName functionName; private final List arguments; - @Getter - @Setter - private boolean isScoreTracked; - /** * Required argument constructor. * @param functionName name of the function @@ -115,7 +133,6 @@ public OpenSearchFunction(FunctionName functionName, List arguments) super(functionName, arguments); this.functionName = functionName; this.arguments = arguments; - this.isScoreTracked = false; } @Override diff --git a/core/src/main/java/org/opensearch/sql/planner/optimizer/rule/read/TableScanPushDown.java b/core/src/main/java/org/opensearch/sql/planner/optimizer/rule/read/TableScanPushDown.java index 556a12bb34..bb4e51c39e 100644 --- a/core/src/main/java/org/opensearch/sql/planner/optimizer/rule/read/TableScanPushDown.java +++ b/core/src/main/java/org/opensearch/sql/planner/optimizer/rule/read/TableScanPushDown.java @@ -75,6 +75,12 @@ public class TableScanPushDown implements Rule { .apply((highlight, scanBuilder) -> scanBuilder.pushDownHighlight(highlight)); + public static final Rule PUSH_DOWN_SCORE = + match(highlight(scanBuilder())).apply( + (highlight, scanBuilder) -> scanBuilder.pushDownHighlight(highlight) + ); + + /** Pattern that matches a plan node. */ private final WithPattern pattern; diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java index d598476a3e..ee4a89119e 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java @@ -78,6 +78,8 @@ public class OpenSearchRequestBuilder { */ private Integer querySize; + private boolean trackScores; + public OpenSearchRequestBuilder(String indexName, Integer maxResultWindow, Settings settings, @@ -97,10 +99,11 @@ public OpenSearchRequestBuilder(OpenSearchRequest.IndexName indexName, this.sourceBuilder = new SearchSourceBuilder(); this.exprValueFactory = exprValueFactory; this.querySize = settings.getSettingValue(Settings.Key.QUERY_SIZE_LIMIT); + this.trackScores = true; sourceBuilder.from(0); sourceBuilder.size(querySize); sourceBuilder.timeout(DEFAULT_QUERY_TIMEOUT); - sourceBuilder.trackScores(false); + sourceBuilder.trackScores(this.trackScores); } /** @@ -181,10 +184,6 @@ public void pushDownLimit(Integer limit, Integer offset) { sourceBuilder.from(offset).size(limit); } - public void pushDownTrackedScore(boolean trackScores) { - sourceBuilder.trackScores(trackScores); - } - /** * Add highlight to DSL requests. * @param field name of the field to highlight diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanBuilder.java index d7483cfcf0..9b36a29bf2 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanBuilder.java @@ -34,6 +34,8 @@ public class OpenSearchIndexScanBuilder extends TableScanBuilder { /** Is limit operator pushed down. */ private boolean isLimitPushedDown = false; + private boolean isScoreTrackedPushedDown = false; + @VisibleForTesting OpenSearchIndexScanBuilder(TableScanBuilder delegate) { this.delegate = delegate; diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanOptimizationTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanOptimizationTest.java index 4dfb148ac8..162b0612f0 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanOptimizationTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanOptimizationTest.java @@ -31,6 +31,7 @@ import static org.opensearch.sql.planner.optimizer.rule.read.TableScanPushDown.PUSH_DOWN_HIGHLIGHT; import static org.opensearch.sql.planner.optimizer.rule.read.TableScanPushDown.PUSH_DOWN_LIMIT; import static org.opensearch.sql.planner.optimizer.rule.read.TableScanPushDown.PUSH_DOWN_PROJECT; +import static org.opensearch.sql.planner.optimizer.rule.read.TableScanPushDown.PUSH_DOWN_SCORE; import static org.opensearch.sql.planner.optimizer.rule.read.TableScanPushDown.PUSH_DOWN_SORT; import com.google.common.collect.ImmutableList; @@ -745,6 +746,7 @@ private LogicalPlan optimize(LogicalPlan plan) { PUSH_DOWN_SORT, PUSH_DOWN_LIMIT, PUSH_DOWN_HIGHLIGHT, + PUSH_DOWN_SCORE, PUSH_DOWN_PROJECT)); return optimizer.optimize(plan); } diff --git a/sql/src/main/antlr/OpenSearchSQLParser.g4 b/sql/src/main/antlr/OpenSearchSQLParser.g4 index 722c4bd98e..e864f880e2 100644 --- a/sql/src/main/antlr/OpenSearchSQLParser.g4 +++ b/sql/src/main/antlr/OpenSearchSQLParser.g4 @@ -358,7 +358,7 @@ relevanceFunction ; scoreRelevanceFunction - : scoreRelevanceFunctionName LR_BRACKET relevanceFunction (COMMA functionArg)* RR_BRACKET + : scoreRelevanceFunctionName=SCORE LR_BRACKET relevanceFunction (COMMA functionArg)* RR_BRACKET ; noFieldRelevanceFunction @@ -506,10 +506,6 @@ systemFunctionName : TYPEOF ; -scoreRelevanceFunctionName - : SCORE | SCOREQUERY | SCORE_QUERY - ; - singleFieldRelevanceFunctionName : MATCH | MATCHQUERY | MATCH_QUERY | MATCH_PHRASE | MATCHPHRASE | MATCHPHRASEQUERY diff --git a/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java b/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java index a19d7ad3f3..35abe12879 100644 --- a/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java +++ b/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java @@ -9,6 +9,7 @@ import static org.opensearch.sql.ast.dsl.AstDSL.between; import static org.opensearch.sql.ast.dsl.AstDSL.not; import static org.opensearch.sql.ast.dsl.AstDSL.qualifiedName; +import static org.opensearch.sql.ast.dsl.AstDSL.stringLiteral; import static org.opensearch.sql.expression.function.BuiltinFunctionName.IS_NOT_NULL; import static org.opensearch.sql.expression.function.BuiltinFunctionName.IS_NULL; import static org.opensearch.sql.expression.function.BuiltinFunctionName.LIKE; @@ -40,6 +41,7 @@ import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.MathExpressionAtomContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.MultiFieldRelevanceFunctionContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.NoFieldRelevanceFunctionContext; +import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.ScoreRelevanceFunctionContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.NotExpressionContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.NullLiteralContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.OverClauseContext; @@ -51,7 +53,6 @@ import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.RelevanceFieldAndWeightContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.ScalarFunctionCallContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.ScalarWindowFunctionContext; -import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.ScoreRelevanceFunctionContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.ShowDescribePatternContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.SignedDecimalContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.SignedRealContext; @@ -99,6 +100,7 @@ import org.opensearch.sql.ast.tree.Sort.SortOption; import org.opensearch.sql.common.utils.StringUtils; import org.opensearch.sql.expression.function.BuiltinFunctionName; +import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser; import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.AlternateMultiMatchQueryContext; import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.AndExpressionContext; import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.ColumnNameContext; @@ -106,6 +108,7 @@ import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.IntervalLiteralContext; import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.NestedExpressionAtomContext; import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.OrExpressionContext; +import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.RelevanceFunctionContext; import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.TableNameContext; import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParserBaseVisitor; @@ -175,11 +178,11 @@ public UnresolvedExpression visitHighlightFunctionCall( @Override public UnresolvedExpression visitPositionFunction( - PositionFunctionContext ctx) { + PositionFunctionContext ctx) { return new Function( - POSITION.getName().getFunctionName(), - Arrays.asList(visitFunctionArg(ctx.functionArg(0)), - visitFunctionArg(ctx.functionArg(1)))); + POSITION.getName().getFunctionName(), + Arrays.asList(visitFunctionArg(ctx.functionArg(0)), + visitFunctionArg(ctx.functionArg(1)))); } @Override @@ -216,20 +219,20 @@ public UnresolvedExpression visitWindowFunctionClause(WindowFunctionClauseContex List partitionByList = Collections.emptyList(); if (overClause.partitionByClause() != null) { partitionByList = overClause.partitionByClause() - .expression() - .stream() - .map(this::visit) - .collect(Collectors.toList()); + .expression() + .stream() + .map(this::visit) + .collect(Collectors.toList()); } List> sortList = Collections.emptyList(); if (overClause.orderByClause() != null) { sortList = overClause.orderByClause() - .orderByElement() - .stream() - .map(item -> ImmutablePair.of( - createSortOption(item), visit(item.expression()))) - .collect(Collectors.toList()); + .orderByElement() + .stream() + .map(item -> ImmutablePair.of( + createSortOption(item), visit(item.expression()))) + .collect(Collectors.toList()); } return new WindowFunction(visit(ctx.function), partitionByList, sortList); } @@ -298,7 +301,7 @@ public UnresolvedExpression visitLikePredicate(LikePredicateContext ctx) { @Override public UnresolvedExpression visitRegexpPredicate(RegexpPredicateContext ctx) { return new Function(REGEXP.getName().getFunctionName(), - Arrays.asList(visit(ctx.left), visit(ctx.right))); + Arrays.asList(visit(ctx.left), visit(ctx.right))); } @Override @@ -399,9 +402,9 @@ public UnresolvedExpression visitBinaryComparisonPredicate( public UnresolvedExpression visitCaseFunctionCall(CaseFunctionCallContext ctx) { UnresolvedExpression caseValue = (ctx.expression() == null) ? null : visit(ctx.expression()); List whenStatements = ctx.caseFuncAlternative() - .stream() - .map(when -> (When) visit(when)) - .collect(Collectors.toList()); + .stream() + .map(when -> (When) visit(when)) + .collect(Collectors.toList()); UnresolvedExpression elseStatement = (ctx.elseArg == null) ? null : visit(ctx.elseArg); return new Case(caseValue, whenStatements, elseStatement); @@ -426,10 +429,10 @@ public UnresolvedExpression visitConvertedDataType( @Override public UnresolvedExpression visitNoFieldRelevanceFunction( - NoFieldRelevanceFunctionContext ctx) { + NoFieldRelevanceFunctionContext ctx) { return new Function( - ctx.noFieldRelevanceFunctionName().getText().toLowerCase(), - noFieldRelevanceArguments(ctx)); + ctx.noFieldRelevanceFunctionName().getText().toLowerCase(), + noFieldRelevanceArguments(ctx)); } @Override @@ -457,7 +460,7 @@ public UnresolvedExpression visitMultiFieldRelevanceFunction( if ((funcName.equalsIgnoreCase(BuiltinFunctionName.MULTI_MATCH.toString()) || funcName.equalsIgnoreCase(BuiltinFunctionName.MULTIMATCH.toString()) || funcName.equalsIgnoreCase(BuiltinFunctionName.MULTIMATCHQUERY.toString())) - && !ctx.getRuleContexts(AlternateMultiMatchQueryContext.class) + && ! ctx.getRuleContexts(AlternateMultiMatchQueryContext.class) .isEmpty()) { return new Function( ctx.multiFieldRelevanceFunctionName().getText().toLowerCase(), @@ -477,16 +480,13 @@ public UnresolvedExpression visitAltMultiFieldRelevanceFunction( altMultiFieldRelevanceFunctionArguments(ctx)); } - /** - * Visit score-relevance function and collect children. - * - * @param ctx the parse tree - * @return children - */ public UnresolvedExpression visitScoreRelevanceFunction(ScoreRelevanceFunctionContext ctx) { + RelevanceFunctionContext relevanceFunction = ctx.relevanceFunction(); + List functionArgs = ctx.functionArg(); + return new ScoreFunction( - visit(ctx.relevanceFunction()), - ctx.functionArg().stream().map(this::visitFunctionArg).collect(Collectors.toList()) + visit(ctx.relevanceFunction()), + ctx.functionArg().stream().map(this::visitFunctionArg).collect(Collectors.toList()) ); } @@ -502,14 +502,13 @@ private Function buildFunction(String functionName, } private QualifiedName visitIdentifiers(List identifiers) { - Boolean isMetadataField = identifiers.stream().filter( - id -> id.metadataField() != null).findFirst().isPresent(); + Boolean isMetadataField = identifiers.stream().filter(id -> id.metadataField() != null).findFirst().isPresent(); return new QualifiedName( - identifiers.stream() - .map(RuleContext::getText) - .map(StringUtils::unquoteIdentifier) - .collect(Collectors.toList()), - isMetadataField); + identifiers.stream() + .map(RuleContext::getText) + .map(StringUtils::unquoteIdentifier) + .collect(Collectors.toList()), + isMetadataField); } private void fillRelevanceArgs(List args, @@ -524,18 +523,18 @@ private void fillRelevanceArgs(List args, } private List noFieldRelevanceArguments( - NoFieldRelevanceFunctionContext ctx) { + NoFieldRelevanceFunctionContext ctx) { // all the arguments are defaulted to string values // to skip environment resolving and function signature resolving ImmutableList.Builder builder = ImmutableList.builder(); builder.add(new UnresolvedArgument("query", - new Literal(StringUtils.unquoteText(ctx.query.getText()), DataType.STRING))); + new Literal(StringUtils.unquoteText(ctx.query.getText()), DataType.STRING))); fillRelevanceArgs(ctx.relevanceArg(), builder); return builder.build(); } private List singleFieldRelevanceArguments( - SingleFieldRelevanceFunctionContext ctx) { + SingleFieldRelevanceFunctionContext ctx) { // all the arguments are defaulted to string values // to skip environment resolving and function signature resolving ImmutableList.Builder builder = ImmutableList.builder(); @@ -591,7 +590,6 @@ private List getFormatFunctionArguments( /** * Adds support for multi_match alternate syntax like * MULTI_MATCH('query'='Dale', 'fields'='*name'). - * * @param ctx : Context for multi field relevance function. * @return : Returns list of all arguments for relevance function. */ @@ -604,7 +602,7 @@ private List alternateMultiMatchArguments( String[] fieldAndWeights = StringUtils.unquoteText( ctx.getRuleContexts(AlternateMultiMatchFieldContext.class) - .stream().findFirst().get().argVal.getText()).split(","); + .stream().findFirst().get().argVal.getText()).split(","); for (var fieldAndWeight : fieldAndWeights) { String[] splitFieldAndWeights = fieldAndWeight.split("\\^"); @@ -616,9 +614,9 @@ private List alternateMultiMatchArguments( ctx.getRuleContexts(AlternateMultiMatchQueryContext.class) .stream().findFirst().ifPresent( - arg -> - builder.add(new UnresolvedArgument("query", - new Literal(StringUtils.unquoteText(arg.argVal.getText()), DataType.STRING))) + arg -> + builder.add(new UnresolvedArgument("query", + new Literal(StringUtils.unquoteText(arg.argVal.getText()), DataType.STRING))) ); fillRelevanceArgs(ctx.relevanceArg(), builder); From 1c05aba35b9a88626f8e399b1d974a8e6a7d5fa6 Mon Sep 17 00:00:00 2001 From: Andrew Carbonetto Date: Fri, 3 Mar 2023 13:44:35 -0800 Subject: [PATCH 25/40] fix checkstyle violations Signed-off-by: Andrew Carbonetto --- .../sql/sql/parser/AstExpressionBuilder.java | 88 ++++++++++--------- 1 file changed, 45 insertions(+), 43 deletions(-) diff --git a/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java b/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java index 35abe12879..455f2ca444 100644 --- a/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java +++ b/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java @@ -9,7 +9,6 @@ import static org.opensearch.sql.ast.dsl.AstDSL.between; import static org.opensearch.sql.ast.dsl.AstDSL.not; import static org.opensearch.sql.ast.dsl.AstDSL.qualifiedName; -import static org.opensearch.sql.ast.dsl.AstDSL.stringLiteral; import static org.opensearch.sql.expression.function.BuiltinFunctionName.IS_NOT_NULL; import static org.opensearch.sql.expression.function.BuiltinFunctionName.IS_NULL; import static org.opensearch.sql.expression.function.BuiltinFunctionName.LIKE; @@ -41,7 +40,6 @@ import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.MathExpressionAtomContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.MultiFieldRelevanceFunctionContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.NoFieldRelevanceFunctionContext; -import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.ScoreRelevanceFunctionContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.NotExpressionContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.NullLiteralContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.OverClauseContext; @@ -53,6 +51,7 @@ import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.RelevanceFieldAndWeightContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.ScalarFunctionCallContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.ScalarWindowFunctionContext; +import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.ScoreRelevanceFunctionContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.ShowDescribePatternContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.SignedDecimalContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.SignedRealContext; @@ -100,7 +99,6 @@ import org.opensearch.sql.ast.tree.Sort.SortOption; import org.opensearch.sql.common.utils.StringUtils; import org.opensearch.sql.expression.function.BuiltinFunctionName; -import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser; import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.AlternateMultiMatchQueryContext; import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.AndExpressionContext; import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.ColumnNameContext; @@ -108,7 +106,6 @@ import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.IntervalLiteralContext; import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.NestedExpressionAtomContext; import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.OrExpressionContext; -import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.RelevanceFunctionContext; import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.TableNameContext; import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParserBaseVisitor; @@ -178,11 +175,11 @@ public UnresolvedExpression visitHighlightFunctionCall( @Override public UnresolvedExpression visitPositionFunction( - PositionFunctionContext ctx) { + PositionFunctionContext ctx) { return new Function( - POSITION.getName().getFunctionName(), - Arrays.asList(visitFunctionArg(ctx.functionArg(0)), - visitFunctionArg(ctx.functionArg(1)))); + POSITION.getName().getFunctionName(), + Arrays.asList(visitFunctionArg(ctx.functionArg(0)), + visitFunctionArg(ctx.functionArg(1)))); } @Override @@ -219,20 +216,20 @@ public UnresolvedExpression visitWindowFunctionClause(WindowFunctionClauseContex List partitionByList = Collections.emptyList(); if (overClause.partitionByClause() != null) { partitionByList = overClause.partitionByClause() - .expression() - .stream() - .map(this::visit) - .collect(Collectors.toList()); + .expression() + .stream() + .map(this::visit) + .collect(Collectors.toList()); } List> sortList = Collections.emptyList(); if (overClause.orderByClause() != null) { sortList = overClause.orderByClause() - .orderByElement() - .stream() - .map(item -> ImmutablePair.of( - createSortOption(item), visit(item.expression()))) - .collect(Collectors.toList()); + .orderByElement() + .stream() + .map(item -> ImmutablePair.of( + createSortOption(item), visit(item.expression()))) + .collect(Collectors.toList()); } return new WindowFunction(visit(ctx.function), partitionByList, sortList); } @@ -301,7 +298,7 @@ public UnresolvedExpression visitLikePredicate(LikePredicateContext ctx) { @Override public UnresolvedExpression visitRegexpPredicate(RegexpPredicateContext ctx) { return new Function(REGEXP.getName().getFunctionName(), - Arrays.asList(visit(ctx.left), visit(ctx.right))); + Arrays.asList(visit(ctx.left), visit(ctx.right))); } @Override @@ -402,9 +399,9 @@ public UnresolvedExpression visitBinaryComparisonPredicate( public UnresolvedExpression visitCaseFunctionCall(CaseFunctionCallContext ctx) { UnresolvedExpression caseValue = (ctx.expression() == null) ? null : visit(ctx.expression()); List whenStatements = ctx.caseFuncAlternative() - .stream() - .map(when -> (When) visit(when)) - .collect(Collectors.toList()); + .stream() + .map(when -> (When) visit(when)) + .collect(Collectors.toList()); UnresolvedExpression elseStatement = (ctx.elseArg == null) ? null : visit(ctx.elseArg); return new Case(caseValue, whenStatements, elseStatement); @@ -429,10 +426,10 @@ public UnresolvedExpression visitConvertedDataType( @Override public UnresolvedExpression visitNoFieldRelevanceFunction( - NoFieldRelevanceFunctionContext ctx) { + NoFieldRelevanceFunctionContext ctx) { return new Function( - ctx.noFieldRelevanceFunctionName().getText().toLowerCase(), - noFieldRelevanceArguments(ctx)); + ctx.noFieldRelevanceFunctionName().getText().toLowerCase(), + noFieldRelevanceArguments(ctx)); } @Override @@ -460,7 +457,7 @@ public UnresolvedExpression visitMultiFieldRelevanceFunction( if ((funcName.equalsIgnoreCase(BuiltinFunctionName.MULTI_MATCH.toString()) || funcName.equalsIgnoreCase(BuiltinFunctionName.MULTIMATCH.toString()) || funcName.equalsIgnoreCase(BuiltinFunctionName.MULTIMATCHQUERY.toString())) - && ! ctx.getRuleContexts(AlternateMultiMatchQueryContext.class) + && !ctx.getRuleContexts(AlternateMultiMatchQueryContext.class) .isEmpty()) { return new Function( ctx.multiFieldRelevanceFunctionName().getText().toLowerCase(), @@ -480,13 +477,16 @@ public UnresolvedExpression visitAltMultiFieldRelevanceFunction( altMultiFieldRelevanceFunctionArguments(ctx)); } + /** + * Visit score-relevance function and collect children. + * + * @param ctx the parse tree + * @return children + */ public UnresolvedExpression visitScoreRelevanceFunction(ScoreRelevanceFunctionContext ctx) { - RelevanceFunctionContext relevanceFunction = ctx.relevanceFunction(); - List functionArgs = ctx.functionArg(); - return new ScoreFunction( - visit(ctx.relevanceFunction()), - ctx.functionArg().stream().map(this::visitFunctionArg).collect(Collectors.toList()) + visit(ctx.relevanceFunction()), + ctx.functionArg().stream().map(this::visitFunctionArg).collect(Collectors.toList()) ); } @@ -502,13 +502,14 @@ private Function buildFunction(String functionName, } private QualifiedName visitIdentifiers(List identifiers) { - Boolean isMetadataField = identifiers.stream().filter(id -> id.metadataField() != null).findFirst().isPresent(); + Boolean isMetadataField = + identifiers.stream().anyMatch(id -> id.metadataField() != null); return new QualifiedName( - identifiers.stream() - .map(RuleContext::getText) - .map(StringUtils::unquoteIdentifier) - .collect(Collectors.toList()), - isMetadataField); + identifiers.stream() + .map(RuleContext::getText) + .map(StringUtils::unquoteIdentifier) + .collect(Collectors.toList()), + isMetadataField); } private void fillRelevanceArgs(List args, @@ -523,18 +524,18 @@ private void fillRelevanceArgs(List args, } private List noFieldRelevanceArguments( - NoFieldRelevanceFunctionContext ctx) { + NoFieldRelevanceFunctionContext ctx) { // all the arguments are defaulted to string values // to skip environment resolving and function signature resolving ImmutableList.Builder builder = ImmutableList.builder(); builder.add(new UnresolvedArgument("query", - new Literal(StringUtils.unquoteText(ctx.query.getText()), DataType.STRING))); + new Literal(StringUtils.unquoteText(ctx.query.getText()), DataType.STRING))); fillRelevanceArgs(ctx.relevanceArg(), builder); return builder.build(); } private List singleFieldRelevanceArguments( - SingleFieldRelevanceFunctionContext ctx) { + SingleFieldRelevanceFunctionContext ctx) { // all the arguments are defaulted to string values // to skip environment resolving and function signature resolving ImmutableList.Builder builder = ImmutableList.builder(); @@ -590,6 +591,7 @@ private List getFormatFunctionArguments( /** * Adds support for multi_match alternate syntax like * MULTI_MATCH('query'='Dale', 'fields'='*name'). + * * @param ctx : Context for multi field relevance function. * @return : Returns list of all arguments for relevance function. */ @@ -602,7 +604,7 @@ private List alternateMultiMatchArguments( String[] fieldAndWeights = StringUtils.unquoteText( ctx.getRuleContexts(AlternateMultiMatchFieldContext.class) - .stream().findFirst().get().argVal.getText()).split(","); + .stream().findFirst().get().argVal.getText()).split(","); for (var fieldAndWeight : fieldAndWeights) { String[] splitFieldAndWeights = fieldAndWeight.split("\\^"); @@ -614,9 +616,9 @@ private List alternateMultiMatchArguments( ctx.getRuleContexts(AlternateMultiMatchQueryContext.class) .stream().findFirst().ifPresent( - arg -> - builder.add(new UnresolvedArgument("query", - new Literal(StringUtils.unquoteText(arg.argVal.getText()), DataType.STRING))) + arg -> + builder.add(new UnresolvedArgument("query", + new Literal(StringUtils.unquoteText(arg.argVal.getText()), DataType.STRING))) ); fillRelevanceArgs(ctx.relevanceArg(), builder); From f97dfeae6bb0a1e47f55b00c7a0c7874cf20854f Mon Sep 17 00:00:00 2001 From: Andrew Carbonetto Date: Tue, 7 Mar 2023 15:02:27 -0800 Subject: [PATCH 26/40] Revert bad conflict resolution Signed-off-by: Andrew Carbonetto --- .../sql/analysis/ExpressionAnalyzer.java | 101 ++++++++++++++---- .../sql/ast/expression/ScoreFunction.java | 9 +- .../org/opensearch/sql/expression/DSL.java | 8 ++ .../sql/expression/ExpressionNodeVisitor.java | 4 - .../sql/expression/ScoreExpression.java | 89 --------------- .../function/OpenSearchFunctions.java | 31 ++---- .../rule/read/TableScanPushDown.java | 6 -- .../request/OpenSearchRequestBuilder.java | 9 +- .../scan/OpenSearchIndexScanBuilder.java | 2 - .../OpenSearchIndexScanOptimizationTest.java | 2 - sql/src/main/antlr/OpenSearchSQLParser.g4 | 6 +- .../sql/sql/parser/AstExpressionBuilder.java | 4 +- 12 files changed, 111 insertions(+), 160 deletions(-) delete mode 100644 core/src/main/java/org/opensearch/sql/expression/ScoreExpression.java diff --git a/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java b/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java index fafd29e963..75b093248a 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java @@ -8,6 +8,7 @@ import static org.opensearch.sql.ast.dsl.AstDSL.and; import static org.opensearch.sql.ast.dsl.AstDSL.compare; +import static org.opensearch.sql.ast.expression.QualifiedName.METADATAFIELD_TYPE_MAP; import static org.opensearch.sql.expression.function.BuiltinFunctionName.GTE; import static org.opensearch.sql.expression.function.BuiltinFunctionName.LTE; @@ -31,6 +32,7 @@ import org.opensearch.sql.ast.expression.Case; import org.opensearch.sql.ast.expression.Cast; import org.opensearch.sql.ast.expression.Compare; +import org.opensearch.sql.ast.expression.DataType; import org.opensearch.sql.ast.expression.EqualTo; import org.opensearch.sql.ast.expression.Field; import org.opensearch.sql.ast.expression.Function; @@ -62,7 +64,6 @@ import org.opensearch.sql.expression.NamedArgumentExpression; import org.opensearch.sql.expression.NamedExpression; import org.opensearch.sql.expression.ReferenceExpression; -import org.opensearch.sql.expression.ScoreExpression; import org.opensearch.sql.expression.aggregation.AggregationState; import org.opensearch.sql.expression.aggregation.Aggregator; import org.opensearch.sql.expression.conditional.cases.CaseClause; @@ -70,6 +71,7 @@ import org.opensearch.sql.expression.function.BuiltinFunctionName; import org.opensearch.sql.expression.function.BuiltinFunctionRepository; import org.opensearch.sql.expression.function.FunctionName; +import org.opensearch.sql.expression.function.OpenSearchFunctions; import org.opensearch.sql.expression.parse.ParseExpression; import org.opensearch.sql.expression.span.SpanExpression; import org.opensearch.sql.expression.window.aggregation.AggregateWindowFunction; @@ -210,9 +212,75 @@ public Expression visitHighlightFunction(HighlightFunction node, AnalysisContext return new HighlightExpression(expr); } + /** + * visitScoreFunction removes the score function from the AST and replaces it with the child + * relevance function node. If the optional boost variable is provided, the boost argument + * of the relevance function is combined. + * @param node score function node + * @param context analysis context for the query + * @return resolved relevance function + */ public Expression visitScoreFunction(ScoreFunction node, AnalysisContext context) { - Expression relevanceQueryExpr = node.getRelevanceQuery().accept(this, context); - return new ScoreExpression(relevanceQueryExpr); + // if no function argument given, just accept the relevance query and return + if (node.getFuncArgs().isEmpty() || !(node.getFuncArgs().get(0) instanceof Literal)) { + OpenSearchFunctions.OpenSearchFunction relevanceQueryExpr = + (OpenSearchFunctions.OpenSearchFunction) node + .getRelevanceQuery().accept(this, context); + relevanceQueryExpr.setScoreTracked(true); + return relevanceQueryExpr; + } + + // note: if an argument exists, and there should only be one, it will be a boost argument + Literal boostFunctionArg = (Literal) node.getFuncArgs().get(0); + Double thisBoostValue; + if (boostFunctionArg.getType().equals(DataType.DOUBLE)) { + thisBoostValue = ((Double) boostFunctionArg.getValue()); + } else if (boostFunctionArg.getType().equals(DataType.INTEGER)) { + thisBoostValue = ((Integer) boostFunctionArg.getValue()).doubleValue(); + } else { + throw new SemanticCheckException(String.format("Expected boost type '%s' but got '%s'", + DataType.DOUBLE.name(), boostFunctionArg.getType().name())); + } + + // update the existing unresolved expression to add a boost argument if it doesn't exist + // OR multiply the existing boost argument + Function relevanceQueryUnresolvedExpr = (Function)node.getRelevanceQuery(); + List relevanceFuncArgs = relevanceQueryUnresolvedExpr.getFuncArgs(); + + boolean doesFunctionContainBoostArgument = false; + List updatedFuncArgs = new ArrayList<>(); + for (UnresolvedExpression expr: relevanceFuncArgs) { + String argumentName = ((UnresolvedArgument) expr).getArgName(); + if (argumentName.equalsIgnoreCase("boost")) { + doesFunctionContainBoostArgument = true; + Literal boostArgLiteral = (Literal)((UnresolvedArgument) expr).getValue(); + Double boostValue = Double.parseDouble((String)boostArgLiteral.getValue()) * thisBoostValue; + UnresolvedArgument newBoostArg = new UnresolvedArgument( + argumentName, + new Literal(boostValue.toString(), DataType.STRING) + ); + updatedFuncArgs.add(newBoostArg); + } else { + updatedFuncArgs.add(expr); + } + } + + // since nothing was found, add an argument + if (!doesFunctionContainBoostArgument) { + UnresolvedArgument newBoostArg = new UnresolvedArgument( + "boost", new Literal(Double.toString(thisBoostValue), DataType.STRING)); + updatedFuncArgs.add(newBoostArg); + } + + // create a new function expression with boost argument and resolve it + Function updatedRelevanceQueryUnresolvedExpr = new Function( + relevanceQueryUnresolvedExpr.getFuncName(), + updatedFuncArgs); + OpenSearchFunctions.OpenSearchFunction relevanceQueryExpr = + (OpenSearchFunctions.OpenSearchFunction) updatedRelevanceQueryUnresolvedExpr + .accept(this, context); + relevanceQueryExpr.setScoreTracked(true); + return relevanceQueryExpr; } @Override @@ -324,24 +392,17 @@ public Expression visitUnresolvedArgument(UnresolvedArgument node, AnalysisConte return new NamedArgumentExpression(node.getArgName(), node.getValue().accept(this, context)); } + /** + * If QualifiedName is actually a reserved metadata field, return the expr type associated + * with the metadata field. + * @param ident metadata field name + * @param context analysis context + * @return DSL reference + */ private Expression visitMetadata(String ident, AnalysisContext context) { - ReferenceExpression ref; - switch (ident.toLowerCase()) { - case "_index": - case "_id": - ref = DSL.ref(ident, ExprCoreType.STRING); - break; - case "_score": - case "_maxscore": - ref = DSL.ref(ident, ExprCoreType.FLOAT); - break; - case "_sort": - ref = DSL.ref(ident, ExprCoreType.LONG); - break; - default: - throw new SemanticCheckException("invalid metadata field"); - } - return ref; + ExprCoreType exprCoreType = Optional.ofNullable(METADATAFIELD_TYPE_MAP.get(ident)) + .orElseThrow(() -> new SemanticCheckException("invalid metadata field")); + return DSL.ref(ident, exprCoreType); } private Expression visitIdentifier(String ident, AnalysisContext context) { diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/ScoreFunction.java b/core/src/main/java/org/opensearch/sql/ast/expression/ScoreFunction.java index cdde418834..0587151930 100644 --- a/core/src/main/java/org/opensearch/sql/ast/expression/ScoreFunction.java +++ b/core/src/main/java/org/opensearch/sql/ast/expression/ScoreFunction.java @@ -5,19 +5,16 @@ package org.opensearch.sql.ast.expression; +import java.util.List; import lombok.AllArgsConstructor; import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.ToString; import org.opensearch.sql.ast.AbstractNodeVisitor; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import java.util.stream.Stream; - /** - * Expression node of Highlight function. + * Expression node of Score function. + * Score takes a relevance-search expression as an argument and returns it */ @AllArgsConstructor @EqualsAndHashCode(callSuper = false) diff --git a/core/src/main/java/org/opensearch/sql/expression/DSL.java b/core/src/main/java/org/opensearch/sql/expression/DSL.java index a22535507d..0d10a4f38c 100644 --- a/core/src/main/java/org/opensearch/sql/expression/DSL.java +++ b/core/src/main/java/org/opensearch/sql/expression/DSL.java @@ -794,6 +794,14 @@ public static FunctionExpression score(Expression... args) { return compile(FunctionProperties.None, BuiltinFunctionName.SCORE, args); } + public static FunctionExpression scorequery(Expression... args) { + return compile(FunctionProperties.None, BuiltinFunctionName.SCOREQUERY, args); + } + + public static FunctionExpression score_query(Expression... args) { + return compile(FunctionProperties.None, BuiltinFunctionName.SCORE_QUERY, args); + } + public static FunctionExpression now(FunctionProperties functionProperties, Expression... args) { return compile(functionProperties, BuiltinFunctionName.NOW, args); diff --git a/core/src/main/java/org/opensearch/sql/expression/ExpressionNodeVisitor.java b/core/src/main/java/org/opensearch/sql/expression/ExpressionNodeVisitor.java index 6ab2375067..e3d4e38674 100644 --- a/core/src/main/java/org/opensearch/sql/expression/ExpressionNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/expression/ExpressionNodeVisitor.java @@ -60,10 +60,6 @@ public T visitHighlight(HighlightExpression node, C context) { return visitNode(node, context); } - public T visitScore(ScoreExpression node, C context) { - return visitNode(node, context); - } - public T visitReference(ReferenceExpression node, C context) { return visitNode(node, context); } diff --git a/core/src/main/java/org/opensearch/sql/expression/ScoreExpression.java b/core/src/main/java/org/opensearch/sql/expression/ScoreExpression.java deleted file mode 100644 index d061dad6d2..0000000000 --- a/core/src/main/java/org/opensearch/sql/expression/ScoreExpression.java +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql.expression; - -import lombok.Getter; -import org.opensearch.sql.common.utils.StringUtils; -import org.opensearch.sql.data.model.ExprNullValue; -import org.opensearch.sql.data.model.ExprTupleValue; -import org.opensearch.sql.data.model.ExprValue; -import org.opensearch.sql.data.model.ExprValueUtils; -import org.opensearch.sql.data.type.ExprCoreType; -import org.opensearch.sql.data.type.ExprType; -import org.opensearch.sql.expression.env.Environment; -import org.opensearch.sql.expression.function.BuiltinFunctionName; - -import java.util.LinkedHashMap; -import java.util.List; -import java.util.regex.Matcher; -import java.util.regex.Pattern; -import java.util.stream.Collectors; - -/** - * Score Expression. - */ -@Getter -public class ScoreExpression extends FunctionExpression { - - private final Expression relevanceQueryExpr; - - /** - * ScoreExpression Constructor. - * @param relevanceQueryExpr : relevanceQueryExpr for expression. - */ - public ScoreExpression(Expression relevanceQueryExpr) { - super(BuiltinFunctionName.SCORE.getName(), List.of(relevanceQueryExpr)); - this.relevanceQueryExpr = relevanceQueryExpr; - } - - /** - * Return collection value matching relevance query expression. - * @param valueEnv : Dataset to parse value from. - * @return : collection value of relevance query expression. - */ - @Override - public ExprValue valueOf(Environment valueEnv) { -// String refName = "_highlight"; -// // Not a wilcard expression -// if (this.type == ExprCoreType.ARRAY) { -// refName += "." + StringUtils.unquoteText(getHighlightField().toString()); -// } -// ExprValue value = valueEnv.resolve(DSL.ref(refName, ExprCoreType.STRING)); -// -// // In the event of multiple returned highlights and wildcard being -// // used in conjunction with other highlight calls, we need to ensure -// // only wildcard regex matching is mapped to wildcard call. -// if (this.type == ExprCoreType.STRUCT && value.type() == ExprCoreType.STRUCT) { -// value = new ExprTupleValue( -// new LinkedHashMap(value.tupleValue() -// .entrySet() -// .stream() -// .filter(s -> matchesHighlightRegex(s.getKey(), -// StringUtils.unquoteText(highlightField.toString()))) -// .collect(Collectors.toMap( -// e -> e.getKey(), -// e -> e.getValue())))); -// if (value.tupleValue().isEmpty()) { -// value = ExprValueUtils.missingValue(); -// } -// } - - // TODO: this is where we visit relevance function nodes and update BOOST values as necessary - // Otherwise, this is a no-op - - return ExprNullValue.of(); - } - - @Override - public T accept(ExpressionNodeVisitor visitor, C context) { - return visitor.visitScore(this, context); - } - - @Override - public ExprType type() { - return ExprCoreType.UNDEFINED; - } -} diff --git a/core/src/main/java/org/opensearch/sql/expression/function/OpenSearchFunctions.java b/core/src/main/java/org/opensearch/sql/expression/function/OpenSearchFunctions.java index 939706804b..9a50aca344 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/OpenSearchFunctions.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/OpenSearchFunctions.java @@ -6,14 +6,13 @@ package org.opensearch.sql.expression.function; import static org.opensearch.sql.data.type.ExprCoreType.BOOLEAN; -import static org.opensearch.sql.data.type.ExprCoreType.DOUBLE; import java.util.List; import java.util.stream.Collectors; +import lombok.Getter; +import lombok.Setter; import lombok.experimental.UtilityClass; -import org.opensearch.sql.data.model.ExprDoubleValue; import org.opensearch.sql.data.model.ExprValue; -import org.opensearch.sql.data.type.ExprCoreType; import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.expression.Expression; import org.opensearch.sql.expression.FunctionExpression; @@ -94,27 +93,6 @@ private static FunctionResolver wildcard_query(BuiltinFunctionName wildcardQuery return new RelevanceFunctionResolver(funcName); } - /** - * Definition of score() function. - * Enables score calculation for the match call - */ -// private static DefaultFunctionResolver score(BuiltinFunctionName score) { -// FunctionName funcName = score.getName(); -// return FunctionDSL.define(funcName, -// FunctionDSL.impl( -// FunctionDSL.nullMissingHandling( -// (relevanceFunc) -> new ExprDoubleValue( -// Math.pow(relevanceFunc.shortValue(), 1)) -// ), -// BOOLEAN, BOOLEAN), -// FunctionDSL.impl( -// FunctionDSL.nullMissingHandling( -// (relevanceFunc, boost) -> new ExprDoubleValue( -// Math.pow(relevanceFunc.shortValue(), boost.shortValue())) -// ), -// BOOLEAN, BOOLEAN, DOUBLE)); -// } - private static FunctionResolver score(BuiltinFunctionName score) { FunctionName funcName = score.getName(); return new RelevanceFunctionResolver(funcName); @@ -124,6 +102,10 @@ public static class OpenSearchFunction extends FunctionExpression { private final FunctionName functionName; private final List arguments; + @Getter + @Setter + private boolean isScoreTracked; + /** * Required argument constructor. * @param functionName name of the function @@ -133,6 +115,7 @@ public OpenSearchFunction(FunctionName functionName, List arguments) super(functionName, arguments); this.functionName = functionName; this.arguments = arguments; + this.isScoreTracked = false; } @Override diff --git a/core/src/main/java/org/opensearch/sql/planner/optimizer/rule/read/TableScanPushDown.java b/core/src/main/java/org/opensearch/sql/planner/optimizer/rule/read/TableScanPushDown.java index bb4e51c39e..556a12bb34 100644 --- a/core/src/main/java/org/opensearch/sql/planner/optimizer/rule/read/TableScanPushDown.java +++ b/core/src/main/java/org/opensearch/sql/planner/optimizer/rule/read/TableScanPushDown.java @@ -75,12 +75,6 @@ public class TableScanPushDown implements Rule { .apply((highlight, scanBuilder) -> scanBuilder.pushDownHighlight(highlight)); - public static final Rule PUSH_DOWN_SCORE = - match(highlight(scanBuilder())).apply( - (highlight, scanBuilder) -> scanBuilder.pushDownHighlight(highlight) - ); - - /** Pattern that matches a plan node. */ private final WithPattern pattern; diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java index ee4a89119e..d598476a3e 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java @@ -78,8 +78,6 @@ public class OpenSearchRequestBuilder { */ private Integer querySize; - private boolean trackScores; - public OpenSearchRequestBuilder(String indexName, Integer maxResultWindow, Settings settings, @@ -99,11 +97,10 @@ public OpenSearchRequestBuilder(OpenSearchRequest.IndexName indexName, this.sourceBuilder = new SearchSourceBuilder(); this.exprValueFactory = exprValueFactory; this.querySize = settings.getSettingValue(Settings.Key.QUERY_SIZE_LIMIT); - this.trackScores = true; sourceBuilder.from(0); sourceBuilder.size(querySize); sourceBuilder.timeout(DEFAULT_QUERY_TIMEOUT); - sourceBuilder.trackScores(this.trackScores); + sourceBuilder.trackScores(false); } /** @@ -184,6 +181,10 @@ public void pushDownLimit(Integer limit, Integer offset) { sourceBuilder.from(offset).size(limit); } + public void pushDownTrackedScore(boolean trackScores) { + sourceBuilder.trackScores(trackScores); + } + /** * Add highlight to DSL requests. * @param field name of the field to highlight diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanBuilder.java index 9b36a29bf2..d7483cfcf0 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanBuilder.java @@ -34,8 +34,6 @@ public class OpenSearchIndexScanBuilder extends TableScanBuilder { /** Is limit operator pushed down. */ private boolean isLimitPushedDown = false; - private boolean isScoreTrackedPushedDown = false; - @VisibleForTesting OpenSearchIndexScanBuilder(TableScanBuilder delegate) { this.delegate = delegate; diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanOptimizationTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanOptimizationTest.java index 162b0612f0..4dfb148ac8 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanOptimizationTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanOptimizationTest.java @@ -31,7 +31,6 @@ import static org.opensearch.sql.planner.optimizer.rule.read.TableScanPushDown.PUSH_DOWN_HIGHLIGHT; import static org.opensearch.sql.planner.optimizer.rule.read.TableScanPushDown.PUSH_DOWN_LIMIT; import static org.opensearch.sql.planner.optimizer.rule.read.TableScanPushDown.PUSH_DOWN_PROJECT; -import static org.opensearch.sql.planner.optimizer.rule.read.TableScanPushDown.PUSH_DOWN_SCORE; import static org.opensearch.sql.planner.optimizer.rule.read.TableScanPushDown.PUSH_DOWN_SORT; import com.google.common.collect.ImmutableList; @@ -746,7 +745,6 @@ private LogicalPlan optimize(LogicalPlan plan) { PUSH_DOWN_SORT, PUSH_DOWN_LIMIT, PUSH_DOWN_HIGHLIGHT, - PUSH_DOWN_SCORE, PUSH_DOWN_PROJECT)); return optimizer.optimize(plan); } diff --git a/sql/src/main/antlr/OpenSearchSQLParser.g4 b/sql/src/main/antlr/OpenSearchSQLParser.g4 index e864f880e2..722c4bd98e 100644 --- a/sql/src/main/antlr/OpenSearchSQLParser.g4 +++ b/sql/src/main/antlr/OpenSearchSQLParser.g4 @@ -358,7 +358,7 @@ relevanceFunction ; scoreRelevanceFunction - : scoreRelevanceFunctionName=SCORE LR_BRACKET relevanceFunction (COMMA functionArg)* RR_BRACKET + : scoreRelevanceFunctionName LR_BRACKET relevanceFunction (COMMA functionArg)* RR_BRACKET ; noFieldRelevanceFunction @@ -506,6 +506,10 @@ systemFunctionName : TYPEOF ; +scoreRelevanceFunctionName + : SCORE | SCOREQUERY | SCORE_QUERY + ; + singleFieldRelevanceFunctionName : MATCH | MATCHQUERY | MATCH_QUERY | MATCH_PHRASE | MATCHPHRASE | MATCHPHRASEQUERY diff --git a/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java b/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java index 455f2ca444..a19d7ad3f3 100644 --- a/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java +++ b/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java @@ -502,8 +502,8 @@ private Function buildFunction(String functionName, } private QualifiedName visitIdentifiers(List identifiers) { - Boolean isMetadataField = - identifiers.stream().anyMatch(id -> id.metadataField() != null); + Boolean isMetadataField = identifiers.stream().filter( + id -> id.metadataField() != null).findFirst().isPresent(); return new QualifiedName( identifiers.stream() .map(RuleContext::getText) From 3ba3c855e02d540254da360608fc143de2e27707 Mon Sep 17 00:00:00 2001 From: Andrew Carbonetto Date: Wed, 8 Mar 2023 09:06:27 -0800 Subject: [PATCH 27/40] Fix for review comments Signed-off-by: Andrew Carbonetto --- .../org/opensearch/sql/analysis/ExpressionAnalyzer.java | 2 -- .../sql/opensearch/response/OpenSearchResponse.java | 7 ++++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java b/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java index 75b093248a..c467325562 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java @@ -9,8 +9,6 @@ import static org.opensearch.sql.ast.dsl.AstDSL.and; import static org.opensearch.sql.ast.dsl.AstDSL.compare; import static org.opensearch.sql.ast.expression.QualifiedName.METADATAFIELD_TYPE_MAP; -import static org.opensearch.sql.expression.function.BuiltinFunctionName.GTE; -import static org.opensearch.sql.expression.function.BuiltinFunctionName.LTE; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/OpenSearchResponse.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/OpenSearchResponse.java index 535c69ea37..568299af43 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/OpenSearchResponse.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/OpenSearchResponse.java @@ -95,7 +95,8 @@ public Iterator iterator() { return (ExprValue) ExprTupleValue.fromExprValueMap(builder.build()); }).iterator(); } else { - float maxScore = hits.getMaxScore(); + ExprFloatValue maxScore = Float.isNaN(hits.getMaxScore()) + ? null : new ExprFloatValue(hits.getMaxScore()); return Arrays.stream(hits.getHits()) .map(hit -> { String source = hit.getSourceAsString(); @@ -108,8 +109,8 @@ public Iterator iterator() { if (!Float.isNaN(hit.getScore())) { builder.put("_score", new ExprFloatValue(hit.getScore())); } - if (!Float.isNaN(maxScore)) { - builder.put("_maxscore", new ExprFloatValue(maxScore)); + if (maxScore != null) { + builder.put("_maxscore", maxScore); } builder.put("_sort", new ExprLongValue(hit.getSeqNo())); From dd1585344b5eda2389bcba667d01f0dfcf9b2079 Mon Sep 17 00:00:00 2001 From: Andrew Carbonetto Date: Mon, 13 Mar 2023 11:37:41 -0700 Subject: [PATCH 28/40] Update IT tests and legacy tests for comments Signed-off-by: Andrew Carbonetto --- .../sql/analysis/ExpressionAnalyzer.java | 14 +++--- .../org/opensearch/sql/ast/dsl/AstDSL.java | 4 +- .../sql/ast/expression/QualifiedName.java | 18 +++---- .../sql/ast/expression/ScoreFunction.java | 4 +- .../opensearch/sql/analysis/AnalyzerTest.java | 10 ++-- docs/user/dql/basics.rst | 40 ++++++++++++++++ docs/user/dql/functions.rst | 11 ++++- .../sql/legacy/CsvFormatResponseIT.java | 1 + .../opensearch/sql/legacy/MethodQueryIT.java | 8 +--- .../sql/legacy/PrettyFormatResponseIT.java | 1 + .../org/opensearch/sql/sql/IdentifierIT.java | 11 ++++- .../org/opensearch/sql/sql/ScoreQueryIT.java | 48 ++++++++++++++----- sql/src/main/antlr/OpenSearchSQLParser.g4 | 2 +- .../sql/sql/parser/AstExpressionBuilder.java | 2 +- .../sql/parser/AstExpressionBuilderTest.java | 10 ++-- 15 files changed, 134 insertions(+), 50 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java b/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java index c467325562..d7e6b1eb42 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java @@ -214,13 +214,14 @@ public Expression visitHighlightFunction(HighlightFunction node, AnalysisContext * visitScoreFunction removes the score function from the AST and replaces it with the child * relevance function node. If the optional boost variable is provided, the boost argument * of the relevance function is combined. + * * @param node score function node * @param context analysis context for the query * @return resolved relevance function */ public Expression visitScoreFunction(ScoreFunction node, AnalysisContext context) { // if no function argument given, just accept the relevance query and return - if (node.getFuncArgs().isEmpty() || !(node.getFuncArgs().get(0) instanceof Literal)) { + if (!(node.getFuncArg() instanceof Literal)) { OpenSearchFunctions.OpenSearchFunction relevanceQueryExpr = (OpenSearchFunctions.OpenSearchFunction) node .getRelevanceQuery().accept(this, context); @@ -229,7 +230,7 @@ public Expression visitScoreFunction(ScoreFunction node, AnalysisContext context } // note: if an argument exists, and there should only be one, it will be a boost argument - Literal boostFunctionArg = (Literal) node.getFuncArgs().get(0); + Literal boostFunctionArg = (Literal) node.getFuncArg(); Double thisBoostValue; if (boostFunctionArg.getType().equals(DataType.DOUBLE)) { thisBoostValue = ((Double) boostFunctionArg.getValue()); @@ -242,17 +243,18 @@ public Expression visitScoreFunction(ScoreFunction node, AnalysisContext context // update the existing unresolved expression to add a boost argument if it doesn't exist // OR multiply the existing boost argument - Function relevanceQueryUnresolvedExpr = (Function)node.getRelevanceQuery(); + Function relevanceQueryUnresolvedExpr = (Function) node.getRelevanceQuery(); List relevanceFuncArgs = relevanceQueryUnresolvedExpr.getFuncArgs(); boolean doesFunctionContainBoostArgument = false; List updatedFuncArgs = new ArrayList<>(); - for (UnresolvedExpression expr: relevanceFuncArgs) { + for (UnresolvedExpression expr : relevanceFuncArgs) { String argumentName = ((UnresolvedArgument) expr).getArgName(); if (argumentName.equalsIgnoreCase("boost")) { doesFunctionContainBoostArgument = true; - Literal boostArgLiteral = (Literal)((UnresolvedArgument) expr).getValue(); - Double boostValue = Double.parseDouble((String)boostArgLiteral.getValue()) * thisBoostValue; + Literal boostArgLiteral = (Literal) ((UnresolvedArgument) expr).getValue(); + Double boostValue = + Double.parseDouble((String) boostArgLiteral.getValue()) * thisBoostValue; UnresolvedArgument newBoostArg = new UnresolvedArgument( argumentName, new Literal(boostValue.toString(), DataType.STRING) diff --git a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java index 22ee1e2f6d..ea4561b60c 100644 --- a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java +++ b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java @@ -290,8 +290,8 @@ public UnresolvedExpression highlight(UnresolvedExpression fieldName, } public UnresolvedExpression score(UnresolvedExpression relevanceQuery, - List funcArgs) { - return new ScoreFunction(relevanceQuery, funcArgs); + UnresolvedExpression funcArg) { + return new ScoreFunction(relevanceQuery, funcArg); } public UnresolvedExpression window(UnresolvedExpression function, diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/QualifiedName.java b/core/src/main/java/org/opensearch/sql/ast/expression/QualifiedName.java index abcbcfa509..b42d196460 100644 --- a/core/src/main/java/org/opensearch/sql/ast/expression/QualifiedName.java +++ b/core/src/main/java/org/opensearch/sql/ast/expression/QualifiedName.java @@ -10,6 +10,7 @@ import static java.util.stream.Collectors.toList; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -38,15 +39,14 @@ public QualifiedName(String name) { public static final String METADATA_FIELD_SCORE = "_score"; public static final String METADATA_FIELD_MAXSCORE = "_maxscore"; public static final String METADATA_FIELD_SORT = "_sort"; - public static final java.util.Map METADATAFIELD_TYPE_MAP = new HashMap<>() { - { - put(METADATA_FIELD_ID, ExprCoreType.STRING); - put(METADATA_FIELD_INDEX, ExprCoreType.STRING); - put(METADATA_FIELD_SCORE, ExprCoreType.FLOAT); - put(METADATA_FIELD_MAXSCORE, ExprCoreType.FLOAT); - put(METADATA_FIELD_SORT, ExprCoreType.LONG); - } - }; + + public static final java.util.Map METADATAFIELD_TYPE_MAP = ImmutableMap.of( + METADATA_FIELD_ID, ExprCoreType.STRING, + METADATA_FIELD_INDEX, ExprCoreType.STRING, + METADATA_FIELD_SCORE, ExprCoreType.FLOAT, + METADATA_FIELD_MAXSCORE, ExprCoreType.FLOAT, + METADATA_FIELD_SORT, ExprCoreType.LONG + ); public QualifiedName(String name, Boolean isMetadataField) { this.parts = Collections.singletonList(name); diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/ScoreFunction.java b/core/src/main/java/org/opensearch/sql/ast/expression/ScoreFunction.java index 0587151930..90a89c0a1e 100644 --- a/core/src/main/java/org/opensearch/sql/ast/expression/ScoreFunction.java +++ b/core/src/main/java/org/opensearch/sql/ast/expression/ScoreFunction.java @@ -22,7 +22,7 @@ @ToString public class ScoreFunction extends UnresolvedExpression { private final UnresolvedExpression relevanceQuery; - private final List funcArgs; + private final UnresolvedExpression funcArg; @Override public T accept(AbstractNodeVisitor nodeVisitor, C context) { @@ -32,7 +32,7 @@ public T accept(AbstractNodeVisitor nodeVisitor, C context) { @Override public List getChild() { List resultingList = List.of(relevanceQuery); - resultingList.addAll(funcArgs); + resultingList.add(funcArg); return resultingList; } } diff --git a/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java b/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java index db560e02c0..3bb0ccaec9 100644 --- a/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java +++ b/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java @@ -228,7 +228,7 @@ public void analyze_filter_visit_score_function() { AstDSL.unresolvedArg("field", stringLiteral("field_value1")), AstDSL.unresolvedArg("query", stringLiteral("search query")), AstDSL.unresolvedArg("boost", stringLiteral("3")) - ), List.of()) + ), null) ); assertAnalyzeEqual( LogicalPlanDSL.filter( @@ -285,7 +285,7 @@ public void analyze_filter_visit_score_function_with_double_boost() { AstDSL.unresolvedArg("field", stringLiteral("field_value1")), AstDSL.unresolvedArg("query", stringLiteral("search query")), AstDSL.unresolvedArg("slop", stringLiteral("3")) - ), List.of(new Literal(3.0, DataType.DOUBLE)) + ), new Literal(3.0, DataType.DOUBLE) ) ); @@ -326,7 +326,7 @@ public void analyze_filter_visit_score_function_with_integer_boost() { AstDSL.unresolvedArg("field", stringLiteral("field_value1")), AstDSL.unresolvedArg("query", stringLiteral("search query")), AstDSL.unresolvedArg("boost", stringLiteral("3")) - ), List.of(new Literal(3, DataType.INTEGER)) + ), new Literal(3, DataType.INTEGER) ) ) ); @@ -341,7 +341,7 @@ public void analyze_filter_visit_score_function_with_unsupported_boost_SemanticC AstDSL.unresolvedArg("field", stringLiteral("field_value1")), AstDSL.unresolvedArg("query", stringLiteral("search query")), AstDSL.unresolvedArg("boost", stringLiteral("3")) - ), List.of(new Literal("3.0", DataType.STRING)) + ), new Literal("3.0", DataType.STRING) ) ); SemanticCheckException exception = @@ -371,7 +371,7 @@ public void analyze_filter_visit_score_function_with_invalid_field_ignored() { AstDSL.unresolvedArg("field", stringLiteral("field_value1")), AstDSL.unresolvedArg("query", stringLiteral("search query")), AstDSL.unresolvedArg("boost", stringLiteral("3")) - ), List.of(AstDSL.unresolvedArg("invalid", stringLiteral("value"))) + ), AstDSL.unresolvedArg("invalid", stringLiteral("value")) ) ) ); diff --git a/docs/user/dql/basics.rst b/docs/user/dql/basics.rst index 9762f23988..93d463621a 100644 --- a/docs/user/dql/basics.rst +++ b/docs/user/dql/basics.rst @@ -155,6 +155,46 @@ Result set: | Nanette| Bates| +---------+--------+ +One can also provide meta-field name(s) in ``SELECT`` clause to retrieve reserved-fields (beginning with underscore) from OpenSearch documents. + +SQL query:: + + POST /_plugins/_sql + { + "query" : "SELECT firstname, lastname, _id, _index, _sort FROM accounts" + } + +Explain:: + + { + "from" : 0, + "size" : 200, + "_source" : { + "includes" : [ + "firstname", + "_id", + "_index", + "_sort", + "lastname" + ], + "excludes" : [ ] + } + } + +Result set: + ++---------+--------+---+--------+-----+ +|firstname|lastname|_id| _index|_sort| ++=========+========+===+========+=====+ +| Amber| Duke| 1|accounts| -2| ++---------+--------+---+--------+-----+ +| Dale| Adams| 2|accounts| -2| ++---------+--------+---+--------+-----+ +| Hattie| Bond| 3|accounts| -2| ++---------+--------+---+--------+-----+ +| Nanette| Bates| 4|accounts| -2| ++---------+--------+---+--------+-----+ + Example 3: Using Field Alias ---------------------------- diff --git a/docs/user/dql/functions.rst b/docs/user/dql/functions.rst index 70d2158e36..ab6cb533a9 100644 --- a/docs/user/dql/functions.rst +++ b/docs/user/dql/functions.rst @@ -3855,7 +3855,7 @@ Please refer to examples below: | ``score(query('Tags:taste OR Body:taste', ...), 2.0)`` -The `score_query` and `scorequery` functions are alternative syntax to the `score` function. +The `score_query` and `scorequery` functions are alternative names for the `score` function. Example boosting score:: @@ -3867,6 +3867,15 @@ Example boosting score:: | 1 | The House at Pooh Corner | Alan Alexander Milne | 1.5884793 | +------+--------------------------+----------------------+-----------+ + os> select *, _score from books where score(query('title:Pooh House', default_operator='AND'), 5.0) OR score(query('title:Winnie', default_operator='AND'), 1.5); + fetched rows / total rows = 2/2 + +------+--------------------------+----------------------+-----------+ + | id | title | author | _score | + |------+--------------------------+----------------------+-----------| + | 1 | The House at Pooh Corner | Alan Alexander Milne | 3.9711983 | + | 2 | Winnie-the-Pooh | Alan Alexander Milne | 1.1581701 | + +------+--------------------------+----------------------+-----------+ + HIGHLIGHT ------------ diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/CsvFormatResponseIT.java b/integ-test/src/test/java/org/opensearch/sql/legacy/CsvFormatResponseIT.java index b48cfab843..aa3bf67f58 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/CsvFormatResponseIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/CsvFormatResponseIT.java @@ -573,6 +573,7 @@ public void twoCharsSeperator() throws Exception { } + @Ignore("tested in @see: org.opensearch.sql.sql.IdentifierIT.testMetafieldIdentifierTest") public void includeIdAndNotTypeOrScore() throws Exception { String query = String.format(Locale.ROOT, "select age, firstname, _id from %s where lastname = 'Marquez' ", TEST_INDEX_ACCOUNT); diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/MethodQueryIT.java b/integ-test/src/test/java/org/opensearch/sql/legacy/MethodQueryIT.java index aa53df9dde..352c414ef1 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/MethodQueryIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/MethodQueryIT.java @@ -59,8 +59,6 @@ public void matchQueryTest() throws IOException { } /** - * score query no longer maps to constant_score in the V2 engine - * @see org.opensearch.sql.sql.ScoreQueryIT * matchQuery 是利用分词结果进行单个字段的搜索. "query" : { "bool" : { "must" : { "bool" : { * "should" : [ { "constant_score" : { "query" : { "match" : { "address" : { * "query" : "Lane", "type" : "boolean" } } }, "boost" : 100.0 } }, { @@ -70,7 +68,7 @@ public void matchQueryTest() throws IOException { * @throws IOException */ @Test - @Ignore + @Ignore("score query no longer maps to constant_score in the V2 engine - @see org.opensearch.sql.sql.ScoreQueryIT") public void scoreQueryTest() throws IOException { final String result = explainQuery(String.format(Locale.ROOT, "select address from %s " + @@ -120,8 +118,6 @@ public void wildcardQueryTest() throws IOException { } /** - * score query no longer handled by legacy engine - * @see org.opensearch.sql.sql.ScoreQueryIT * matchPhraseQueryTest 短语查询完全匹配. * "address" : { * "query" : "671 Bristol Street", @@ -131,7 +127,7 @@ public void wildcardQueryTest() throws IOException { * @throws IOException */ @Test - @Ignore + @Ignore("score query no longer handled by legacy engine - @see org.opensearch.sql.sql.ScoreQueryIT") public void matchPhraseQueryTest() throws IOException { final String result = explainQuery(String.format(Locale.ROOT, "select address from %s " + diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/PrettyFormatResponseIT.java b/integ-test/src/test/java/org/opensearch/sql/legacy/PrettyFormatResponseIT.java index a7b2398a2b..1e2073acbd 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/PrettyFormatResponseIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/PrettyFormatResponseIT.java @@ -126,6 +126,7 @@ public void selectWrongField() throws IOException { } @Test + @Ignore("_score tested in V2 engine - @see org.opensearch.sql.sql.ScoreQueryIT") public void selectScore() throws IOException { JSONObject response = executeQuery( String.format(Locale.ROOT, "SELECT _score FROM %s WHERE SCORE(match_phrase(phrase, 'brown fox'))", diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/IdentifierIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/IdentifierIT.java index 3e7401d74f..c6dfa79477 100644 --- a/integ-test/src/test/java/org/opensearch/sql/sql/IdentifierIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/sql/IdentifierIT.java @@ -78,7 +78,9 @@ public void testDoubleUnderscoreIdentifierTest() throws IOException { @Test public void testMetafieldIdentifierTest() throws IOException { // create an index, but the contents doesn't matter - createIndexWithOneDoc("test.metafields"); + String id = "12345"; + String index = "test.metafields"; + new Index(index).addDoc("{\"age\": 30}", id); // Execute using field metadata values final JSONObject result = new JSONObject(executeQuery( @@ -93,6 +95,7 @@ public void testMetafieldIdentifierTest() throws IOException { schema("_score", null, "float"), schema("_maxscore", null, "float"), schema("_sort", null, "long")); + verifyDataRows(result, rows(30, id, index, 1.0, 1.0, -2)); } private void createIndexWithOneDoc(String... indexNames) throws IOException { @@ -129,6 +132,12 @@ void addDoc(String doc) { indexDoc.setJsonEntity(doc); performRequest(client(), indexDoc); } + + void addDoc(String doc, String id) { + Request indexDoc = new Request("POST", String.format("/%s/_doc/%s?refresh=true", indexName, id)); + indexDoc.setJsonEntity(doc); + performRequest(client(), indexDoc); + } } } diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/ScoreQueryIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/ScoreQueryIT.java index 4b0964d62e..03df7d0e29 100644 --- a/integ-test/src/test/java/org/opensearch/sql/sql/ScoreQueryIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/sql/ScoreQueryIT.java @@ -7,7 +7,6 @@ import org.json.JSONObject; import org.junit.Assert; -import org.junit.Ignore; import org.junit.Test; import org.opensearch.sql.legacy.SQLIntegTestCase; import org.opensearch.sql.legacy.TestsConstants; @@ -15,11 +14,11 @@ import java.io.IOException; import java.util.Locale; -import static org.hamcrest.Matchers.allOf; -import static org.hamcrest.Matchers.both; import static org.hamcrest.Matchers.containsString; -import static org.hamcrest.Matchers.not; -import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BEER; +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.schema; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; +import static org.opensearch.sql.util.MatcherUtils.verifySchema; public class ScoreQueryIT extends SQLIntegTestCase { @Override @@ -88,22 +87,37 @@ protected void init() throws Exception { * @throws IOException */ @Test - public void scoreQueryTest() throws IOException { + public void scoreQueryExplainTest() throws IOException { final String result = explainQuery(String.format(Locale.ROOT, "select address from %s " + - "where score(matchQuery(address, 'Lane'),100) " + - "or score(matchQuery(address,'Street'),0.5) order by _score desc limit 3", + "where score(matchQuery(address, 'Douglass'), 100) " + + "or score(matchQuery(address, 'Hall'), 0.5) order by _score desc limit 2", TestsConstants.TEST_INDEX_ACCOUNT)); - Assert.assertThat(result, containsString("\\\"match\\\":{\\\"address\\\":{\\\"query\\\":\\\"Lane\\\"")); + Assert.assertThat(result, containsString("\\\"match\\\":{\\\"address\\\":{\\\"query\\\":\\\"Douglass\\\"")); Assert.assertThat(result, containsString("\\\"boost\\\":100.0")); - Assert.assertThat(result, containsString("\\\"match\\\":{\\\"address\\\":{\\\"query\\\":\\\"Street\\\"")); + Assert.assertThat(result, containsString("\\\"match\\\":{\\\"address\\\":{\\\"query\\\":\\\"Hall\\\"")); Assert.assertThat(result, containsString("\\\"boost\\\":0.5")); Assert.assertThat(result, containsString("\\\"sort\\\":[{\\\"_score\\\"")); Assert.assertThat(result, containsString("\\\"track_scores\\\":true")); } @Test - public void scoreQueryDefaultBoostTest() throws IOException { + public void scoreQueryTest() throws IOException { + final JSONObject result = new JSONObject(executeQuery(String.format(Locale.ROOT, + "select address, _score from %s " + + "where score(matchQuery(address, 'Douglass'), 100) " + + "or score(matchQuery(address, 'Hall'), 0.5) order by _score desc limit 2", + TestsConstants.TEST_INDEX_ACCOUNT), "jdbc")); + verifySchema(result, + schema("address", null, "text"), + schema("_score", null, "float")); + verifyDataRows(result, + rows("154 Douglass Street", 650.1515), + rows("565 Hall Street", 3.2507575)); + } + + @Test + public void scoreQueryDefaultBoostExplainTest() throws IOException { final String result = explainQuery(String.format(Locale.ROOT, "select address from %s " + "where score(matchQuery(address, 'Lane')) order by _score desc limit 2", @@ -113,4 +127,16 @@ public void scoreQueryDefaultBoostTest() throws IOException { Assert.assertThat(result, containsString("\\\"sort\\\":[{\\\"_score\\\"")); Assert.assertThat(result, containsString("\\\"track_scores\\\":true")); } + + @Test + public void scoreQueryDefaultBoostQueryTest() throws IOException { + final JSONObject result = new JSONObject(executeQuery(String.format(Locale.ROOT, + "select address, _score from %s " + + "where score(matchQuery(address, 'Powell')) order by _score desc limit 2", + TestsConstants.TEST_INDEX_ACCOUNT), "jdbc")); + verifySchema(result, + schema("address", null, "text"), + schema("_score", null, "float")); + verifyDataRows(result, rows("305 Powell Street", 6.501515)); + } } diff --git a/sql/src/main/antlr/OpenSearchSQLParser.g4 b/sql/src/main/antlr/OpenSearchSQLParser.g4 index 722c4bd98e..0eafe74e71 100644 --- a/sql/src/main/antlr/OpenSearchSQLParser.g4 +++ b/sql/src/main/antlr/OpenSearchSQLParser.g4 @@ -358,7 +358,7 @@ relevanceFunction ; scoreRelevanceFunction - : scoreRelevanceFunctionName LR_BRACKET relevanceFunction (COMMA functionArg)* RR_BRACKET + : scoreRelevanceFunctionName LR_BRACKET relevanceFunction (COMMA functionArg)? RR_BRACKET ; noFieldRelevanceFunction diff --git a/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java b/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java index a19d7ad3f3..5db233110a 100644 --- a/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java +++ b/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java @@ -486,7 +486,7 @@ public UnresolvedExpression visitAltMultiFieldRelevanceFunction( public UnresolvedExpression visitScoreRelevanceFunction(ScoreRelevanceFunctionContext ctx) { return new ScoreFunction( visit(ctx.relevanceFunction()), - ctx.functionArg().stream().map(this::visitFunctionArg).collect(Collectors.toList()) + ctx.functionArg() == null ? null : visit(ctx.functionArg()) ); } diff --git a/sql/src/test/java/org/opensearch/sql/sql/parser/AstExpressionBuilderTest.java b/sql/src/test/java/org/opensearch/sql/sql/parser/AstExpressionBuilderTest.java index aa4f3a4bd5..74322f40ed 100644 --- a/sql/src/test/java/org/opensearch/sql/sql/parser/AstExpressionBuilderTest.java +++ b/sql/src/test/java/org/opensearch/sql/sql/parser/AstExpressionBuilderTest.java @@ -37,7 +37,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import java.util.HashMap; -import java.util.List; +import java.util.stream.Stream; import org.antlr.v4.runtime.CommonTokenStream; import org.apache.commons.lang3.tuple.ImmutablePair; import org.junit.jupiter.api.Test; @@ -439,7 +439,7 @@ public void canBuildKeywordsAsIdentInQualifiedName() { @Test public void canBuildMetaDataFieldAsQualifiedName() { - List.of("_id", "_index", "_sort", "_score", "_maxscore").stream().forEach( + Stream.of("_id", "_index", "_sort", "_score", "_maxscore").forEach( field -> assertEquals( qualifiedNameWithMetadata(field), buildExprAst(field) @@ -449,7 +449,7 @@ public void canBuildMetaDataFieldAsQualifiedName() { @Test public void canBuildNonMetaDataFieldAsQualifiedName() { - List.of("id", "__id", "_routing", "___field").stream().forEach( + Stream.of("id", "__id", "_routing", "___field").forEach( field -> assertEquals( qualifiedName(field), buildExprAst(field) @@ -801,7 +801,7 @@ public void relevanceScore_query() { "field2", 3.2F, "field1", 1.F))), unresolvedArg("query", stringLiteral("search query")) ), - List.of() + null ), buildExprAst("score(query_string(['field1', 'field2' ^ 3.2], 'search query'))") ); @@ -816,7 +816,7 @@ public void relevanceScore_withBoost_query() { "field1", 1.F, "field2", 3.2F))), unresolvedArg("query", stringLiteral("search query")) ), - List.of(doubleLiteral(1.0)) + doubleLiteral(1.0) ), buildExprAst("score(query_string(['field1', 'field2' ^ 3.2], 'search query'), 1.0)") ); From 543d2325b7a3481802c228e733e8e4d999ce9aa4 Mon Sep 17 00:00:00 2001 From: Andrew Carbonetto Date: Mon, 13 Mar 2023 11:42:03 -0700 Subject: [PATCH 29/40] Minor comment Signed-off-by: Andrew Carbonetto --- .../java/org/opensearch/sql/analysis/ExpressionAnalyzer.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java b/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java index d7e6b1eb42..37816a6eba 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java @@ -373,7 +373,7 @@ public Expression visitAllFields(AllFields node, AnalysisContext context) { @Override public Expression visitQualifiedName(QualifiedName node, AnalysisContext context) { QualifierAnalyzer qualifierAnalyzer = new QualifierAnalyzer(context); - if (node.isMetadataField().booleanValue()) { + if (node.isMetadataField()) { return visitMetadata(qualifierAnalyzer.unqualified(node), context); } return visitIdentifier(qualifierAnalyzer.unqualified(node), context); From 9d59e9f49b4f6dcbb58b8e90abc439d7f7089c6b Mon Sep 17 00:00:00 2001 From: Andrew Carbonetto Date: Mon, 13 Mar 2023 16:09:21 -0700 Subject: [PATCH 30/40] Updates for whitespace Signed-off-by: Andrew Carbonetto --- .../java/org/opensearch/sql/sql/MatchIT.java | 2 - .../OpenSearchIndexScanOptimizationTest.java | 4 +- .../sql/sql/parser/AstExpressionBuilder.java | 60 +++++++++---------- 3 files changed, 32 insertions(+), 34 deletions(-) diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/MatchIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/MatchIT.java index 6fdb70e99d..9885ddfa33 100644 --- a/integ-test/src/test/java/org/opensearch/sql/sql/MatchIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/sql/MatchIT.java @@ -14,10 +14,8 @@ import java.io.IOException; import java.util.Locale; - import org.json.JSONObject; import org.junit.Assert; -import org.junit.Ignore; import org.junit.Test; import org.opensearch.sql.legacy.SQLIntegTestCase; import org.opensearch.sql.legacy.TestsConstants; diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanOptimizationTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanOptimizationTest.java index 4dfb148ac8..08f7deb2c3 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanOptimizationTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanOptimizationTest.java @@ -187,8 +187,8 @@ void test_filter_on_multiple_opensearchfunctions_with_trackedscores_push_down() QueryBuilders.boolQuery() .should( QueryBuilders.queryStringQuery("QUERY") - .field("intV", 1.5F) - .boost(12.5F)) + .field("intV", 1.5F) + .boost(12.5F)) .should( QueryBuilders.queryStringQuery("QUERY") .field("intV", 1.5F) diff --git a/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java b/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java index 5db233110a..d10f9804d1 100644 --- a/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java +++ b/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java @@ -175,11 +175,11 @@ public UnresolvedExpression visitHighlightFunctionCall( @Override public UnresolvedExpression visitPositionFunction( - PositionFunctionContext ctx) { + PositionFunctionContext ctx) { return new Function( - POSITION.getName().getFunctionName(), - Arrays.asList(visitFunctionArg(ctx.functionArg(0)), - visitFunctionArg(ctx.functionArg(1)))); + POSITION.getName().getFunctionName(), + Arrays.asList(visitFunctionArg(ctx.functionArg(0)), + visitFunctionArg(ctx.functionArg(1)))); } @Override @@ -216,20 +216,20 @@ public UnresolvedExpression visitWindowFunctionClause(WindowFunctionClauseContex List partitionByList = Collections.emptyList(); if (overClause.partitionByClause() != null) { partitionByList = overClause.partitionByClause() - .expression() - .stream() - .map(this::visit) - .collect(Collectors.toList()); + .expression() + .stream() + .map(this::visit) + .collect(Collectors.toList()); } List> sortList = Collections.emptyList(); if (overClause.orderByClause() != null) { sortList = overClause.orderByClause() - .orderByElement() - .stream() - .map(item -> ImmutablePair.of( - createSortOption(item), visit(item.expression()))) - .collect(Collectors.toList()); + .orderByElement() + .stream() + .map(item -> ImmutablePair.of( + createSortOption(item), visit(item.expression()))) + .collect(Collectors.toList()); } return new WindowFunction(visit(ctx.function), partitionByList, sortList); } @@ -298,7 +298,7 @@ public UnresolvedExpression visitLikePredicate(LikePredicateContext ctx) { @Override public UnresolvedExpression visitRegexpPredicate(RegexpPredicateContext ctx) { return new Function(REGEXP.getName().getFunctionName(), - Arrays.asList(visit(ctx.left), visit(ctx.right))); + Arrays.asList(visit(ctx.left), visit(ctx.right))); } @Override @@ -399,9 +399,9 @@ public UnresolvedExpression visitBinaryComparisonPredicate( public UnresolvedExpression visitCaseFunctionCall(CaseFunctionCallContext ctx) { UnresolvedExpression caseValue = (ctx.expression() == null) ? null : visit(ctx.expression()); List whenStatements = ctx.caseFuncAlternative() - .stream() - .map(when -> (When) visit(when)) - .collect(Collectors.toList()); + .stream() + .map(when -> (When) visit(when)) + .collect(Collectors.toList()); UnresolvedExpression elseStatement = (ctx.elseArg == null) ? null : visit(ctx.elseArg); return new Case(caseValue, whenStatements, elseStatement); @@ -426,10 +426,10 @@ public UnresolvedExpression visitConvertedDataType( @Override public UnresolvedExpression visitNoFieldRelevanceFunction( - NoFieldRelevanceFunctionContext ctx) { + NoFieldRelevanceFunctionContext ctx) { return new Function( - ctx.noFieldRelevanceFunctionName().getText().toLowerCase(), - noFieldRelevanceArguments(ctx)); + ctx.noFieldRelevanceFunctionName().getText().toLowerCase(), + noFieldRelevanceArguments(ctx)); } @Override @@ -506,9 +506,9 @@ private QualifiedName visitIdentifiers(List identifiers) { id -> id.metadataField() != null).findFirst().isPresent(); return new QualifiedName( identifiers.stream() - .map(RuleContext::getText) - .map(StringUtils::unquoteIdentifier) - .collect(Collectors.toList()), + .map(RuleContext::getText) + .map(StringUtils::unquoteIdentifier) + .collect(Collectors.toList()), isMetadataField); } @@ -524,18 +524,18 @@ private void fillRelevanceArgs(List args, } private List noFieldRelevanceArguments( - NoFieldRelevanceFunctionContext ctx) { + NoFieldRelevanceFunctionContext ctx) { // all the arguments are defaulted to string values // to skip environment resolving and function signature resolving ImmutableList.Builder builder = ImmutableList.builder(); builder.add(new UnresolvedArgument("query", - new Literal(StringUtils.unquoteText(ctx.query.getText()), DataType.STRING))); + new Literal(StringUtils.unquoteText(ctx.query.getText()), DataType.STRING))); fillRelevanceArgs(ctx.relevanceArg(), builder); return builder.build(); } private List singleFieldRelevanceArguments( - SingleFieldRelevanceFunctionContext ctx) { + SingleFieldRelevanceFunctionContext ctx) { // all the arguments are defaulted to string values // to skip environment resolving and function signature resolving ImmutableList.Builder builder = ImmutableList.builder(); @@ -604,7 +604,7 @@ private List alternateMultiMatchArguments( String[] fieldAndWeights = StringUtils.unquoteText( ctx.getRuleContexts(AlternateMultiMatchFieldContext.class) - .stream().findFirst().get().argVal.getText()).split(","); + .stream().findFirst().get().argVal.getText()).split(","); for (var fieldAndWeight : fieldAndWeights) { String[] splitFieldAndWeights = fieldAndWeight.split("\\^"); @@ -616,9 +616,9 @@ private List alternateMultiMatchArguments( ctx.getRuleContexts(AlternateMultiMatchQueryContext.class) .stream().findFirst().ifPresent( - arg -> - builder.add(new UnresolvedArgument("query", - new Literal(StringUtils.unquoteText(arg.argVal.getText()), DataType.STRING))) + arg -> + builder.add(new UnresolvedArgument("query", + new Literal(StringUtils.unquoteText(arg.argVal.getText()), DataType.STRING))) ); fillRelevanceArgs(ctx.relevanceArg(), builder); From 7a05276659c1ef2236b8362d12837e80cf318cd9 Mon Sep 17 00:00:00 2001 From: Andrew Carbonetto Date: Mon, 13 Mar 2023 17:44:47 -0700 Subject: [PATCH 31/40] Update basics.rst to show OS result Signed-off-by: Andrew Carbonetto --- .../sql/ast/expression/ScoreFunction.java | 4 +-- docs/user/dql/basics.rst | 25 +++++++++---------- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/ScoreFunction.java b/core/src/main/java/org/opensearch/sql/ast/expression/ScoreFunction.java index 90a89c0a1e..ae30acc771 100644 --- a/core/src/main/java/org/opensearch/sql/ast/expression/ScoreFunction.java +++ b/core/src/main/java/org/opensearch/sql/ast/expression/ScoreFunction.java @@ -31,8 +31,6 @@ public T accept(AbstractNodeVisitor nodeVisitor, C context) { @Override public List getChild() { - List resultingList = List.of(relevanceQuery); - resultingList.add(funcArg); - return resultingList; + return List.of(relevanceQuery, funcArg); } } diff --git a/docs/user/dql/basics.rst b/docs/user/dql/basics.rst index 93d463621a..f11822f7d2 100644 --- a/docs/user/dql/basics.rst +++ b/docs/user/dql/basics.rst @@ -155,7 +155,7 @@ Result set: | Nanette| Bates| +---------+--------+ -One can also provide meta-field name(s) in ``SELECT`` clause to retrieve reserved-fields (beginning with underscore) from OpenSearch documents. +One can also provide meta-field name(s) to retrieve reserved-fields (beginning with underscore) from OpenSearch documents. SQL query:: @@ -181,20 +181,19 @@ Explain:: } } -Result set: -+---------+--------+---+--------+-----+ -|firstname|lastname|_id| _index|_sort| -+=========+========+===+========+=====+ -| Amber| Duke| 1|accounts| -2| -+---------+--------+---+--------+-----+ -| Dale| Adams| 2|accounts| -2| -+---------+--------+---+--------+-----+ -| Hattie| Bond| 3|accounts| -2| -+---------+--------+---+--------+-----+ -| Nanette| Bates| 4|accounts| -2| -+---------+--------+---+--------+-----+ +This produces results like this for example:: + os> SELECT firstname, lastname, _index, _sort FROM accounts; + fetched rows / total rows = 4/4 + +-------------+------------+----------+---------+ + | firstname | lastname | _index | _sort | + |-------------+------------+----------+---------| + | Amber | Duke | accounts | -2 | + | Hattie | Bond | accounts | -2 | + | Nanette | Bates | accounts | -2 | + | Dale | Adams | accounts | -2 | + +-------------+------------+----------+---------+ Example 3: Using Field Alias ---------------------------- From e75d6b5b22bc7ffd00204ece74a2ee7cd462a1c5 Mon Sep 17 00:00:00 2001 From: Andrew Carbonetto Date: Mon, 13 Mar 2023 17:46:26 -0700 Subject: [PATCH 32/40] Update basics.rst to show OS result Signed-off-by: Andrew Carbonetto --- docs/user/dql/basics.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/user/dql/basics.rst b/docs/user/dql/basics.rst index f11822f7d2..9003f90f30 100644 --- a/docs/user/dql/basics.rst +++ b/docs/user/dql/basics.rst @@ -155,7 +155,8 @@ Result set: | Nanette| Bates| +---------+--------+ -One can also provide meta-field name(s) to retrieve reserved-fields (beginning with underscore) from OpenSearch documents. +One can also provide meta-field name(s) to retrieve reserved-fields (beginning with underscore) from OpenSearch documents. Meta-fields are not output +from wildcard calls (`SELECT *`) and must be explicitly included to be returned. SQL query:: From fecf615410e4ad0a539410df53de67699d165ad3 Mon Sep 17 00:00:00 2001 From: Andrew Carbonetto Date: Tue, 14 Mar 2023 14:00:13 -0700 Subject: [PATCH 33/40] Update basics.rst description Signed-off-by: Andrew Carbonetto --- docs/user/dql/basics.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/user/dql/basics.rst b/docs/user/dql/basics.rst index 9003f90f30..b7e8cf35a4 100644 --- a/docs/user/dql/basics.rst +++ b/docs/user/dql/basics.rst @@ -156,7 +156,7 @@ Result set: +---------+--------+ One can also provide meta-field name(s) to retrieve reserved-fields (beginning with underscore) from OpenSearch documents. Meta-fields are not output -from wildcard calls (`SELECT *`) and must be explicitly included to be returned. +from wildcard calls (`SELECT *`) and must be explicitly included to be returned. SQL query:: From 47636c3335538079c0bb104a538bd18412ae486f Mon Sep 17 00:00:00 2001 From: Andrew Carbonetto Date: Tue, 14 Mar 2023 17:04:12 -0700 Subject: [PATCH 34/40] Change Score function to accept a double/integer not an unresolved Signed-off-by: Andrew Carbonetto --- .../sql/analysis/ExpressionAnalyzer.java | 22 ++------ .../org/opensearch/sql/ast/dsl/AstDSL.java | 4 +- .../sql/ast/expression/QualifiedName.java | 16 +++--- .../sql/ast/expression/ScoreFunction.java | 4 +- .../opensearch/sql/analysis/AnalyzerTest.java | 54 ++----------------- sql/src/main/antlr/OpenSearchSQLParser.g4 | 2 +- .../sql/sql/parser/AstExpressionBuilder.java | 16 +++--- .../sql/parser/AstExpressionBuilderTest.java | 4 +- 8 files changed, 29 insertions(+), 93 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java b/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java index 37816a6eba..5331177b61 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java @@ -220,26 +220,12 @@ public Expression visitHighlightFunction(HighlightFunction node, AnalysisContext * @return resolved relevance function */ public Expression visitScoreFunction(ScoreFunction node, AnalysisContext context) { - // if no function argument given, just accept the relevance query and return - if (!(node.getFuncArg() instanceof Literal)) { - OpenSearchFunctions.OpenSearchFunction relevanceQueryExpr = - (OpenSearchFunctions.OpenSearchFunction) node - .getRelevanceQuery().accept(this, context); - relevanceQueryExpr.setScoreTracked(true); - return relevanceQueryExpr; - } - - // note: if an argument exists, and there should only be one, it will be a boost argument - Literal boostFunctionArg = (Literal) node.getFuncArg(); - Double thisBoostValue; - if (boostFunctionArg.getType().equals(DataType.DOUBLE)) { - thisBoostValue = ((Double) boostFunctionArg.getValue()); - } else if (boostFunctionArg.getType().equals(DataType.INTEGER)) { - thisBoostValue = ((Integer) boostFunctionArg.getValue()).doubleValue(); - } else { + Literal boostArg = node.getRelevanceFieldWeight(); + if (!boostArg.getType().equals(DataType.DOUBLE)) { throw new SemanticCheckException(String.format("Expected boost type '%s' but got '%s'", - DataType.DOUBLE.name(), boostFunctionArg.getType().name())); + DataType.DOUBLE.name(), boostArg.getType().name())); } + Double thisBoostValue = ((Double) boostArg.getValue()); // update the existing unresolved expression to add a boost argument if it doesn't exist // OR multiply the existing boost argument diff --git a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java index ea4561b60c..39bb18a759 100644 --- a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java +++ b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java @@ -290,8 +290,8 @@ public UnresolvedExpression highlight(UnresolvedExpression fieldName, } public UnresolvedExpression score(UnresolvedExpression relevanceQuery, - UnresolvedExpression funcArg) { - return new ScoreFunction(relevanceQuery, funcArg); + Literal relevanceFieldWeight) { + return new ScoreFunction(relevanceQuery, relevanceFieldWeight); } public UnresolvedExpression window(UnresolvedExpression function, diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/QualifiedName.java b/core/src/main/java/org/opensearch/sql/ast/expression/QualifiedName.java index b42d196460..42c26fbfe9 100644 --- a/core/src/main/java/org/opensearch/sql/ast/expression/QualifiedName.java +++ b/core/src/main/java/org/opensearch/sql/ast/expression/QualifiedName.java @@ -14,7 +14,6 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; -import java.util.HashMap; import java.util.List; import java.util.Optional; import java.util.stream.StreamSupport; @@ -28,10 +27,11 @@ public class QualifiedName extends UnresolvedExpression { private final List parts; - private final Boolean isMetadataField; + @Getter + private final boolean isMetadataField; public QualifiedName(String name) { - this(name, Boolean.FALSE); + this(name, false); } public static final String METADATA_FIELD_ID = "_id"; @@ -48,19 +48,19 @@ public QualifiedName(String name) { METADATA_FIELD_SORT, ExprCoreType.LONG ); - public QualifiedName(String name, Boolean isMetadataField) { + public QualifiedName(String name, boolean isMetadataField) { this.parts = Collections.singletonList(name); this.isMetadataField = isMetadataField; } public QualifiedName(Iterable parts) { - this(parts, Boolean.FALSE); + this(parts, false); } /** * QualifiedName Constructor. */ - public QualifiedName(Iterable parts, Boolean isMetadataField) { + public QualifiedName(Iterable parts, boolean isMetadataField) { this.isMetadataField = isMetadataField; List partsList = StreamSupport.stream(parts.spliterator(), false).collect(toList()); if (partsList.isEmpty()) { @@ -139,8 +139,4 @@ public List getChild() { public R accept(AbstractNodeVisitor nodeVisitor, C context) { return nodeVisitor.visitQualifiedName(this, context); } - - public Boolean isMetadataField() { - return this.isMetadataField; - } } diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/ScoreFunction.java b/core/src/main/java/org/opensearch/sql/ast/expression/ScoreFunction.java index ae30acc771..1b73f9bd95 100644 --- a/core/src/main/java/org/opensearch/sql/ast/expression/ScoreFunction.java +++ b/core/src/main/java/org/opensearch/sql/ast/expression/ScoreFunction.java @@ -22,7 +22,7 @@ @ToString public class ScoreFunction extends UnresolvedExpression { private final UnresolvedExpression relevanceQuery; - private final UnresolvedExpression funcArg; + private final Literal relevanceFieldWeight; @Override public T accept(AbstractNodeVisitor nodeVisitor, C context) { @@ -31,6 +31,6 @@ public T accept(AbstractNodeVisitor nodeVisitor, C context) { @Override public List getChild() { - return List.of(relevanceQuery, funcArg); + return List.of(relevanceQuery); } } diff --git a/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java b/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java index 3bb0ccaec9..da6c046368 100644 --- a/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java +++ b/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java @@ -228,7 +228,7 @@ public void analyze_filter_visit_score_function() { AstDSL.unresolvedArg("field", stringLiteral("field_value1")), AstDSL.unresolvedArg("query", stringLiteral("search query")), AstDSL.unresolvedArg("boost", stringLiteral("3")) - ), null) + ), AstDSL.doubleLiteral(1.0)) ); assertAnalyzeEqual( LogicalPlanDSL.filter( @@ -236,7 +236,7 @@ public void analyze_filter_visit_score_function() { DSL.match_phrase_prefix( DSL.namedArgument("field", "field_value1"), DSL.namedArgument("query", "search query"), - DSL.namedArgument("boost", "3") + DSL.namedArgument("boost", "3.0") ) ), unresolvedPlan @@ -308,30 +308,6 @@ public void analyze_filter_visit_score_function_with_double_boost() { assertEquals(true, relevanceQuery.isScoreTracked()); } - @Test - public void analyze_filter_visit_score_function_with_integer_boost() { - assertAnalyzeEqual( - LogicalPlanDSL.filter( - LogicalPlanDSL.relation("schema", table), - DSL.match_phrase_prefix( - DSL.namedArgument("field", "field_value1"), - DSL.namedArgument("query", "search query"), - DSL.namedArgument("boost", "9.0") - ) - ), - AstDSL.filter( - AstDSL.relation("schema"), - new ScoreFunction( - AstDSL.function("match_phrase_prefix", - AstDSL.unresolvedArg("field", stringLiteral("field_value1")), - AstDSL.unresolvedArg("query", stringLiteral("search query")), - AstDSL.unresolvedArg("boost", stringLiteral("3")) - ), new Literal(3, DataType.INTEGER) - ) - ) - ); - } - @Test public void analyze_filter_visit_score_function_with_unsupported_boost_SemanticCheckException() { UnresolvedPlan unresolvedPlan = AstDSL.filter( @@ -341,7 +317,7 @@ public void analyze_filter_visit_score_function_with_unsupported_boost_SemanticC AstDSL.unresolvedArg("field", stringLiteral("field_value1")), AstDSL.unresolvedArg("query", stringLiteral("search query")), AstDSL.unresolvedArg("boost", stringLiteral("3")) - ), new Literal("3.0", DataType.STRING) + ), AstDSL.stringLiteral("3.0") ) ); SemanticCheckException exception = @@ -353,30 +329,6 @@ public void analyze_filter_visit_score_function_with_unsupported_boost_SemanticC exception.getMessage()); } - @Test - public void analyze_filter_visit_score_function_with_invalid_field_ignored() { - assertAnalyzeEqual( - LogicalPlanDSL.filter( - LogicalPlanDSL.relation("schema", table), - DSL.match_phrase_prefix( - DSL.namedArgument("field", "field_value1"), - DSL.namedArgument("query", "search query"), - DSL.namedArgument("boost", "3") - ) - ), - AstDSL.filter( - AstDSL.relation("schema"), - new ScoreFunction( - AstDSL.function("match_phrase_prefix", - AstDSL.unresolvedArg("field", stringLiteral("field_value1")), - AstDSL.unresolvedArg("query", stringLiteral("search query")), - AstDSL.unresolvedArg("boost", stringLiteral("3")) - ), AstDSL.unresolvedArg("invalid", stringLiteral("value")) - ) - ) - ); - } - @Test public void head_relation() { assertAnalyzeEqual( diff --git a/sql/src/main/antlr/OpenSearchSQLParser.g4 b/sql/src/main/antlr/OpenSearchSQLParser.g4 index 0eafe74e71..97ff82ce8d 100644 --- a/sql/src/main/antlr/OpenSearchSQLParser.g4 +++ b/sql/src/main/antlr/OpenSearchSQLParser.g4 @@ -358,7 +358,7 @@ relevanceFunction ; scoreRelevanceFunction - : scoreRelevanceFunctionName LR_BRACKET relevanceFunction (COMMA functionArg)? RR_BRACKET + : scoreRelevanceFunctionName LR_BRACKET relevanceFunction (COMMA weight=relevanceFieldWeight)? RR_BRACKET ; noFieldRelevanceFunction diff --git a/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java b/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java index d10f9804d1..abcee29322 100644 --- a/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java +++ b/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java @@ -484,10 +484,11 @@ public UnresolvedExpression visitAltMultiFieldRelevanceFunction( * @return children */ public UnresolvedExpression visitScoreRelevanceFunction(ScoreRelevanceFunctionContext ctx) { - return new ScoreFunction( - visit(ctx.relevanceFunction()), - ctx.functionArg() == null ? null : visit(ctx.functionArg()) - ); + Literal weight = + ctx.weight == null ? + new Literal(Double.valueOf(1.0), DataType.DOUBLE) : + new Literal(Double.parseDouble(ctx.weight.getText()), DataType.DOUBLE); + return new ScoreFunction(visit(ctx.relevanceFunction()), weight); } private Function buildFunction(String functionName, @@ -502,8 +503,8 @@ private Function buildFunction(String functionName, } private QualifiedName visitIdentifiers(List identifiers) { - Boolean isMetadataField = identifiers.stream().filter( - id -> id.metadataField() != null).findFirst().isPresent(); + // check if the last part is a metadata field + boolean isMetadataField = identifiers.get(identifiers.size() - 1).metadataField() != null; return new QualifiedName( identifiers.stream() .map(RuleContext::getText) @@ -618,7 +619,8 @@ private List alternateMultiMatchArguments( .stream().findFirst().ifPresent( arg -> builder.add(new UnresolvedArgument("query", - new Literal(StringUtils.unquoteText(arg.argVal.getText()), DataType.STRING))) + new Literal( + StringUtils.unquoteText(arg.argVal.getText()), DataType.STRING))) ); fillRelevanceArgs(ctx.relevanceArg(), builder); diff --git a/sql/src/test/java/org/opensearch/sql/sql/parser/AstExpressionBuilderTest.java b/sql/src/test/java/org/opensearch/sql/sql/parser/AstExpressionBuilderTest.java index 74322f40ed..14db1ec8d7 100644 --- a/sql/src/test/java/org/opensearch/sql/sql/parser/AstExpressionBuilderTest.java +++ b/sql/src/test/java/org/opensearch/sql/sql/parser/AstExpressionBuilderTest.java @@ -798,10 +798,10 @@ public void relevanceScore_query() { AstDSL.score( AstDSL.function("query_string", unresolvedArg("fields", new RelevanceFieldList(ImmutableMap.of( - "field2", 3.2F, "field1", 1.F))), + "field1", 1.F, "field2", 3.2F))), unresolvedArg("query", stringLiteral("search query")) ), - null + AstDSL.doubleLiteral(1.0) ), buildExprAst("score(query_string(['field1', 'field2' ^ 3.2], 'search query'))") ); From 6d9853c12f895a3c277fa56d81762d1d54c8122c Mon Sep 17 00:00:00 2001 From: Andrew Carbonetto Date: Tue, 14 Mar 2023 17:22:37 -0700 Subject: [PATCH 35/40] Update functions.rst Signed-off-by: Andrew Carbonetto --- docs/user/dql/functions.rst | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/docs/user/dql/functions.rst b/docs/user/dql/functions.rst index ab6cb533a9..682a6a1fbd 100644 --- a/docs/user/dql/functions.rst +++ b/docs/user/dql/functions.rst @@ -3845,12 +3845,14 @@ SCORE Description >>>>>>>>>>> -``score(search_expression, boost)`` -``score_query(search_expression, boost)`` -``scorequery(search_expression, boost)`` +``score(relevance_expression, boost)`` +``score_query(relevance_expression, boost)`` +``scorequery(relevance_expression, boost)`` + +The score function returns the `_score` of any documents matching the enclosed relevance-based expression. The SCORE function expects two +arguments. The first argument is the relevance-based search expression. The second argument is an optional floating-point number to boost +the score (the default value is 1.0). -The score function returns the _score of any documents matching the enclosed relevance-search expression. The SCORE function expects two -arguments. The first argument is the search expression. The second argument is an optional floating-point number to boost the score (the default value is 1.0). Please refer to examples below: | ``score(query('Tags:taste OR Body:taste', ...), 2.0)`` From f93f7c434dc2ff9a57c34df85ca102ca38a9764a Mon Sep 17 00:00:00 2001 From: Andrew Carbonetto Date: Wed, 15 Mar 2023 09:08:53 -0700 Subject: [PATCH 36/40] Checkstyle update Signed-off-by: Andrew Carbonetto --- .../storage/scan/OpenSearchIndexScanQueryBuilder.java | 7 ++----- .../opensearch/sql/sql/parser/AstExpressionBuilder.java | 6 +++--- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanQueryBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanQueryBuilder.java index 18fc4201a5..d5a0c72f20 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanQueryBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanQueryBuilder.java @@ -110,11 +110,8 @@ private boolean trackScoresFromOpenSearchFunction(Expression condition) { return true; } if (condition instanceof FunctionExpression) { - for (Expression expr: ((FunctionExpression) condition).getArguments()) { - if (trackScoresFromOpenSearchFunction(expr)) { - return true; - } - } + return ((FunctionExpression) condition).getArguments().stream() + .anyMatch(this::trackScoresFromOpenSearchFunction); } return false; } diff --git a/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java b/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java index abcee29322..99cfdf425e 100644 --- a/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java +++ b/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java @@ -485,9 +485,9 @@ public UnresolvedExpression visitAltMultiFieldRelevanceFunction( */ public UnresolvedExpression visitScoreRelevanceFunction(ScoreRelevanceFunctionContext ctx) { Literal weight = - ctx.weight == null ? - new Literal(Double.valueOf(1.0), DataType.DOUBLE) : - new Literal(Double.parseDouble(ctx.weight.getText()), DataType.DOUBLE); + ctx.weight == null + ? new Literal(Double.valueOf(1.0), DataType.DOUBLE) + : new Literal(Double.parseDouble(ctx.weight.getText()), DataType.DOUBLE); return new ScoreFunction(visit(ctx.relevanceFunction()), weight); } From 1340f11c9ca206bccca4d26ad13467bb30ba8592 Mon Sep 17 00:00:00 2001 From: Andrew Carbonetto Date: Wed, 15 Mar 2023 17:21:23 -0700 Subject: [PATCH 37/40] Move reserved world symbol table to OpenSearchTable Signed-off-by: Andrew Carbonetto --- .../org/opensearch/sql/analysis/Analyzer.java | 4 ++ .../sql/analysis/ExpressionAnalyzer.java | 22 ++++++--- .../sql/analysis/TypeEnvironment.java | 25 +++++++++- .../org/opensearch/sql/ast/dsl/AstDSL.java | 6 +-- .../sql/ast/expression/QualifiedName.java | 31 +----------- .../org/opensearch/sql/storage/Table.java | 8 +++ .../opensearch/sql/analysis/AnalyzerTest.java | 49 +++++++++++++++++++ .../sql/analysis/AnalyzerTestBase.java | 5 ++ .../sql/analysis/ExpressionAnalyzerTest.java | 43 ++++++---------- .../opensearch/storage/OpenSearchIndex.java | 21 ++++++++ .../storage/OpenSearchIndexTest.java | 15 ++++++ .../sql/sql/parser/AstExpressionBuilder.java | 5 +- .../sql/parser/AstExpressionBuilderTest.java | 3 +- 13 files changed, 161 insertions(+), 76 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index 228b54ba0c..f9ffe06690 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -63,6 +63,7 @@ import org.opensearch.sql.ast.tree.Values; import org.opensearch.sql.data.model.ExprMissingValue; import org.opensearch.sql.data.type.ExprCoreType; +import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.datasource.DataSourceService; import org.opensearch.sql.datasource.model.DataSource; import org.opensearch.sql.exception.SemanticCheckException; @@ -157,6 +158,9 @@ public LogicalPlan visitRelation(Relation node, AnalysisContext context) { dataSourceSchemaIdentifierNameResolver.getIdentifierName()); } table.getFieldTypes().forEach((k, v) -> curEnv.define(new Symbol(Namespace.FIELD_NAME, k), v)); + table.getReservedFieldTypes().forEach( + (k, v) -> curEnv.addReservedWord(new Symbol(Namespace.FIELD_NAME, k), v) + ); // Put index name or its alias in index namespace on type environment so qualifier // can be removed when analyzing qualified name. The value (expr type) here doesn't matter. diff --git a/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java b/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java index 5331177b61..55776fe6a3 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java @@ -8,7 +8,6 @@ import static org.opensearch.sql.ast.dsl.AstDSL.and; import static org.opensearch.sql.ast.dsl.AstDSL.compare; -import static org.opensearch.sql.ast.expression.QualifiedName.METADATAFIELD_TYPE_MAP; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; @@ -359,8 +358,19 @@ public Expression visitAllFields(AllFields node, AnalysisContext context) { @Override public Expression visitQualifiedName(QualifiedName node, AnalysisContext context) { QualifierAnalyzer qualifierAnalyzer = new QualifierAnalyzer(context); - if (node.isMetadataField()) { - return visitMetadata(qualifierAnalyzer.unqualified(node), context); + + // check for reserved words in the identifier + TypeEnvironment typeEnv = context.peek(); + for (String part : node.getParts()) { + Optional exprType = typeEnv.getReservedSymbolTable().lookup( + new Symbol(Namespace.FIELD_NAME, part)); + if (exprType.isPresent()) { + return visitMetadata( + qualifierAnalyzer.unqualified(node), + (ExprCoreType) exprType.get(), + context + ); + } } return visitIdentifier(qualifierAnalyzer.unqualified(node), context); } @@ -385,9 +395,9 @@ public Expression visitUnresolvedArgument(UnresolvedArgument node, AnalysisConte * @param context analysis context * @return DSL reference */ - private Expression visitMetadata(String ident, AnalysisContext context) { - ExprCoreType exprCoreType = Optional.ofNullable(METADATAFIELD_TYPE_MAP.get(ident)) - .orElseThrow(() -> new SemanticCheckException("invalid metadata field")); + private Expression visitMetadata(String ident, + ExprCoreType exprCoreType, + AnalysisContext context) { return DSL.ref(ident, exprCoreType); } diff --git a/core/src/main/java/org/opensearch/sql/analysis/TypeEnvironment.java b/core/src/main/java/org/opensearch/sql/analysis/TypeEnvironment.java index c86d8109ad..c9fd8030e0 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/TypeEnvironment.java +++ b/core/src/main/java/org/opensearch/sql/analysis/TypeEnvironment.java @@ -29,14 +29,30 @@ public class TypeEnvironment implements Environment { private final TypeEnvironment parent; private final SymbolTable symbolTable; + @Getter + private final SymbolTable reservedSymbolTable; + + /** + * Constructor with empty symbol tables. + * + * @param parent parent environment + */ public TypeEnvironment(TypeEnvironment parent) { this.parent = parent; this.symbolTable = new SymbolTable(); + this.reservedSymbolTable = new SymbolTable(); } + /** + * Constructor with empty reserved symbol table. + * + * @param parent parent environment + * @param symbolTable type table + */ public TypeEnvironment(TypeEnvironment parent, SymbolTable symbolTable) { this.parent = parent; this.symbolTable = symbolTable; + this.reservedSymbolTable = new SymbolTable(); } /** @@ -59,6 +75,7 @@ public ExprType resolve(Symbol symbol) { /** * Resolve all fields in the current environment. + * * @param namespace a namespace * @return all symbols in the namespace */ @@ -102,7 +119,11 @@ public void remove(ReferenceExpression ref) { * Clear all fields in the current environment. */ public void clearAllFields() { - lookupAllFields(FIELD_NAME).keySet().stream() - .forEach(v -> remove(new Symbol(Namespace.FIELD_NAME, v))); + lookupAllFields(FIELD_NAME).keySet().forEach( + v -> remove(new Symbol(Namespace.FIELD_NAME, v))); + } + + public void addReservedWord(Symbol symbol, ExprType type) { + reservedSymbolTable.store(symbol, type); } } diff --git a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java index 39bb18a759..de2ab5404a 100644 --- a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java +++ b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java @@ -140,11 +140,7 @@ public UnresolvedPlan values(List... values) { } public static QualifiedName qualifiedName(String... parts) { - return new QualifiedName(Arrays.asList(parts), Boolean.FALSE); - } - - public static QualifiedName qualifiedNameWithMetadata(String... parts) { - return new QualifiedName(Arrays.asList(parts), Boolean.TRUE); + return new QualifiedName(Arrays.asList(parts)); } public static UnresolvedExpression equalTo( diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/QualifiedName.java b/core/src/main/java/org/opensearch/sql/ast/expression/QualifiedName.java index 42c26fbfe9..8b16119dc0 100644 --- a/core/src/main/java/org/opensearch/sql/ast/expression/QualifiedName.java +++ b/core/src/main/java/org/opensearch/sql/ast/expression/QualifiedName.java @@ -10,7 +10,6 @@ import static java.util.stream.Collectors.toList; import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -20,48 +19,20 @@ import lombok.EqualsAndHashCode; import lombok.Getter; import org.opensearch.sql.ast.AbstractNodeVisitor; -import org.opensearch.sql.data.type.ExprCoreType; @Getter @EqualsAndHashCode(callSuper = false) public class QualifiedName extends UnresolvedExpression { private final List parts; - @Getter - private final boolean isMetadataField; - public QualifiedName(String name) { - this(name, false); - } - - public static final String METADATA_FIELD_ID = "_id"; - public static final String METADATA_FIELD_INDEX = "_index"; - public static final String METADATA_FIELD_SCORE = "_score"; - public static final String METADATA_FIELD_MAXSCORE = "_maxscore"; - public static final String METADATA_FIELD_SORT = "_sort"; - - public static final java.util.Map METADATAFIELD_TYPE_MAP = ImmutableMap.of( - METADATA_FIELD_ID, ExprCoreType.STRING, - METADATA_FIELD_INDEX, ExprCoreType.STRING, - METADATA_FIELD_SCORE, ExprCoreType.FLOAT, - METADATA_FIELD_MAXSCORE, ExprCoreType.FLOAT, - METADATA_FIELD_SORT, ExprCoreType.LONG - ); - - public QualifiedName(String name, boolean isMetadataField) { this.parts = Collections.singletonList(name); - this.isMetadataField = isMetadataField; - } - - public QualifiedName(Iterable parts) { - this(parts, false); } /** * QualifiedName Constructor. */ - public QualifiedName(Iterable parts, boolean isMetadataField) { - this.isMetadataField = isMetadataField; + public QualifiedName(Iterable parts) { List partsList = StreamSupport.stream(parts.spliterator(), false).collect(toList()); if (partsList.isEmpty()) { throw new IllegalArgumentException("parts is empty"); diff --git a/core/src/main/java/org/opensearch/sql/storage/Table.java b/core/src/main/java/org/opensearch/sql/storage/Table.java index 496281fa8d..8990796172 100644 --- a/core/src/main/java/org/opensearch/sql/storage/Table.java +++ b/core/src/main/java/org/opensearch/sql/storage/Table.java @@ -6,6 +6,7 @@ package org.opensearch.sql.storage; +import com.google.common.collect.ImmutableMap; import java.util.Map; import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.executor.streaming.StreamingSource; @@ -43,6 +44,13 @@ default void create(Map schema) { */ Map getFieldTypes(); + /** + * Get the {@link ExprType} for each meta-field (reserved fields) in the table. + */ + default Map getReservedFieldTypes() { + return ImmutableMap.of(); + } + /** * Implement a {@link LogicalPlan} by {@link PhysicalPlan} in storage engine. * diff --git a/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java b/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java index da6c046368..f711c2362d 100644 --- a/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java +++ b/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java @@ -31,6 +31,7 @@ import static org.opensearch.sql.ast.tree.Sort.SortOption.DEFAULT_ASC; import static org.opensearch.sql.ast.tree.Sort.SortOrder; import static org.opensearch.sql.data.model.ExprValueUtils.integerValue; +import static org.opensearch.sql.data.model.ExprValueUtils.stringValue; import static org.opensearch.sql.data.type.ExprCoreType.BOOLEAN; import static org.opensearch.sql.data.type.ExprCoreType.DOUBLE; import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; @@ -107,6 +108,54 @@ public void filter_relation() { AstDSL.equalTo(AstDSL.field("integer_value"), AstDSL.intLiteral(1)))); } + @Test + public void filter_relation_with_reserved_qualifiedName() { + assertAnalyzeEqual( + LogicalPlanDSL.filter( + LogicalPlanDSL.relation("schema", table), + DSL.equal(DSL.ref("_test", STRING), DSL.literal(stringValue("value")))), + AstDSL.filter( + AstDSL.relation("schema"), + AstDSL.equalTo(AstDSL.qualifiedName("_test"), AstDSL.stringLiteral("value")))); + } + + @Test + public void filter_relation_with_invalid_qualifiedName_SemanticCheckException() { + UnresolvedPlan invalidFieldPlan = AstDSL.filter( + AstDSL.relation("schema"), + AstDSL.equalTo( + AstDSL.qualifiedName("_invalid"), + AstDSL.stringLiteral("value")) + ); + + SemanticCheckException exception = + assertThrows( + SemanticCheckException.class, + () -> analyze(invalidFieldPlan)); + assertEquals( + "can't resolve Symbol(namespace=FIELD_NAME, name=_invalid) in type env", + exception.getMessage()); + } + + @Test + public void filter_relation_with_invalid_qualifiedName_ExpressionEvaluationException() { + UnresolvedPlan typeMismatchPlan = AstDSL.filter( + AstDSL.relation("schema"), + AstDSL.equalTo(AstDSL.qualifiedName("_test"), AstDSL.intLiteral(1)) + ); + + ExpressionEvaluationException exception = + assertThrows( + ExpressionEvaluationException.class, + () -> analyze(typeMismatchPlan)); + assertEquals( + "= function expected {[BYTE,BYTE],[SHORT,SHORT],[INTEGER,INTEGER],[LONG,LONG]," + + "[FLOAT,FLOAT],[DOUBLE,DOUBLE],[STRING,STRING],[BOOLEAN,BOOLEAN],[DATE,DATE]," + + "[TIME,TIME],[DATETIME,DATETIME],[TIMESTAMP,TIMESTAMP],[INTERVAL,INTERVAL]," + + "[STRUCT,STRUCT],[ARRAY,ARRAY]}, but get [STRING,INTEGER]", + exception.getMessage()); + } + @Test public void filter_relation_with_alias() { assertAnalyzeEqual( diff --git a/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTestBase.java b/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTestBase.java index 51c1f06433..d2875beced 100644 --- a/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTestBase.java +++ b/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTestBase.java @@ -10,6 +10,7 @@ import static org.opensearch.sql.data.type.ExprCoreType.LONG; import static org.opensearch.sql.data.type.ExprCoreType.STRING; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import java.util.List; import java.util.Map; @@ -73,6 +74,10 @@ public Map getFieldTypes() { public PhysicalPlan implement(LogicalPlan plan) { throw new UnsupportedOperationException(); } + + public Map getReservedFieldTypes() { + return ImmutableMap.of("_test", STRING); + } }); } diff --git a/core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java b/core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java index b92aad31e7..d07996702a 100644 --- a/core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java +++ b/core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java @@ -15,7 +15,6 @@ import static org.opensearch.sql.ast.dsl.AstDSL.function; import static org.opensearch.sql.ast.dsl.AstDSL.intLiteral; import static org.opensearch.sql.ast.dsl.AstDSL.qualifiedName; -import static org.opensearch.sql.ast.dsl.AstDSL.qualifiedNameWithMetadata; import static org.opensearch.sql.ast.dsl.AstDSL.stringLiteral; import static org.opensearch.sql.ast.dsl.AstDSL.unresolvedArg; import static org.opensearch.sql.data.model.ExprValueUtils.LITERAL_TRUE; @@ -232,36 +231,26 @@ public void qualified_name_with_qualifier() { } @Test - public void qualified_name_with_metadata_field_success() { + public void qualified_name_with_reserved_symbol() { analysisContext.push(); - analysisContext.peek().define(new Symbol(Namespace.INDEX_NAME, "index_alias"), STRUCT); - - assertAnalyzeEqual(DSL.ref("_id", STRING), qualifiedNameWithMetadata("index_alias", "_id")); - assertAnalyzeEqual(DSL.ref("_index", STRING), - qualifiedNameWithMetadata("index_alias", "_index")); - assertAnalyzeEqual(DSL.ref("_score", FLOAT), - qualifiedNameWithMetadata("index_alias", "_score")); - assertAnalyzeEqual(DSL.ref("_maxscore", FLOAT), - qualifiedNameWithMetadata("index_alias", "_maxscore")); - assertAnalyzeEqual(DSL.ref("_sort", LONG), qualifiedNameWithMetadata("index_alias", "_sort")); - assertAnalyzeEqual(DSL.ref("_id", STRING), qualifiedNameWithMetadata("_id")); - assertAnalyzeEqual(DSL.ref("_index", STRING), qualifiedNameWithMetadata("_index")); - - analysisContext.pop(); - } - - @Test - public void qualified_name_with_metadata_field_failure() { - analysisContext.push(); + analysisContext.peek().addReservedWord(new Symbol(Namespace.FIELD_NAME, "_reserved"), STRING); + analysisContext.peek().addReservedWord(new Symbol(Namespace.FIELD_NAME, "_priority"), FLOAT); analysisContext.peek().define(new Symbol(Namespace.INDEX_NAME, "index_alias"), STRUCT); + assertAnalyzeEqual( + DSL.ref("_priority", FLOAT), + qualifiedName("_priority") + ); + assertAnalyzeEqual( + DSL.ref("_reserved", STRING), + qualifiedName("index_alias", "_reserved") + ); - SemanticCheckException exception = - assertThrows(SemanticCheckException.class, - () -> analyze(qualifiedNameWithMetadata("index_alias", "_invalid"))); - assertEquals( - "invalid metadata field", - exception.getMessage() + // reserved fields take priority over symbol table + analysisContext.peek().define(new Symbol(Namespace.FIELD_NAME, "_reserved"), LONG); + assertAnalyzeEqual( + DSL.ref("_reserved", STRING), + qualifiedName("index_alias", "_reserved") ); analysisContext.pop(); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/OpenSearchIndex.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/OpenSearchIndex.java index c694769b89..9e0edc0d69 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/OpenSearchIndex.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/OpenSearchIndex.java @@ -7,10 +7,12 @@ package org.opensearch.sql.opensearch.storage; import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableMap; import java.util.HashMap; import java.util.Map; import lombok.RequiredArgsConstructor; import org.opensearch.sql.common.setting.Settings; +import org.opensearch.sql.data.type.ExprCoreType; import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.opensearch.client.OpenSearchClient; import org.opensearch.sql.opensearch.data.type.OpenSearchDataType; @@ -33,6 +35,20 @@ /** OpenSearch table (index) implementation. */ public class OpenSearchIndex implements Table { + public static final String METADATA_FIELD_ID = "_id"; + public static final String METADATA_FIELD_INDEX = "_index"; + public static final String METADATA_FIELD_SCORE = "_score"; + public static final String METADATA_FIELD_MAXSCORE = "_maxscore"; + public static final String METADATA_FIELD_SORT = "_sort"; + + public static final java.util.Map METADATAFIELD_TYPE_MAP = ImmutableMap.of( + METADATA_FIELD_ID, ExprCoreType.STRING, + METADATA_FIELD_INDEX, ExprCoreType.STRING, + METADATA_FIELD_SCORE, ExprCoreType.FLOAT, + METADATA_FIELD_MAXSCORE, ExprCoreType.FLOAT, + METADATA_FIELD_SORT, ExprCoreType.LONG + ); + /** OpenSearch client connection. */ private final OpenSearchClient client; @@ -92,6 +108,11 @@ public Map getFieldTypes() { return cachedFieldTypes; } + @Override + public Map getReservedFieldTypes() { + return METADATAFIELD_TYPE_MAP; + } + /** * Get the max result window setting of the table. */ diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/OpenSearchIndexTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/OpenSearchIndexTest.java index 74c18f7c3d..80820a261c 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/OpenSearchIndexTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/OpenSearchIndexTest.java @@ -152,6 +152,21 @@ void getFieldTypes() { } } + @Test + void getReservedFieldTypes() { + Map fieldTypes = index.getReservedFieldTypes(); + assertThat( + fieldTypes, + allOf( + aMapWithSize(5), + hasEntry("_id", ExprCoreType.STRING), + hasEntry("_index", ExprCoreType.STRING), + hasEntry("_sort", ExprCoreType.LONG), + hasEntry("_score", ExprCoreType.FLOAT), + hasEntry("_maxscore", ExprCoreType.FLOAT) + )); + } + @Test void implementRelationOperatorOnly() { when(settings.getSettingValue(Settings.Key.QUERY_SIZE_LIMIT)).thenReturn(200); diff --git a/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java b/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java index 99cfdf425e..1659503c00 100644 --- a/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java +++ b/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java @@ -503,14 +503,11 @@ private Function buildFunction(String functionName, } private QualifiedName visitIdentifiers(List identifiers) { - // check if the last part is a metadata field - boolean isMetadataField = identifiers.get(identifiers.size() - 1).metadataField() != null; return new QualifiedName( identifiers.stream() .map(RuleContext::getText) .map(StringUtils::unquoteIdentifier) - .collect(Collectors.toList()), - isMetadataField); + .collect(Collectors.toList())); } private void fillRelevanceArgs(List args, diff --git a/sql/src/test/java/org/opensearch/sql/sql/parser/AstExpressionBuilderTest.java b/sql/src/test/java/org/opensearch/sql/sql/parser/AstExpressionBuilderTest.java index 14db1ec8d7..63a2413c35 100644 --- a/sql/src/test/java/org/opensearch/sql/sql/parser/AstExpressionBuilderTest.java +++ b/sql/src/test/java/org/opensearch/sql/sql/parser/AstExpressionBuilderTest.java @@ -23,7 +23,6 @@ import static org.opensearch.sql.ast.dsl.AstDSL.nullLiteral; import static org.opensearch.sql.ast.dsl.AstDSL.or; import static org.opensearch.sql.ast.dsl.AstDSL.qualifiedName; -import static org.opensearch.sql.ast.dsl.AstDSL.qualifiedNameWithMetadata; import static org.opensearch.sql.ast.dsl.AstDSL.stringLiteral; import static org.opensearch.sql.ast.dsl.AstDSL.timeLiteral; import static org.opensearch.sql.ast.dsl.AstDSL.timestampLiteral; @@ -441,7 +440,7 @@ public void canBuildKeywordsAsIdentInQualifiedName() { public void canBuildMetaDataFieldAsQualifiedName() { Stream.of("_id", "_index", "_sort", "_score", "_maxscore").forEach( field -> assertEquals( - qualifiedNameWithMetadata(field), + qualifiedName(field), buildExprAst(field) ) ); From bdc302018a35c55909a81a148f35b3ccfb8279a5 Mon Sep 17 00:00:00 2001 From: Andrew Carbonetto Date: Thu, 16 Mar 2023 09:14:36 -0700 Subject: [PATCH 38/40] Update functions.rst for review comments Signed-off-by: Andrew Carbonetto --- docs/user/dql/functions.rst | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/docs/user/dql/functions.rst b/docs/user/dql/functions.rst index 682a6a1fbd..ab8e3d1a19 100644 --- a/docs/user/dql/functions.rst +++ b/docs/user/dql/functions.rst @@ -3845,13 +3845,16 @@ SCORE Description >>>>>>>>>>> -``score(relevance_expression, boost)`` -``score_query(relevance_expression, boost)`` -``scorequery(relevance_expression, boost)`` +``score(relevance_expression[, boost])`` +``score_query(relevance_expression[, boost])`` +``scorequery(relevance_expression[, boost])`` -The score function returns the `_score` of any documents matching the enclosed relevance-based expression. The SCORE function expects two -arguments. The first argument is the relevance-based search expression. The second argument is an optional floating-point number to boost -the score (the default value is 1.0). +The `SCORE()` function calculates the `_score` of any documents matching the enclosed relevance-based expression. The `SCORE()` +function expects one argument with an optional second argument. The first argument is the relevance-based search expression. +The second argument is an optional floating-point boost to the score (the default value is 1.0). + +The `SCORE()` function sets `track_scores=true` for OpenSearch requests. Without it, `_score` fields may return `null` for some +relevance-based search expressions. Please refer to examples below: From 31f06170351e5f7c7c24464c063b1dcc16735090 Mon Sep 17 00:00:00 2001 From: Andrew Carbonetto Date: Fri, 17 Mar 2023 15:30:24 -0700 Subject: [PATCH 39/40] Removed parser meta tokens; Changes ImmutableMap to Map Signed-off-by: Andrew Carbonetto --- core/src/main/java/org/opensearch/sql/storage/Table.java | 3 +-- .../opensearch/sql/analysis/ExpressionAnalyzerTest.java | 3 ++- docs/user/dql/functions.rst | 2 +- .../sql/opensearch/storage/OpenSearchIndex.java | 3 +-- .../sql/opensearch/storage/OpenSearchIndexScan.java | 2 +- sql/src/main/antlr/OpenSearchSQLParser.g4 | 9 --------- 6 files changed, 6 insertions(+), 16 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/storage/Table.java b/core/src/main/java/org/opensearch/sql/storage/Table.java index 8990796172..e2586ed22c 100644 --- a/core/src/main/java/org/opensearch/sql/storage/Table.java +++ b/core/src/main/java/org/opensearch/sql/storage/Table.java @@ -6,7 +6,6 @@ package org.opensearch.sql.storage; -import com.google.common.collect.ImmutableMap; import java.util.Map; import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.executor.streaming.StreamingSource; @@ -48,7 +47,7 @@ default void create(Map schema) { * Get the {@link ExprType} for each meta-field (reserved fields) in the table. */ default Map getReservedFieldTypes() { - return ImmutableMap.of(); + return Map.of(); } /** diff --git a/core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java b/core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java index d07996702a..c7cd8d0556 100644 --- a/core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java +++ b/core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java @@ -739,7 +739,8 @@ public void match_phrase_prefix_all_params() { ); } - @Test void scorequery_function_with_boost() { + @Test + void scorequery_function_with_boost() { assertAnalyzeEqual( DSL.scorequery( DSL.namedArgument("RelevanceQuery", diff --git a/docs/user/dql/functions.rst b/docs/user/dql/functions.rst index ab8e3d1a19..fb385b8c3e 100644 --- a/docs/user/dql/functions.rst +++ b/docs/user/dql/functions.rst @@ -3851,7 +3851,7 @@ Description The `SCORE()` function calculates the `_score` of any documents matching the enclosed relevance-based expression. The `SCORE()` function expects one argument with an optional second argument. The first argument is the relevance-based search expression. -The second argument is an optional floating-point boost to the score (the default value is 1.0). +The second argument is an optional floating-point boost to the score (the default value is 1.0). The `SCORE()` function sets `track_scores=true` for OpenSearch requests. Without it, `_score` fields may return `null` for some relevance-based search expressions. diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/OpenSearchIndex.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/OpenSearchIndex.java index 9e0edc0d69..9d6de3a4a2 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/OpenSearchIndex.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/OpenSearchIndex.java @@ -7,7 +7,6 @@ package org.opensearch.sql.opensearch.storage; import com.google.common.annotations.VisibleForTesting; -import com.google.common.collect.ImmutableMap; import java.util.HashMap; import java.util.Map; import lombok.RequiredArgsConstructor; @@ -41,7 +40,7 @@ public class OpenSearchIndex implements Table { public static final String METADATA_FIELD_MAXSCORE = "_maxscore"; public static final String METADATA_FIELD_SORT = "_sort"; - public static final java.util.Map METADATAFIELD_TYPE_MAP = ImmutableMap.of( + public static final java.util.Map METADATAFIELD_TYPE_MAP = Map.of( METADATA_FIELD_ID, ExprCoreType.STRING, METADATA_FIELD_INDEX, ExprCoreType.STRING, METADATA_FIELD_SCORE, ExprCoreType.FLOAT, diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/OpenSearchIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/OpenSearchIndexScan.java index 66d9216b25..a26e64a809 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/OpenSearchIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/OpenSearchIndexScan.java @@ -75,7 +75,7 @@ public OpenSearchIndexScan(OpenSearchClient client, Settings settings, OpenSearchExprValueFactory exprValueFactory) { this.client = client; this.requestBuilder = new OpenSearchRequestBuilder( - indexName, maxResultWindow, settings,exprValueFactory); + indexName, maxResultWindow, settings, exprValueFactory); } @Override diff --git a/sql/src/main/antlr/OpenSearchSQLParser.g4 b/sql/src/main/antlr/OpenSearchSQLParser.g4 index 97ff82ce8d..4cdc78dc14 100644 --- a/sql/src/main/antlr/OpenSearchSQLParser.g4 +++ b/sql/src/main/antlr/OpenSearchSQLParser.g4 @@ -635,19 +635,10 @@ qualifiedName ident : DOT? ID | BACKTICK_QUOTE_ID - | metadataField | keywordsCanBeId | scalarFunctionName ; -metadataField - : META_INDEX - | META_ID - | META_SCORE - | META_MAXSCORE - | META_SORT - ; - keywordsCanBeId : FULL | FIELD | D | T | TS // OD SQL and ODBC special From 55b93ac0535c9993f170e2aa0855f5216c79c253 Mon Sep 17 00:00:00 2001 From: Andrew Carbonetto Date: Fri, 17 Mar 2023 15:53:06 -0700 Subject: [PATCH 40/40] Removed parser meta tokens; Changes ImmutableMap to Map Signed-off-by: Andrew Carbonetto --- sql/src/main/antlr/OpenSearchSQLLexer.g4 | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/sql/src/main/antlr/OpenSearchSQLLexer.g4 b/sql/src/main/antlr/OpenSearchSQLLexer.g4 index dec67ec363..34488f7a7f 100644 --- a/sql/src/main/antlr/OpenSearchSQLLexer.g4 +++ b/sql/src/main/antlr/OpenSearchSQLLexer.g4 @@ -134,16 +134,6 @@ STDDEV_SAMP: 'STDDEV_SAMP'; SUBSTRING: 'SUBSTRING'; TRIM: 'TRIM'; - -// Metadata fields can be ID - -META_INDEX: '_INDEX'; -META_ID: '_ID'; -META_SCORE: '_SCORE'; -META_MAXSCORE: '_MAXSCORE'; -META_SORT: '_SORT'; - - // Keywords, but can be ID // Common Keywords, but can be ID