Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#639: allow metadata fields and score opensearch function #228

Merged
merged 41 commits into from
Mar 21, 2023
Merged
Show file tree
Hide file tree
Changes from 27 commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
d8b412d
Rebase from main
acarbonetto Mar 7, 2023
5eaa344
Update to define and include metadata when visiting the expr node
acarbonetto Nov 14, 2022
ea25455
Add specific metadata identifiers
acarbonetto Jan 10, 2023
9f1dcca
Add IT tests and add parser changes
acarbonetto Jan 10, 2023
5a70363
Rebase from main
acarbonetto Mar 7, 2023
8c0aaf6
Update score function expression analyzer to return boosted relevance…
acarbonetto Feb 14, 2023
4d8229d
Update builder to track scores
acarbonetto Feb 15, 2023
4630c98
Remove ScoreExpression.java and cleanup checkstyle
acarbonetto Feb 15, 2023
9a10273
cleanup checkstyle
acarbonetto Feb 15, 2023
0a92969
Cleanup and add alternative score function syntax
acarbonetto Feb 15, 2023
3b5e900
Cleanup and add alternative score function syntax
acarbonetto Feb 15, 2023
348b8b9
Fix some bugs and add Expression tests
acarbonetto Feb 16, 2023
f67d4f2
Add expresssion and analyzer tests
acarbonetto Feb 16, 2023
be7191a
Add score doctests
acarbonetto Feb 17, 2023
1a0b3ef
Add score function doctests
acarbonetto Feb 17, 2023
05d9dd3
Add metafield tests
acarbonetto Mar 3, 2023
996a166
Move legacy test and mark old as ignore
acarbonetto Mar 3, 2023
2c469c7
fix checkstyle violations
acarbonetto Mar 3, 2023
4c7a5d7
fix checkstyle violations
acarbonetto Mar 3, 2023
dec36fa
Update tests and identifier to accept metafields
acarbonetto Mar 7, 2023
7aab6ee
Checkstyle fixes
acarbonetto Mar 7, 2023
fcb3470
Rebase from main
acarbonetto Mar 7, 2023
6e52d13
Rebase from main
acarbonetto Mar 7, 2023
0d3beab
Rebase from main
acarbonetto Mar 7, 2023
1c05aba
fix checkstyle violations
acarbonetto Mar 3, 2023
f97dfea
Revert bad conflict resolution
acarbonetto Mar 7, 2023
3ba3c85
Fix for review comments
acarbonetto Mar 8, 2023
dd15853
Update IT tests and legacy tests for comments
acarbonetto Mar 13, 2023
543d232
Minor comment
acarbonetto Mar 13, 2023
9d59e9f
Updates for whitespace
acarbonetto Mar 13, 2023
7a05276
Update basics.rst to show OS result
acarbonetto Mar 14, 2023
e75d6b5
Update basics.rst to show OS result
acarbonetto Mar 14, 2023
fecf615
Update basics.rst description
acarbonetto Mar 14, 2023
47636c3
Change Score function to accept a double/integer not an unresolved
acarbonetto Mar 15, 2023
6d9853c
Update functions.rst
acarbonetto Mar 15, 2023
f93f7c4
Checkstyle update
acarbonetto Mar 15, 2023
1340f11
Move reserved world symbol table to OpenSearchTable
acarbonetto Mar 16, 2023
bdc3020
Update functions.rst for review comments
acarbonetto Mar 16, 2023
31f0617
Removed parser meta tokens; Changes ImmutableMap to Map
acarbonetto Mar 17, 2023
55b93ac
Removed parser meta tokens; Changes ImmutableMap to Map
acarbonetto Mar 17, 2023
0410521
Merge branch 'integ-metadata-fields' into dev-metadata-fields
acarbonetto Mar 21, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@

import static org.opensearch.sql.ast.dsl.AstDSL.and;
import static org.opensearch.sql.ast.dsl.AstDSL.compare;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.GTE;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.LTE;
import static org.opensearch.sql.ast.expression.QualifiedName.METADATAFIELD_TYPE_MAP;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
Expand All @@ -31,6 +30,7 @@
import org.opensearch.sql.ast.expression.Case;
import org.opensearch.sql.ast.expression.Cast;
import org.opensearch.sql.ast.expression.Compare;
import org.opensearch.sql.ast.expression.DataType;
import org.opensearch.sql.ast.expression.EqualTo;
import org.opensearch.sql.ast.expression.Field;
import org.opensearch.sql.ast.expression.Function;
Expand All @@ -42,6 +42,7 @@
import org.opensearch.sql.ast.expression.Or;
import org.opensearch.sql.ast.expression.QualifiedName;
import org.opensearch.sql.ast.expression.RelevanceFieldList;
import org.opensearch.sql.ast.expression.ScoreFunction;
import org.opensearch.sql.ast.expression.Span;
import org.opensearch.sql.ast.expression.UnresolvedArgument;
import org.opensearch.sql.ast.expression.UnresolvedAttribute;
Expand All @@ -51,6 +52,7 @@
import org.opensearch.sql.ast.expression.Xor;
import org.opensearch.sql.common.antlr.SyntaxCheckException;
import org.opensearch.sql.data.model.ExprValueUtils;
import org.opensearch.sql.data.type.ExprCoreType;
import org.opensearch.sql.data.type.ExprType;
import org.opensearch.sql.exception.SemanticCheckException;
import org.opensearch.sql.expression.DSL;
Expand All @@ -67,6 +69,7 @@
import org.opensearch.sql.expression.function.BuiltinFunctionName;
import org.opensearch.sql.expression.function.BuiltinFunctionRepository;
import org.opensearch.sql.expression.function.FunctionName;
import org.opensearch.sql.expression.function.OpenSearchFunctions;
import org.opensearch.sql.expression.parse.ParseExpression;
import org.opensearch.sql.expression.span.SpanExpression;
import org.opensearch.sql.expression.window.aggregation.AggregateWindowFunction;
Expand Down Expand Up @@ -207,6 +210,77 @@ public Expression visitHighlightFunction(HighlightFunction node, AnalysisContext
return new HighlightExpression(expr);
}

/**
* visitScoreFunction removes the score function from the AST and replaces it with the child
* relevance function node. If the optional boost variable is provided, the boost argument
* of the relevance function is combined.
* @param node score function node
* @param context analysis context for the query
* @return resolved relevance function
*/
public Expression visitScoreFunction(ScoreFunction node, AnalysisContext context) {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we need to add this to opensearch module as storage-specific function. Personally I think we should prioritize opensearch-project#811. It will become more and more difficult as we keep adding more OpenSearch logic to core engine.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed. I've got it on our radar, and we can start to scope it out. As is, there's quite a bit of work to do to pull out the opensearch specific classes, but I think it's do-able.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We've had storage-specific function support in opensearch-project#1354. Can we start this now instead of adding more and more special logic to core? Agreed there is quite lots of work to move all to opensearch but maybe easy to do this for single PR?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let me take a look. There's already a lot in this PR.
Would you mind if we move the score function out as a proof of concept for a set of OpenSearch storage engine functions?

// if no function argument given, just accept the relevance query and return
if (node.getFuncArgs().isEmpty() || !(node.getFuncArgs().get(0) instanceof Literal)) {
OpenSearchFunctions.OpenSearchFunction relevanceQueryExpr =
(OpenSearchFunctions.OpenSearchFunction) node
.getRelevanceQuery().accept(this, context);
relevanceQueryExpr.setScoreTracked(true);
return relevanceQueryExpr;
}

// note: if an argument exists, and there should only be one, it will be a boost argument
Literal boostFunctionArg = (Literal) node.getFuncArgs().get(0);
Double thisBoostValue;
if (boostFunctionArg.getType().equals(DataType.DOUBLE)) {
thisBoostValue = ((Double) boostFunctionArg.getValue());
} else if (boostFunctionArg.getType().equals(DataType.INTEGER)) {
thisBoostValue = ((Integer) boostFunctionArg.getValue()).doubleValue();
} else {
throw new SemanticCheckException(String.format("Expected boost type '%s' but got '%s'",
DataType.DOUBLE.name(), boostFunctionArg.getType().name()));
}

// update the existing unresolved expression to add a boost argument if it doesn't exist
// OR multiply the existing boost argument
Function relevanceQueryUnresolvedExpr = (Function)node.getRelevanceQuery();
List<UnresolvedExpression> relevanceFuncArgs = relevanceQueryUnresolvedExpr.getFuncArgs();

boolean doesFunctionContainBoostArgument = false;
List<UnresolvedExpression> updatedFuncArgs = new ArrayList<>();
for (UnresolvedExpression expr: relevanceFuncArgs) {
String argumentName = ((UnresolvedArgument) expr).getArgName();
if (argumentName.equalsIgnoreCase("boost")) {
doesFunctionContainBoostArgument = true;
Literal boostArgLiteral = (Literal)((UnresolvedArgument) expr).getValue();
Double boostValue = Double.parseDouble((String)boostArgLiteral.getValue()) * thisBoostValue;
UnresolvedArgument newBoostArg = new UnresolvedArgument(
argumentName,
new Literal(boostValue.toString(), DataType.STRING)
);
updatedFuncArgs.add(newBoostArg);
} else {
updatedFuncArgs.add(expr);
}
}

// since nothing was found, add an argument
if (!doesFunctionContainBoostArgument) {
UnresolvedArgument newBoostArg = new UnresolvedArgument(
"boost", new Literal(Double.toString(thisBoostValue), DataType.STRING));
updatedFuncArgs.add(newBoostArg);
}

// create a new function expression with boost argument and resolve it
Function updatedRelevanceQueryUnresolvedExpr = new Function(
relevanceQueryUnresolvedExpr.getFuncName(),
updatedFuncArgs);
OpenSearchFunctions.OpenSearchFunction relevanceQueryExpr =
(OpenSearchFunctions.OpenSearchFunction) updatedRelevanceQueryUnresolvedExpr
.accept(this, context);
relevanceQueryExpr.setScoreTracked(true);
return relevanceQueryExpr;
}

@Override
public Expression visitIn(In node, AnalysisContext context) {
return visitIn(node.getField(), node.getValueList(), context);
Expand Down Expand Up @@ -297,6 +371,9 @@ public Expression visitAllFields(AllFields node, AnalysisContext context) {
@Override
public Expression visitQualifiedName(QualifiedName node, AnalysisContext context) {
QualifierAnalyzer qualifierAnalyzer = new QualifierAnalyzer(context);
if (node.isMetadataField().booleanValue()) {
acarbonetto marked this conversation as resolved.
Show resolved Hide resolved
return visitMetadata(qualifierAnalyzer.unqualified(node), context);
}
return visitIdentifier(qualifierAnalyzer.unqualified(node), context);
}

Expand All @@ -313,6 +390,19 @@ public Expression visitUnresolvedArgument(UnresolvedArgument node, AnalysisConte
return new NamedArgumentExpression(node.getArgName(), node.getValue().accept(this, context));
}

/**
* If QualifiedName is actually a reserved metadata field, return the expr type associated
* with the metadata field.
* @param ident metadata field name
* @param context analysis context
* @return DSL reference
*/
private Expression visitMetadata(String ident, AnalysisContext context) {
ExprCoreType exprCoreType = Optional.ofNullable(METADATAFIELD_TYPE_MAP.get(ident))
.orElseThrow(() -> new SemanticCheckException("invalid metadata field"));
return DSL.ref(ident, exprCoreType);
}

private Expression visitIdentifier(String ident, AnalysisContext context) {
// ParseExpression will always override ReferenceExpression when ident conflicts
for (NamedExpression expr : context.getNamedParseExpressions()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import org.opensearch.sql.expression.conditional.cases.CaseClause;
import org.opensearch.sql.expression.conditional.cases.WhenClause;
import org.opensearch.sql.expression.function.BuiltinFunctionRepository;
import org.opensearch.sql.expression.function.OpenSearchFunctions;
import org.opensearch.sql.planner.logical.LogicalAggregation;
import org.opensearch.sql.planner.logical.LogicalPlan;
import org.opensearch.sql.planner.logical.LogicalPlanNodeVisitor;
Expand Down Expand Up @@ -70,8 +71,17 @@ public Expression visitFunction(FunctionExpression node, AnalysisContext context
final List<Expression> args =
node.getArguments().stream().map(expr -> expr.accept(this, context))
.collect(Collectors.toList());
return (Expression) repository.compile(context.getFunctionProperties(),
node.getFunctionName(), args);
Expression optimizedFunctionExpression = (Expression) repository.compile(
context.getFunctionProperties(),
node.getFunctionName(),
args
);
// Propagate scoreTracked for OpenSearch functions
if (optimizedFunctionExpression instanceof OpenSearchFunctions.OpenSearchFunction) {
((OpenSearchFunctions.OpenSearchFunction) optimizedFunctionExpression).setScoreTracked(
((OpenSearchFunctions.OpenSearchFunction)node).isScoreTracked());
}
return optimizedFunctionExpression;
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import org.opensearch.sql.ast.expression.Or;
import org.opensearch.sql.ast.expression.QualifiedName;
import org.opensearch.sql.ast.expression.RelevanceFieldList;
import org.opensearch.sql.ast.expression.ScoreFunction;
import org.opensearch.sql.ast.expression.Span;
import org.opensearch.sql.ast.expression.UnresolvedArgument;
import org.opensearch.sql.ast.expression.UnresolvedAttribute;
Expand Down Expand Up @@ -278,6 +279,10 @@ public T visitHighlightFunction(HighlightFunction node, C context) {
return visitChildren(node, context);
}

public T visitScoreFunction(ScoreFunction node, C context) {
return visitChildren(node, context);
}

public T visitStatement(Statement node, C context) {
return visit(node, context);
}
Expand Down
13 changes: 11 additions & 2 deletions core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import org.opensearch.sql.ast.expression.Or;
import org.opensearch.sql.ast.expression.ParseMethod;
import org.opensearch.sql.ast.expression.QualifiedName;
import org.opensearch.sql.ast.expression.ScoreFunction;
import org.opensearch.sql.ast.expression.Span;
import org.opensearch.sql.ast.expression.SpanUnit;
import org.opensearch.sql.ast.expression.UnresolvedArgument;
Expand All @@ -60,7 +61,6 @@
import org.opensearch.sql.ast.tree.TableFunction;
import org.opensearch.sql.ast.tree.UnresolvedPlan;
import org.opensearch.sql.ast.tree.Values;
import org.opensearch.sql.expression.function.BuiltinFunctionName;

/**
* Class of static methods to create specific node instances.
Expand Down Expand Up @@ -140,7 +140,11 @@ public UnresolvedPlan values(List<Literal>... values) {
}

public static QualifiedName qualifiedName(String... parts) {
return new QualifiedName(Arrays.asList(parts));
return new QualifiedName(Arrays.asList(parts), Boolean.FALSE);
}

public static QualifiedName qualifiedNameWithMetadata(String... parts) {
return new QualifiedName(Arrays.asList(parts), Boolean.TRUE);
}

public static UnresolvedExpression equalTo(
Expand Down Expand Up @@ -285,6 +289,11 @@ public UnresolvedExpression highlight(UnresolvedExpression fieldName,
return new HighlightFunction(fieldName, arguments);
}

public UnresolvedExpression score(UnresolvedExpression relevanceQuery,
List<UnresolvedExpression> funcArgs) {
return new ScoreFunction(relevanceQuery, funcArgs);
}

public UnresolvedExpression window(UnresolvedExpression function,
List<UnresolvedExpression> partitionByList,
List<Pair<SortOption, UnresolvedExpression>> sortList) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,26 +13,55 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Optional;
import java.util.stream.StreamSupport;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import org.opensearch.sql.ast.AbstractNodeVisitor;
import org.opensearch.sql.data.type.ExprCoreType;

@Getter
@EqualsAndHashCode(callSuper = false)
public class QualifiedName extends UnresolvedExpression {
private final List<String> parts;

private final Boolean isMetadataField;
acarbonetto marked this conversation as resolved.
Show resolved Hide resolved

public QualifiedName(String name) {
this(name, Boolean.FALSE);
}

public static final String METADATA_FIELD_ID = "_id";
public static final String METADATA_FIELD_INDEX = "_index";
public static final String METADATA_FIELD_SCORE = "_score";
public static final String METADATA_FIELD_MAXSCORE = "_maxscore";
public static final String METADATA_FIELD_SORT = "_sort";
public static final java.util.Map<String, ExprCoreType> METADATAFIELD_TYPE_MAP = new HashMap<>() {
Copy link

@dai-chen dai-chen Feb 17, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We may not want to hardcode this in core. Not sure if adding these fields in OpenSearchIndex.getFieldMapping() can work for you, however we need to consider meta column in other storage engine. For example, S3 may have $path, $partition in each row.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can't use OpenSearchIndex as it would create a dependency on opensearch in core. Not ideal. We kind of need to wait until we have a user-defined functions that could override core visitors, like QualifiedName.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, I didn't mean you need to depend on opensearch module. OpenSearchIndex is subclass of our Table interface. As I understand, Analyzer will fetch field list from Table during query analysis. I saw you've added isMetaField flag, so I don't see why we want to hardcode metadata field list in core module.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh. Interesting. I'll see if I can hook that up.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please see changes in:

  • core/src/main/java/org/opensearch/sql/analysis/TypeEnvironment.java (for interface changes in the environment)
  • opensearch/src/main/java/org/opensearch/sql/opensearch/storage/OpenSearchIndex.java (specific changes for OpenSearch indexes), and
  • core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java the visitQualifiedName function

{
Yury-Fridlyand marked this conversation as resolved.
Show resolved Hide resolved
put(METADATA_FIELD_ID, ExprCoreType.STRING);
put(METADATA_FIELD_INDEX, ExprCoreType.STRING);
put(METADATA_FIELD_SCORE, ExprCoreType.FLOAT);
put(METADATA_FIELD_MAXSCORE, ExprCoreType.FLOAT);
put(METADATA_FIELD_SORT, ExprCoreType.LONG);
}
};

public QualifiedName(String name, Boolean isMetadataField) {
this.parts = Collections.singletonList(name);
this.isMetadataField = isMetadataField;
}

public QualifiedName(Iterable<String> parts) {
this(parts, Boolean.FALSE);
}

/**
* QualifiedName Constructor.
*/
public QualifiedName(Iterable<String> parts) {
public QualifiedName(Iterable<String> parts, Boolean isMetadataField) {
this.isMetadataField = isMetadataField;
List<String> partsList = StreamSupport.stream(parts.spliterator(), false).collect(toList());
if (partsList.isEmpty()) {
throw new IllegalArgumentException("parts is empty");
Expand Down Expand Up @@ -110,4 +139,8 @@ public List<UnresolvedExpression> getChild() {
public <R, C> R accept(AbstractNodeVisitor<R, C> nodeVisitor, C context) {
return nodeVisitor.visitQualifiedName(this, context);
}

public Boolean isMetadataField() {
return this.isMetadataField;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.ast.expression;

import java.util.List;
import lombok.AllArgsConstructor;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.ToString;
import org.opensearch.sql.ast.AbstractNodeVisitor;

/**
* Expression node of Score function.
* Score takes a relevance-search expression as an argument and returns it
*/
@AllArgsConstructor
@EqualsAndHashCode(callSuper = false)
@Getter
@ToString
public class ScoreFunction extends UnresolvedExpression {
private final UnresolvedExpression relevanceQuery;
private final List<UnresolvedExpression> funcArgs;

@Override
public <T, C> T accept(AbstractNodeVisitor<T, C> nodeVisitor, C context) {
return nodeVisitor.visitScoreFunction(this, context);
}

@Override
public List<UnresolvedExpression> getChild() {
List<UnresolvedExpression> resultingList = List.of(relevanceQuery);
resultingList.addAll(funcArgs);
return resultingList;
}
}
14 changes: 13 additions & 1 deletion core/src/main/java/org/opensearch/sql/expression/DSL.java
Original file line number Diff line number Diff line change
Expand Up @@ -787,7 +787,19 @@ public static FunctionExpression match_bool_prefix(Expression... args) {
}

public static FunctionExpression wildcard_query(Expression... args) {
return compile(FunctionProperties.None,BuiltinFunctionName.WILDCARD_QUERY, args);
return compile(FunctionProperties.None, BuiltinFunctionName.WILDCARD_QUERY, args);
}

public static FunctionExpression score(Expression... args) {
return compile(FunctionProperties.None, BuiltinFunctionName.SCORE, args);
}

public static FunctionExpression scorequery(Expression... args) {
return compile(FunctionProperties.None, BuiltinFunctionName.SCOREQUERY, args);
}

public static FunctionExpression score_query(Expression... args) {
return compile(FunctionProperties.None, BuiltinFunctionName.SCORE_QUERY, args);
}

public static FunctionExpression now(FunctionProperties functionProperties,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ public enum BuiltinFunctionName {
WEEKOFYEAR(FunctionName.of("weekofyear")),
WEEK_OF_YEAR(FunctionName.of("week_of_year")),
YEAR(FunctionName.of("year")),

// `now`-like functions
NOW(FunctionName.of("now")),
CURDATE(FunctionName.of("curdate")),
Expand All @@ -122,6 +123,7 @@ public enum BuiltinFunctionName {
CURRENT_TIMESTAMP(FunctionName.of("current_timestamp")),
LOCALTIMESTAMP(FunctionName.of("localtimestamp")),
SYSDATE(FunctionName.of("sysdate")),

/**
* Text Functions.
*/
Expand Down Expand Up @@ -239,6 +241,10 @@ public enum BuiltinFunctionName {
MATCH_BOOL_PREFIX(FunctionName.of("match_bool_prefix")),
HIGHLIGHT(FunctionName.of("highlight")),
MATCH_PHRASE_PREFIX(FunctionName.of("match_phrase_prefix")),
SCORE(FunctionName.of("score")),
SCOREQUERY(FunctionName.of("scorequery")),
SCORE_QUERY(FunctionName.of("score_query")),

/**
* Legacy Relevance Function.
*/
Expand Down
Loading