Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Highlight In SQL #717

Merged
merged 24 commits into from
Aug 5, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
1097d9f
Add support for highlight to parser and AstExpressionBuilder
Jul 1, 2022
1a453e1
Add unit test for highlight in AstExpressionBuilder
Jul 4, 2022
0d5c87b
Add unit test for highlight in AstBuilderTest
Jul 5, 2022
26d0b7e
Support highlight as an Unresolved expression.
Jul 5, 2022
3f10b8b
Represent highlight as UnresolvedExpression.
Jul 5, 2022
543d0d7
Support highlight in Analyzer.
Jul 5, 2022
f47ffe7
Treat highlight as a proper function in AST
Jul 6, 2022
5fdb939
Add support for highlight in Analyzer
Jul 6, 2022
5c8db0a
Add a simple IT test for highlight.
Jul 6, 2022
ac9f080
Register highlight function in the BuiltInFunctionRepository
Jul 6, 2022
b526132
Partial support for highlight in physical plan.
Jul 6, 2022
807c475
Add HighlightOperator.
Jul 6, 2022
74b6492
Highlight alpha complete.
Jul 7, 2022
ad7affc
Initial implementation to upporting highlight('*')
forestmvey Jul 25, 2022
bb97dde
Add support for multiple highlight calls in select statement.
forestmvey Jul 25, 2022
163b909
Removed OpenSearchLogicalIndexScan highlightFields and dependencies. …
forestmvey Jul 26, 2022
b2a90d4
Added HighlightExpressionTest
forestmvey Jul 27, 2022
2c04c49
Added javadocs, minor PR revisions, and fixed jacoco errors by improv…
forestmvey Jul 28, 2022
01708e3
Code cleanup, adding parsing failure tests, and adding tests for high…
forestmvey Jul 29, 2022
6b96ac0
Removing HighlightOperator functionality and unnecessary visitHighlig…
forestmvey Jul 29, 2022
232b19f
Adding highlight function to functions.rst and removing unecessary fu…
forestmvey Aug 3, 2022
9fbea30
Change highlight fields returned format to array list. Changed highli…
forestmvey Aug 4, 2022
23a24e2
Fix bug where invalid schema name was being used for returned highlig…
forestmvey Aug 4, 2022
449a330
Fix failing integration tests due to schema changes for highlight exp…
forestmvey Aug 5, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,12 @@ public LogicalPlan visitProject(Project node, AnalysisContext context) {
child = windowAnalyzer.analyze(expr, context);
}

for (UnresolvedExpression expr : node.getProjectList()) {
HighlightAnalyzer highlightAnalyzer = new HighlightAnalyzer(expressionAnalyzer, child);
child = highlightAnalyzer.analyze(expr, context);

}

List<NamedExpression> namedExpressions =
selectExpressionAnalyzer.analyze(node.getProjectList(), context,
new ExpressionReferenceOptimizer(expressionAnalyzer.getRepository(), child));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,7 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.stream.Collectors;
import lombok.Getter;
Expand All @@ -29,6 +27,7 @@
import org.opensearch.sql.ast.expression.EqualTo;
import org.opensearch.sql.ast.expression.Field;
import org.opensearch.sql.ast.expression.Function;
import org.opensearch.sql.ast.expression.HighlightFunction;
import org.opensearch.sql.ast.expression.In;
import org.opensearch.sql.ast.expression.Interval;
import org.opensearch.sql.ast.expression.Literal;
Expand All @@ -44,12 +43,12 @@
import org.opensearch.sql.ast.expression.WindowFunction;
import org.opensearch.sql.ast.expression.Xor;
import org.opensearch.sql.common.antlr.SyntaxCheckException;
import org.opensearch.sql.data.model.ExprTupleValue;
import org.opensearch.sql.data.model.ExprValueUtils;
import org.opensearch.sql.data.type.ExprType;
import org.opensearch.sql.exception.SemanticCheckException;
import org.opensearch.sql.expression.DSL;
import org.opensearch.sql.expression.Expression;
import org.opensearch.sql.expression.HighlightExpression;
import org.opensearch.sql.expression.LiteralExpression;
import org.opensearch.sql.expression.NamedArgumentExpression;
import org.opensearch.sql.expression.NamedExpression;
Expand Down Expand Up @@ -191,6 +190,12 @@ public Expression visitWindowFunction(WindowFunction node, AnalysisContext conte
return expr;
}

@Override
public Expression visitHighlight(HighlightFunction node, AnalysisContext context) {
Expression expr = node.getHighlightField().accept(this, context);
return new HighlightExpression(expr);
}

@Override
public Expression visitIn(In node, AnalysisContext context) {
return visitIn(node.getField(), node.getValueList(), context);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.analysis;

import lombok.RequiredArgsConstructor;
import org.opensearch.sql.ast.AbstractNodeVisitor;
import org.opensearch.sql.ast.expression.Alias;
import org.opensearch.sql.ast.expression.HighlightFunction;
import org.opensearch.sql.ast.expression.UnresolvedExpression;
import org.opensearch.sql.expression.Expression;
import org.opensearch.sql.planner.logical.LogicalHighlight;
import org.opensearch.sql.planner.logical.LogicalPlan;

/**
* Analyze the highlight in the {@link AnalysisContext} to construct the {@link
* LogicalPlan}.
*/
@RequiredArgsConstructor
public class HighlightAnalyzer extends AbstractNodeVisitor<LogicalPlan, AnalysisContext> {
private final ExpressionAnalyzer expressionAnalyzer;
private final LogicalPlan child;

public LogicalPlan analyze(UnresolvedExpression projectItem, AnalysisContext context) {
LogicalPlan highlight = projectItem.accept(this, context);
return (highlight == null) ? child : highlight;
}

@Override
public LogicalPlan visitAlias(Alias node, AnalysisContext context) {
if (!(node.getDelegated() instanceof HighlightFunction)) {
return null;
}

HighlightFunction unresolved = (HighlightFunction) node.getDelegated();
Expression field = expressionAnalyzer.analyze(unresolved.getHighlightField(), context);
return new LogicalHighlight(child, field);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import org.opensearch.sql.ast.expression.EqualTo;
import org.opensearch.sql.ast.expression.Field;
import org.opensearch.sql.ast.expression.Function;
import org.opensearch.sql.ast.expression.HighlightFunction;
import org.opensearch.sql.ast.expression.In;
import org.opensearch.sql.ast.expression.Interval;
import org.opensearch.sql.ast.expression.Let;
Expand Down Expand Up @@ -254,4 +255,8 @@ public T visitKmeans(Kmeans node, C context) {
public T visitAD(AD node, C context) {
return visitChildren(node, context);
}

public T visitHighlight(HighlightFunction node, C context) {
return visitChildren(node, context);
}
}
5 changes: 5 additions & 0 deletions core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import org.opensearch.sql.ast.expression.EqualTo;
import org.opensearch.sql.ast.expression.Field;
import org.opensearch.sql.ast.expression.Function;
import org.opensearch.sql.ast.expression.HighlightFunction;
import org.opensearch.sql.ast.expression.In;
import org.opensearch.sql.ast.expression.Interval;
import org.opensearch.sql.ast.expression.Let;
Expand Down Expand Up @@ -261,6 +262,10 @@ public When when(UnresolvedExpression condition, UnresolvedExpression result) {
return new When(condition, result);
}

public UnresolvedExpression highlight(UnresolvedExpression fieldName) {
return new HighlightFunction(fieldName);
}

public UnresolvedExpression window(UnresolvedExpression function,
List<UnresolvedExpression> partitionByList,
List<Pair<SortOption, UnresolvedExpression>> sortList) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.ast.expression;

import java.util.List;
import lombok.AllArgsConstructor;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.ToString;
import org.opensearch.sql.ast.AbstractNodeVisitor;

/**
* Expression node of Highlight function.
*/
@AllArgsConstructor
@EqualsAndHashCode(callSuper = false)
@Getter
@ToString
public class HighlightFunction extends UnresolvedExpression {
private final UnresolvedExpression highlightField;

@Override
public <T, C> T accept(AbstractNodeVisitor<T, C> nodeVisitor, C context) {
return nodeVisitor.visitHighlight(this, context);
}

@Override
public List<UnresolvedExpression> getChild() {
return List.of(highlightField);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,10 @@ public T visitNamed(NamedExpression node, C context) {
return node.getDelegated().accept(this, context);
}

public T visitHighlight(HighlightExpression node, C context) {
return visitNode(node, context);
}

public T visitReference(ReferenceExpression node, C context) {
return visitNode(node, context);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.expression;

import java.util.List;
import lombok.Getter;
import org.opensearch.sql.common.utils.StringUtils;
import org.opensearch.sql.data.model.ExprValue;
import org.opensearch.sql.data.type.ExprCoreType;
import org.opensearch.sql.data.type.ExprType;
import org.opensearch.sql.expression.env.Environment;
import org.opensearch.sql.expression.function.BuiltinFunctionName;

/**
* Highlight Expression.
*/
@Getter
public class HighlightExpression extends FunctionExpression {
private final Expression highlightField;

/**
* HighlightExpression Constructor.
* @param highlightField : Highlight field for expression.
*/
public HighlightExpression(Expression highlightField) {
super(BuiltinFunctionName.HIGHLIGHT.getName(), List.of(highlightField));
this.highlightField = highlightField;
}

/**
* Return collection value matching highlight field.
* @param valueEnv : Dataset to parse value from.
* @return : collection value of highlight fields.
*/
@Override
public ExprValue valueOf(Environment<Expression, ExprValue> valueEnv) {
dai-chen marked this conversation as resolved.
Show resolved Hide resolved
String refName = "_highlight" + "." + StringUtils.unquoteText(getHighlightField().toString());
return valueEnv.resolve(DSL.ref(refName, ExprCoreType.STRING));
}

/**
* Get type for HighlightExpression.
* @return : String type.
*/
@Override
public ExprType type() {
forestmvey marked this conversation as resolved.
Show resolved Hide resolved
return ExprCoreType.ARRAY;
}

@Override
public <T, C> T accept(ExpressionNodeVisitor<T, C> visitor, C context) {
return visitor.visitHighlight(this, context);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ public enum BuiltinFunctionName {
MATCHPHRASE(FunctionName.of("matchphrase")),
QUERY_STRING(FunctionName.of("query_string")),
MATCH_BOOL_PREFIX(FunctionName.of("match_bool_prefix")),
HIGHLIGHT(FunctionName.of("highlight")),
MATCH_PHRASE_PREFIX(FunctionName.of("match_phrase_prefix")),
/**
* Legacy Relevance Function.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,20 @@
import static org.opensearch.sql.data.type.ExprCoreType.STRING;
import static org.opensearch.sql.data.type.ExprCoreType.STRUCT;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import lombok.experimental.UtilityClass;
import org.opensearch.sql.ast.dsl.AstDSL;
import org.opensearch.sql.data.model.ExprValue;
import org.opensearch.sql.data.type.ExprCoreType;
import org.opensearch.sql.data.type.ExprType;
import org.opensearch.sql.expression.Expression;
import org.opensearch.sql.expression.FunctionExpression;
import org.opensearch.sql.expression.HighlightExpression;
import org.opensearch.sql.expression.NamedArgumentExpression;
import org.opensearch.sql.expression.env.Environment;

Expand Down Expand Up @@ -50,6 +51,14 @@ public void register(BuiltinFunctionRepository repository) {
repository.register(match_phrase(BuiltinFunctionName.MATCH_PHRASE));
repository.register(match_phrase(BuiltinFunctionName.MATCHPHRASE));
repository.register(match_phrase_prefix());
repository.register(highlight());
}

private static FunctionResolver highlight() {
FunctionName functionName = BuiltinFunctionName.HIGHLIGHT.getName();
FunctionSignature functionSignature = new FunctionSignature(functionName, List.of(STRING));
FunctionBuilder functionBuilder = arguments -> new HighlightExpression(arguments.get(0));
return new FunctionResolver(functionName, ImmutableMap.of(functionSignature, functionBuilder));
}

private static FunctionResolver match_bool_prefix() {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.planner.logical;

import java.util.Collections;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.ToString;
import org.opensearch.sql.expression.Expression;

@EqualsAndHashCode(callSuper = true)
@Getter
@ToString
public class LogicalHighlight extends LogicalPlan {
private final Expression highlightField;

public LogicalHighlight(LogicalPlan childPlan, Expression field) {
super(Collections.singletonList(childPlan));
highlightField = field;
}

@Override
public <R, C> R accept(LogicalPlanNodeVisitor<R, C> visitor, C context) {
return visitor.visitHighlight(this, context);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,10 @@ public LogicalPlan window(LogicalPlan input,
return new LogicalWindow(input, windowFunction, windowDefinition);
}

public LogicalPlan highlight(LogicalPlan input, Expression field) {
return new LogicalHighlight(input, field);
}

public static LogicalPlan remove(LogicalPlan input, ReferenceExpression... fields) {
return new LogicalRemove(input, ImmutableSet.copyOf(fields));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ public R visitFilter(LogicalFilter plan, C context) {
return visitNode(plan, context);
}

public R visitHighlight(LogicalHighlight plan, C context) {
return visitNode(plan, context);
}

public R visitAggregation(LogicalAggregation plan, C context) {
return visitNode(plan, context);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,5 +105,4 @@ public ValuesOperator values(List<LiteralExpression>... values) {
public static LimitOperator limit(PhysicalPlan input, Integer limit, Integer offset) {
return new LimitOperator(input, limit, offset);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,4 @@ public R visitMLCommons(PhysicalPlan node, C context) {
public R visitAD(PhysicalPlan node, C context) {
return visitNode(node, context);
}


}
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import static org.opensearch.sql.ast.dsl.AstDSL.qualifiedName;
import static org.opensearch.sql.ast.dsl.AstDSL.relation;
import static org.opensearch.sql.ast.dsl.AstDSL.span;
import static org.opensearch.sql.ast.dsl.AstDSL.stringLiteral;
import static org.opensearch.sql.ast.tree.Sort.NullOrder;
import static org.opensearch.sql.ast.tree.Sort.SortOption;
import static org.opensearch.sql.ast.tree.Sort.SortOption.DEFAULT_ASC;
Expand All @@ -45,13 +46,15 @@
import org.opensearch.sql.ast.dsl.AstDSL;
import org.opensearch.sql.ast.expression.Argument;
import org.opensearch.sql.ast.expression.DataType;
import org.opensearch.sql.ast.expression.HighlightFunction;
import org.opensearch.sql.ast.expression.Literal;
import org.opensearch.sql.ast.expression.SpanUnit;
import org.opensearch.sql.ast.tree.AD;
import org.opensearch.sql.ast.tree.Kmeans;
import org.opensearch.sql.ast.tree.RareTopN.CommandType;
import org.opensearch.sql.exception.SemanticCheckException;
import org.opensearch.sql.expression.DSL;
import org.opensearch.sql.expression.HighlightExpression;
import org.opensearch.sql.expression.config.ExpressionConfig;
import org.opensearch.sql.expression.window.WindowDefinition;
import org.opensearch.sql.planner.logical.LogicalAD;
Expand Down Expand Up @@ -231,6 +234,22 @@ public void project_source() {
AstDSL.alias("double_value", AstDSL.field("double_value"))));
}

@Test
public void project_highlight() {
assertAnalyzeEqual(
LogicalPlanDSL.project(
LogicalPlanDSL.highlight(LogicalPlanDSL.relation("schema"),
DSL.literal("fieldA")),
DSL.named("highlight(fieldA)", new HighlightExpression(DSL.literal("fieldA")))
),
AstDSL.projectWithArg(
AstDSL.relation("schema"),
AstDSL.defaultFieldsArgs(),
AstDSL.alias("highlight(fieldA)", new HighlightFunction(AstDSL.stringLiteral("fieldA")))
)
);
}

@Test
public void remove_source() {
assertAnalyzeEqual(
Expand Down
Loading