Skip to content

Commit

Permalink
Pulled out analyzers to opensearch module
Browse files Browse the repository at this point in the history
Signed-off-by: Guian Gumpac <guian.gumpac@improving.com>
  • Loading branch information
GumpacG committed Aug 17, 2023
1 parent 3b818c0 commit a602e30
Show file tree
Hide file tree
Showing 9 changed files with 219 additions and 101 deletions.
13 changes: 13 additions & 0 deletions core/src/main/java/org/opensearch/sql/analysis/Analyzer.java
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,19 @@ public Analyzer(
this.repository = repository;
}

public Analyzer(
ExpressionAnalyzer expressionAnalyzer,
SelectExpressionAnalyzer selectExpressionAnalyzer,
NamedExpressionAnalyzer namedExpressionAnalyzer,
DataSourceService dataSourceService,
BuiltinFunctionRepository repository) {
this.expressionAnalyzer = expressionAnalyzer;
this.dataSourceService = dataSourceService;
this.selectExpressionAnalyzer = selectExpressionAnalyzer;
this.namedExpressionAnalyzer = namedExpressionAnalyzer;
this.repository = repository;
}

public LogicalPlan analyze(UnresolvedPlan unresolved, AnalysisContext context) {
return unresolved.accept(this, context);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -177,12 +177,6 @@ public Expression visitAggregateFunction(AggregateFunction node, AnalysisContext
}
}

@Override
public Expression visitRelevanceFieldList(RelevanceFieldList node, AnalysisContext context) {
return new LiteralExpression(
ExprValueUtils.tupleValue(ImmutableMap.copyOf(node.getFieldList())));
}

@Override
public Expression visitFunction(Function node, AnalysisContext context) {
FunctionName functionName = FunctionName.of(node.getFuncName());
Expand Down Expand Up @@ -220,66 +214,6 @@ public Expression visitHighlightFunction(HighlightFunction node, AnalysisContext
return new HighlightExpression(expr);
}

/**
* visitScoreFunction removes the score function from the AST and replaces it with the child
* relevance function node. If the optional boost variable is provided, the boost argument of the
* relevance function is combined.
*
* @param node score function node
* @param context analysis context for the query
* @return resolved relevance function
*/
public Expression visitScoreFunction(ScoreFunction node, AnalysisContext context) {
Literal boostArg = node.getRelevanceFieldWeight();
if (!boostArg.getType().equals(DataType.DOUBLE)) {
throw new SemanticCheckException(
String.format(
"Expected boost type '%s' but got '%s'",
DataType.DOUBLE.name(), boostArg.getType().name()));
}
Double thisBoostValue = ((Double) boostArg.getValue());

// update the existing unresolved expression to add a boost argument if it doesn't exist
// OR multiply the existing boost argument
Function relevanceQueryUnresolvedExpr = (Function) node.getRelevanceQuery();
List<UnresolvedExpression> relevanceFuncArgs = relevanceQueryUnresolvedExpr.getFuncArgs();

boolean doesFunctionContainBoostArgument = false;
List<UnresolvedExpression> updatedFuncArgs = new ArrayList<>();
for (UnresolvedExpression expr : relevanceFuncArgs) {
String argumentName = ((UnresolvedArgument) expr).getArgName();
if (argumentName.equalsIgnoreCase("boost")) {
doesFunctionContainBoostArgument = true;
Literal boostArgLiteral = (Literal) ((UnresolvedArgument) expr).getValue();
Double boostValue =
Double.parseDouble((String) boostArgLiteral.getValue()) * thisBoostValue;
UnresolvedArgument newBoostArg =
new UnresolvedArgument(
argumentName, new Literal(boostValue.toString(), DataType.STRING));
updatedFuncArgs.add(newBoostArg);
} else {
updatedFuncArgs.add(expr);
}
}

// since nothing was found, add an argument
if (!doesFunctionContainBoostArgument) {
UnresolvedArgument newBoostArg =
new UnresolvedArgument(
"boost", new Literal(Double.toString(thisBoostValue), DataType.STRING));
updatedFuncArgs.add(newBoostArg);
}

// create a new function expression with boost argument and resolve it
Function updatedRelevanceQueryUnresolvedExpr =
new Function(relevanceQueryUnresolvedExpr.getFuncName(), updatedFuncArgs);
OpenSearchFunctions.OpenSearchFunction relevanceQueryExpr =
(OpenSearchFunctions.OpenSearchFunction)
updatedRelevanceQueryUnresolvedExpr.accept(this, context);
relevanceQueryExpr.setScoreTracked(true);
return relevanceQueryExpr;
}

@Override
public Expression visitIn(In node, AnalysisContext context) {
return visitIn(node.getField(), node.getValueList(), context);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
@RequiredArgsConstructor
public class SelectExpressionAnalyzer
extends AbstractNodeVisitor<List<NamedExpression>, AnalysisContext> {
private final ExpressionAnalyzer expressionAnalyzer;
protected final ExpressionAnalyzer expressionAnalyzer;

private ExpressionReferenceOptimizer optimizer;

Expand All @@ -59,11 +59,6 @@ public List<NamedExpression> visitField(Field node, AnalysisContext context) {

@Override
public List<NamedExpression> visitAlias(Alias node, AnalysisContext context) {
// Expand all nested fields if used in SELECT clause
if (node.getDelegated() instanceof NestedAllTupleFields) {
return node.getDelegated().accept(this, context);
}

Expression expr = referenceIfSymbolDefined(node, context);
return Collections.singletonList(
DSL.named(unqualifiedNameIfFieldOnly(node, context), expr, node.getAlias()));
Expand All @@ -82,7 +77,7 @@ public List<NamedExpression> visitAlias(Alias node, AnalysisContext context) {
* groupExpr))
* </ol>
*/
private Expression referenceIfSymbolDefined(Alias expr, AnalysisContext context) {
protected Expression referenceIfSymbolDefined(Alias expr, AnalysisContext context) {
UnresolvedExpression delegatedExpr = expr.getDelegated();

// Pass named expression because expression like window function loses full name
Expand All @@ -105,30 +100,6 @@ public List<NamedExpression> visitAllFields(AllFields node, AnalysisContext cont
.collect(Collectors.toList());
}

@Override
public List<NamedExpression> visitNestedAllTupleFields(
NestedAllTupleFields node, AnalysisContext context) {
TypeEnvironment environment = context.peek();
Map<String, ExprType> lookupAllTupleFields =
environment.lookupAllTupleFields(Namespace.FIELD_NAME);
environment.resolve(new Symbol(Namespace.FIELD_NAME, node.getPath()));

// Match all fields with same path as used in nested function.
Pattern p = Pattern.compile(node.getPath() + "\\.[^\\.]+$");
return lookupAllTupleFields.entrySet().stream()
.filter(field -> p.matcher(field.getKey()).find())
.map(
entry -> {
Expression nestedFunc =
new Function(
"nested",
List.of(new QualifiedName(List.of(entry.getKey().split("\\.")))))
.accept(expressionAnalyzer, context);
return DSL.named("nested(" + entry.getKey() + ")", nestedFunc);
})
.collect(Collectors.toList());
}

/**
* Get unqualified name if select item is just a field. For example, suppose an index named
* "accounts", return "age" for "SELECT accounts.age". But do nothing for expression in "SELECT
Expand All @@ -137,7 +108,7 @@ public List<NamedExpression> visitNestedAllTupleFields(
* this. Otherwise, what unqualified() returns will override Alias's name as NamedExpression's
* name even though the QualifiedName doesn't have qualifier.
*/
private String unqualifiedNameIfFieldOnly(Alias node, AnalysisContext context) {
protected String unqualifiedNameIfFieldOnly(Alias node, AnalysisContext context) {
UnresolvedExpression selectItem = node.getDelegated();
if (selectItem instanceof QualifiedName) {
QualifierAnalyzer qualifierAnalyzer = new QualifierAnalyzer(context);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@
import java.util.Collection;
import java.util.Collections;
import org.opensearch.sql.DataSourceSchemaName;
import org.opensearch.sql.analysis.Analyzer;
import org.opensearch.sql.analysis.ExpressionAnalyzer;
import org.opensearch.sql.datasource.DataSourceService;
import org.opensearch.sql.expression.function.BuiltinFunctionRepository;
import org.opensearch.sql.expression.function.FunctionResolver;

/** Storage engine for different storage to provide data access API implementation. */
Expand All @@ -24,4 +28,8 @@ public interface StorageEngine {
default Collection<FunctionResolver> getFunctions() {
return Collections.emptyList();
}

default Analyzer getAnalyzer(DataSourceService dataSourceService, BuiltinFunctionRepository repository) {
return new Analyzer(new ExpressionAnalyzer(repository), dataSourceService, repository);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
package org.opensearch.sql.opensearch.analysis;

import org.opensearch.sql.analysis.Analyzer;
import org.opensearch.sql.analysis.ExpressionAnalyzer;
import org.opensearch.sql.analysis.NamedExpressionAnalyzer;
import org.opensearch.sql.datasource.DataSourceService;
import org.opensearch.sql.expression.function.BuiltinFunctionRepository;

public class OpenSearchAnalyzer extends Analyzer {
/**
* Constructor.
*
* @param dataSourceService
* @param repository
*/
public OpenSearchAnalyzer(DataSourceService dataSourceService,
BuiltinFunctionRepository repository) {
super(
new OpenSearchExpressionAnalyzer(repository),
new OpenSearchSelectExpressionAnalyzer(new OpenSearchExpressionAnalyzer(repository)),
new NamedExpressionAnalyzer(new OpenSearchExpressionAnalyzer(repository)),
dataSourceService,
repository);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
package org.opensearch.sql.opensearch.analysis;

import com.google.common.collect.ImmutableMap;
import org.opensearch.sql.analysis.AnalysisContext;
import org.opensearch.sql.analysis.ExpressionAnalyzer;
import org.opensearch.sql.ast.expression.DataType;
import org.opensearch.sql.ast.expression.Function;
import org.opensearch.sql.ast.expression.Literal;
import org.opensearch.sql.ast.expression.RelevanceFieldList;
import org.opensearch.sql.ast.expression.ScoreFunction;
import org.opensearch.sql.ast.expression.UnresolvedArgument;
import org.opensearch.sql.ast.expression.UnresolvedExpression;
import org.opensearch.sql.data.model.ExprValueUtils;
import org.opensearch.sql.exception.SemanticCheckException;
import org.opensearch.sql.expression.Expression;
import org.opensearch.sql.expression.LiteralExpression;
import org.opensearch.sql.expression.function.BuiltinFunctionRepository;
import org.opensearch.sql.expression.function.OpenSearchFunctions;

import java.util.ArrayList;
import java.util.List;

public class OpenSearchExpressionAnalyzer extends ExpressionAnalyzer {
public OpenSearchExpressionAnalyzer(BuiltinFunctionRepository repository) {
super(repository);
}

/**
* visitScoreFunction removes the score function from the AST and replaces it with the child
* relevance function node. If the optional boost variable is provided, the boost argument of the
* relevance function is combined.
*
* @param node score function node
* @param context analysis context for the query
* @return resolved relevance function
*/
public Expression visitScoreFunction(ScoreFunction node, AnalysisContext context) {
Literal boostArg = node.getRelevanceFieldWeight();
if (!boostArg.getType().equals(DataType.DOUBLE)) {
throw new SemanticCheckException(
String.format(
"Expected boost type '%s' but got '%s'",
DataType.DOUBLE.name(), boostArg.getType().name()));
}
Double thisBoostValue = ((Double) boostArg.getValue());

// update the existing unresolved expression to add a boost argument if it doesn't exist
// OR multiply the existing boost argument
Function relevanceQueryUnresolvedExpr = (Function) node.getRelevanceQuery();
List<UnresolvedExpression> relevanceFuncArgs = relevanceQueryUnresolvedExpr.getFuncArgs();

boolean doesFunctionContainBoostArgument = false;
List<UnresolvedExpression> updatedFuncArgs = new ArrayList<>();
for (UnresolvedExpression expr : relevanceFuncArgs) {
String argumentName = ((UnresolvedArgument) expr).getArgName();
if (argumentName.equalsIgnoreCase("boost")) {
doesFunctionContainBoostArgument = true;
Literal boostArgLiteral = (Literal) ((UnresolvedArgument) expr).getValue();
Double boostValue =
Double.parseDouble((String) boostArgLiteral.getValue()) * thisBoostValue;
UnresolvedArgument newBoostArg =
new UnresolvedArgument(
argumentName, new Literal(boostValue.toString(), DataType.STRING));
updatedFuncArgs.add(newBoostArg);
} else {
updatedFuncArgs.add(expr);
}
}

// since nothing was found, add an argument
if (!doesFunctionContainBoostArgument) {
UnresolvedArgument newBoostArg =
new UnresolvedArgument(
"boost", new Literal(Double.toString(thisBoostValue), DataType.STRING));
updatedFuncArgs.add(newBoostArg);
}

// create a new function expression with boost argument and resolve it
Function updatedRelevanceQueryUnresolvedExpr =
new Function(relevanceQueryUnresolvedExpr.getFuncName(), updatedFuncArgs);
OpenSearchFunctions.OpenSearchFunction relevanceQueryExpr =
(OpenSearchFunctions.OpenSearchFunction)
updatedRelevanceQueryUnresolvedExpr.accept(this, context);
relevanceQueryExpr.setScoreTracked(true);
return relevanceQueryExpr;
}

@Override
public Expression visitRelevanceFieldList(RelevanceFieldList node, AnalysisContext context) {
return new LiteralExpression(
ExprValueUtils.tupleValue(ImmutableMap.copyOf(node.getFieldList())));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
package org.opensearch.sql.opensearch.analysis;

import org.opensearch.sql.analysis.AnalysisContext;
import org.opensearch.sql.analysis.ExpressionAnalyzer;
import org.opensearch.sql.analysis.SelectExpressionAnalyzer;
import org.opensearch.sql.analysis.TypeEnvironment;
import org.opensearch.sql.analysis.symbol.Namespace;
import org.opensearch.sql.analysis.symbol.Symbol;
import org.opensearch.sql.ast.expression.Alias;
import org.opensearch.sql.ast.expression.Function;
import org.opensearch.sql.ast.expression.NestedAllTupleFields;
import org.opensearch.sql.ast.expression.QualifiedName;
import org.opensearch.sql.data.type.ExprType;
import org.opensearch.sql.expression.DSL;
import org.opensearch.sql.expression.Expression;
import org.opensearch.sql.expression.NamedExpression;

import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

public class OpenSearchSelectExpressionAnalyzer extends SelectExpressionAnalyzer {
public OpenSearchSelectExpressionAnalyzer(ExpressionAnalyzer expressionAnalyzer) {
super(expressionAnalyzer);
}

@Override
public List<NamedExpression> visitAlias(Alias node, AnalysisContext context) {
// Expand all nested fields if used in SELECT clause
if (node.getDelegated() instanceof NestedAllTupleFields) {
return node.getDelegated().accept(this, context);
}

Expression expr = referenceIfSymbolDefined(node, context);
return Collections.singletonList(
DSL.named(unqualifiedNameIfFieldOnly(node, context), expr, node.getAlias()));
}


@Override
public List<NamedExpression> visitNestedAllTupleFields(
NestedAllTupleFields node, AnalysisContext context) {
TypeEnvironment environment = context.peek();
Map<String, ExprType> lookupAllTupleFields =
environment.lookupAllTupleFields(Namespace.FIELD_NAME);
environment.resolve(new Symbol(Namespace.FIELD_NAME, node.getPath()));

// Match all fields with same path as used in nested function.
Pattern p = Pattern.compile(node.getPath() + "\\.[^\\.]+$");
return lookupAllTupleFields.entrySet().stream()
.filter(field -> p.matcher(field.getKey()).find())
.map(
entry -> {
Expression nestedFunc =
new Function(
"nested",
List.of(new QualifiedName(List.of(entry.getKey().split("\\.")))))
.accept(expressionAnalyzer, context);
return DSL.named("nested(" + entry.getKey() + ")", nestedFunc);
})
.collect(Collectors.toList());
}
}
Loading

0 comments on commit a602e30

Please sign in to comment.