Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pagination Phase 2: Support WHERE clause, column list in SELECT clause and for functions and expressions in the query. #1500

Merged
merged 5 commits into from
Jun 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ public class Cast extends UnresolvedExpression {
private final UnresolvedExpression expression;

/**
* Expression that represents ELSE statement result.
* Expression that represents name of the target type.
*/
private final UnresolvedExpression convertedType;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,53 @@

import org.opensearch.sql.ast.AbstractNodeVisitor;
import org.opensearch.sql.ast.Node;
import org.opensearch.sql.ast.expression.Alias;
import org.opensearch.sql.ast.expression.AllFields;
import org.opensearch.sql.ast.expression.And;
import org.opensearch.sql.ast.expression.Argument;
import org.opensearch.sql.ast.expression.Between;
import org.opensearch.sql.ast.expression.Case;
import org.opensearch.sql.ast.expression.Cast;
import org.opensearch.sql.ast.expression.Compare;
import org.opensearch.sql.ast.expression.EqualTo;
import org.opensearch.sql.ast.expression.Field;
import org.opensearch.sql.ast.expression.Function;
import org.opensearch.sql.ast.expression.HighlightFunction;
import org.opensearch.sql.ast.expression.In;
import org.opensearch.sql.ast.expression.Interval;
import org.opensearch.sql.ast.expression.Literal;
import org.opensearch.sql.ast.expression.Not;
import org.opensearch.sql.ast.expression.Or;
import org.opensearch.sql.ast.expression.QualifiedName;
import org.opensearch.sql.ast.expression.RelevanceFieldList;
import org.opensearch.sql.ast.expression.UnresolvedArgument;
import org.opensearch.sql.ast.expression.UnresolvedAttribute;
import org.opensearch.sql.ast.expression.When;
import org.opensearch.sql.ast.expression.WindowFunction;
import org.opensearch.sql.ast.expression.Xor;
import org.opensearch.sql.ast.tree.Aggregation;
import org.opensearch.sql.ast.tree.Filter;
import org.opensearch.sql.ast.tree.Limit;
import org.opensearch.sql.ast.tree.Project;
import org.opensearch.sql.ast.tree.Relation;
import org.opensearch.sql.ast.tree.Sort;
import org.opensearch.sql.ast.tree.Values;
import org.opensearch.sql.expression.function.BuiltinFunctionName;

/**
* Use this unresolved plan visitor to check if a plan can be serialized by PaginatedPlanCache.
* If plan.accept(new CanPaginateVisitor(...)) returns true,
* If <pre>plan.accept(new CanPaginateVisitor(...))</pre> returns <em>true</em>,
* then PaginatedPlanCache.convertToCursor will succeed. Otherwise, it will fail.
* The purpose of this visitor is to activate legacy engine fallback mechanism.
* Currently, the conditions are:
* - only projection of a relation is supported.
* - projection only has * (a.k.a. allFields).
* - Relation only scans one table
* - The table is an open search index.
* So it accepts only queries like `select * from $index`
* Currently, V2 engine does not support queries with:
* - aggregation (GROUP BY clause or aggregation functions like min/max)
* - in memory aggregation (window function)
* - ORDER BY clause
* - LIMIT/OFFSET clause(s)
* - without FROM clause
* - JOIN
* - a subquery
* V2 also requires that the table being queried should be an OpenSearch index.
* See PaginatedPlanCache.canConvertToCursor for usage.
*/
public class CanPaginateVisitor extends AbstractNodeVisitor<Boolean, Object> {
Expand All @@ -36,22 +68,182 @@ public Boolean visitRelation(Relation node, Object context) {
return Boolean.TRUE;
}

private Boolean canPaginate(Node node, Object context) {
var childList = node.getChild();
if (childList != null) {
return childList.stream().allMatch(n -> n.accept(this, context));
}
return Boolean.TRUE;
}

// For queries with WHERE clause:
@Override
public Boolean visitChildren(Node node, Object context) {
public Boolean visitFilter(Filter node, Object context) {
return canPaginate(node, context) && node.getCondition().accept(this, context);
}

// Queries with GROUP BY clause are not supported
@Override
public Boolean visitAggregation(Aggregation node, Object context) {
return Boolean.FALSE;
}

// Queries with ORDER BY clause are not supported
@Override
public Boolean visitProject(Project node, Object context) {
// Allow queries with 'SELECT *' only. Those restriction could be removed, but consider
// in-memory aggregation performed by window function (see WindowOperator).
// SELECT max(age) OVER (PARTITION BY city) ...
var projections = node.getProjectList();
if (projections.size() != 1) {
public Boolean visitSort(Sort node, Object context) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be better to override the defaultResult instead of returning Boolean.FALSE for all unsupported nodes?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or on the flip side, you could make the defaultResult Boolean.TRUE so that you don't have make duplicated canPaginate(node, context) calls. Doing this can eliminate canPaginate and would use the default visitor of the AbstractNodeVisitor as it traverses through it's children anyways.

return Boolean.FALSE;
}

// Queries without FROM clause are not supported
@Override
public Boolean visitValues(Values node, Object context) {
return Boolean.FALSE;
}

// Queries with LIMIT clause are not supported
@Override
public Boolean visitLimit(Limit node, Object context) {
Copy link
Collaborator

@forestmvey forestmvey Jun 8, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the point of having these explicit cases for what isn't supported if it will return false without the override. Perhaps we should log something stating the feature is unsupported. Maybe add a TODO comment for future work.

return Boolean.FALSE;
}

@Override
public Boolean visitLiteral(Literal node, Object context) {
return canPaginate(node, context);
}

@Override
public Boolean visitField(Field node, Object context) {
return canPaginate(node, context) && node.getFieldArgs().stream()
.allMatch(n -> n.accept(this, context));
}

@Override
public Boolean visitAlias(Alias node, Object context) {
return canPaginate(node, context) && node.getDelegated().accept(this, context);
}

@Override
public Boolean visitAllFields(AllFields node, Object context) {
return canPaginate(node, context);
}

@Override
public Boolean visitQualifiedName(QualifiedName node, Object context) {
return canPaginate(node, context);
}

@Override
public Boolean visitEqualTo(EqualTo node, Object context) {
return canPaginate(node, context);
}

@Override
public Boolean visitRelevanceFieldList(RelevanceFieldList node, Object context) {
return canPaginate(node, context);
}

@Override
public Boolean visitInterval(Interval node, Object context) {
return canPaginate(node, context);
}

@Override
public Boolean visitCompare(Compare node, Object context) {
return canPaginate(node, context);
}

@Override
public Boolean visitNot(Not node, Object context) {
return canPaginate(node, context);
}

@Override
public Boolean visitOr(Or node, Object context) {
return canPaginate(node, context);
}

@Override
public Boolean visitAnd(And node, Object context) {
return canPaginate(node, context);
}

@Override
public Boolean visitArgument(Argument node, Object context) {
return canPaginate(node, context);
}

@Override
public Boolean visitXor(Xor node, Object context) {
return canPaginate(node, context);
}

@Override
public Boolean visitFunction(Function node, Object context) {
// https://github.com/opensearch-project/sql/issues/1718
if (node.getFuncName()
.equalsIgnoreCase(BuiltinFunctionName.NESTED.getName().getFunctionName())) {
return Boolean.FALSE;
}
return canPaginate(node, context);
}

@Override
public Boolean visitIn(In node, Object context) {
return canPaginate(node, context) && node.getValueList().stream()
.allMatch(n -> n.accept(this, context));
}

@Override
public Boolean visitBetween(Between node, Object context) {
return canPaginate(node, context);
}

@Override
public Boolean visitCase(Case node, Object context) {
return canPaginate(node, context);
}

@Override
public Boolean visitWhen(When node, Object context) {
return canPaginate(node, context);
}

@Override
public Boolean visitCast(Cast node, Object context) {
return canPaginate(node, context) && node.getConvertedType().accept(this, context);
}

@Override
public Boolean visitHighlightFunction(HighlightFunction node, Object context) {
return canPaginate(node, context);
}

@Override
public Boolean visitUnresolvedArgument(UnresolvedArgument node, Object context) {
return canPaginate(node, context);
}

@Override
public Boolean visitUnresolvedAttribute(UnresolvedAttribute node, Object context) {
return canPaginate(node, context);
}

if (!(projections.get(0) instanceof AllFields)) {
@Override
public Boolean visitChildren(Node node, Object context) {
// for all not listed (= unchecked) - false
return Boolean.FALSE;
}

@Override
public Boolean visitWindowFunction(WindowFunction node, Object context) {
// don't support in-memory aggregation
// SELECT max(age) OVER (PARTITION BY city) ...
return Boolean.FALSE;
}

@Override
public Boolean visitProject(Project node, Object context) {
if (!node.getProjectList().stream().allMatch(n -> n.accept(this, context))) {
return Boolean.FALSE;
}

Expand Down
Loading